Skip to content

Conversation

@ckoparkar
Copy link
Member

Fixes #154284

Add constexpr support for the following:

_mm_reduce_add_epi8 _mm_reduce_add_epi16 _mm256_reduce_add_epi8 _mm256_reduce_add_epi16 _mm_reduce_mul_epi8 _mm_reduce_mul_epi16 _mm256_reduce_mul_epi8 _mm256_reduce_mul_epi16 _mm_reduce_and_epi8 _mm_reduce_and_epi16 _mm256_reduce_and_epi8 _mm256_reduce_and_epi16 _mm_reduce_or_epi8 _mm_reduce_or_epi16 _mm256_reduce_or_epi8 _mm256_reduce_or_epi16

_mm_mask_reduce_add_epi8 _mm_mask_reduce_add_epi16 _mm256_mask_reduce_add_epi8 _mm256_mask_reduce_add_epi16 _mm_mask_reduce_mul_epi8 _mm_mask_reduce_mul_epi16 _mm256_mask_reduce_mul_epi8 _mm256_mask_reduce_mul_epi16 _mm_mask_reduce_and_epi8 _mm_mask_reduce_and_epi16 _mm256_mask_reduce_and_epi8 _mm256_mask_reduce_and_epi16 _mm_mask_reduce_or_epi8 _mm_mask_reduce_or_epi16 _mm256_mask_reduce_or_epi8 _mm256_mask_reduce_or_epi16

_mm_reduce_max_epi8 _mm_reduce_max_epi16 _mm256_reduce_max_epi8 _mm256_reduce_max_epi16 _mm_reduce_min_epi8 _mm_reduce_min_epi16 _mm256_reduce_min_epi8 _mm256_reduce_min_epi16 _mm_reduce_max_epu8 _mm_reduce_max_epu16 _mm256_reduce_max_epu8 _mm256_reduce_max_epu16 _mm_reduce_min_epu8 _mm_reduce_min_epu16 _mm256_reduce_min_epu8 _mm256_reduce_min_epu16

_mm_mask_reduce_max_epi8 _mm_mask_reduce_max_epi16 _mm256_mask_reduce_max_epi8 _mm256_mask_reduce_max_epi16 _mm_mask_reduce_min_epi8 _mm_mask_reduce_min_epi16 _mm256_mask_reduce_min_epi8 _mm256_mask_reduce_min_epi16 _mm_mask_reduce_max_epu8 _mm_mask_reduce_max_epu16 _mm256_mask_reduce_max_epu8 _mm256_mask_reduce_max_epu16 _mm_mask_reduce_min_epu8 _mm_mask_reduce_min_epu16 _mm256_mask_reduce_min_epu8 _mm256_mask_reduce_min_epu16

@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics labels Aug 25, 2025
@llvmbot
Copy link
Member

llvmbot commented Aug 25, 2025

@llvm/pr-subscribers-backend-x86

Author: Chaitanya Koparkar (ckoparkar)

Changes

Fixes #154284

Add constexpr support for the following:

_mm_reduce_add_epi8 _mm_reduce_add_epi16 _mm256_reduce_add_epi8 _mm256_reduce_add_epi16 _mm_reduce_mul_epi8 _mm_reduce_mul_epi16 _mm256_reduce_mul_epi8 _mm256_reduce_mul_epi16 _mm_reduce_and_epi8 _mm_reduce_and_epi16 _mm256_reduce_and_epi8 _mm256_reduce_and_epi16 _mm_reduce_or_epi8 _mm_reduce_or_epi16 _mm256_reduce_or_epi8 _mm256_reduce_or_epi16

_mm_mask_reduce_add_epi8 _mm_mask_reduce_add_epi16 _mm256_mask_reduce_add_epi8 _mm256_mask_reduce_add_epi16 _mm_mask_reduce_mul_epi8 _mm_mask_reduce_mul_epi16 _mm256_mask_reduce_mul_epi8 _mm256_mask_reduce_mul_epi16 _mm_mask_reduce_and_epi8 _mm_mask_reduce_and_epi16 _mm256_mask_reduce_and_epi8 _mm256_mask_reduce_and_epi16 _mm_mask_reduce_or_epi8 _mm_mask_reduce_or_epi16 _mm256_mask_reduce_or_epi8 _mm256_mask_reduce_or_epi16

_mm_reduce_max_epi8 _mm_reduce_max_epi16 _mm256_reduce_max_epi8 _mm256_reduce_max_epi16 _mm_reduce_min_epi8 _mm_reduce_min_epi16 _mm256_reduce_min_epi8 _mm256_reduce_min_epi16 _mm_reduce_max_epu8 _mm_reduce_max_epu16 _mm256_reduce_max_epu8 _mm256_reduce_max_epu16 _mm_reduce_min_epu8 _mm_reduce_min_epu16 _mm256_reduce_min_epu8 _mm256_reduce_min_epu16

_mm_mask_reduce_max_epi8 _mm_mask_reduce_max_epi16 _mm256_mask_reduce_max_epi8 _mm256_mask_reduce_max_epi16 _mm_mask_reduce_min_epi8 _mm_mask_reduce_min_epi16 _mm256_mask_reduce_min_epi8 _mm256_mask_reduce_min_epi16 _mm_mask_reduce_max_epu8 _mm_mask_reduce_max_epu16 _mm256_mask_reduce_max_epu8 _mm256_mask_reduce_max_epu16 _mm_mask_reduce_min_epu8 _mm_mask_reduce_min_epu16 _mm256_mask_reduce_min_epu8 _mm256_mask_reduce_min_epu16


Patch is 48.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155199.diff

2 Files Affected:

  • (modified) clang/lib/Headers/avx512vlbwintrin.h (+72-72)
  • (modified) clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c (+97)
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index 2e2052ad1b682..eaf292a0949f5 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -2197,7 +2197,7 @@ _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B)
                                          (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
@@ -2205,7 +2205,7 @@ _mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
                 (__v8hi) __W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
@@ -2213,7 +2213,7 @@ _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
                 (__v8hi) _mm_setzero_si128 ());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
@@ -2221,7 +2221,7 @@ _mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
                 (__v16hi) __W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
@@ -2229,7 +2229,7 @@ _mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
                 (__v16hi) _mm256_setzero_si256 ());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
@@ -2237,7 +2237,7 @@ _mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
                 (__v16qi) __W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
@@ -2245,7 +2245,7 @@ _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
                 (__v16qi) _mm_setzero_si128 ());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
@@ -2253,7 +2253,7 @@ _mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
                 (__v32qi) __W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
@@ -2812,353 +2812,353 @@ _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
                                   (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
                                   (__v16hi)_mm256_setzero_si256()))
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_add_epi16(__m128i __W) {
   return __builtin_reduce_add((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_mul_epi16(__m128i __W) {
   return __builtin_reduce_mul((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_and_epi16(__m128i __W) {
   return __builtin_reduce_and((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_or_epi16(__m128i __W) {
   return __builtin_reduce_or((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_add_epi16( __mmask8 __M, __m128i __W) {
   __W = _mm_maskz_mov_epi16(__M, __W);
   return __builtin_reduce_add((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_mul_epi16( __mmask8 __M, __m128i __W) {
   __W = _mm_mask_mov_epi16(_mm_set1_epi16(1), __M, __W);
   return __builtin_reduce_mul((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_and_epi16( __mmask8 __M, __m128i __W) {
   __W = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __W);
   return __builtin_reduce_and((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W) {
   __W = _mm_maskz_mov_epi16(__M, __W);
   return __builtin_reduce_or((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_max_epi16(__m128i __V) {
   return __builtin_reduce_max((__v8hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_max_epu16(__m128i __V) {
   return __builtin_reduce_max((__v8hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_min_epi16(__m128i __V) {
   return __builtin_reduce_min((__v8hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_min_epu16(__m128i __V) {
   return __builtin_reduce_min((__v8hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_max_epi16(__mmask16 __M, __m128i __V) {
   __V = _mm_mask_mov_epi16(_mm_set1_epi16(-32767-1), __M, __V);
   return __builtin_reduce_max((__v8hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_max_epu16(__mmask16 __M, __m128i __V) {
   __V = _mm_maskz_mov_epi16(__M, __V);
   return __builtin_reduce_max((__v8hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_min_epi16(__mmask16 __M, __m128i __V) {
   __V = _mm_mask_mov_epi16(_mm_set1_epi16(32767), __M, __V);
   return __builtin_reduce_min((__v8hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_min_epu16(__mmask16 __M, __m128i __V) {
   __V = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __V);
   return __builtin_reduce_min((__v8hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_add_epi16(__m256i __W) {
   return __builtin_reduce_add((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_mul_epi16(__m256i __W) {
   return __builtin_reduce_mul((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_and_epi16(__m256i __W) {
   return __builtin_reduce_and((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_or_epi16(__m256i __W) {
   return __builtin_reduce_or((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_add_epi16( __mmask16 __M, __m256i __W) {
   __W = _mm256_maskz_mov_epi16(__M, __W);
   return __builtin_reduce_add((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_mul_epi16( __mmask16 __M, __m256i __W) {
   __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(1), __M, __W);
   return __builtin_reduce_mul((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_and_epi16( __mmask16 __M, __m256i __W) {
   __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __W);
   return __builtin_reduce_and((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W) {
   __W = _mm256_maskz_mov_epi16(__M, __W);
   return __builtin_reduce_or((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_max_epi16(__m256i __V) {
   return __builtin_reduce_max((__v16hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_max_epu16(__m256i __V) {
   return __builtin_reduce_max((__v16hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_min_epi16(__m256i __V) {
   return __builtin_reduce_min((__v16hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_min_epu16(__m256i __V) {
   return __builtin_reduce_min((__v16hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __V) {
   __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-32767-1), __M, __V);
   return __builtin_reduce_max((__v16hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __V) {
   __V = _mm256_maskz_mov_epi16(__M, __V);
   return __builtin_reduce_max((__v16hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __V) {
   __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(32767), __M, __V);
   return __builtin_reduce_min((__v16hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __V) {
   __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __V);
   return __builtin_reduce_min((__v16hu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_add_epi8(__m128i __W) {
   return __builtin_reduce_add((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_mul_epi8(__m128i __W) {
   return __builtin_reduce_mul((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_and_epi8(__m128i __W) {
   return __builtin_reduce_and((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_or_epi8(__m128i __W) {
   return __builtin_reduce_or((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W) {
   __W = _mm_maskz_mov_epi8(__M, __W);
   return __builtin_reduce_add((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W) {
   __W = _mm_mask_mov_epi8(_mm_set1_epi8(1), __M, __W);
   return __builtin_reduce_mul((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W) {
   __W = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __W);
   return __builtin_reduce_and((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W) {
   __W = _mm_maskz_mov_epi8(__M, __W);
   return __builtin_reduce_or((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_max_epi8(__m128i __V) {
   return __builtin_reduce_max((__v16qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_max_epu8(__m128i __V) {
   return __builtin_reduce_max((__v16qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_min_epi8(__m128i __V) {
   return __builtin_reduce_min((__v16qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_min_epu8(__m128i __V) {
   return __builtin_reduce_min((__v16qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __V) {
   __V = _mm_mask_mov_epi8(_mm_set1_epi8(-127-1), __M, __V);
   return __builtin_reduce_max((__v16qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __V) {
   __V = _mm_maskz_mov_epi8(__M, __V);
   return __builtin_reduce_max((__v16qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __V) {
   __V = _mm_mask_mov_epi8(_mm_set1_epi8(127), __M, __V);
   return __builtin_reduce_min((__v16qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __V) {
   __V = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __V);
   return __builtin_reduce_min((__v16qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_add_epi8(__m256i __W) {
   return __builtin_reduce_add((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_mul_epi8(__m256i __W) {
   return __builtin_reduce_mul((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_and_epi8(__m256i __W) {
   return __builtin_reduce_and((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_or_epi8(__m256i __W) {
   return __builtin_reduce_or((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W) {
   __W = _mm256_maskz_mov_epi8(__M, __W);
   return __builtin_reduce_add((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W) {
   __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(1), __M, __W);
   return __builtin_reduce_mul((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W) {
   __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __W);
   return __builtin_reduce_and((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W) {
   __W = _mm256_maskz_mov_epi8(__M, __W);
   return __builtin_reduce_or((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_max_epi8(__m256i __V) {
   return __builtin_reduce_max((__v32qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_max_epu8(__m256i __V) {
   return __builtin_reduce_max((__v32qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_min_epi8(__m256i __V) {
   return __builtin_reduce_min((__v32qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_min_epu8(__m256i __V) {
   return __builtin_reduce_min((__v32qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __V) {
   __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-127-1), __M, __V);
   return __builtin_reduce_max((__v32qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __V) {
   __V = _mm256_maskz_mov_epi8(__M, __V);
   return __builtin_reduce_max((__v32qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __V) {
   __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(127), __M, __V);
   return __builtin_reduce_min((__v32qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V) {
   __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __V);
   return __builtin_reduce_min((__v32qu)__V);
diff --git a/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c b/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c
index faa3b54624a77..8e53db3f45300 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c
@@ -8,30 +8,35 @@
 // RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=i386 -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
 
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 short test_mm_reduce_add_epi16(__m128i __W){
 // CHECK-LABEL: test_mm_reduce_add_epi16
 // CHECK: call {{.*}}i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %{{.*}})
   return _mm_reduce_add_epi16(__W);
 }
+TEST_CONSTEXPR(_mm_reduce_add_epi16((__m128i)(__v8hi){1,2,3,4,5,6,7,8}) == 36);
 
 short test_mm_reduce_mul_epi16(__m128i __W){
 // CHECK-LABEL: test_mm_reduce_mul_epi16
 // CHECK:    call {{.*}}i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %{{.*}})
   return _mm_reduce_mul_epi16(__W);
 }
+TEST_CONSTEXPR(_mm_reduce_mul_epi16((__m128i)(__v8hi){1,2,3,1,2,3,1,2}) == 72);
 
 short test_mm_reduce_or_epi16(__m128i __W){
 // CHECK-LABEL: test_mm_reduce_or_epi16
 // CHECK:    call {{.*}}i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %{{.*}})
   return _mm_reduce_or_epi16(__W);
 }
+TEST_CONSTEXPR(_mm_reduce_or_epi16((__m128i)(__v8hi){1,2,4,8,0,0,0,0}) == 15);
 
 short test_mm_reduce_and_epi16(__m128i __W){
 // CHECK-LABEL: test_mm_reduce_and_epi16
 // CHECK:    call {{.*}}i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %{{.*}})
   return _mm_reduce_and_epi16(__W);
 }
+TEST_CONSTEXPR(_mm_reduce_and_epi16((__m128i)(__v8hi){1,3,5,7,9,11,13,15}) == 1);
 
 short test_mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W){
 // CHECK-LABEL: test_mm_mask_reduce_add_epi16
@@ -39,6 +44,8 @@ short test_mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W){
 /...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Aug 25, 2025

@llvm/pr-subscribers-clang

Author: Chaitanya Koparkar (ckoparkar)

Changes

Fixes #154284

Add constexpr support for the following:

_mm_reduce_add_epi8 _mm_reduce_add_epi16 _mm256_reduce_add_epi8 _mm256_reduce_add_epi16 _mm_reduce_mul_epi8 _mm_reduce_mul_epi16 _mm256_reduce_mul_epi8 _mm256_reduce_mul_epi16 _mm_reduce_and_epi8 _mm_reduce_and_epi16 _mm256_reduce_and_epi8 _mm256_reduce_and_epi16 _mm_reduce_or_epi8 _mm_reduce_or_epi16 _mm256_reduce_or_epi8 _mm256_reduce_or_epi16

_mm_mask_reduce_add_epi8 _mm_mask_reduce_add_epi16 _mm256_mask_reduce_add_epi8 _mm256_mask_reduce_add_epi16 _mm_mask_reduce_mul_epi8 _mm_mask_reduce_mul_epi16 _mm256_mask_reduce_mul_epi8 _mm256_mask_reduce_mul_epi16 _mm_mask_reduce_and_epi8 _mm_mask_reduce_and_epi16 _mm256_mask_reduce_and_epi8 _mm256_mask_reduce_and_epi16 _mm_mask_reduce_or_epi8 _mm_mask_reduce_or_epi16 _mm256_mask_reduce_or_epi8 _mm256_mask_reduce_or_epi16

_mm_reduce_max_epi8 _mm_reduce_max_epi16 _mm256_reduce_max_epi8 _mm256_reduce_max_epi16 _mm_reduce_min_epi8 _mm_reduce_min_epi16 _mm256_reduce_min_epi8 _mm256_reduce_min_epi16 _mm_reduce_max_epu8 _mm_reduce_max_epu16 _mm256_reduce_max_epu8 _mm256_reduce_max_epu16 _mm_reduce_min_epu8 _mm_reduce_min_epu16 _mm256_reduce_min_epu8 _mm256_reduce_min_epu16

_mm_mask_reduce_max_epi8 _mm_mask_reduce_max_epi16 _mm256_mask_reduce_max_epi8 _mm256_mask_reduce_max_epi16 _mm_mask_reduce_min_epi8 _mm_mask_reduce_min_epi16 _mm256_mask_reduce_min_epi8 _mm256_mask_reduce_min_epi16 _mm_mask_reduce_max_epu8 _mm_mask_reduce_max_epu16 _mm256_mask_reduce_max_epu8 _mm256_mask_reduce_max_epu16 _mm_mask_reduce_min_epu8 _mm_mask_reduce_min_epu16 _mm256_mask_reduce_min_epu8 _mm256_mask_reduce_min_epu16


Patch is 48.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155199.diff

2 Files Affected:

  • (modified) clang/lib/Headers/avx512vlbwintrin.h (+72-72)
  • (modified) clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c (+97)
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index 2e2052ad1b682..eaf292a0949f5 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -2197,7 +2197,7 @@ _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B)
                                          (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
@@ -2205,7 +2205,7 @@ _mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
                 (__v8hi) __W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
@@ -2213,7 +2213,7 @@ _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
                 (__v8hi) _mm_setzero_si128 ());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
@@ -2221,7 +2221,7 @@ _mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
                 (__v16hi) __W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
@@ -2229,7 +2229,7 @@ _mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
                 (__v16hi) _mm256_setzero_si256 ());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
@@ -2237,7 +2237,7 @@ _mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
                 (__v16qi) __W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
@@ -2245,7 +2245,7 @@ _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
                 (__v16qi) _mm_setzero_si128 ());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
@@ -2253,7 +2253,7 @@ _mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
                 (__v32qi) __W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
@@ -2812,353 +2812,353 @@ _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
                                   (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
                                   (__v16hi)_mm256_setzero_si256()))
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_add_epi16(__m128i __W) {
   return __builtin_reduce_add((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_mul_epi16(__m128i __W) {
   return __builtin_reduce_mul((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_and_epi16(__m128i __W) {
   return __builtin_reduce_and((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_or_epi16(__m128i __W) {
   return __builtin_reduce_or((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_add_epi16( __mmask8 __M, __m128i __W) {
   __W = _mm_maskz_mov_epi16(__M, __W);
   return __builtin_reduce_add((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_mul_epi16( __mmask8 __M, __m128i __W) {
   __W = _mm_mask_mov_epi16(_mm_set1_epi16(1), __M, __W);
   return __builtin_reduce_mul((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_and_epi16( __mmask8 __M, __m128i __W) {
   __W = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __W);
   return __builtin_reduce_and((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W) {
   __W = _mm_maskz_mov_epi16(__M, __W);
   return __builtin_reduce_or((__v8hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_max_epi16(__m128i __V) {
   return __builtin_reduce_max((__v8hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_max_epu16(__m128i __V) {
   return __builtin_reduce_max((__v8hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_min_epi16(__m128i __V) {
   return __builtin_reduce_min((__v8hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_min_epu16(__m128i __V) {
   return __builtin_reduce_min((__v8hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_max_epi16(__mmask16 __M, __m128i __V) {
   __V = _mm_mask_mov_epi16(_mm_set1_epi16(-32767-1), __M, __V);
   return __builtin_reduce_max((__v8hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_max_epu16(__mmask16 __M, __m128i __V) {
   __V = _mm_maskz_mov_epi16(__M, __V);
   return __builtin_reduce_max((__v8hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS128
+static __inline__ short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_min_epi16(__mmask16 __M, __m128i __V) {
   __V = _mm_mask_mov_epi16(_mm_set1_epi16(32767), __M, __V);
   return __builtin_reduce_min((__v8hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS128
+static __inline__ unsigned short __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_min_epu16(__mmask16 __M, __m128i __V) {
   __V = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __V);
   return __builtin_reduce_min((__v8hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_add_epi16(__m256i __W) {
   return __builtin_reduce_add((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_mul_epi16(__m256i __W) {
   return __builtin_reduce_mul((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_and_epi16(__m256i __W) {
   return __builtin_reduce_and((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_or_epi16(__m256i __W) {
   return __builtin_reduce_or((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_add_epi16( __mmask16 __M, __m256i __W) {
   __W = _mm256_maskz_mov_epi16(__M, __W);
   return __builtin_reduce_add((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_mul_epi16( __mmask16 __M, __m256i __W) {
   __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(1), __M, __W);
   return __builtin_reduce_mul((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_and_epi16( __mmask16 __M, __m256i __W) {
   __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __W);
   return __builtin_reduce_and((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W) {
   __W = _mm256_maskz_mov_epi16(__M, __W);
   return __builtin_reduce_or((__v16hi)__W);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_max_epi16(__m256i __V) {
   return __builtin_reduce_max((__v16hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_max_epu16(__m256i __V) {
   return __builtin_reduce_max((__v16hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_min_epi16(__m256i __V) {
   return __builtin_reduce_min((__v16hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_min_epu16(__m256i __V) {
   return __builtin_reduce_min((__v16hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __V) {
   __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-32767-1), __M, __V);
   return __builtin_reduce_max((__v16hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __V) {
   __V = _mm256_maskz_mov_epi16(__M, __V);
   return __builtin_reduce_max((__v16hu)__V);
 }
 
-static __inline__ short __DEFAULT_FN_ATTRS256
+static __inline__ short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __V) {
   __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(32767), __M, __V);
   return __builtin_reduce_min((__v16hi)__V);
 }
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS256
+static __inline__ unsigned short __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __V) {
   __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __V);
   return __builtin_reduce_min((__v16hu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_add_epi8(__m128i __W) {
   return __builtin_reduce_add((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_mul_epi8(__m128i __W) {
   return __builtin_reduce_mul((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_and_epi8(__m128i __W) {
   return __builtin_reduce_and((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_or_epi8(__m128i __W) {
   return __builtin_reduce_or((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W) {
   __W = _mm_maskz_mov_epi8(__M, __W);
   return __builtin_reduce_add((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W) {
   __W = _mm_mask_mov_epi8(_mm_set1_epi8(1), __M, __W);
   return __builtin_reduce_mul((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W) {
   __W = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __W);
   return __builtin_reduce_and((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W) {
   __W = _mm_maskz_mov_epi8(__M, __W);
   return __builtin_reduce_or((__v16qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_max_epi8(__m128i __V) {
   return __builtin_reduce_max((__v16qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_max_epu8(__m128i __V) {
   return __builtin_reduce_max((__v16qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_min_epi8(__m128i __V) {
   return __builtin_reduce_min((__v16qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_reduce_min_epu8(__m128i __V) {
   return __builtin_reduce_min((__v16qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __V) {
   __V = _mm_mask_mov_epi8(_mm_set1_epi8(-127-1), __M, __V);
   return __builtin_reduce_max((__v16qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __V) {
   __V = _mm_maskz_mov_epi8(__M, __V);
   return __builtin_reduce_max((__v16qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS128
+static __inline__ signed char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __V) {
   __V = _mm_mask_mov_epi8(_mm_set1_epi8(127), __M, __V);
   return __builtin_reduce_min((__v16qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS128
+static __inline__ unsigned char __DEFAULT_FN_ATTRS128_CONSTEXPR
 _mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __V) {
   __V = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __V);
   return __builtin_reduce_min((__v16qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_add_epi8(__m256i __W) {
   return __builtin_reduce_add((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_mul_epi8(__m256i __W) {
   return __builtin_reduce_mul((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_and_epi8(__m256i __W) {
   return __builtin_reduce_and((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_or_epi8(__m256i __W) {
   return __builtin_reduce_or((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W) {
   __W = _mm256_maskz_mov_epi8(__M, __W);
   return __builtin_reduce_add((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W) {
   __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(1), __M, __W);
   return __builtin_reduce_mul((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W) {
   __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __W);
   return __builtin_reduce_and((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W) {
   __W = _mm256_maskz_mov_epi8(__M, __W);
   return __builtin_reduce_or((__v32qs)__W);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_max_epi8(__m256i __V) {
   return __builtin_reduce_max((__v32qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_max_epu8(__m256i __V) {
   return __builtin_reduce_max((__v32qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_min_epi8(__m256i __V) {
   return __builtin_reduce_min((__v32qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_reduce_min_epu8(__m256i __V) {
   return __builtin_reduce_min((__v32qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __V) {
   __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-127-1), __M, __V);
   return __builtin_reduce_max((__v32qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __V) {
   __V = _mm256_maskz_mov_epi8(__M, __V);
   return __builtin_reduce_max((__v32qu)__V);
 }
 
-static __inline__ signed char __DEFAULT_FN_ATTRS256
+static __inline__ signed char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __V) {
   __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(127), __M, __V);
   return __builtin_reduce_min((__v32qs)__V);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS256
+static __inline__ unsigned char __DEFAULT_FN_ATTRS256_CONSTEXPR
 _mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V) {
   __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __V);
   return __builtin_reduce_min((__v32qu)__V);
diff --git a/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c b/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c
index faa3b54624a77..8e53db3f45300 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-reduceIntrin.c
@@ -8,30 +8,35 @@
 // RUN: %clang_cc1 -x c++ -ffreestanding %s -O0 -triple=i386 -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
 
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 short test_mm_reduce_add_epi16(__m128i __W){
 // CHECK-LABEL: test_mm_reduce_add_epi16
 // CHECK: call {{.*}}i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %{{.*}})
   return _mm_reduce_add_epi16(__W);
 }
+TEST_CONSTEXPR(_mm_reduce_add_epi16((__m128i)(__v8hi){1,2,3,4,5,6,7,8}) == 36);
 
 short test_mm_reduce_mul_epi16(__m128i __W){
 // CHECK-LABEL: test_mm_reduce_mul_epi16
 // CHECK:    call {{.*}}i16 @llvm.vector.reduce.mul.v8i16(<8 x i16> %{{.*}})
   return _mm_reduce_mul_epi16(__W);
 }
+TEST_CONSTEXPR(_mm_reduce_mul_epi16((__m128i)(__v8hi){1,2,3,1,2,3,1,2}) == 72);
 
 short test_mm_reduce_or_epi16(__m128i __W){
 // CHECK-LABEL: test_mm_reduce_or_epi16
 // CHECK:    call {{.*}}i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %{{.*}})
   return _mm_reduce_or_epi16(__W);
 }
+TEST_CONSTEXPR(_mm_reduce_or_epi16((__m128i)(__v8hi){1,2,4,8,0,0,0,0}) == 15);
 
 short test_mm_reduce_and_epi16(__m128i __W){
 // CHECK-LABEL: test_mm_reduce_and_epi16
 // CHECK:    call {{.*}}i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %{{.*}})
   return _mm_reduce_and_epi16(__W);
 }
+TEST_CONSTEXPR(_mm_reduce_and_epi16((__m128i)(__v8hi){1,3,5,7,9,11,13,15}) == 1);
 
 short test_mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W){
 // CHECK-LABEL: test_mm_mask_reduce_add_epi16
@@ -39,6 +44,8 @@ short test_mm_mask_reduce_add_epi16(__mmask8 __M, __m128i __W){
 /...
[truncated]

@github-actions
Copy link

github-actions bot commented Aug 25, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@ckoparkar ckoparkar force-pushed the ckoparkar/154284 branch 2 times, most recently from 981fa7d to c5cf1b2 Compare August 25, 2025 03:06
@RKSimon RKSimon requested review from RKSimon and phoebewang August 26, 2025 11:16
@RKSimon
Copy link
Collaborator

RKSimon commented Aug 26, 2025

@ckoparkar please can you update to trunk latest to see if we can clear that CI failure?

…ed in constexpr

Add constexpr support for the following:

_mm_reduce_add_epi8 _mm_reduce_add_epi16 _mm256_reduce_add_epi8 _mm256_reduce_add_epi16
_mm_reduce_mul_epi8 _mm_reduce_mul_epi16 _mm256_reduce_mul_epi8 _mm256_reduce_mul_epi16
_mm_reduce_and_epi8 _mm_reduce_and_epi16 _mm256_reduce_and_epi8 _mm256_reduce_and_epi16
_mm_reduce_or_epi8  _mm_reduce_or_epi16  _mm256_reduce_or_epi8  _mm256_reduce_or_epi16

_mm_mask_reduce_add_epi8 _mm_mask_reduce_add_epi16 _mm256_mask_reduce_add_epi8 _mm256_mask_reduce_add_epi16
_mm_mask_reduce_mul_epi8 _mm_mask_reduce_mul_epi16 _mm256_mask_reduce_mul_epi8 _mm256_mask_reduce_mul_epi16
_mm_mask_reduce_and_epi8 _mm_mask_reduce_and_epi16 _mm256_mask_reduce_and_epi8 _mm256_mask_reduce_and_epi16
_mm_mask_reduce_or_epi8  _mm_mask_reduce_or_epi16  _mm256_mask_reduce_or_epi8  _mm256_mask_reduce_or_epi16

_mm_reduce_max_epi8 _mm_reduce_max_epi16 _mm256_reduce_max_epi8 _mm256_reduce_max_epi16
_mm_reduce_min_epi8 _mm_reduce_min_epi16 _mm256_reduce_min_epi8 _mm256_reduce_min_epi16
_mm_reduce_max_epu8 _mm_reduce_max_epu16 _mm256_reduce_max_epu8 _mm256_reduce_max_epu16
_mm_reduce_min_epu8 _mm_reduce_min_epu16 _mm256_reduce_min_epu8 _mm256_reduce_min_epu16

_mm_mask_reduce_max_epi8 _mm_mask_reduce_max_epi16 _mm256_mask_reduce_max_epi8 _mm256_mask_reduce_max_epi16
_mm_mask_reduce_min_epi8 _mm_mask_reduce_min_epi16 _mm256_mask_reduce_min_epi8 _mm256_mask_reduce_min_epi16
_mm_mask_reduce_max_epu8 _mm_mask_reduce_max_epu16 _mm256_mask_reduce_max_epu8 _mm256_mask_reduce_max_epu16
_mm_mask_reduce_min_epu8 _mm_mask_reduce_min_epu16 _mm256_mask_reduce_min_epu8 _mm256_mask_reduce_min_epu16
Copy link
Contributor

@phoebewang phoebewang left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@ckoparkar
Copy link
Member Author

D'oh, yes. I saw the Linux tests fail at the 7min mark on a lot of other PRs yesterday. Was waiting for that to be resolved, looks like it's OK now.

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM - cheers

@RKSimon RKSimon merged commit abfc239 into llvm:main Aug 26, 2025
9 checks passed
@ckoparkar ckoparkar deleted the ckoparkar/154284 branch August 26, 2025 13:24
@ckoparkar
Copy link
Member Author

Thanks everyone for reviews :-)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[Headers][X86] Allow AVX512VLBW integer reduction intrinsics to be used in constexpr

4 participants