-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[Headers][X86] Enable constexpr handling for MMX/SSE/AVX/AVX512 avg intrinsics #157464
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
0bd1edf to
63351db
Compare
abf5cc7 to
1899bfe
Compare
…ntrinsics Updates the avg builtins to support constant expression handling.
This PR handles the __builtin_ia32_pavg builtins inside VectorExprEvaluator::VisitCallExpr.
4bf7d38 to
6422433
Compare
7a7116b to
25d55f2
Compare
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
25d55f2 to
810e0fc
Compare
|
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang Author: Bhasawut Singhaphan (markbhasawut) ChangesThis PR updates the avg builtins to support constant expression handling, by extending the VectorExprEvaluator::VisitCallExpr that handles elementwise integer binop builtins. Closes #155390 Patch is 33.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/157464.diff 13 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index b4ff550d27279..995708e8374fe 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -93,8 +93,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}
let Features = "sse2" in {
- def pavgb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
- def pavgw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
@@ -106,6 +104,8 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def pavgb128 : X86Builtin<"_Vector<16, unsigned char>(_Vector<16, unsigned char>, _Vector<16, unsigned char>)">;
+ def pavgw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, unsigned short>, _Vector<8, unsigned short>)">;
def pmulhw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def pmulhuw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, unsigned short>, _Vector<8, unsigned short>)">;
}
@@ -575,8 +575,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
def packusdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
- def pavgb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
- def pavgw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def pblendw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">;
def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
@@ -618,6 +616,9 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
}
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">;
+ def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
+
def pblendvb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Vector<32, char>)">;
def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
@@ -1307,8 +1308,6 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512
def packsswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">;
def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
def packuswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">;
- def pavgb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
- def pavgw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
}
@@ -1350,6 +1349,8 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512
}
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+ def pavgb512 : X86Builtin<"_Vector<64, unsigned char>(_Vector<64, unsigned char>, _Vector<64, unsigned char>)">;
+ def pavgw512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, unsigned short>, _Vector<32, unsigned short>)">;
def pmulhuw512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, unsigned short>, _Vector<32, unsigned short>)">;
def pmulhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index a0dcdace854b9..b712222b32992 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3299,6 +3299,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
});
+ case clang::X86::BI__builtin_ia32_pavgb128:
+ case clang::X86::BI__builtin_ia32_pavgw128:
+ case clang::X86::BI__builtin_ia32_pavgb256:
+ case clang::X86::BI__builtin_ia32_pavgw256:
+ case clang::X86::BI__builtin_ia32_pavgb512:
+ case clang::X86::BI__builtin_ia32_pavgw512:
+ return interp__builtin_elementwise_int_binop(S, OpPC, Call,
+ llvm::APIntOps::avgCeilU);
+
case clang::X86::BI__builtin_ia32_pmulhuw128:
case clang::X86::BI__builtin_ia32_pmulhuw256:
case clang::X86::BI__builtin_ia32_pmulhuw512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ca930737474df..47a3dfeae24d6 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11694,6 +11694,14 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
});
+ case clang::X86::BI__builtin_ia32_pavgb128:
+ case clang::X86::BI__builtin_ia32_pavgw128:
+ case clang::X86::BI__builtin_ia32_pavgb256:
+ case clang::X86::BI__builtin_ia32_pavgw256:
+ case clang::X86::BI__builtin_ia32_pavgb512:
+ case clang::X86::BI__builtin_ia32_pavgw512:
+ return EvaluateBinOpExpr(llvm::APIntOps::avgCeilU);
+
case clang::X86::BI__builtin_ia32_pmulhuw128:
case clang::X86::BI__builtin_ia32_pmulhuw256:
case clang::X86::BI__builtin_ia32_pmulhuw512:
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 2cacdc3c4596c..fc12a9bf15e57 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -496,10 +496,9 @@ _mm256_andnot_si256(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns A 256-bit integer vector containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_avg_epu8(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_avg_epu8(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_pavgb256((__v32qu)__a, (__v32qu)__b);
}
/// Computes the averages of the corresponding unsigned 16-bit integers in
@@ -522,10 +521,9 @@ _mm256_avg_epu8(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit vector of [16 x i16].
/// \returns A 256-bit vector of [16 x i16] containing the result.
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_avg_epu16(__m256i __a, __m256i __b)
-{
- return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_avg_epu16(__m256i __a, __m256i __b) {
+ return (__m256i)__builtin_ia32_pavgw256((__v16hu)__a, (__v16hu)__b);
}
/// Merges 8-bit integer values from either of the two 256-bit vectors
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 31e0a2242240c..8882048cdd07b 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -690,50 +690,40 @@ _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_avg_epu8 (__m512i __A, __m512i __B)
-{
- return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B);
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_avg_epu8(__m512i __A, __m512i __B) {
+ return (__m512i)__builtin_ia32_pavgb512((__v64qu)__A, (__v64qu)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
- __m512i __B)
-{
- return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
- (__v64qi)_mm512_avg_epu8(__A, __B),
- (__v64qi)__W);
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+ return (__m512i)__builtin_ia32_selectb_512(
+ (__mmask64)__U, (__v64qi)_mm512_avg_epu8(__A, __B), (__v64qi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
- (__v64qi)_mm512_avg_epu8(__A, __B),
- (__v64qi)_mm512_setzero_si512());
+ (__v64qi)_mm512_avg_epu8(__A, __B),
+ (__v64qi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_avg_epu16 (__m512i __A, __m512i __B)
-{
- return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B);
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_avg_epu16(__m512i __A, __m512i __B) {
+ return (__m512i)__builtin_ia32_pavgw512((__v32hu)__A, (__v32hu)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
- __m512i __B)
-{
- return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
- (__v32hi)_mm512_avg_epu16(__A, __B),
- (__v32hi)__W);
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+ return (__m512i)__builtin_ia32_selectw_512(
+ (__mmask32)__U, (__v32hi)_mm512_avg_epu16(__A, __B), (__v32hi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
- return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
- (__v32hi)_mm512_avg_epu16(__A, __B),
- (__v32hi) _mm512_setzero_si512());
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
+ return (__m512i)__builtin_ia32_selectw_512(
+ (__mmask32)__U, (__v32hi)_mm512_avg_epu16(__A, __B),
+ (__v32hi)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index 846cda67bce3f..7067209f09cdf 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -795,68 +795,56 @@ _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
(__v16hi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
-{
- return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
- (__v16qi)_mm_avg_epu8(__A, __B),
- (__v16qi)__W);
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
+ return (__m128i)__builtin_ia32_selectb_128(
+ (__mmask16)__U, (__v16qi)_mm_avg_epu8(__A, __B), (__v16qi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
(__v16qi)_mm_avg_epu8(__A, __B),
(__v16qi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
-{
- return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
- (__v32qi)_mm256_avg_epu8(__A, __B),
- (__v32qi)__W);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
+ return (__m256i)__builtin_ia32_selectb_256(
+ (__mmask32)__U, (__v32qi)_mm256_avg_epu8(__A, __B), (__v32qi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
(__v32qi)_mm256_avg_epu8(__A, __B),
(__v32qi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
- return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
- (__v8hi)_mm_avg_epu16(__A, __B),
- (__v8hi)__W);
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+ return (__m128i)__builtin_ia32_selectw_128(
+ (__mmask8)__U, (__v8hi)_mm_avg_epu16(__A, __B), (__v8hi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_avg_epu16(__A, __B),
(__v8hi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
-{
- return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
- (__v16hi)_mm256_avg_epu16(__A, __B),
- (__v16hi)__W);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+ return (__m256i)__builtin_ia32_selectw_256(
+ (__mmask16)__U, (__v16hi)_mm256_avg_epu16(__A, __B), (__v16hi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B)
-{
- return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
- (__v16hi)_mm256_avg_epu16(__A, __B),
- (__v16hi)_mm256_setzero_si256());
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
+ return (__m256i)__builtin_ia32_selectw_256(
+ (__mmask16)__U, (__v16hi)_mm256_avg_epu16(__A, __B),
+ (__v16hi)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index a366e0df407a9..e4fbe011239d6 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -27,7 +27,6 @@ typedef double __v2df __attribute__((__vector_size__(16)));
/* Unsigned types */
typedef unsigned long long __v2du __attribute__((__vector_size__(16)));
-typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
/* We need an explicitly signed variant for char. Note that this shouldn't
* appear in the interface though. */
@@ -2247,9 +2246,9 @@ _mm_adds_epu16(__m128i __a, __m128i __b) {
/// A 128-bit unsigned [16 x i8] vector.
/// \returns A 128-bit unsigned [16 x i8] vector containing the rounded
/// averages of both parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a,
- __m128i __b) {
- return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_avg_epu8(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_pavgb128((__v16qu)__a, (__v16qu)__b);
}
/// Computes the rounded averages of corresponding elements of two
@@ -2266,9 +2265,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a,
/// A 128-bit unsigned [8 x i16] vector.
/// \returns A 128-bit unsigned [8 x i16] vector containing the rounded
/// averages of both parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a,
- __m128i __b) {
- return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_avg_epu16(__m128i __a, __m128i __b) {
+ return (__m128i)__builtin_ia32_pavgw128((__v8hu)__a, (__v8hu)__b);
}
/// Multiplies the corresponding elements of two 128-bit signed [8 x i16]
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index 4b52904315451..6b70f245e2564 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -24,6 +24,7 @@ typedef float __m128_u __attribute__((__vector_size__(16), __aligned__(1)));
/* Unsigned types */
typedef unsigned int __v4su __attribute__((__vector_size__(16)));
typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
+typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
/* This header should only be included in a hosted environment as it depends on
* a standard library to provide allocation routines. */
@@ -2539,11 +2540,10 @@ _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the averages of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_avg_pu8(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_pavgb128((__v16qi)__anyext128(__a),
- (__v16qi)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_avg_pu8(__m64 __a, __m64 __b) {
+ return __trunc64(__builtin_ia32_pavgb128((__v16qu)__zext128(__a),
+ (__v16qu)__zext128(__b)));
}
/// Computes the rounded averages of the packed unsigned 16-bit integer
@@ -2559,11 +2559,10 @@ _mm_avg_pu8(__m64 __a, __m64 __b)
/// \param __b
/// A 64-bit integer vector containing one of the source operands.
/// \returns A 64-bit integer vector containing the averages of both operands.
-static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
-_mm_avg_pu16(__m64 __a, __m64 __b)
-{
- return __trunc64(__builtin_ia32_pavgw128((__v8hi)__anyext128(__a),
- (__v8hi)__anyext128(__b)));
+static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_avg_pu16(__m64 __a, __m64 __b) {
+ return __trunc64(
+ __builtin_ia32_pavgw128((__v8hu)__zext128(__a), (__v8hu)__zext128(__b)));
}
/// Subtracts the corresponding 8-bit unsigned integer values of the two
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index 724a5f693f9fe..aeb1aee4ea946 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -128,12 +128,14 @@ __m256i test_mm256_avg_epu8(__m256i a, __m256i b) {
// CHECK: call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
return _mm256_avg_epu8(a, b);
}
+TEST_CONSTEXPR(match_v32qu(_mm256_avg_epu8((__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, ...
[truncated]
|
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers!
|
Thanks for your time to review this! @RKSimon |
This PR updates the avg builtins to support constant expression handling, by extending the VectorExprEvaluator::VisitCallExpr that handles elementwise integer binop builtins.
Closes #155390