From 491504e0c58087ad44dbc7be039e95658c444a82 Mon Sep 17 00:00:00 2001 From: AdityaC4 Date: Mon, 15 Sep 2025 23:19:21 -0500 Subject: [PATCH 1/7] [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow AVX/AVX512 subvector insertion intrinsics to be used in constexpr #157709 --- clang/include/clang/Basic/BuiltinsX86.td | 22 +++-- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 68 +++++++++++++++ clang/lib/AST/ExprConstant.cpp | 50 +++++++++++ .../test/CodeGen/X86/avx-insert-constexpr.cpp | 87 +++++++++++++++++++ 4 files changed, 219 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGen/X86/avx-insert-constexpr.cpp diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index aac502091b57e..adcbd379d4982 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -502,9 +502,6 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">; - def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">; - def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">; - def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">; def sqrtpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>)">; def sqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; def rsqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">; @@ -513,6 +510,12 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in def roundps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">; } +let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">; + def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">; + def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">; +} + let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def vtestzpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">; def vtestcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">; @@ -609,6 +612,9 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">; def extract128i256 : X86Builtin<"_Vector<2, long long int>(_Vector<4, long long int>, _Constant int)">; +} + +let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def insert128i256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>, _Constant int)">; } @@ -2945,29 +2951,29 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256 def extracti32x4_256_mask : X86Builtin<"_Vector<4, int>(_Vector<8, int>, _Constant int, _Vector<4, int>, unsigned char)">; } -let Features = "avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512dq", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def insertf32x8 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<8, float>, _Constant int)">; def insertf64x2_512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<2, double>, _Constant int)">; def inserti32x8 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<8, int>, _Constant int)">; def inserti64x2_512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>, _Constant int)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def insertf64x4 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<4, double>, _Constant int)">; def inserti64x4 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<4, long long int>, _Constant int)">; } -let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def insertf64x2_256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">; def inserti64x2_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>, _Constant int)">; } -let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def insertf32x4_256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">; def inserti32x4_256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">; } -let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def insertf32x4 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<4, float>, _Constant int)">; def inserti32x4 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>, _Constant int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b7b6d65c38e97..19561246ca601 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2914,6 +2914,56 @@ static bool interp__builtin_elementwise_triop( return true; } +static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, + const CallExpr *Call, + unsigned ID) { + assert(Call->getNumArgs() == 3); + + PrimType ImmPT = *S.getContext().classify(Call->getArg(2)); + APSInt ImmAPS = popToAPSInt(S.Stk, ImmPT); + uint64_t Index = ImmAPS.getZExtValue(); + + const Pointer &SubVec = S.Stk.pop(); + if (!SubVec.getFieldDesc()->isPrimitiveArray()) { + return Invalid(S, OpPC); + } + + const Pointer &DstVec = S.Stk.pop(); + if (!DstVec.getFieldDesc()->isPrimitiveArray()) { + return Invalid(S, OpPC); + } + + const Pointer &Result = S.Stk.peek(); + + unsigned DstElements = DstVec.getNumElems(); + unsigned SubElements = SubVec.getNumElems(); + + if (SubElements == 0 || DstElements == 0 || (DstElements % SubElements) != 0) + return Invalid(S, OpPC); + + unsigned NumLanes = DstElements / SubElements; + unsigned Lane = static_cast(Index % NumLanes); + + QualType ElemType = DstVec.getFieldDesc()->getElemQualType(); + PrimType ElemPT = *S.getContext().classify(ElemType); + + unsigned InsertPos = Lane * SubElements; + + TYPE_SWITCH(ElemPT, { + for (unsigned i = 0; i < DstElements; ++i) { + Result.elem(i) = DstVec.elem(i); + } + + for (unsigned i = 0; i < SubElements; ++i) { + Result.elem(InsertPos + i) = SubVec.elem(i); + } + }); + + Result.initializeAllElements(); + + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -3572,6 +3622,24 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return interp__builtin_elementwise_triop(S, OpPC, Call, llvm::APIntOps::fshr); + case X86::BI__builtin_ia32_insertf32x4_256: + case X86::BI__builtin_ia32_inserti32x4_256: + case X86::BI__builtin_ia32_insertf64x2_256: + case X86::BI__builtin_ia32_inserti64x2_256: + case X86::BI__builtin_ia32_insertf32x4: + case X86::BI__builtin_ia32_inserti32x4: + case X86::BI__builtin_ia32_insertf64x2_512: + case X86::BI__builtin_ia32_inserti64x2_512: + case X86::BI__builtin_ia32_insertf32x8: + case X86::BI__builtin_ia32_inserti32x8: + case X86::BI__builtin_ia32_insertf64x4: + case X86::BI__builtin_ia32_inserti64x4: + case X86::BI__builtin_ia32_vinsertf128_ps256: + case X86::BI__builtin_ia32_vinsertf128_pd256: + case X86::BI__builtin_ia32_vinsertf128_si256: + case X86::BI__builtin_ia32_insert128i256: + return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID); + default: S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index b2cb9e2b3c347..604d2e32d1980 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12128,6 +12128,56 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + + case X86::BI__builtin_ia32_insertf32x4_256: + case X86::BI__builtin_ia32_inserti32x4_256: + case X86::BI__builtin_ia32_insertf64x2_256: + case X86::BI__builtin_ia32_inserti64x2_256: + case X86::BI__builtin_ia32_insertf32x4: + case X86::BI__builtin_ia32_inserti32x4: + case X86::BI__builtin_ia32_insertf64x2_512: + case X86::BI__builtin_ia32_inserti64x2_512: + case X86::BI__builtin_ia32_insertf32x8: + case X86::BI__builtin_ia32_inserti32x8: + case X86::BI__builtin_ia32_insertf64x4: + case X86::BI__builtin_ia32_inserti64x4: + case X86::BI__builtin_ia32_vinsertf128_ps256: + case X86::BI__builtin_ia32_vinsertf128_pd256: + case X86::BI__builtin_ia32_vinsertf128_si256: + case X86::BI__builtin_ia32_insert128i256: { + APValue SourceDst, SourceSub; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceDst) || + !EvaluateAsRValue(Info, E->getArg(1), SourceSub)) + return false; + + APSInt Imm; + if (!EvaluateInteger(E->getArg(2), Imm, Info)) + return false; + + if (!SourceDst.isVector() || !SourceSub.isVector()) + return false; + + unsigned DstLen = SourceDst.getVectorLength(); + unsigned SubLen = SourceSub.getVectorLength(); + if (SubLen == 0 || DstLen == 0 || (DstLen % SubLen) != 0) + return false; + + unsigned NumLanes = DstLen / SubLen; + unsigned LaneIdx = (Imm.getZExtValue() % NumLanes) * SubLen; + + SmallVector ResultElements; + ResultElements.reserve(DstLen); + + for (unsigned EltNum = 0; EltNum < DstLen; ++EltNum) { + if (EltNum >= LaneIdx && EltNum < LaneIdx + SubLen) { + ResultElements.push_back(SourceSub.getVectorElt(EltNum - LaneIdx)); + } else { + ResultElements.push_back(SourceDst.getVectorElt(EltNum)); + } + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } } } diff --git a/clang/test/CodeGen/X86/avx-insert-constexpr.cpp b/clang/test/CodeGen/X86/avx-insert-constexpr.cpp new file mode 100644 index 0000000000000..30c1776d8ba6d --- /dev/null +++ b/clang/test/CodeGen/X86/avx-insert-constexpr.cpp @@ -0,0 +1,87 @@ +// REQUIRES: x86-registered-target +// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-linux-gnu -O0 -target-cpu skylake-avx512 -std=c++17 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-linux-gnu -O0 -target-cpu skylake-avx512 -std=c++17 -fexperimental-new-constant-interpreter -emit-llvm -o - %s | FileCheck %s + +#include +#include "builtin_test_helpers.h" + +// +// AVX256 Insert Tests +// + +__m256 test_mm256_insertf32x4(__m256 A, __m128 B) { + // CHECK-LABEL: test_mm256_insertf32x4 + return _mm256_insertf32x4(A, B, 1); +} + +// Insert 128-bit float vector into upper lane +TEST_CONSTEXPR(match_m256(_mm256_insertf32x4(_mm256_set1_ps(1.0f), _mm_set_ps(40.0f, 30.0f, 20.0f, 10.0f), 1), 1.0f, 1.0f, 1.0f, 1.0f, 10.0f, 20.0f, 30.0f, 40.0f)); + +__m256i test_mm256_inserti32x4(__m256i A, __m128i B) { + // CHECK-LABEL: test_mm256_inserti32x4 + return _mm256_inserti32x4(A, B, 0); +} + +// Insert 128-bit integer vector into lower lane +TEST_CONSTEXPR(match_v8si(_mm256_inserti32x4(_mm256_set1_epi32(1), _mm_set_epi32(40, 30, 20, 10), 0), 10, 20, 30, 40, 1, 1, 1, 1)); + +// +// AVX256 Masked Insert Test +// + +__m256 test_mm256_maskz_insertf32x4(__mmask8 U, __m256 A, __m128 B) { + // CHECK-LABEL: test_mm256_maskz_insertf32x4 + return _mm256_maskz_insertf32x4(U, A, B, 1); +} + +// Test zero mask produces all zeros +TEST_CONSTEXPR(match_m256( + _mm256_maskz_insertf32x4(0x00, _mm256_set1_ps(1.0f), + _mm_set_ps(40.0f, 30.0f, 20.0f, 10.0f), 1), + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)); + +// +// AVX Legacy Insert Test +// + +__m256 test_mm256_insertf128_ps(__m256 A, __m128 B) { + // CHECK-LABEL: test_mm256_insertf128_ps + return _mm256_insertf128_ps(A, B, 1); +} + +// Legacy insertf128 into upper lane +TEST_CONSTEXPR(match_m256(_mm256_insertf128_ps(_mm256_set1_ps(1.0f), _mm_set1_ps(7.0f), 1), 1.0f, 1.0f, 1.0f, 1.0f, 7.0f, 7.0f, 7.0f, 7.0f)); + +// +//AVX512 Insert Tests +// + +__m512 test_mm512_insertf32x4(__m512 A, __m128 B) { + // CHECK-LABEL: test_mm512_insertf32x4 + return _mm512_insertf32x4(A, B, 3); +} + +// Insert 128-bit into highest lane of 512-bit vector +TEST_CONSTEXPR(match_m512(_mm512_insertf32x4(_mm512_set1_ps(1.0f), _mm_set1_ps(5.0f), 3), 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 5.0f, 5.0f, 5.0f, 5.0f)); + +__m512 test_mm512_insertf32x8(__m512 A, __m256 B) { + // CHECK-LABEL: test_mm512_insertf32x8 + return _mm512_insertf32x8(A, B, 1); +} + +// Insert 256-bit into upper half of 512-bit vector +TEST_CONSTEXPR(match_m512(_mm512_insertf32x8(_mm512_set1_ps(1.0f), _mm256_set1_ps(2.0f), 1), 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f)); + +// +// AVX512 Masked Insert Test +// + +__m512 test_mm512_maskz_insertf32x4(__mmask16 U, __m512 A, __m128 B) { + // CHECK-LABEL: test_mm512_maskz_insertf32x4 + return _mm512_maskz_insertf32x4(U, A, B, 3); +} + +// Test zero mask produces all zeros +TEST_CONSTEXPR(match_m512( + _mm512_maskz_insertf32x4(0x0000, _mm512_set1_ps(1.0f), _mm_set1_ps(5.0f), 3), + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)); From 257075d1509bf4c2053826b207fabea8df308d8c Mon Sep 17 00:00:00 2001 From: AdityaC4 Date: Tue, 16 Sep 2025 09:29:18 -0500 Subject: [PATCH 2/7] use getPrimType() instead & formatting --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 28 ++++++++++-------------- clang/lib/AST/ExprConstant.cpp | 5 ++--- 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 19561246ca601..d1b537217284f 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2924,14 +2924,12 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, uint64_t Index = ImmAPS.getZExtValue(); const Pointer &SubVec = S.Stk.pop(); - if (!SubVec.getFieldDesc()->isPrimitiveArray()) { - return Invalid(S, OpPC); - } + if (!SubVec.getFieldDesc()->isPrimitiveArray()) + return false; const Pointer &DstVec = S.Stk.pop(); - if (!DstVec.getFieldDesc()->isPrimitiveArray()) { - return Invalid(S, OpPC); - } + if (!DstVec.getFieldDesc()->isPrimitiveArray()) + return false; const Pointer &Result = S.Stk.peek(); @@ -2939,24 +2937,20 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, unsigned SubElements = SubVec.getNumElems(); if (SubElements == 0 || DstElements == 0 || (DstElements % SubElements) != 0) - return Invalid(S, OpPC); + return false; unsigned NumLanes = DstElements / SubElements; unsigned Lane = static_cast(Index % NumLanes); - - QualType ElemType = DstVec.getFieldDesc()->getElemQualType(); - PrimType ElemPT = *S.getContext().classify(ElemType); - unsigned InsertPos = Lane * SubElements; + PrimType ElemPT = DstVec.getFieldDesc()->getPrimType(); + TYPE_SWITCH(ElemPT, { - for (unsigned i = 0; i < DstElements; ++i) { - Result.elem(i) = DstVec.elem(i); - } + for (unsigned I = 0; I != DstElements; ++I) + Result.elem(I) = DstVec.elem(I); - for (unsigned i = 0; i < SubElements; ++i) { - Result.elem(InsertPos + i) = SubVec.elem(i); - } + for (unsigned I = 0; I != SubElements; ++I) + Result.elem(InsertPos + I) = SubVec.elem(I); }); Result.initializeAllElements(); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 604d2e32d1980..3a55f1481ffb5 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12169,11 +12169,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { ResultElements.reserve(DstLen); for (unsigned EltNum = 0; EltNum < DstLen; ++EltNum) { - if (EltNum >= LaneIdx && EltNum < LaneIdx + SubLen) { + if (EltNum >= LaneIdx && EltNum < LaneIdx + SubLen) ResultElements.push_back(SourceSub.getVectorElt(EltNum - LaneIdx)); - } else { + else ResultElements.push_back(SourceDst.getVectorElt(EltNum)); - } } return Success(APValue(ResultElements.data(), ResultElements.size()), E); From 69816f4f71e3ed161735c2c361146d43e210a68a Mon Sep 17 00:00:00 2001 From: AdityaC4 Date: Tue, 16 Sep 2025 10:35:51 -0500 Subject: [PATCH 3/7] Refactor vector insertion checks to use assertions --- clang/include/clang/Basic/BuiltinsX86.td | 13 +++++-------- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 3 +-- clang/lib/AST/ExprConstant.cpp | 6 ++---- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index adcbd379d4982..044c755d4d7cf 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -497,6 +497,9 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def blendps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Constant int)">; def blendvpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>, _Vector<4, double>)">; def blendvps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, _Vector<8, float>)">; + def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">; + def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">; + def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">; } let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { @@ -510,11 +513,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in def roundps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">; } -let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { - def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">; - def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">; - def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">; -} let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def vtestzpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">; @@ -614,9 +612,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def extract128i256 : X86Builtin<"_Vector<2, long long int>(_Vector<4, long long int>, _Constant int)">; } -let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { - def insert128i256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>, _Constant int)">; -} let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">; @@ -650,6 +645,8 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def psrlv8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; def psllv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">; def psrlv4di : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">; + + def insert128i256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>, _Constant int)">; } let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index d1b537217284f..88378144e0b9c 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2936,8 +2936,7 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, unsigned DstElements = DstVec.getNumElems(); unsigned SubElements = SubVec.getNumElems(); - if (SubElements == 0 || DstElements == 0 || (DstElements % SubElements) != 0) - return false; + assert(SubElements != 0 && DstElements != 0 && (DstElements % SubElements) == 0); unsigned NumLanes = DstElements / SubElements; unsigned Lane = static_cast(Index % NumLanes); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 3a55f1481ffb5..001d636a8a960 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12156,12 +12156,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!SourceDst.isVector() || !SourceSub.isVector()) return false; - + assert(SourceDst.isVector() && SourceSub.isVector()); unsigned DstLen = SourceDst.getVectorLength(); unsigned SubLen = SourceSub.getVectorLength(); - if (SubLen == 0 || DstLen == 0 || (DstLen % SubLen) != 0) - return false; - + assert(SubLen != 0 && DstLen != 0 && (DstLen % SubLen) == 0); unsigned NumLanes = DstLen / SubLen; unsigned LaneIdx = (Imm.getZExtValue() % NumLanes) * SubLen; From 5e32918302ecf33eaabf25c4c83bad58bf79dea6 Mon Sep 17 00:00:00 2001 From: AdityaC4 Date: Wed, 17 Sep 2025 18:00:34 -0500 Subject: [PATCH 4/7] Remove unnecessary new test file and add tests in corresponding files --- clang/test/CodeGen/X86/avx-builtins.c | 3 + .../test/CodeGen/X86/avx-insert-constexpr.cpp | 87 ------------------- clang/test/CodeGen/X86/avx2-builtins.c | 1 + clang/test/CodeGen/X86/avx512dq-builtins.c | 13 +++ clang/test/CodeGen/X86/avx512f-builtins.c | 12 +++ clang/test/CodeGen/X86/avx512vl-builtins.c | 6 ++ clang/test/CodeGen/X86/avx512vldq-builtins.c | 6 ++ 7 files changed, 41 insertions(+), 87 deletions(-) delete mode 100644 clang/test/CodeGen/X86/avx-insert-constexpr.cpp diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 7b1a9cc4d9a7f..347cd9ee6a667 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1144,6 +1144,7 @@ __m256d test_mm256_insertf128_pd(__m256d A, __m128d B) { // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> return _mm256_insertf128_pd(A, B, 0); } +TEST_CONSTEXPR(match_m256d(_mm256_insertf128_pd(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m128d){5.0, 6.0}), 0), 5.0, 6.0, 3.0, 4.0)); __m256 test_mm256_insertf128_ps(__m256 A, __m128 B) { // CHECK-LABEL: test_mm256_insertf128_ps @@ -1151,6 +1152,7 @@ __m256 test_mm256_insertf128_ps(__m256 A, __m128 B) { // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> return _mm256_insertf128_ps(A, B, 1); } +TEST_CONSTEXPR(match_m256(_mm256_insertf128_ps(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m128){10.0f, 20.0f, 30.0f, 40.0f}), 1), 1.0f, 2.0f, 3.0f, 4.0f, 10.0f, 20.0f, 30.0f, 40.0f)); __m256i test_mm256_insertf128_si256(__m256i A, __m128i B) { // CHECK-LABEL: test_mm256_insertf128_si256 @@ -1158,6 +1160,7 @@ __m256i test_mm256_insertf128_si256(__m256i A, __m128i B) { // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> return _mm256_insertf128_si256(A, B, 0); } +TEST_CONSTEXPR(match_m256i(_mm256_insertf128_si256(((__m256i){1ULL, 2ULL, 3ULL, 4ULL}), ((__m128i){10ULL, 20ULL}), 0), 10ULL, 20ULL, 3ULL, 4ULL)); __m256i test_mm256_lddqu_si256(__m256i* A) { // CHECK-LABEL: test_mm256_lddqu_si256 diff --git a/clang/test/CodeGen/X86/avx-insert-constexpr.cpp b/clang/test/CodeGen/X86/avx-insert-constexpr.cpp deleted file mode 100644 index 30c1776d8ba6d..0000000000000 --- a/clang/test/CodeGen/X86/avx-insert-constexpr.cpp +++ /dev/null @@ -1,87 +0,0 @@ -// REQUIRES: x86-registered-target -// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-linux-gnu -O0 -target-cpu skylake-avx512 -std=c++17 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -ffreestanding -triple x86_64-unknown-linux-gnu -O0 -target-cpu skylake-avx512 -std=c++17 -fexperimental-new-constant-interpreter -emit-llvm -o - %s | FileCheck %s - -#include -#include "builtin_test_helpers.h" - -// -// AVX256 Insert Tests -// - -__m256 test_mm256_insertf32x4(__m256 A, __m128 B) { - // CHECK-LABEL: test_mm256_insertf32x4 - return _mm256_insertf32x4(A, B, 1); -} - -// Insert 128-bit float vector into upper lane -TEST_CONSTEXPR(match_m256(_mm256_insertf32x4(_mm256_set1_ps(1.0f), _mm_set_ps(40.0f, 30.0f, 20.0f, 10.0f), 1), 1.0f, 1.0f, 1.0f, 1.0f, 10.0f, 20.0f, 30.0f, 40.0f)); - -__m256i test_mm256_inserti32x4(__m256i A, __m128i B) { - // CHECK-LABEL: test_mm256_inserti32x4 - return _mm256_inserti32x4(A, B, 0); -} - -// Insert 128-bit integer vector into lower lane -TEST_CONSTEXPR(match_v8si(_mm256_inserti32x4(_mm256_set1_epi32(1), _mm_set_epi32(40, 30, 20, 10), 0), 10, 20, 30, 40, 1, 1, 1, 1)); - -// -// AVX256 Masked Insert Test -// - -__m256 test_mm256_maskz_insertf32x4(__mmask8 U, __m256 A, __m128 B) { - // CHECK-LABEL: test_mm256_maskz_insertf32x4 - return _mm256_maskz_insertf32x4(U, A, B, 1); -} - -// Test zero mask produces all zeros -TEST_CONSTEXPR(match_m256( - _mm256_maskz_insertf32x4(0x00, _mm256_set1_ps(1.0f), - _mm_set_ps(40.0f, 30.0f, 20.0f, 10.0f), 1), - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)); - -// -// AVX Legacy Insert Test -// - -__m256 test_mm256_insertf128_ps(__m256 A, __m128 B) { - // CHECK-LABEL: test_mm256_insertf128_ps - return _mm256_insertf128_ps(A, B, 1); -} - -// Legacy insertf128 into upper lane -TEST_CONSTEXPR(match_m256(_mm256_insertf128_ps(_mm256_set1_ps(1.0f), _mm_set1_ps(7.0f), 1), 1.0f, 1.0f, 1.0f, 1.0f, 7.0f, 7.0f, 7.0f, 7.0f)); - -// -//AVX512 Insert Tests -// - -__m512 test_mm512_insertf32x4(__m512 A, __m128 B) { - // CHECK-LABEL: test_mm512_insertf32x4 - return _mm512_insertf32x4(A, B, 3); -} - -// Insert 128-bit into highest lane of 512-bit vector -TEST_CONSTEXPR(match_m512(_mm512_insertf32x4(_mm512_set1_ps(1.0f), _mm_set1_ps(5.0f), 3), 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 5.0f, 5.0f, 5.0f, 5.0f)); - -__m512 test_mm512_insertf32x8(__m512 A, __m256 B) { - // CHECK-LABEL: test_mm512_insertf32x8 - return _mm512_insertf32x8(A, B, 1); -} - -// Insert 256-bit into upper half of 512-bit vector -TEST_CONSTEXPR(match_m512(_mm512_insertf32x8(_mm512_set1_ps(1.0f), _mm256_set1_ps(2.0f), 1), 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f)); - -// -// AVX512 Masked Insert Test -// - -__m512 test_mm512_maskz_insertf32x4(__mmask16 U, __m512 A, __m128 B) { - // CHECK-LABEL: test_mm512_maskz_insertf32x4 - return _mm512_maskz_insertf32x4(U, A, B, 3); -} - -// Test zero mask produces all zeros -TEST_CONSTEXPR(match_m512( - _mm512_maskz_insertf32x4(0x0000, _mm512_set1_ps(1.0f), _mm_set1_ps(5.0f), 3), - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)); diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 17ab47c72ad4b..b6b54172ea186 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -779,6 +779,7 @@ __m256i test0_mm256_inserti128_si256(__m256i a, __m128i b) { // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> return _mm256_inserti128_si256(a, b, 0); } +TEST_CONSTEXPR(match_m256i(_mm256_inserti128_si256(((__m256i){1ULL, 2ULL, 3ULL, 4ULL}), ((__m128i){10ULL, 20ULL}), 0), 10ULL, 20ULL, 3ULL, 4ULL)); __m256i test1_mm256_inserti128_si256(__m256i a, __m128i b) { // CHECK-LABEL: test1_mm256_inserti128_si256 diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c index df096e3607f30..a7c11eb655628 100644 --- a/clang/test/CodeGen/X86/avx512dq-builtins.c +++ b/clang/test/CodeGen/X86/avx512dq-builtins.c @@ -1437,6 +1437,7 @@ __m512 test_mm512_insertf32x8(__m512 __A, __m256 __B) { // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> return _mm512_insertf32x8(__A, __B, 1); } +TEST_CONSTEXPR(match_m512(_mm512_insertf32x8(((__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), ((__m256){20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f, 90.0f}), 1), 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f, 90.0f)); __m512 test_mm512_mask_insertf32x8(__m512 __W, __mmask16 __U, __m512 __A, __m256 __B) { // CHECK-LABEL: test_mm512_mask_insertf32x8 @@ -1444,6 +1445,7 @@ __m512 test_mm512_mask_insertf32x8(__m512 __W, __mmask16 __U, __m512 __A, __m256 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_insertf32x8(__W, __U, __A, __B, 1); } +TEST_CONSTEXPR(match_m512(_mm512_mask_insertf32x8(((__m512){2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f,9.0f,10.0f,11.0f,12.0f,13.0f,14.0f,15.0f,16.0f,17.0f}), (0xF000), ((__m512){2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f,9.0f,10.0f,11.0f,12.0f,13.0f,14.0f,15.0f,16.0f,17.0f}), ((__m256){20.0f,30.0f,40.0f,50.0f,60.0f,70.0f,80.0f,90.0f}), 1), 2.0f,3.0f,4.0f,5.0f,6.0f,7.0f,8.0f,9.0f,10.0f,11.0f,12.0f,13.0f,60.0f,70.0f,80.0f,90.0f)); __m512 test_mm512_maskz_insertf32x8(__mmask16 __U, __m512 __A, __m256 __B) { // CHECK-LABEL: test_mm512_maskz_insertf32x8 @@ -1451,12 +1453,14 @@ __m512 test_mm512_maskz_insertf32x8(__mmask16 __U, __m512 __A, __m256 __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_insertf32x8(__U, __A, __B, 1); } +TEST_CONSTEXPR(match_m512(_mm512_maskz_insertf32x8((0x0F00), ((__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), ((__m256){20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0f, 90.0f}), 1), 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 20.0f, 30.0f, 40.0f, 50.0f, 0.0f, 0.0f, 0.0f, 0.0f)); __m512d test_mm512_insertf64x2(__m512d __A, __m128d __B) { // CHECK-LABEL: test_mm512_insertf64x2 // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> return _mm512_insertf64x2(__A, __B, 3); } +TEST_CONSTEXPR(match_m512d(_mm512_insertf64x2(((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}), ((__m128d){10.0, 20.0}), 3), 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 10.0, 20.0)); __m512d test_mm512_mask_insertf64x2(__m512d __W, __mmask8 __U, __m512d __A, __m128d __B) { // CHECK-LABEL: test_mm512_mask_insertf64x2 @@ -1464,6 +1468,7 @@ __m512d test_mm512_mask_insertf64x2(__m512d __W, __mmask8 __U, __m512d __A, __m1 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_insertf64x2(__W, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_insertf64x2(((__m512d){2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0}), (0x80), ((__m512d){2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0}), ((__m128d){10.0, 20.0}), 3), 2.0,3.0,4.0,5.0,6.0,7.0,8.0,20.0)); __m512d test_mm512_maskz_insertf64x2(__mmask8 __U, __m512d __A, __m128d __B) { // CHECK-LABEL: test_mm512_maskz_insertf64x2 @@ -1471,12 +1476,14 @@ __m512d test_mm512_maskz_insertf64x2(__mmask8 __U, __m512d __A, __m128d __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_insertf64x2(__U, __A, __B, 3); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_insertf64x2((0x80), ((__m512d){1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0}), ((__m128d){10.0,20.0}), 3), 0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0)); __m512i test_mm512_inserti32x8(__m512i __A, __m256i __B) { // CHECK-LABEL: test_mm512_inserti32x8 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> return _mm512_inserti32x8(__A, __B, 1); } +TEST_CONSTEXPR(match_v16si(_mm512_inserti32x8(((__m512i)(__v16si){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}), ((__m256i)(__v8si){20,30,40,50,60,70,80,90}), 1), 1, 2, 3, 4, 5, 6, 7, 8, 20, 30, 40, 50, 60, 70, 80, 90)); __m512i test_mm512_mask_inserti32x8(__m512i __W, __mmask16 __U, __m512i __A, __m256i __B) { // CHECK-LABEL: test_mm512_mask_inserti32x8 @@ -1484,6 +1491,7 @@ __m512i test_mm512_mask_inserti32x8(__m512i __W, __mmask16 __U, __m512i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_inserti32x8(__W, __U, __A, __B, 1); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_inserti32x8(((__m512i)(__v16si){2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17}), (0xF000), ((__m512i)(__v16si){2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17}), ((__m256i)(__v8si){20,30,40,50,60,70,80,90}), 1), 2,3,4,5,6,7,8,9,10,11,12,13,60,70,80,90)); __m512i test_mm512_maskz_inserti32x8(__mmask16 __U, __m512i __A, __m256i __B) { // CHECK-LABEL: test_mm512_maskz_inserti32x8 @@ -1491,12 +1499,14 @@ __m512i test_mm512_maskz_inserti32x8(__mmask16 __U, __m512i __A, __m256i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_inserti32x8(__U, __A, __B, 1); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_inserti32x8((0x0F00), ((__m512i)(__v16si){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}), ((__m256i)(__v8si){20,30,40,50,60,70,80,90}), 1), 0,0,0,0,0,0,0,0,20,30,40,50,0,0,0,0)); __m512i test_mm512_inserti64x2(__m512i __A, __m128i __B) { // CHECK-LABEL: test_mm512_inserti64x2 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> return _mm512_inserti64x2(__A, __B, 1); } +TEST_CONSTEXPR(match_m512i(_mm512_inserti64x2(((__m512i){1, 2, 3, 4, 5, 6, 7, 8}), ((__m128i){10, 20}), 1), 1, 2, 10, 20, 5, 6, 7, 8)); __m512i test_mm512_mask_inserti64x2(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: test_mm512_mask_inserti64x2 @@ -1504,6 +1514,7 @@ __m512i test_mm512_mask_inserti64x2(__m512i __W, __mmask8 __U, __m512i __A, __m1 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_inserti64x2(__W, __U, __A, __B, 1); } +TEST_CONSTEXPR(match_m512i(_mm512_mask_inserti64x2(((__m512i){1,2,3,4,5,6,7,8}), (0x08), ((__m512i){1,2,3,4,5,6,7,8}), ((__m128i){10, 20}), 1), 1, 2, 3, 20, 5, 6, 7, 8)); __m512i test_mm512_maskz_inserti64x2(__mmask8 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: test_mm512_maskz_inserti64x2 @@ -1511,6 +1522,8 @@ __m512i test_mm512_maskz_inserti64x2(__mmask8 __U, __m512i __A, __m128i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_inserti64x2(__U, __A, __B, 1); } +TEST_CONSTEXPR(match_m512i(_mm512_maskz_inserti64x2((0x0C), ((__m512i){1,2,3,4,5,6,7,8}), ((__m128i){10, 20}), 1), 0, 0, 10, 20, 0, 0, 0, 0)); + __mmask8 test_mm512_mask_fpclass_pd_mask(__mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_mask_fpclass_pd_mask // CHECK: @llvm.x86.avx512.fpclass.pd.512 diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index f93216e546a63..41dc47a67da03 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -7020,6 +7020,7 @@ __m512d test_mm512_insertf64x4(__m512d __A, __m256d __B) { // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> return _mm512_insertf64x4(__A, __B, 1); } +TEST_CONSTEXPR(match_m512d(_mm512_insertf64x4(((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}), ((__m256d){10.0, 20.0, 30.0, 40.0}), 1), 1.0, 2.0, 3.0, 4.0, 10.0, 20.0, 30.0, 40.0)); __m512d test_mm512_mask_insertf64x4(__m512d __W, __mmask8 __U, __m512d __A, __m256d __B) { // CHECK-LABEL: test_mm512_mask_insertf64x4 @@ -7027,6 +7028,7 @@ __m512d test_mm512_mask_insertf64x4(__m512d __W, __mmask8 __U, __m512d __A, __m2 // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_mask_insertf64x4(__W, __U, __A, __B, 1); } +TEST_CONSTEXPR(match_m512d(_mm512_mask_insertf64x4(((__m512d){2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}), (0x00C0), ((__m512d){2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}), ((__m256d){10.0, 20.0, 30.0, 40.0}), 1), 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 30.0, 40.0)); __m512d test_mm512_maskz_insertf64x4(__mmask8 __U, __m512d __A, __m256d __B) { // CHECK-LABEL: test_mm512_maskz_insertf64x4 @@ -7034,12 +7036,14 @@ __m512d test_mm512_maskz_insertf64x4(__mmask8 __U, __m512d __A, __m256d __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return _mm512_maskz_insertf64x4(__U, __A, __B, 1); } +TEST_CONSTEXPR(match_m512d(_mm512_maskz_insertf64x4((0x0030), ((__m512d){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}), ((__m256d){10.0, 20.0, 30.0, 40.0}), 1), 0.0, 0.0, 0.0, 0.0, 10.0, 20.0, 0.0, 0.0)); __m512i test_mm512_inserti64x4(__m512i __A, __m256i __B) { // CHECK-LABEL: test_mm512_inserti64x4 // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> return _mm512_inserti64x4(__A, __B, 1); } +TEST_CONSTEXPR(match_m512i(_mm512_inserti64x4(((__m512i){1, 2, 3, 4, 5, 6, 7, 8}), ((__m256i){10, 20, 30, 40}), 1), 1, 2, 3, 4, 10, 20, 30, 40)); __m512i test_mm512_mask_inserti64x4(__m512i __W, __mmask8 __U, __m512i __A, __m256i __B) { // CHECK-LABEL: test_mm512_mask_inserti64x4 @@ -7047,6 +7051,7 @@ __m512i test_mm512_mask_inserti64x4(__m512i __W, __mmask8 __U, __m512i __A, __m2 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_inserti64x4(__W, __U, __A, __B, 1); } +TEST_CONSTEXPR(match_m512i(_mm512_mask_inserti64x4(((__m512i){2, 3, 4, 5, 6, 7, 8, 9}), (0x00C0), ((__m512i){2, 3, 4, 5, 6, 7, 8, 9}), ((__m256i){10, 20, 30, 40}), 1), 2, 3, 4, 5, 6, 7, 30, 40)); __m512i test_mm512_maskz_inserti64x4(__mmask8 __U, __m512i __A, __m256i __B) { // CHECK-LABEL: test_mm512_maskz_inserti64x4 @@ -7054,12 +7059,14 @@ __m512i test_mm512_maskz_inserti64x4(__mmask8 __U, __m512i __A, __m256i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_inserti64x4(__U, __A, __B, 1); } +TEST_CONSTEXPR(match_m512i(_mm512_maskz_inserti64x4((0x0030), ((__m512i){1, 2, 3, 4, 5, 6, 7, 8}), ((__m256i){10, 20, 30, 40}), 1), 0, 0, 0, 0, 10, 20, 0, 0)); __m512 test_mm512_insertf32x4(__m512 __A, __m128 __B) { // CHECK-LABEL: test_mm512_insertf32x4 // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> return _mm512_insertf32x4(__A, __B, 1); } +TEST_CONSTEXPR(match_m512(_mm512_insertf32x4(((__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), ((__m128){20.0f, 30.0f, 40.0f, 50.0f}), 1), 1.0f, 2.0f, 3.0f, 4.0f, 20.0f, 30.0f, 40.0f, 50.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f)); __m512 test_mm512_mask_insertf32x4(__m512 __W, __mmask16 __U, __m512 __A, __m128 __B) { // CHECK-LABEL: test_mm512_mask_insertf32x4 @@ -7067,6 +7074,7 @@ __m512 test_mm512_mask_insertf32x4(__m512 __W, __mmask16 __U, __m512 __A, __m128 // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_insertf32x4(__W, __U, __A, __B, 1); } +TEST_CONSTEXPR(match_m512(_mm512_mask_insertf32x4(((__m512){2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f}), (0x00F0), ((__m512){2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f}), ((__m128){20.0f, 30.0f, 40.0f, 50.0f}), 1), 2.0f, 3.0f, 4.0f, 5.0f, 20.0f, 30.0f, 40.0f, 50.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f)); __m512 test_mm512_maskz_insertf32x4(__mmask16 __U, __m512 __A, __m128 __B) { // CHECK-LABEL: test_mm512_maskz_insertf32x4 @@ -7074,12 +7082,14 @@ __m512 test_mm512_maskz_insertf32x4(__mmask16 __U, __m512 __A, __m128 __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_insertf32x4(__U, __A, __B, 1); } +TEST_CONSTEXPR(match_m512(_mm512_maskz_insertf32x4((0x0030), ((__m512){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f}), ((__m128){20.0f, 30.0f, 40.0f, 50.0f}), 1), 0.0f, 0.0f, 0.0f, 0.0f, 20.0f, 30.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)); __m512i test_mm512_inserti32x4(__m512i __A, __m128i __B) { // CHECK-LABEL: test_mm512_inserti32x4 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> return _mm512_inserti32x4(__A, __B, 1); } +TEST_CONSTEXPR(match_v16si(_mm512_inserti32x4(((__m512i)(__v16si){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}), ((__m128i)(__v4si){20,30,40,50}), 1), 1, 2, 3, 4, 20, 30, 40, 50, 9, 10, 11, 12, 13, 14, 15, 16)); __m512i test_mm512_mask_inserti32x4(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: test_mm512_mask_inserti32x4 @@ -7087,6 +7097,7 @@ __m512i test_mm512_mask_inserti32x4(__m512i __W, __mmask16 __U, __m512i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_inserti32x4(__W, __U, __A, __B, 1); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_inserti32x4(((__m512i)(__v16si){2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17}), (0x00F0), ((__m512i)(__v16si){2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17}), ((__m128i)(__v4si){20,30,40,50}), 1), 2, 3, 4, 5, 20, 30, 40, 50, 10, 11, 12, 13, 14, 15, 16, 17)); __m512i test_mm512_maskz_inserti32x4(__mmask16 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: test_mm512_maskz_inserti32x4 @@ -7094,6 +7105,7 @@ __m512i test_mm512_maskz_inserti32x4(__mmask16 __U, __m512i __A, __m128i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_inserti32x4(__U, __A, __B, 1); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_inserti32x4((0x0030), ((__m512i)(__v16si){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}), ((__m128i)(__v4si){20,30,40,50}), 1), 0, 0, 0, 0, 20, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512d test_mm512_getmant_round_pd(__m512d __A) { // CHECK-LABEL: test_mm512_getmant_round_pd diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 8cef11b12fb93..465c43dff8493 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -9529,6 +9529,7 @@ __m256 test_mm256_insertf32x4(__m256 __A, __m128 __B) { // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> return _mm256_insertf32x4(__A, __B, 1); } +TEST_CONSTEXPR(match_m256(_mm256_insertf32x4(((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m128){10.0f, 20.0f, 30.0f, 40.0f}), 1), 1.0f, 2.0f, 3.0f, 4.0f, 10.0f, 20.0f, 30.0f, 40.0f)); __m256 test_mm256_mask_insertf32x4(__m256 __W, __mmask8 __U, __m256 __A, __m128 __B) { // CHECK-LABEL: test_mm256_mask_insertf32x4 @@ -9536,6 +9537,7 @@ __m256 test_mm256_mask_insertf32x4(__m256 __W, __mmask8 __U, __m256 __A, __m128 // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_insertf32x4(__W, __U, __A, __B, 1); } +TEST_CONSTEXPR(match_m256(_mm256_mask_insertf32x4(((__m256){2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}), (0x60), ((__m256){2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}), ((__m128){10.0f, 20.0f, 30.0f, 40.0f}), 1), 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 20.0f, 30.0f, 9.0f)); __m256 test_mm256_maskz_insertf32x4(__mmask8 __U, __m256 __A, __m128 __B) { // CHECK-LABEL: test_mm256_maskz_insertf32x4 @@ -9543,12 +9545,14 @@ __m256 test_mm256_maskz_insertf32x4(__mmask8 __U, __m256 __A, __m128 __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_insertf32x4(__U, __A, __B, 1); } +TEST_CONSTEXPR(match_m256(_mm256_maskz_insertf32x4((0x30), ((__m256){1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f}), ((__m128){10.0f, 20.0f, 30.0f, 40.0f}), 1), 0.0f, 0.0f, 0.0f, 0.0f, 10.0f, 20.0f, 0.0f, 0.0f)); __m256i test_mm256_inserti32x4(__m256i __A, __m128i __B) { // CHECK-LABEL: test_mm256_inserti32x4 // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> return _mm256_inserti32x4(__A, __B, 1); } +TEST_CONSTEXPR(match_v8si(_mm256_inserti32x4(((__m256i)(__v8si){1,2,3,4,5,6,7,8}), ((__m128i)(__v4si){10,20,30,40}), 1), 1, 2, 3, 4, 10, 20, 30, 40)); __m256i test_mm256_mask_inserti32x4(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { // CHECK-LABEL: test_mm256_mask_inserti32x4 @@ -9556,6 +9560,7 @@ __m256i test_mm256_mask_inserti32x4(__m256i __W, __mmask8 __U, __m256i __A, __m1 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_inserti32x4(__W, __U, __A, __B, 1); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_inserti32x4(((__m256i)(__v8si){2,3,4,5,6,7,8,9}), (0x60), ((__m256i)(__v8si){2,3,4,5,6,7,8,9}), ((__m128i)(__v4si){10,20,30,40}), 1), 2, 3, 4, 5, 6, 20, 30, 9)); __m256i test_mm256_maskz_inserti32x4(__mmask8 __U, __m256i __A, __m128i __B) { // CHECK-LABEL: test_mm256_maskz_inserti32x4 @@ -9563,6 +9568,7 @@ __m256i test_mm256_maskz_inserti32x4(__mmask8 __U, __m256i __A, __m128i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_inserti32x4(__U, __A, __B, 1); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_inserti32x4((0x30), ((__m256i)(__v8si){1,2,3,4,5,6,7,8}), ((__m128i)(__v4si){10,20,30,40}), 1), 0, 0, 0, 0, 10, 20, 0, 0)); __m128d test_mm_getmant_pd(__m128d __A) { // CHECK-LABEL: test_mm_getmant_pd diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c index 802784472163d..938845799acf5 100644 --- a/clang/test/CodeGen/X86/avx512vldq-builtins.c +++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c @@ -1123,6 +1123,7 @@ __m256d test_mm256_insertf64x2(__m256d __A, __m128d __B) { // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> return _mm256_insertf64x2(__A, __B, 1); } +TEST_CONSTEXPR(match_m256d(_mm256_insertf64x2(((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m128d){5.0, 6.0}), 1), 1.0, 2.0, 5.0, 6.0)); __m256d test_mm256_mask_insertf64x2(__m256d __W, __mmask8 __U, __m256d __A, __m128d __B) { // CHECK-LABEL: test_mm256_mask_insertf64x2 @@ -1130,6 +1131,7 @@ __m256d test_mm256_mask_insertf64x2(__m256d __W, __mmask8 __U, __m256d __A, __m1 // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_mask_insertf64x2(__W, __U, __A, __B, 1); } +TEST_CONSTEXPR(match_m256d(_mm256_mask_insertf64x2(((__m256d){2.0, 3.0, 4.0, 5.0}), (0x8), ((__m256d){2.0, 3.0, 4.0, 5.0}), ((__m128d){6.0, 7.0}), 1), 2.0, 3.0, 4.0, 7.0)); __m256d test_mm256_maskz_insertf64x2(__mmask8 __U, __m256d __A, __m128d __B) { // CHECK-LABEL: test_mm256_maskz_insertf64x2 @@ -1137,12 +1139,14 @@ __m256d test_mm256_maskz_insertf64x2(__mmask8 __U, __m256d __A, __m128d __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_maskz_insertf64x2(__U, __A, __B, 1); } +TEST_CONSTEXPR(match_m256d(_mm256_maskz_insertf64x2((0x8), ((__m256d){1.0, 2.0, 3.0, 4.0}), ((__m128d){5.0, 6.0}), 1), 0.0, 0.0, 0.0, 6.0)); __m256i test_mm256_inserti64x2(__m256i __A, __m128i __B) { // CHECK-LABEL: test_mm256_inserti64x2 // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> return _mm256_inserti64x2(__A, __B, 1); } +TEST_CONSTEXPR(match_m256i(_mm256_inserti64x2(((__m256i){1, 2, 3, 4}), ((__m128i){5, 6}), 1), 1, 2, 5, 6)); __m256i test_mm256_mask_inserti64x2(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { // CHECK-LABEL: test_mm256_mask_inserti64x2 @@ -1150,6 +1154,7 @@ __m256i test_mm256_mask_inserti64x2(__m256i __W, __mmask8 __U, __m256i __A, __m1 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_inserti64x2(__W, __U, __A, __B, 1); } +TEST_CONSTEXPR(match_m256i(_mm256_mask_inserti64x2(((__m256i){2, 3, 4, 5}), (0x4), ((__m256i){2, 3, 4, 5}), ((__m128i){6, 7}), 1), 2, 3, 6, 5)); __m256i test_mm256_maskz_inserti64x2(__mmask8 __U, __m256i __A, __m128i __B) { // CHECK-LABEL: test_mm256_maskz_inserti64x2 @@ -1157,6 +1162,7 @@ __m256i test_mm256_maskz_inserti64x2(__mmask8 __U, __m256i __A, __m128i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_inserti64x2(__U, __A, __B, 1); } +TEST_CONSTEXPR(match_m256i(_mm256_maskz_inserti64x2((0x4), ((__m256i){1, 2, 3, 4}), ((__m128i){5, 6}), 1), 0, 0, 5, 0)); __mmask8 test_mm_mask_fpclass_pd_mask(__mmask8 __U, __m128d __A) { // CHECK-LABEL: test_mm_mask_fpclass_pd_mask From c8fd4de189fa915f370cc9c543db70be4cf10d22 Mon Sep 17 00:00:00 2001 From: AdityaC4 Date: Wed, 17 Sep 2025 18:54:17 -0500 Subject: [PATCH 5/7] Use new popToAPSInt and rename variables for clarity --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 26 +++++++++++------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 88378144e0b9c..c0c641963f344 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2919,40 +2919,38 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, unsigned ID) { assert(Call->getNumArgs() == 3); - PrimType ImmPT = *S.getContext().classify(Call->getArg(2)); - APSInt ImmAPS = popToAPSInt(S.Stk, ImmPT); + APSInt ImmAPS = popToAPSInt(S, Call->getArg(2)); uint64_t Index = ImmAPS.getZExtValue(); const Pointer &SubVec = S.Stk.pop(); if (!SubVec.getFieldDesc()->isPrimitiveArray()) return false; - const Pointer &DstVec = S.Stk.pop(); - if (!DstVec.getFieldDesc()->isPrimitiveArray()) + const Pointer &BaseVec = S.Stk.pop(); + if (!BaseVec.getFieldDesc()->isPrimitiveArray()) return false; - const Pointer &Result = S.Stk.peek(); + const Pointer &Dst = S.Stk.peek(); - unsigned DstElements = DstVec.getNumElems(); + unsigned BaseElements = BaseVec.getNumElems(); unsigned SubElements = SubVec.getNumElems(); - assert(SubElements != 0 && DstElements != 0 && (DstElements % SubElements) == 0); + assert(SubElements != 0 && BaseElements != 0 && (BaseElements % SubElements) == 0); - unsigned NumLanes = DstElements / SubElements; + unsigned NumLanes = BaseElements / SubElements; unsigned Lane = static_cast(Index % NumLanes); unsigned InsertPos = Lane * SubElements; - PrimType ElemPT = DstVec.getFieldDesc()->getPrimType(); + PrimType ElemPT = BaseVec.getFieldDesc()->getPrimType(); TYPE_SWITCH(ElemPT, { - for (unsigned I = 0; I != DstElements; ++I) - Result.elem(I) = DstVec.elem(I); - + for (unsigned I = 0; I != BaseElements; ++I) + Dst.elem(I) = BaseVec.elem(I); for (unsigned I = 0; I != SubElements; ++I) - Result.elem(InsertPos + I) = SubVec.elem(I); + Dst.elem(InsertPos + I) = SubVec.elem(I); }); - Result.initializeAllElements(); + Dst.initializeAllElements(); return true; } From 68a3be881ddc4f915b7ab8b5bc089cae6760819e Mon Sep 17 00:00:00 2001 From: AdityaC4 Date: Wed, 17 Sep 2025 19:28:20 -0500 Subject: [PATCH 6/7] remove redundant if statement --- clang/lib/AST/ExprConstant.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 001d636a8a960..f7fed6ba215dc 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12154,8 +12154,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!EvaluateInteger(E->getArg(2), Imm, Info)) return false; - if (!SourceDst.isVector() || !SourceSub.isVector()) - return false; assert(SourceDst.isVector() && SourceSub.isVector()); unsigned DstLen = SourceDst.getVectorLength(); unsigned SubLen = SourceSub.getVectorLength(); From 2fdbc9f2fccb905065cf568ac5731791c05fb6b4 Mon Sep 17 00:00:00 2001 From: AdityaC4 Date: Thu, 18 Sep 2025 07:42:16 -0500 Subject: [PATCH 7/7] clang-format InterpBuiltin.cpp changes --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index c0c641963f344..64962ee13d6b0 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2935,7 +2935,8 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, unsigned BaseElements = BaseVec.getNumElems(); unsigned SubElements = SubVec.getNumElems(); - assert(SubElements != 0 && BaseElements != 0 && (BaseElements % SubElements) == 0); + assert(SubElements != 0 && BaseElements != 0 && + (BaseElements % SubElements) == 0); unsigned NumLanes = BaseElements / SubElements; unsigned Lane = static_cast(Index % NumLanes);