From d98ec0176f0425173c9ad4769513ccb72d313d0f Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Mon, 7 Jul 2025 17:09:29 -0500 Subject: [PATCH 1/6] Added pattern for folding packed integer constructions. --- .../Transforms/Vectorize/VectorCombine.cpp | 125 ++++++++++++++++++ .../VectorCombine/packed-integers.ll | 108 +++++++++++++++ 2 files changed, 233 insertions(+) create mode 100644 llvm/test/Transforms/VectorCombine/packed-integers.ll diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index fe8d74c43dfdc..ce73a383d2555 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" @@ -125,6 +126,7 @@ class VectorCombine { bool scalarizeLoadExtract(Instruction &I); bool scalarizeExtExtract(Instruction &I); bool foldConcatOfBoolMasks(Instruction &I); + bool foldIntegerPackFromVector(Instruction &I); bool foldPermuteOfBinops(Instruction &I); bool foldShuffleOfBinops(Instruction &I); bool foldShuffleOfSelects(Instruction &I); @@ -1957,6 +1959,126 @@ bool VectorCombine::foldConcatOfBoolMasks(Instruction &I) { return true; } +/// Match "shufflevector -> bitcast" or "extractelement -> zext -> shl" patterns +/// which extract vector elements and pack them in the same relative positions. +static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec, + uint64_t &VecOffset, + SmallBitVector &Mask) { + static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) { + ShlAmt = 0; + return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base); + }; + + // First try to match extractelement -> zext -> shl + uint64_t VecIdx, ShlAmt; + if (match(V, m_ConstShlOrSelf(m_ZExtOrSelf(m_ExtractElt( + m_Value(Vec), m_ConstantInt(VecIdx))), + ShlAmt))) { + auto *VecTy = dyn_cast(Vec->getType()); + if (!VecTy) + return false; + auto *EltTy = dyn_cast(VecTy->getElementType()); + if (!EltTy) + return false; + + const unsigned EltBitWidth = EltTy->getBitWidth(); + const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth(); + if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0) + return false; + const unsigned ShlEltAmt = ShlAmt / EltBitWidth; + + if (ShlEltAmt > VecIdx) + return false; + VecOffset = VecIdx - ShlEltAmt; + Mask.resize(V->getType()->getIntegerBitWidth() / EltBitWidth); + Mask.set(ShlEltAmt); + return true; + } + + // Now try to match shufflevector -> bitcast + Value *Lhs, *Rhs; + ArrayRef ShuffleMask; + if (!match(V, m_BitCast(m_Shuffle(m_Value(Lhs), m_Value(Rhs), + m_Mask(ShuffleMask))))) + return false; + Mask.resize(ShuffleMask.size()); + + if (isa(Lhs)) + std::swap(Lhs, Rhs); + + auto *RhsConst = dyn_cast(Rhs); + if (!RhsConst) + return false; + + auto *LhsTy = dyn_cast(Lhs->getType()); + if (!LhsTy) + return false; + + Vec = Lhs; + const unsigned NumLhsElts = LhsTy->getNumElements(); + bool FoundVecOffset = false; + for (unsigned Idx = 0; Idx < ShuffleMask.size(); ++Idx) { + if (ShuffleMask[Idx] == PoisonMaskElem) + return false; + const unsigned ShuffleIdx = ShuffleMask[Idx]; + if (ShuffleIdx >= NumLhsElts) { + const unsigned RhsIdx = ShuffleIdx - NumLhsElts; + auto *RhsElt = + dyn_cast(RhsConst->getAggregateElement(RhsIdx)); + if (!RhsElt || RhsElt->getZExtValue() != 0) + return false; + continue; + } + + if (FoundVecOffset) { + if (VecOffset + Idx != ShuffleIdx) + return false; + } else { + if (ShuffleIdx < Idx) + return false; + VecOffset = ShuffleIdx - Idx; + FoundVecOffset = true; + } + Mask.set(Idx); + } + return FoundVecOffset; +} +/// Try to fold the or of two scalar integers whose contents are packed elements +/// of the same vector. +bool VectorCombine::foldIntegerPackFromVector(Instruction &I) { + assert(I.getOpcode() == Instruction::Or); + Value *LhsVec, *RhsVec; + uint64_t LhsVecOffset, RhsVecOffset; + SmallBitVector Mask; + if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset, + Mask)) + return false; + if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset, + Mask)) + return false; + if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset) + return false; + + // Convert into shufflevector -> bitcast + SmallVector ShuffleMask; + ShuffleMask.reserve(Mask.size()); + const unsigned ZeroVecIdx = + cast(LhsVec->getType())->getNumElements(); + for (unsigned Idx = 0; Idx < Mask.size(); ++Idx) { + if (Mask.test(Idx)) + ShuffleMask.push_back(LhsVecOffset + Idx); + else + ShuffleMask.push_back(ZeroVecIdx); + } + + Value *MaskedVec = Builder.CreateShuffleVector( + LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask, + LhsVec->getName() + ".extract"); + Value *CastedVec = Builder.CreateBitCast(MaskedVec, I.getType(), I.getName()); + replaceValue(I, *CastedVec); + return true; +} + /// Try to convert "shuffle (binop (shuffle, shuffle)), undef" /// --> "binop (shuffle), (shuffle)". bool VectorCombine::foldPermuteOfBinops(Instruction &I) { @@ -3742,6 +3864,9 @@ bool VectorCombine::run() { if (Opcode == Instruction::Store) MadeChange |= foldSingleElementStore(I); + if (isa(I.getType()) && Opcode == Instruction::Or) + MadeChange |= foldIntegerPackFromVector(I); + // If this is an early pipeline invocation of this pass, we are done. if (TryEarlyFoldsOnly) return; diff --git a/llvm/test/Transforms/VectorCombine/packed-integers.ll b/llvm/test/Transforms/VectorCombine/packed-integers.ll new file mode 100644 index 0000000000000..f01179bbde13c --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/packed-integers.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=vector-combine %s | FileCheck %s + +define i32 @bitcast.v2i(<4 x i8> %v) { +; CHECK-LABEL: define i32 @bitcast.v2i( +; CHECK-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V]] to i32 +; CHECK-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <4 x i8> %v, i32 0 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <4 x i8> %v, i32 1 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.2 = extractelement <4 x i8> %v, i32 2 + %z.2 = zext i8 %v.2 to i32 + %s.2 = shl i32 %z.2, 16 + %x.2 = or i32 %x.1, %s.2 + + %v.3 = extractelement <4 x i8> %v, i32 3 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.2, %s.3 + + ret i32 %x.3 +} + +define i32 @bitcast.v2i.tree(<4 x i8> %v) { +; CHECK-LABEL: define i32 @bitcast.v2i.tree( +; CHECK-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = bitcast <4 x i8> [[V]] to i32 +; CHECK-NEXT: ret i32 [[X]] +; + %v.0 = extractelement <4 x i8> %v, i32 0 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <4 x i8> %v, i32 1 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.2 = extractelement <4 x i8> %v, i32 2 + %z.2 = zext i8 %v.2 to i32 + %s.2 = shl i32 %z.2, 16 + + %v.3 = extractelement <4 x i8> %v, i32 3 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %s.2, %s.3 + + %x = or i32 %x.1, %x.3 + + ret i32 %x +} + +define i32 @extract.i32(<8 x i8> %v) { +; CHECK-LABEL: define i32 @extract.i32( +; CHECK-SAME: <8 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[V_EXTRACT4:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT4]] to i32 +; CHECK-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <8 x i8> %v, i32 3 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <8 x i8> %v, i32 4 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.2 = extractelement <8 x i8> %v, i32 5 + %z.2 = zext i8 %v.2 to i32 + %s.2 = shl i32 %z.2, 16 + %x.2 = or i32 %x.1, %s.2 + + %v.3 = extractelement <8 x i8> %v, i32 6 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.2, %s.3 + + ret i32 %x.3 +} + +define i32 @partial(<4 x i8> %v) { +; CHECK-LABEL: define i32 @partial( +; CHECK-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[V_EXTRACT2:%.*]] = shufflevector <4 x i8> [[V]], <4 x i8> zeroinitializer, <4 x i32> +; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT2]] to i32 +; CHECK-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <4 x i8> %v, i32 0 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <4 x i8> %v, i32 1 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.3 = extractelement <4 x i8> %v, i32 3 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.1, %s.3 + + ret i32 %x.3 +} From 95e74dc814749b20434cf910f4103d0b1a8f3dc2 Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Fri, 11 Jul 2025 09:02:16 -0500 Subject: [PATCH 2/6] Incorporated reviewer feedback. --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index ce73a383d2555..4d67af7003de8 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -2025,7 +2025,7 @@ static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec, const unsigned RhsIdx = ShuffleIdx - NumLhsElts; auto *RhsElt = dyn_cast(RhsConst->getAggregateElement(RhsIdx)); - if (!RhsElt || RhsElt->getZExtValue() != 0) + if (!RhsElt || !RhsElt->isNullValue()) return false; continue; } @@ -2059,17 +2059,12 @@ bool VectorCombine::foldIntegerPackFromVector(Instruction &I) { if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset) return false; - // Convert into shufflevector -> bitcast - SmallVector ShuffleMask; - ShuffleMask.reserve(Mask.size()); + // Convert into shufflevector -> bitcast; const unsigned ZeroVecIdx = cast(LhsVec->getType())->getNumElements(); - for (unsigned Idx = 0; Idx < Mask.size(); ++Idx) { - if (Mask.test(Idx)) - ShuffleMask.push_back(LhsVecOffset + Idx); - else - ShuffleMask.push_back(ZeroVecIdx); - } + SmallVector ShuffleMask(Mask.size(), ZeroVecIdx); + for (unsigned Idx : Mask.set_bits()) + ShuffleMask[Idx] = LhsVecOffset + Idx; Value *MaskedVec = Builder.CreateShuffleVector( LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask, From 7abb1534cd5ce01023aca95c748852e344d8beb4 Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Mon, 21 Jul 2025 15:13:37 -0500 Subject: [PATCH 3/6] Moved pattern to `instcombine` --- .../InstCombine/InstCombineAndOrXor.cpp | 141 ++++++++++++++++++ .../Transforms/Vectorize/VectorCombine.cpp | 120 --------------- .../or-packed-int-vecs.ll} | 13 +- 3 files changed, 148 insertions(+), 126 deletions(-) rename llvm/test/Transforms/{VectorCombine/packed-integers.ll => InstCombine/or-packed-int-vecs.ll} (81%) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index dd16cfaeecd45..49cd60baf9fd5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -11,10 +11,13 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/FloatingPointPredicateUtils.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/ConstantRange.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" @@ -3592,6 +3595,141 @@ static Value *foldOrOfInversions(BinaryOperator &I, return nullptr; } +/// Match "shufflevector -> bitcast" or "extractelement -> zext -> shl" patterns +/// which extract vector elements and pack them in the same relative positions. +static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec, + uint64_t &VecOffset, + SmallBitVector &Mask) { + static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) { + ShlAmt = 0; + return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base); + }; + + // First try to match extractelement -> zext -> shl + uint64_t VecIdx, ShlAmt; + if (match(V, m_ConstShlOrSelf(m_ZExtOrSelf(m_ExtractElt( + m_Value(Vec), m_ConstantInt(VecIdx))), + ShlAmt))) { + auto *VecTy = dyn_cast(Vec->getType()); + if (!VecTy) + return false; + auto *EltTy = dyn_cast(VecTy->getElementType()); + if (!EltTy) + return false; + + const unsigned EltBitWidth = EltTy->getBitWidth(); + const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth(); + if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0) + return false; + const unsigned ShlEltAmt = ShlAmt / EltBitWidth; + + if (ShlEltAmt > VecIdx) + return false; + VecOffset = VecIdx - ShlEltAmt; + Mask.resize(V->getType()->getIntegerBitWidth() / EltBitWidth); + Mask.set(ShlEltAmt); + return true; + } + + // Now try to match a bitcasted subvector. + Instruction *DstVecI; + if (!match(V, m_BitCast(m_Instruction(DstVecI)))) + return false; + + auto *DstTy = dyn_cast(DstVecI->getType()); + if (!DstTy) + return false; + + Mask.resize(DstTy->getNumElements()); + + // First check for a subvector obtained from a shufflevector. + if (isa(DstVecI)) { + Constant *ConstVec; + ArrayRef ShuffleMask; + if (!match(DstVecI, m_Shuffle(m_Value(Vec), m_Constant(ConstVec), + m_Mask(ShuffleMask)))) + return false; + + auto *VecTy = dyn_cast(Vec->getType()); + if (!VecTy) + return false; + + const unsigned NumVecElts = VecTy->getNumElements(); + bool FoundVecOffset = false; + for (unsigned Idx = 0; Idx < ShuffleMask.size(); ++Idx) { + if (ShuffleMask[Idx] == PoisonMaskElem) + return false; + const unsigned ShuffleIdx = ShuffleMask[Idx]; + if (ShuffleIdx >= NumVecElts) { + const unsigned ConstIdx = ShuffleIdx - NumVecElts; + auto *ConstElt = + dyn_cast(ConstVec->getAggregateElement(ConstIdx)); + if (!ConstElt || !ConstElt->isNullValue()) + return false; + continue; + } + + if (FoundVecOffset) { + if (VecOffset + Idx != ShuffleIdx) + return false; + } else { + if (ShuffleIdx < Idx) + return false; + VecOffset = ShuffleIdx - Idx; + FoundVecOffset = true; + } + Mask.set(Idx); + } + return FoundVecOffset; + } + + // Check for a subvector obtained as an (insertelement V, 0, idx) + uint64_t InsertIdx; + if (!match(DstVecI, + m_InsertElt(m_Value(Vec), m_Zero(), m_ConstantInt(InsertIdx)))) + return false; + + auto *VecTy = dyn_cast(Vec->getType()); + if (!VecTy) + return false; + VecOffset = 0; + bool AlreadyInsertedMaskedElt = Mask.test(InsertIdx); + Mask.set(); + if (!AlreadyInsertedMaskedElt) + Mask.reset(InsertIdx); + return true; +} + +/// Try to fold the or of two scalar integers whose contents are packed elements +/// of the same vector. +Instruction *foldIntegerPackFromVector(Instruction &I, + InstCombiner::BuilderTy &Builder) { + assert(I.getOpcode() == Instruction::Or); + Value *LhsVec, *RhsVec; + uint64_t LhsVecOffset, RhsVecOffset; + SmallBitVector Mask; + if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset, + Mask)) + return nullptr; + if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset, + Mask)) + return nullptr; + if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset) + return nullptr; + + // Convert into shufflevector -> bitcast; + const unsigned ZeroVecIdx = + cast(LhsVec->getType())->getNumElements(); + SmallVector ShuffleMask(Mask.size(), ZeroVecIdx); + for (unsigned Idx : Mask.set_bits()) + ShuffleMask[Idx] = LhsVecOffset + Idx; + + Value *MaskedVec = Builder.CreateShuffleVector( + LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask, + I.getName() + ".v"); + return CastInst::Create(Instruction::BitCast, MaskedVec, I.getType()); +} + // A decomposition of ((X & Mask) * Factor). The NUW / NSW bools // track these properities for preservation. Note that we can decompose // equivalent select form of this expression (e.g. (!(X & Mask) ? 0 : Mask * @@ -3688,6 +3826,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (Instruction *X = foldComplexAndOrPatterns(I, Builder)) return X; + if (Instruction *X = foldIntegerPackFromVector(I, Builder)) + return X; + // (A & B) | (C & D) -> A ^ D where A == ~C && B == ~D // (A & B) | (C & D) -> A ^ C where A == ~D && B == ~C if (Value *V = foldOrOfInversions(I, Builder)) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 4d67af7003de8..fe8d74c43dfdc 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -16,7 +16,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" -#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" @@ -126,7 +125,6 @@ class VectorCombine { bool scalarizeLoadExtract(Instruction &I); bool scalarizeExtExtract(Instruction &I); bool foldConcatOfBoolMasks(Instruction &I); - bool foldIntegerPackFromVector(Instruction &I); bool foldPermuteOfBinops(Instruction &I); bool foldShuffleOfBinops(Instruction &I); bool foldShuffleOfSelects(Instruction &I); @@ -1959,121 +1957,6 @@ bool VectorCombine::foldConcatOfBoolMasks(Instruction &I) { return true; } -/// Match "shufflevector -> bitcast" or "extractelement -> zext -> shl" patterns -/// which extract vector elements and pack them in the same relative positions. -static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec, - uint64_t &VecOffset, - SmallBitVector &Mask) { - static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) { - ShlAmt = 0; - return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base); - }; - - // First try to match extractelement -> zext -> shl - uint64_t VecIdx, ShlAmt; - if (match(V, m_ConstShlOrSelf(m_ZExtOrSelf(m_ExtractElt( - m_Value(Vec), m_ConstantInt(VecIdx))), - ShlAmt))) { - auto *VecTy = dyn_cast(Vec->getType()); - if (!VecTy) - return false; - auto *EltTy = dyn_cast(VecTy->getElementType()); - if (!EltTy) - return false; - - const unsigned EltBitWidth = EltTy->getBitWidth(); - const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth(); - if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0) - return false; - const unsigned ShlEltAmt = ShlAmt / EltBitWidth; - - if (ShlEltAmt > VecIdx) - return false; - VecOffset = VecIdx - ShlEltAmt; - Mask.resize(V->getType()->getIntegerBitWidth() / EltBitWidth); - Mask.set(ShlEltAmt); - return true; - } - - // Now try to match shufflevector -> bitcast - Value *Lhs, *Rhs; - ArrayRef ShuffleMask; - if (!match(V, m_BitCast(m_Shuffle(m_Value(Lhs), m_Value(Rhs), - m_Mask(ShuffleMask))))) - return false; - Mask.resize(ShuffleMask.size()); - - if (isa(Lhs)) - std::swap(Lhs, Rhs); - - auto *RhsConst = dyn_cast(Rhs); - if (!RhsConst) - return false; - - auto *LhsTy = dyn_cast(Lhs->getType()); - if (!LhsTy) - return false; - - Vec = Lhs; - const unsigned NumLhsElts = LhsTy->getNumElements(); - bool FoundVecOffset = false; - for (unsigned Idx = 0; Idx < ShuffleMask.size(); ++Idx) { - if (ShuffleMask[Idx] == PoisonMaskElem) - return false; - const unsigned ShuffleIdx = ShuffleMask[Idx]; - if (ShuffleIdx >= NumLhsElts) { - const unsigned RhsIdx = ShuffleIdx - NumLhsElts; - auto *RhsElt = - dyn_cast(RhsConst->getAggregateElement(RhsIdx)); - if (!RhsElt || !RhsElt->isNullValue()) - return false; - continue; - } - - if (FoundVecOffset) { - if (VecOffset + Idx != ShuffleIdx) - return false; - } else { - if (ShuffleIdx < Idx) - return false; - VecOffset = ShuffleIdx - Idx; - FoundVecOffset = true; - } - Mask.set(Idx); - } - return FoundVecOffset; -} -/// Try to fold the or of two scalar integers whose contents are packed elements -/// of the same vector. -bool VectorCombine::foldIntegerPackFromVector(Instruction &I) { - assert(I.getOpcode() == Instruction::Or); - Value *LhsVec, *RhsVec; - uint64_t LhsVecOffset, RhsVecOffset; - SmallBitVector Mask; - if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset, - Mask)) - return false; - if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset, - Mask)) - return false; - if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset) - return false; - - // Convert into shufflevector -> bitcast; - const unsigned ZeroVecIdx = - cast(LhsVec->getType())->getNumElements(); - SmallVector ShuffleMask(Mask.size(), ZeroVecIdx); - for (unsigned Idx : Mask.set_bits()) - ShuffleMask[Idx] = LhsVecOffset + Idx; - - Value *MaskedVec = Builder.CreateShuffleVector( - LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask, - LhsVec->getName() + ".extract"); - Value *CastedVec = Builder.CreateBitCast(MaskedVec, I.getType(), I.getName()); - replaceValue(I, *CastedVec); - return true; -} - /// Try to convert "shuffle (binop (shuffle, shuffle)), undef" /// --> "binop (shuffle), (shuffle)". bool VectorCombine::foldPermuteOfBinops(Instruction &I) { @@ -3859,9 +3742,6 @@ bool VectorCombine::run() { if (Opcode == Instruction::Store) MadeChange |= foldSingleElementStore(I); - if (isa(I.getType()) && Opcode == Instruction::Or) - MadeChange |= foldIntegerPackFromVector(I); - // If this is an early pipeline invocation of this pass, we are done. if (TryEarlyFoldsOnly) return; diff --git a/llvm/test/Transforms/VectorCombine/packed-integers.ll b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll similarity index 81% rename from llvm/test/Transforms/VectorCombine/packed-integers.ll rename to llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll index f01179bbde13c..1bb520a323474 100644 --- a/llvm/test/Transforms/VectorCombine/packed-integers.ll +++ b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -passes=vector-combine %s | FileCheck %s +; RUN: opt -S -passes=instcombine %s | FileCheck %s define i32 @bitcast.v2i(<4 x i8> %v) { ; CHECK-LABEL: define i32 @bitcast.v2i( @@ -59,9 +59,10 @@ define i32 @bitcast.v2i.tree(<4 x i8> %v) { define i32 @extract.i32(<8 x i8> %v) { ; CHECK-LABEL: define i32 @extract.i32( ; CHECK-SAME: <8 x i8> [[V:%.*]]) { -; CHECK-NEXT: [[V_EXTRACT4:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> zeroinitializer, <4 x i32> -; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT4]] to i32 -; CHECK-NEXT: ret i32 [[X_3]] +; CHECK-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <8 x i32> +; CHECK-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i8> [[X_3_V_EXTRACT]] to <2 x i32> +; CHECK-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i32> [[X_3_V_BC]], i64 0 +; CHECK-NEXT: ret i32 [[X_3_V_EXTRACT1]] ; %v.0 = extractelement <8 x i8> %v, i32 3 %z.0 = zext i8 %v.0 to i32 @@ -87,8 +88,8 @@ define i32 @extract.i32(<8 x i8> %v) { define i32 @partial(<4 x i8> %v) { ; CHECK-LABEL: define i32 @partial( ; CHECK-SAME: <4 x i8> [[V:%.*]]) { -; CHECK-NEXT: [[V_EXTRACT2:%.*]] = shufflevector <4 x i8> [[V]], <4 x i8> zeroinitializer, <4 x i32> -; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V_EXTRACT2]] to i32 +; CHECK-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i8> [[V]], i8 0, i64 2 +; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V1]] to i32 ; CHECK-NEXT: ret i32 [[X_3]] ; %v.0 = extractelement <4 x i8> %v, i32 0 From e3638d56a52c81d479f6ec32005e0c066f561a52 Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Tue, 22 Jul 2025 09:53:10 -0500 Subject: [PATCH 4/6] Added endian coverage to tests. --- llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll index 1bb520a323474..1d6442a28eb59 100644 --- a/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll +++ b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -passes=instcombine %s | FileCheck %s +; RUN: opt < %s -passes=instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=CHECK +; RUN: opt < %s -passes=instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=CHECK define i32 @bitcast.v2i(<4 x i8> %v) { ; CHECK-LABEL: define i32 @bitcast.v2i( From 2d929bd8aafbb3c48f57820fa33ac9eeb6c443a1 Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Tue, 22 Jul 2025 16:20:26 -0500 Subject: [PATCH 5/6] Corrected handling of big-endian vectors. --- .../InstCombine/InstCombineAndOrXor.cpp | 47 +- .../InstCombine/or-packed-int-vecs.ll | 674 ++++++++++++++++-- 2 files changed, 664 insertions(+), 57 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 49cd60baf9fd5..852b5fff53b3d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3595,11 +3595,19 @@ static Value *foldOrOfInversions(BinaryOperator &I, return nullptr; } -/// Match "shufflevector -> bitcast" or "extractelement -> zext -> shl" patterns -/// which extract vector elements and pack them in the same relative positions. +/// Match \p V as "shufflevector -> bitcast" or "extractelement -> zext -> shl" +/// patterns, which extract vector elements and pack them in the same relative +/// positions. +/// +/// \p Vec is the underlying vector being extracted from. +/// \p Mask is a bitmask identifying which packed elements are obtained from the +/// vector. +/// \p VecOffset is the vector element corresponding to index 0 of the +/// mask. static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec, - uint64_t &VecOffset, - SmallBitVector &Mask) { + int64_t &VecOffset, + SmallBitVector &Mask, + const DataLayout &DL) { static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) { ShlAmt = 0; return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base); @@ -3621,13 +3629,15 @@ static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec, const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth(); if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0) return false; + const unsigned TargetEltWidth = TargetBitWidth / EltBitWidth; const unsigned ShlEltAmt = ShlAmt / EltBitWidth; - if (ShlEltAmt > VecIdx) - return false; - VecOffset = VecIdx - ShlEltAmt; - Mask.resize(V->getType()->getIntegerBitWidth() / EltBitWidth); - Mask.set(ShlEltAmt); + const unsigned MaskIdx = + DL.isLittleEndian() ? ShlEltAmt : TargetEltWidth - ShlEltAmt - 1; + + VecOffset = static_cast(VecIdx) - static_cast(MaskIdx); + Mask.resize(TargetEltWidth); + Mask.set(MaskIdx); return true; } @@ -3700,19 +3710,20 @@ static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec, return true; } -/// Try to fold the or of two scalar integers whose contents are packed elements -/// of the same vector. +/// Try to fold the join of two scalar integers whose contents are packed +/// elements of the same vector. Instruction *foldIntegerPackFromVector(Instruction &I, - InstCombiner::BuilderTy &Builder) { + InstCombiner::BuilderTy &Builder, + const DataLayout &DL) { assert(I.getOpcode() == Instruction::Or); Value *LhsVec, *RhsVec; - uint64_t LhsVecOffset, RhsVecOffset; + int64_t LhsVecOffset, RhsVecOffset; SmallBitVector Mask; if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset, - Mask)) + Mask, DL)) return nullptr; if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset, - Mask)) + Mask, DL)) return nullptr; if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset) return nullptr; @@ -3721,8 +3732,10 @@ Instruction *foldIntegerPackFromVector(Instruction &I, const unsigned ZeroVecIdx = cast(LhsVec->getType())->getNumElements(); SmallVector ShuffleMask(Mask.size(), ZeroVecIdx); - for (unsigned Idx : Mask.set_bits()) + for (unsigned Idx : Mask.set_bits()) { + assert(LhsVecOffset + Idx >= 0); ShuffleMask[Idx] = LhsVecOffset + Idx; + } Value *MaskedVec = Builder.CreateShuffleVector( LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask, @@ -3826,7 +3839,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (Instruction *X = foldComplexAndOrPatterns(I, Builder)) return X; - if (Instruction *X = foldIntegerPackFromVector(I, Builder)) + if (Instruction *X = foldIntegerPackFromVector(I, Builder, DL)) return X; // (A & B) | (C & D) -> A ^ D where A == ~C && B == ~D diff --git a/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll index 1d6442a28eb59..24d16553b7792 100644 --- a/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll +++ b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll @@ -1,27 +1,45 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -passes=instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=CHECK -; RUN: opt < %s -passes=instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=CHECK +; RUN: opt < %s -passes=instcombine -data-layout="E" -S | FileCheck %s --check-prefixes=CHECK-BE +; RUN: opt < %s -passes=instcombine -data-layout="e" -S | FileCheck %s --check-prefixes=CHECK-LE -define i32 @bitcast.v2i(<4 x i8> %v) { -; CHECK-LABEL: define i32 @bitcast.v2i( -; CHECK-SAME: <4 x i8> [[V:%.*]]) { -; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V]] to i32 -; CHECK-NEXT: ret i32 [[X_3]] +define i32 @bitcast.v2i.le(<4 x i8> %v) { +; CHECK-BE-LABEL: define i32 @bitcast.v2i.le( +; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 0 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 1 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <4 x i8> [[V]], i64 2 +; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32 +; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16 +; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i32 [[X_1]], [[S_2]] +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]] +; CHECK-BE-NEXT: ret i32 [[X_3]] ; - %v.0 = extractelement <4 x i8> %v, i32 0 +; CHECK-LE-LABEL: define i32 @bitcast.v2i.le( +; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V]] to i32 +; CHECK-LE-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <4 x i8> %v, i64 0 %z.0 = zext i8 %v.0 to i32 - %v.1 = extractelement <4 x i8> %v, i32 1 + %v.1 = extractelement <4 x i8> %v, i64 1 %z.1 = zext i8 %v.1 to i32 %s.1 = shl i32 %z.1, 8 %x.1 = or i32 %z.0, %s.1 - %v.2 = extractelement <4 x i8> %v, i32 2 + %v.2 = extractelement <4 x i8> %v, i64 2 %z.2 = zext i8 %v.2 to i32 %s.2 = shl i32 %z.2, 16 %x.2 = or i32 %x.1, %s.2 - %v.3 = extractelement <4 x i8> %v, i32 3 + %v.3 = extractelement <4 x i8> %v, i64 3 %z.3 = zext i8 %v.3 to i32 %s.3 = shl i32 %z.3, 24 %x.3 = or i32 %x.2, %s.3 @@ -29,25 +47,178 @@ define i32 @bitcast.v2i(<4 x i8> %v) { ret i32 %x.3 } -define i32 @bitcast.v2i.tree(<4 x i8> %v) { -; CHECK-LABEL: define i32 @bitcast.v2i.tree( -; CHECK-SAME: <4 x i8> [[V:%.*]]) { -; CHECK-NEXT: [[X:%.*]] = bitcast <4 x i8> [[V]] to i32 -; CHECK-NEXT: ret i32 [[X]] +define i32 @bitcast.v2i.be(<4 x i8> %v) { +; CHECK-BE-LABEL: define i32 @bitcast.v2i.be( +; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V]] to i32 +; CHECK-BE-NEXT: ret i32 [[X_3]] +; +; CHECK-LE-LABEL: define i32 @bitcast.v2i.be( +; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 2 +; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32 +; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8 +; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]] +; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <4 x i8> [[V]], i64 1 +; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32 +; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16 +; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i32 [[X_1]], [[S_2]] +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 0 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]] +; CHECK-LE-NEXT: ret i32 [[X_3]] ; - %v.0 = extractelement <4 x i8> %v, i32 0 + %v.0 = extractelement <4 x i8> %v, i64 3 %z.0 = zext i8 %v.0 to i32 - %v.1 = extractelement <4 x i8> %v, i32 1 + %v.1 = extractelement <4 x i8> %v, i64 2 %z.1 = zext i8 %v.1 to i32 %s.1 = shl i32 %z.1, 8 %x.1 = or i32 %z.0, %s.1 - %v.2 = extractelement <4 x i8> %v, i32 2 + %v.2 = extractelement <4 x i8> %v, i64 1 %z.2 = zext i8 %v.2 to i32 %s.2 = shl i32 %z.2, 16 + %x.2 = or i32 %x.1, %s.2 - %v.3 = extractelement <4 x i8> %v, i32 3 + %v.3 = extractelement <4 x i8> %v, i64 0 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.2, %s.3 + + ret i32 %x.3 +} + +define i64 @bitcast.v2i.le.i16(<4 x i16> %v) { +; CHECK-BE-LABEL: define i64 @bitcast.v2i.le.i16( +; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 0 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 1 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <4 x i16> [[V]], i64 2 +; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64 +; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32 +; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]] +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]] +; CHECK-BE-NEXT: ret i64 [[X_3]] +; +; CHECK-LE-LABEL: define i64 @bitcast.v2i.le.i16( +; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[V]] to i64 +; CHECK-LE-NEXT: ret i64 [[X_3]] +; + %v.0 = extractelement <4 x i16> %v, i64 0 + %z.0 = zext i16 %v.0 to i64 + + %v.1 = extractelement <4 x i16> %v, i64 1 + %z.1 = zext i16 %v.1 to i64 + %s.1 = shl i64 %z.1, 16 + %x.1 = or i64 %z.0, %s.1 + + %v.2 = extractelement <4 x i16> %v, i64 2 + %z.2 = zext i16 %v.2 to i64 + %s.2 = shl i64 %z.2, 32 + %x.2 = or i64 %x.1, %s.2 + + %v.3 = extractelement <4 x i16> %v, i64 3 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %x.2, %s.3 + + ret i64 %x.3 +} + +define i64 @bitcast.v2i.be.i16(<4 x i16> %v) { +; CHECK-BE-LABEL: define i64 @bitcast.v2i.be.i16( +; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[V]] to i64 +; CHECK-BE-NEXT: ret i64 [[X_3]] +; +; CHECK-LE-LABEL: define i64 @bitcast.v2i.be.i16( +; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 2 +; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64 +; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16 +; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]] +; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <4 x i16> [[V]], i64 1 +; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64 +; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32 +; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]] +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 0 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]] +; CHECK-LE-NEXT: ret i64 [[X_3]] +; + %v.0 = extractelement <4 x i16> %v, i64 3 + %z.0 = zext i16 %v.0 to i64 + + %v.1 = extractelement <4 x i16> %v, i64 2 + %z.1 = zext i16 %v.1 to i64 + %s.1 = shl i64 %z.1, 16 + %x.1 = or i64 %z.0, %s.1 + + %v.2 = extractelement <4 x i16> %v, i64 1 + %z.2 = zext i16 %v.2 to i64 + %s.2 = shl i64 %z.2, 32 + %x.2 = or i64 %x.1, %s.2 + + %v.3 = extractelement <4 x i16> %v, i64 0 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %x.2, %s.3 + + ret i64 %x.3 +} + +define i32 @bitcast.v2i.le.tree(<4 x i8> %v) { +; CHECK-BE-LABEL: define i32 @bitcast.v2i.le.tree( +; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 0 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 1 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <4 x i8> [[V]], i64 2 +; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32 +; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16 +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[S_2]], [[S_3]] +; CHECK-BE-NEXT: [[X:%.*]] = or disjoint i32 [[X_1]], [[X_3]] +; CHECK-BE-NEXT: ret i32 [[X]] +; +; CHECK-LE-LABEL: define i32 @bitcast.v2i.le.tree( +; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X:%.*]] = bitcast <4 x i8> [[V]] to i32 +; CHECK-LE-NEXT: ret i32 [[X]] +; + %v.0 = extractelement <4 x i8> %v, i64 0 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <4 x i8> %v, i64 1 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.2 = extractelement <4 x i8> %v, i64 2 + %z.2 = zext i8 %v.2 to i32 + %s.2 = shl i32 %z.2, 16 + + %v.3 = extractelement <4 x i8> %v, i64 3 %z.3 = zext i8 %v.3 to i32 %s.3 = shl i32 %z.3, 24 %x.3 = or i32 %s.2, %s.3 @@ -57,28 +228,184 @@ define i32 @bitcast.v2i.tree(<4 x i8> %v) { ret i32 %x } -define i32 @extract.i32(<8 x i8> %v) { -; CHECK-LABEL: define i32 @extract.i32( -; CHECK-SAME: <8 x i8> [[V:%.*]]) { -; CHECK-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <8 x i32> -; CHECK-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i8> [[X_3_V_EXTRACT]] to <2 x i32> -; CHECK-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i32> [[X_3_V_BC]], i64 0 -; CHECK-NEXT: ret i32 [[X_3_V_EXTRACT1]] +define i32 @bitcast.v2i.be.tree(<4 x i8> %v) { +; CHECK-BE-LABEL: define i32 @bitcast.v2i.be.tree( +; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X:%.*]] = bitcast <4 x i8> [[V]] to i32 +; CHECK-BE-NEXT: ret i32 [[X]] +; +; CHECK-LE-LABEL: define i32 @bitcast.v2i.be.tree( +; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 2 +; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32 +; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8 +; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]] +; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <4 x i8> [[V]], i64 1 +; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32 +; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16 +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 0 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[S_2]], [[S_3]] +; CHECK-LE-NEXT: [[X:%.*]] = or disjoint i32 [[X_1]], [[X_3]] +; CHECK-LE-NEXT: ret i32 [[X]] ; - %v.0 = extractelement <8 x i8> %v, i32 3 + %v.0 = extractelement <4 x i8> %v, i64 3 %z.0 = zext i8 %v.0 to i32 - %v.1 = extractelement <8 x i8> %v, i32 4 + %v.1 = extractelement <4 x i8> %v, i64 2 %z.1 = zext i8 %v.1 to i32 %s.1 = shl i32 %z.1, 8 %x.1 = or i32 %z.0, %s.1 - %v.2 = extractelement <8 x i8> %v, i32 5 + %v.2 = extractelement <4 x i8> %v, i64 1 + %z.2 = zext i8 %v.2 to i32 + %s.2 = shl i32 %z.2, 16 + + %v.3 = extractelement <4 x i8> %v, i64 0 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %s.2, %s.3 + + %x = or i32 %x.1, %x.3 + + ret i32 %x +} + +define i64 @bitcast.v2i.le.tree.i16(<4 x i16> %v) { +; CHECK-BE-LABEL: define i64 @bitcast.v2i.le.tree.i16( +; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 0 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 1 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <4 x i16> [[V]], i64 2 +; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64 +; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32 +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[S_2]], [[S_3]] +; CHECK-BE-NEXT: [[X:%.*]] = or disjoint i64 [[X_1]], [[X_3]] +; CHECK-BE-NEXT: ret i64 [[X]] +; +; CHECK-LE-LABEL: define i64 @bitcast.v2i.le.tree.i16( +; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X:%.*]] = bitcast <4 x i16> [[V]] to i64 +; CHECK-LE-NEXT: ret i64 [[X]] +; + %v.0 = extractelement <4 x i16> %v, i64 0 + %z.0 = zext i16 %v.0 to i64 + + %v.1 = extractelement <4 x i16> %v, i64 1 + %z.1 = zext i16 %v.1 to i64 + %s.1 = shl i64 %z.1, 16 + %x.1 = or i64 %z.0, %s.1 + + %v.2 = extractelement <4 x i16> %v, i64 2 + %z.2 = zext i16 %v.2 to i64 + %s.2 = shl i64 %z.2, 32 + + %v.3 = extractelement <4 x i16> %v, i64 3 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %s.2, %s.3 + + %x = or i64 %x.1, %x.3 + + ret i64 %x +} + +define i64 @bitcast.v2i.be.tree.i16(<4 x i16> %v) { +; CHECK-BE-LABEL: define i64 @bitcast.v2i.be.tree.i16( +; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X:%.*]] = bitcast <4 x i16> [[V]] to i64 +; CHECK-BE-NEXT: ret i64 [[X]] +; +; CHECK-LE-LABEL: define i64 @bitcast.v2i.be.tree.i16( +; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 2 +; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64 +; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16 +; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]] +; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <4 x i16> [[V]], i64 1 +; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64 +; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32 +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 0 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[S_2]], [[S_3]] +; CHECK-LE-NEXT: [[X:%.*]] = or disjoint i64 [[X_1]], [[X_3]] +; CHECK-LE-NEXT: ret i64 [[X]] +; + %v.0 = extractelement <4 x i16> %v, i64 3 + %z.0 = zext i16 %v.0 to i64 + + %v.1 = extractelement <4 x i16> %v, i64 2 + %z.1 = zext i16 %v.1 to i64 + %s.1 = shl i64 %z.1, 16 + %x.1 = or i64 %z.0, %s.1 + + %v.2 = extractelement <4 x i16> %v, i64 1 + %z.2 = zext i16 %v.2 to i64 + %s.2 = shl i64 %z.2, 32 + + %v.3 = extractelement <4 x i16> %v, i64 0 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %s.2, %s.3 + + %x = or i64 %x.1, %x.3 + + ret i64 %x +} + +define i32 @extract.le.i32(<8 x i8> %v) { +; CHECK-BE-LABEL: define i32 @extract.le.i32( +; CHECK-BE-SAME: <8 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <8 x i8> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <8 x i8> [[V]], i64 4 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <8 x i8> [[V]], i64 5 +; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32 +; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16 +; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i32 [[X_1]], [[S_2]] +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <8 x i8> [[V]], i64 6 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]] +; CHECK-BE-NEXT: ret i32 [[X_3]] +; +; CHECK-LE-LABEL: define i32 @extract.le.i32( +; CHECK-LE-SAME: <8 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <8 x i32> +; CHECK-LE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i8> [[X_3_V_EXTRACT]] to <2 x i32> +; CHECK-LE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i32> [[X_3_V_BC]], i64 0 +; CHECK-LE-NEXT: ret i32 [[X_3_V_EXTRACT1]] +; + %v.0 = extractelement <8 x i8> %v, i64 3 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <8 x i8> %v, i64 4 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.2 = extractelement <8 x i8> %v, i64 5 %z.2 = zext i8 %v.2 to i32 %s.2 = shl i32 %z.2, 16 %x.2 = or i32 %x.1, %s.2 - %v.3 = extractelement <8 x i8> %v, i32 6 + %v.3 = extractelement <8 x i8> %v, i64 6 %z.3 = zext i8 %v.3 to i32 %s.3 = shl i32 %z.3, 24 %x.3 = or i32 %x.2, %s.3 @@ -86,25 +413,292 @@ define i32 @extract.i32(<8 x i8> %v) { ret i32 %x.3 } -define i32 @partial(<4 x i8> %v) { -; CHECK-LABEL: define i32 @partial( -; CHECK-SAME: <4 x i8> [[V:%.*]]) { -; CHECK-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i8> [[V]], i8 0, i64 2 -; CHECK-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V1]] to i32 -; CHECK-NEXT: ret i32 [[X_3]] +define i32 @extract.be.i32(<8 x i8> %v) { +; CHECK-BE-LABEL: define i32 @extract.be.i32( +; CHECK-BE-SAME: <8 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <8 x i32> +; CHECK-BE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i8> [[X_3_V_EXTRACT]] to <2 x i32> +; CHECK-BE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i32> [[X_3_V_BC]], i64 0 +; CHECK-BE-NEXT: ret i32 [[X_3_V_EXTRACT1]] +; +; CHECK-LE-LABEL: define i32 @extract.be.i32( +; CHECK-LE-SAME: <8 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <8 x i8> [[V]], i64 6 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <8 x i8> [[V]], i64 5 +; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32 +; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8 +; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]] +; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <8 x i8> [[V]], i64 4 +; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32 +; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16 +; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i32 [[X_1]], [[S_2]] +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <8 x i8> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]] +; CHECK-LE-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <8 x i8> %v, i64 6 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <8 x i8> %v, i64 5 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.2 = extractelement <8 x i8> %v, i64 4 + %z.2 = zext i8 %v.2 to i32 + %s.2 = shl i32 %z.2, 16 + %x.2 = or i32 %x.1, %s.2 + + %v.3 = extractelement <8 x i8> %v, i64 3 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.2, %s.3 + + ret i32 %x.3 +} + +define i64 @extract.le.i64(<8 x i16> %v) { +; CHECK-BE-LABEL: define i64 @extract.le.i64( +; CHECK-BE-SAME: <8 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <8 x i16> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <8 x i16> [[V]], i64 4 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <8 x i16> [[V]], i64 5 +; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64 +; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32 +; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]] +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <8 x i16> [[V]], i64 6 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]] +; CHECK-BE-NEXT: ret i64 [[X_3]] +; +; CHECK-LE-LABEL: define i64 @extract.le.i64( +; CHECK-LE-SAME: <8 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i16> [[V]], <8 x i16> poison, <8 x i32> +; CHECK-LE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i16> [[X_3_V_EXTRACT]] to <2 x i64> +; CHECK-LE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i64> [[X_3_V_BC]], i64 0 +; CHECK-LE-NEXT: ret i64 [[X_3_V_EXTRACT1]] +; + %v.0 = extractelement <8 x i16> %v, i64 3 + %z.0 = zext i16 %v.0 to i64 + + %v.1 = extractelement <8 x i16> %v, i64 4 + %z.1 = zext i16 %v.1 to i64 + %s.1 = shl i64 %z.1, 16 + %x.1 = or i64 %z.0, %s.1 + + %v.2 = extractelement <8 x i16> %v, i64 5 + %z.2 = zext i16 %v.2 to i64 + %s.2 = shl i64 %z.2, 32 + %x.2 = or i64 %x.1, %s.2 + + %v.3 = extractelement <8 x i16> %v, i64 6 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %x.2, %s.3 + + ret i64 %x.3 +} + +define i64 @extract.be.i64(<8 x i16> %v) { +; CHECK-BE-LABEL: define i64 @extract.be.i64( +; CHECK-BE-SAME: <8 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i16> [[V]], <8 x i16> poison, <8 x i32> +; CHECK-BE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i16> [[X_3_V_EXTRACT]] to <2 x i64> +; CHECK-BE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i64> [[X_3_V_BC]], i64 0 +; CHECK-BE-NEXT: ret i64 [[X_3_V_EXTRACT1]] +; +; CHECK-LE-LABEL: define i64 @extract.be.i64( +; CHECK-LE-SAME: <8 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <8 x i16> [[V]], i64 6 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <8 x i16> [[V]], i64 5 +; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64 +; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16 +; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]] +; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <8 x i16> [[V]], i64 4 +; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64 +; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32 +; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]] +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <8 x i16> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]] +; CHECK-LE-NEXT: ret i64 [[X_3]] +; + %v.0 = extractelement <8 x i16> %v, i64 6 + %z.0 = zext i16 %v.0 to i64 + + %v.1 = extractelement <8 x i16> %v, i64 5 + %z.1 = zext i16 %v.1 to i64 + %s.1 = shl i64 %z.1, 16 + %x.1 = or i64 %z.0, %s.1 + + %v.2 = extractelement <8 x i16> %v, i64 4 + %z.2 = zext i16 %v.2 to i64 + %s.2 = shl i64 %z.2, 32 + %x.2 = or i64 %x.1, %s.2 + + %v.3 = extractelement <8 x i16> %v, i64 3 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %x.2, %s.3 + + ret i64 %x.3 +} + +define i32 @partial.le(<4 x i8> %v) { +; CHECK-BE-LABEL: define i32 @partial.le( +; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 0 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 1 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_1]], [[S_3]] +; CHECK-BE-NEXT: ret i32 [[X_3]] ; - %v.0 = extractelement <4 x i8> %v, i32 0 +; CHECK-LE-LABEL: define i32 @partial.le( +; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i8> [[V]], i8 0, i64 2 +; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V1]] to i32 +; CHECK-LE-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <4 x i8> %v, i64 0 %z.0 = zext i8 %v.0 to i32 - %v.1 = extractelement <4 x i8> %v, i32 1 + %v.1 = extractelement <4 x i8> %v, i64 1 %z.1 = zext i8 %v.1 to i32 %s.1 = shl i32 %z.1, 8 %x.1 = or i32 %z.0, %s.1 - %v.3 = extractelement <4 x i8> %v, i32 3 + %v.3 = extractelement <4 x i8> %v, i64 3 %z.3 = zext i8 %v.3 to i32 %s.3 = shl i32 %z.3, 24 %x.3 = or i32 %x.1, %s.3 ret i32 %x.3 } + +define i32 @partial.be(<4 x i8> %v) { +; CHECK-BE-LABEL: define i32 @partial.be( +; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i8> [[V]], i8 0, i64 2 +; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V1]] to i32 +; CHECK-BE-NEXT: ret i32 [[X_3]] +; +; CHECK-LE-LABEL: define i32 @partial.be( +; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 1 +; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32 +; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 16 +; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]] +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 0 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_1]], [[S_3]] +; CHECK-LE-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <4 x i8> %v, i64 3 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <4 x i8> %v, i64 1 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 16 + %x.1 = or i32 %z.0, %s.1 + + %v.3 = extractelement <4 x i8> %v, i64 0 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.1, %s.3 + + ret i32 %x.3 +} + + +define i64 @partial.le.i16(<4 x i16> %v) { +; CHECK-BE-LABEL: define i64 @partial.le.i16( +; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 0 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 1 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_1]], [[S_3]] +; CHECK-BE-NEXT: ret i64 [[X_3]] +; +; CHECK-LE-LABEL: define i64 @partial.le.i16( +; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i16> [[V]], i16 0, i64 2 +; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[X_3_V1]] to i64 +; CHECK-LE-NEXT: ret i64 [[X_3]] +; + %v.0 = extractelement <4 x i16> %v, i64 0 + %z.0 = zext i16 %v.0 to i64 + + %v.1 = extractelement <4 x i16> %v, i64 1 + %z.1 = zext i16 %v.1 to i64 + %s.1 = shl i64 %z.1, 16 + %x.1 = or i64 %z.0, %s.1 + + %v.3 = extractelement <4 x i16> %v, i64 3 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %x.1, %s.3 + + ret i64 %x.3 +} + +define i64 @partial.be.i16(<4 x i16> %v) { +; CHECK-BE-LABEL: define i64 @partial.be.i16( +; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i16> [[V]], i16 0, i64 2 +; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[X_3_V1]] to i64 +; CHECK-BE-NEXT: ret i64 [[X_3]] +; +; CHECK-LE-LABEL: define i64 @partial.be.i16( +; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 1 +; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64 +; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 32 +; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]] +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 0 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_1]], [[S_3]] +; CHECK-LE-NEXT: ret i64 [[X_3]] +; + %v.0 = extractelement <4 x i16> %v, i64 3 + %z.0 = zext i16 %v.0 to i64 + + %v.1 = extractelement <4 x i16> %v, i64 1 + %z.1 = zext i16 %v.1 to i64 + %s.1 = shl i64 %z.1, 32 + %x.1 = or i64 %z.0, %s.1 + + %v.3 = extractelement <4 x i16> %v, i64 0 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %x.1, %s.3 + + ret i64 %x.3 +} From d9477769e8997fbb2f6e0caf7d34fb7bda94f501 Mon Sep 17 00:00:00 2001 From: Zach Goldthorpe Date: Wed, 30 Jul 2025 10:11:25 -0500 Subject: [PATCH 6/6] Incorporated reviewer feedback --- .../InstCombine/InstCombineAndOrXor.cpp | 22 +- .../InstCombine/or-packed-int-vecs.ll | 226 +++++++++++++++++- 2 files changed, 235 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 852b5fff53b3d..7f337249629dd 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3642,21 +3642,21 @@ static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec, } // Now try to match a bitcasted subvector. - Instruction *DstVecI; - if (!match(V, m_BitCast(m_Instruction(DstVecI)))) + Instruction *SrcVecI; + if (!match(V, m_BitCast(m_Instruction(SrcVecI)))) return false; - auto *DstTy = dyn_cast(DstVecI->getType()); - if (!DstTy) + auto *SrcTy = dyn_cast(SrcVecI->getType()); + if (!SrcTy) return false; - Mask.resize(DstTy->getNumElements()); + Mask.resize(SrcTy->getNumElements()); // First check for a subvector obtained from a shufflevector. - if (isa(DstVecI)) { + if (isa(SrcVecI)) { Constant *ConstVec; ArrayRef ShuffleMask; - if (!match(DstVecI, m_Shuffle(m_Value(Vec), m_Constant(ConstVec), + if (!match(SrcVecI, m_Shuffle(m_Value(Vec), m_Constant(ConstVec), m_Mask(ShuffleMask)))) return false; @@ -3695,7 +3695,7 @@ static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec, // Check for a subvector obtained as an (insertelement V, 0, idx) uint64_t InsertIdx; - if (!match(DstVecI, + if (!match(SrcVecI, m_InsertElt(m_Value(Vec), m_Zero(), m_ConstantInt(InsertIdx)))) return false; @@ -3712,9 +3712,9 @@ static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec, /// Try to fold the join of two scalar integers whose contents are packed /// elements of the same vector. -Instruction *foldIntegerPackFromVector(Instruction &I, - InstCombiner::BuilderTy &Builder, - const DataLayout &DL) { +static Instruction *foldIntegerPackFromVector(Instruction &I, + InstCombiner::BuilderTy &Builder, + const DataLayout &DL) { assert(I.getOpcode() == Instruction::Or); Value *LhsVec, *RhsVec; int64_t LhsVecOffset, RhsVecOffset; diff --git a/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll index 24d16553b7792..9391fb5ddae97 100644 --- a/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll +++ b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -passes=instcombine -data-layout="E" -S | FileCheck %s --check-prefixes=CHECK-BE -; RUN: opt < %s -passes=instcombine -data-layout="e" -S | FileCheck %s --check-prefixes=CHECK-LE +; RUN: opt %s -passes=instcombine -data-layout="E" -S | FileCheck %s --check-prefixes=CHECK,CHECK-BE +; RUN: opt %s -passes=instcombine -data-layout="e" -S | FileCheck %s --check-prefixes=CHECK,CHECK-LE define i32 @bitcast.v2i.le(<4 x i8> %v) { ; CHECK-BE-LABEL: define i32 @bitcast.v2i.le( @@ -702,3 +702,225 @@ define i64 @partial.be.i16(<4 x i16> %v) { ret i64 %x.3 } + +define i32 @partial.extract.le.i32(<8 x i8> %v) { +; CHECK-BE-LABEL: define i32 @partial.extract.le.i32( +; CHECK-BE-SAME: <8 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <8 x i8> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <8 x i8> [[V]], i64 4 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <8 x i8> [[V]], i64 6 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_1]], [[S_3]] +; CHECK-BE-NEXT: ret i32 [[X_3]] +; +; CHECK-LE-LABEL: define i32 @partial.extract.le.i32( +; CHECK-LE-SAME: <8 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X_3_V:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> , <4 x i32> +; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V]] to i32 +; CHECK-LE-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <8 x i8> %v, i64 3 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <8 x i8> %v, i64 4 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.3 = extractelement <8 x i8> %v, i64 6 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.1, %s.3 + + ret i32 %x.3 +} + +define i32 @partial.extract.be.i32(<8 x i8> %v) { +; CHECK-BE-LABEL: define i32 @partial.extract.be.i32( +; CHECK-BE-SAME: <8 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X_3_V:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> , <4 x i32> +; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V]] to i32 +; CHECK-BE-NEXT: ret i32 [[X_3]] +; +; CHECK-LE-LABEL: define i32 @partial.extract.be.i32( +; CHECK-LE-SAME: <8 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <8 x i8> [[V]], i64 6 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <8 x i8> [[V]], i64 4 +; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32 +; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16 +; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i32 [[S_2]], [[Z_0]] +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <8 x i8> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]] +; CHECK-LE-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <8 x i8> %v, i64 6 + %z.0 = zext i8 %v.0 to i32 + + %v.2 = extractelement <8 x i8> %v, i64 4 + %z.2 = zext i8 %v.2 to i32 + %s.2 = shl i32 %z.2, 16 + %x.2 = or i32 %z.0, %s.2 + + %v.3 = extractelement <8 x i8> %v, i64 3 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.2, %s.3 + + ret i32 %x.3 +} + +define i64 @partial.extract.le.i64(<8 x i16> %v) { +; CHECK-BE-LABEL: define i64 @partial.extract.le.i64( +; CHECK-BE-SAME: <8 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <8 x i16> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <8 x i16> [[V]], i64 4 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <8 x i16> [[V]], i64 5 +; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64 +; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32 +; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]] +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <8 x i16> [[V]], i64 6 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]] +; CHECK-BE-NEXT: ret i64 [[X_3]] +; +; CHECK-LE-LABEL: define i64 @partial.extract.le.i64( +; CHECK-LE-SAME: <8 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i16> [[V]], <8 x i16> poison, <8 x i32> +; CHECK-LE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i16> [[X_3_V_EXTRACT]] to <2 x i64> +; CHECK-LE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i64> [[X_3_V_BC]], i64 0 +; CHECK-LE-NEXT: ret i64 [[X_3_V_EXTRACT1]] +; + %v.0 = extractelement <8 x i16> %v, i64 3 + %z.0 = zext i16 %v.0 to i64 + + %v.1 = extractelement <8 x i16> %v, i64 4 + %z.1 = zext i16 %v.1 to i64 + %s.1 = shl i64 %z.1, 16 + %x.1 = or i64 %z.0, %s.1 + + %v.2 = extractelement <8 x i16> %v, i64 5 + %z.2 = zext i16 %v.2 to i64 + %s.2 = shl i64 %z.2, 32 + %x.2 = or i64 %x.1, %s.2 + + %v.3 = extractelement <8 x i16> %v, i64 6 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %x.2, %s.3 + + ret i64 %x.3 +} + +define i64 @partial.extract.be.i64(<8 x i16> %v) { +; CHECK-BE-LABEL: define i64 @partial.extract.be.i64( +; CHECK-BE-SAME: <8 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X_3_V:%.*]] = shufflevector <8 x i16> [[V]], <8 x i16> , <4 x i32> +; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[X_3_V]] to i64 +; CHECK-BE-NEXT: ret i64 [[X_3]] +; +; CHECK-LE-LABEL: define i64 @partial.extract.be.i64( +; CHECK-LE-SAME: <8 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <8 x i16> [[V]], i64 6 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <8 x i16> [[V]], i64 4 +; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64 +; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32 +; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i64 [[S_2]], [[Z_0]] +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <8 x i16> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]] +; CHECK-LE-NEXT: ret i64 [[X_3]] +; + %v.0 = extractelement <8 x i16> %v, i64 6 + %z.0 = zext i16 %v.0 to i64 + + %v.2 = extractelement <8 x i16> %v, i64 4 + %z.2 = zext i16 %v.2 to i64 + %s.2 = shl i64 %z.2, 32 + %x.2 = or i64 %z.0, %s.2 + + %v.3 = extractelement <8 x i16> %v, i64 3 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %x.2, %s.3 + + ret i64 %x.3 +} + +define <2 x i16> @shufflecast.v2v(<4 x i8> %v) { +; CHECK-LABEL: define <2 x i16> @shufflecast.v2v( +; CHECK-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[W_3:%.*]] = bitcast <4 x i8> [[V]] to <2 x i16> +; CHECK-NEXT: ret <2 x i16> [[W_3]] +; + %v.0 = shufflevector <4 x i8> %v, <4 x i8> zeroinitializer, <4 x i32> + %c.0 = bitcast <4 x i8> %v.0 to <2 x i16> + + %v.1 = shufflevector <4 x i8> %v, <4 x i8> zeroinitializer, <4 x i32> + %c.1 = bitcast <4 x i8> %v.1 to <2 x i16> + %w.1 = or <2 x i16> %c.0, %c.1 + + %v.2 = shufflevector <4 x i8> %v, <4 x i8> zeroinitializer, <4 x i32> + %c.2 = bitcast <4 x i8> %v.2 to <2 x i16> + %w.2 = or <2 x i16> %w.1, %c.2 + + %v.3 = shufflevector <4 x i8> %v, <4 x i8> zeroinitializer, <4 x i32> + %c.3 = bitcast <4 x i8> %v.3 to <2 x i16> + %w.3 = or <2 x i16> %w.2, %c.3 + + ret <2 x i16> %w.3 +} + +define <2 x i32> @shufflecast.v2v.i16(<4 x i16> %v) { +; CHECK-LABEL: define <2 x i32> @shufflecast.v2v.i16( +; CHECK-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-NEXT: [[W_3:%.*]] = bitcast <4 x i16> [[V]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[W_3]] +; + %v.0 = shufflevector <4 x i16> %v, <4 x i16> zeroinitializer, <4 x i32> + %c.0 = bitcast <4 x i16> %v.0 to <2 x i32> + + %v.1 = shufflevector <4 x i16> %v, <4 x i16> zeroinitializer, <4 x i32> + %c.1 = bitcast <4 x i16> %v.1 to <2 x i32> + %w.1 = or <2 x i32> %c.0, %c.1 + + %v.2 = shufflevector <4 x i16> %v, <4 x i16> zeroinitializer, <4 x i32> + %c.2 = bitcast <4 x i16> %v.2 to <2 x i32> + %w.2 = or <2 x i32> %w.1, %c.2 + + %v.3 = shufflevector <4 x i16> %v, <4 x i16> zeroinitializer, <4 x i32> + %c.3 = bitcast <4 x i16> %v.3 to <2 x i32> + %w.3 = or <2 x i32> %w.2, %c.3 + + ret <2 x i32> %w.3 +} + +define i32 @bitcast.v2i.half(<2 x half> %v) { +; CHECK-LABEL: define i32 @bitcast.v2i.half( +; CHECK-SAME: <2 x half> [[V:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = bitcast <2 x half> [[V]] to i32 +; CHECK-NEXT: ret i32 [[X]] +; + %v.0 = insertelement <2 x half> %v, half 0.0, i64 1 + %x.0 = bitcast <2 x half> %v.0 to i32 + + %v.1 = insertelement <2 x half> %v, half 0.0, i64 0 + %x.1 = bitcast <2 x half> %v.1 to i32 + + %x = or i32 %x.0, %x.1 + ret i32 %x +}