diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index dd16cfaeecd45..7f337249629dd 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -11,10 +11,13 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/Analysis/FloatingPointPredicateUtils.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/ConstantRange.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" @@ -3592,6 +3595,154 @@ static Value *foldOrOfInversions(BinaryOperator &I, return nullptr; } +/// Match \p V as "shufflevector -> bitcast" or "extractelement -> zext -> shl" +/// patterns, which extract vector elements and pack them in the same relative +/// positions. +/// +/// \p Vec is the underlying vector being extracted from. +/// \p Mask is a bitmask identifying which packed elements are obtained from the +/// vector. +/// \p VecOffset is the vector element corresponding to index 0 of the +/// mask. +static bool matchSubIntegerPackFromVector(Value *V, Value *&Vec, + int64_t &VecOffset, + SmallBitVector &Mask, + const DataLayout &DL) { + static const auto m_ConstShlOrSelf = [](const auto &Base, uint64_t &ShlAmt) { + ShlAmt = 0; + return m_CombineOr(m_Shl(Base, m_ConstantInt(ShlAmt)), Base); + }; + + // First try to match extractelement -> zext -> shl + uint64_t VecIdx, ShlAmt; + if (match(V, m_ConstShlOrSelf(m_ZExtOrSelf(m_ExtractElt( + m_Value(Vec), m_ConstantInt(VecIdx))), + ShlAmt))) { + auto *VecTy = dyn_cast(Vec->getType()); + if (!VecTy) + return false; + auto *EltTy = dyn_cast(VecTy->getElementType()); + if (!EltTy) + return false; + + const unsigned EltBitWidth = EltTy->getBitWidth(); + const unsigned TargetBitWidth = V->getType()->getIntegerBitWidth(); + if (TargetBitWidth % EltBitWidth != 0 || ShlAmt % EltBitWidth != 0) + return false; + const unsigned TargetEltWidth = TargetBitWidth / EltBitWidth; + const unsigned ShlEltAmt = ShlAmt / EltBitWidth; + + const unsigned MaskIdx = + DL.isLittleEndian() ? ShlEltAmt : TargetEltWidth - ShlEltAmt - 1; + + VecOffset = static_cast(VecIdx) - static_cast(MaskIdx); + Mask.resize(TargetEltWidth); + Mask.set(MaskIdx); + return true; + } + + // Now try to match a bitcasted subvector. + Instruction *SrcVecI; + if (!match(V, m_BitCast(m_Instruction(SrcVecI)))) + return false; + + auto *SrcTy = dyn_cast(SrcVecI->getType()); + if (!SrcTy) + return false; + + Mask.resize(SrcTy->getNumElements()); + + // First check for a subvector obtained from a shufflevector. + if (isa(SrcVecI)) { + Constant *ConstVec; + ArrayRef ShuffleMask; + if (!match(SrcVecI, m_Shuffle(m_Value(Vec), m_Constant(ConstVec), + m_Mask(ShuffleMask)))) + return false; + + auto *VecTy = dyn_cast(Vec->getType()); + if (!VecTy) + return false; + + const unsigned NumVecElts = VecTy->getNumElements(); + bool FoundVecOffset = false; + for (unsigned Idx = 0; Idx < ShuffleMask.size(); ++Idx) { + if (ShuffleMask[Idx] == PoisonMaskElem) + return false; + const unsigned ShuffleIdx = ShuffleMask[Idx]; + if (ShuffleIdx >= NumVecElts) { + const unsigned ConstIdx = ShuffleIdx - NumVecElts; + auto *ConstElt = + dyn_cast(ConstVec->getAggregateElement(ConstIdx)); + if (!ConstElt || !ConstElt->isNullValue()) + return false; + continue; + } + + if (FoundVecOffset) { + if (VecOffset + Idx != ShuffleIdx) + return false; + } else { + if (ShuffleIdx < Idx) + return false; + VecOffset = ShuffleIdx - Idx; + FoundVecOffset = true; + } + Mask.set(Idx); + } + return FoundVecOffset; + } + + // Check for a subvector obtained as an (insertelement V, 0, idx) + uint64_t InsertIdx; + if (!match(SrcVecI, + m_InsertElt(m_Value(Vec), m_Zero(), m_ConstantInt(InsertIdx)))) + return false; + + auto *VecTy = dyn_cast(Vec->getType()); + if (!VecTy) + return false; + VecOffset = 0; + bool AlreadyInsertedMaskedElt = Mask.test(InsertIdx); + Mask.set(); + if (!AlreadyInsertedMaskedElt) + Mask.reset(InsertIdx); + return true; +} + +/// Try to fold the join of two scalar integers whose contents are packed +/// elements of the same vector. +static Instruction *foldIntegerPackFromVector(Instruction &I, + InstCombiner::BuilderTy &Builder, + const DataLayout &DL) { + assert(I.getOpcode() == Instruction::Or); + Value *LhsVec, *RhsVec; + int64_t LhsVecOffset, RhsVecOffset; + SmallBitVector Mask; + if (!matchSubIntegerPackFromVector(I.getOperand(0), LhsVec, LhsVecOffset, + Mask, DL)) + return nullptr; + if (!matchSubIntegerPackFromVector(I.getOperand(1), RhsVec, RhsVecOffset, + Mask, DL)) + return nullptr; + if (LhsVec != RhsVec || LhsVecOffset != RhsVecOffset) + return nullptr; + + // Convert into shufflevector -> bitcast; + const unsigned ZeroVecIdx = + cast(LhsVec->getType())->getNumElements(); + SmallVector ShuffleMask(Mask.size(), ZeroVecIdx); + for (unsigned Idx : Mask.set_bits()) { + assert(LhsVecOffset + Idx >= 0); + ShuffleMask[Idx] = LhsVecOffset + Idx; + } + + Value *MaskedVec = Builder.CreateShuffleVector( + LhsVec, Constant::getNullValue(LhsVec->getType()), ShuffleMask, + I.getName() + ".v"); + return CastInst::Create(Instruction::BitCast, MaskedVec, I.getType()); +} + // A decomposition of ((X & Mask) * Factor). The NUW / NSW bools // track these properities for preservation. Note that we can decompose // equivalent select form of this expression (e.g. (!(X & Mask) ? 0 : Mask * @@ -3688,6 +3839,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (Instruction *X = foldComplexAndOrPatterns(I, Builder)) return X; + if (Instruction *X = foldIntegerPackFromVector(I, Builder, DL)) + return X; + // (A & B) | (C & D) -> A ^ D where A == ~C && B == ~D // (A & B) | (C & D) -> A ^ C where A == ~D && B == ~C if (Value *V = foldOrOfInversions(I, Builder)) diff --git a/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll new file mode 100644 index 0000000000000..9391fb5ddae97 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/or-packed-int-vecs.ll @@ -0,0 +1,926 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt %s -passes=instcombine -data-layout="E" -S | FileCheck %s --check-prefixes=CHECK,CHECK-BE +; RUN: opt %s -passes=instcombine -data-layout="e" -S | FileCheck %s --check-prefixes=CHECK,CHECK-LE + +define i32 @bitcast.v2i.le(<4 x i8> %v) { +; CHECK-BE-LABEL: define i32 @bitcast.v2i.le( +; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 0 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 1 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <4 x i8> [[V]], i64 2 +; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32 +; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16 +; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i32 [[X_1]], [[S_2]] +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]] +; CHECK-BE-NEXT: ret i32 [[X_3]] +; +; CHECK-LE-LABEL: define i32 @bitcast.v2i.le( +; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V]] to i32 +; CHECK-LE-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <4 x i8> %v, i64 0 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <4 x i8> %v, i64 1 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.2 = extractelement <4 x i8> %v, i64 2 + %z.2 = zext i8 %v.2 to i32 + %s.2 = shl i32 %z.2, 16 + %x.2 = or i32 %x.1, %s.2 + + %v.3 = extractelement <4 x i8> %v, i64 3 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.2, %s.3 + + ret i32 %x.3 +} + +define i32 @bitcast.v2i.be(<4 x i8> %v) { +; CHECK-BE-LABEL: define i32 @bitcast.v2i.be( +; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[V]] to i32 +; CHECK-BE-NEXT: ret i32 [[X_3]] +; +; CHECK-LE-LABEL: define i32 @bitcast.v2i.be( +; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 2 +; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32 +; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8 +; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]] +; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <4 x i8> [[V]], i64 1 +; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32 +; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16 +; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i32 [[X_1]], [[S_2]] +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 0 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]] +; CHECK-LE-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <4 x i8> %v, i64 3 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <4 x i8> %v, i64 2 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.2 = extractelement <4 x i8> %v, i64 1 + %z.2 = zext i8 %v.2 to i32 + %s.2 = shl i32 %z.2, 16 + %x.2 = or i32 %x.1, %s.2 + + %v.3 = extractelement <4 x i8> %v, i64 0 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.2, %s.3 + + ret i32 %x.3 +} + +define i64 @bitcast.v2i.le.i16(<4 x i16> %v) { +; CHECK-BE-LABEL: define i64 @bitcast.v2i.le.i16( +; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 0 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 1 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <4 x i16> [[V]], i64 2 +; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64 +; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32 +; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]] +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]] +; CHECK-BE-NEXT: ret i64 [[X_3]] +; +; CHECK-LE-LABEL: define i64 @bitcast.v2i.le.i16( +; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[V]] to i64 +; CHECK-LE-NEXT: ret i64 [[X_3]] +; + %v.0 = extractelement <4 x i16> %v, i64 0 + %z.0 = zext i16 %v.0 to i64 + + %v.1 = extractelement <4 x i16> %v, i64 1 + %z.1 = zext i16 %v.1 to i64 + %s.1 = shl i64 %z.1, 16 + %x.1 = or i64 %z.0, %s.1 + + %v.2 = extractelement <4 x i16> %v, i64 2 + %z.2 = zext i16 %v.2 to i64 + %s.2 = shl i64 %z.2, 32 + %x.2 = or i64 %x.1, %s.2 + + %v.3 = extractelement <4 x i16> %v, i64 3 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %x.2, %s.3 + + ret i64 %x.3 +} + +define i64 @bitcast.v2i.be.i16(<4 x i16> %v) { +; CHECK-BE-LABEL: define i64 @bitcast.v2i.be.i16( +; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[V]] to i64 +; CHECK-BE-NEXT: ret i64 [[X_3]] +; +; CHECK-LE-LABEL: define i64 @bitcast.v2i.be.i16( +; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 2 +; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64 +; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16 +; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]] +; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <4 x i16> [[V]], i64 1 +; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64 +; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32 +; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]] +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 0 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]] +; CHECK-LE-NEXT: ret i64 [[X_3]] +; + %v.0 = extractelement <4 x i16> %v, i64 3 + %z.0 = zext i16 %v.0 to i64 + + %v.1 = extractelement <4 x i16> %v, i64 2 + %z.1 = zext i16 %v.1 to i64 + %s.1 = shl i64 %z.1, 16 + %x.1 = or i64 %z.0, %s.1 + + %v.2 = extractelement <4 x i16> %v, i64 1 + %z.2 = zext i16 %v.2 to i64 + %s.2 = shl i64 %z.2, 32 + %x.2 = or i64 %x.1, %s.2 + + %v.3 = extractelement <4 x i16> %v, i64 0 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %x.2, %s.3 + + ret i64 %x.3 +} + +define i32 @bitcast.v2i.le.tree(<4 x i8> %v) { +; CHECK-BE-LABEL: define i32 @bitcast.v2i.le.tree( +; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 0 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 1 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <4 x i8> [[V]], i64 2 +; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32 +; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16 +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[S_2]], [[S_3]] +; CHECK-BE-NEXT: [[X:%.*]] = or disjoint i32 [[X_1]], [[X_3]] +; CHECK-BE-NEXT: ret i32 [[X]] +; +; CHECK-LE-LABEL: define i32 @bitcast.v2i.le.tree( +; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X:%.*]] = bitcast <4 x i8> [[V]] to i32 +; CHECK-LE-NEXT: ret i32 [[X]] +; + %v.0 = extractelement <4 x i8> %v, i64 0 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <4 x i8> %v, i64 1 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.2 = extractelement <4 x i8> %v, i64 2 + %z.2 = zext i8 %v.2 to i32 + %s.2 = shl i32 %z.2, 16 + + %v.3 = extractelement <4 x i8> %v, i64 3 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %s.2, %s.3 + + %x = or i32 %x.1, %x.3 + + ret i32 %x +} + +define i32 @bitcast.v2i.be.tree(<4 x i8> %v) { +; CHECK-BE-LABEL: define i32 @bitcast.v2i.be.tree( +; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X:%.*]] = bitcast <4 x i8> [[V]] to i32 +; CHECK-BE-NEXT: ret i32 [[X]] +; +; CHECK-LE-LABEL: define i32 @bitcast.v2i.be.tree( +; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 2 +; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32 +; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8 +; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]] +; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <4 x i8> [[V]], i64 1 +; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32 +; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16 +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 0 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[S_2]], [[S_3]] +; CHECK-LE-NEXT: [[X:%.*]] = or disjoint i32 [[X_1]], [[X_3]] +; CHECK-LE-NEXT: ret i32 [[X]] +; + %v.0 = extractelement <4 x i8> %v, i64 3 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <4 x i8> %v, i64 2 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.2 = extractelement <4 x i8> %v, i64 1 + %z.2 = zext i8 %v.2 to i32 + %s.2 = shl i32 %z.2, 16 + + %v.3 = extractelement <4 x i8> %v, i64 0 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %s.2, %s.3 + + %x = or i32 %x.1, %x.3 + + ret i32 %x +} + +define i64 @bitcast.v2i.le.tree.i16(<4 x i16> %v) { +; CHECK-BE-LABEL: define i64 @bitcast.v2i.le.tree.i16( +; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 0 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 1 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <4 x i16> [[V]], i64 2 +; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64 +; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32 +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[S_2]], [[S_3]] +; CHECK-BE-NEXT: [[X:%.*]] = or disjoint i64 [[X_1]], [[X_3]] +; CHECK-BE-NEXT: ret i64 [[X]] +; +; CHECK-LE-LABEL: define i64 @bitcast.v2i.le.tree.i16( +; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X:%.*]] = bitcast <4 x i16> [[V]] to i64 +; CHECK-LE-NEXT: ret i64 [[X]] +; + %v.0 = extractelement <4 x i16> %v, i64 0 + %z.0 = zext i16 %v.0 to i64 + + %v.1 = extractelement <4 x i16> %v, i64 1 + %z.1 = zext i16 %v.1 to i64 + %s.1 = shl i64 %z.1, 16 + %x.1 = or i64 %z.0, %s.1 + + %v.2 = extractelement <4 x i16> %v, i64 2 + %z.2 = zext i16 %v.2 to i64 + %s.2 = shl i64 %z.2, 32 + + %v.3 = extractelement <4 x i16> %v, i64 3 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %s.2, %s.3 + + %x = or i64 %x.1, %x.3 + + ret i64 %x +} + +define i64 @bitcast.v2i.be.tree.i16(<4 x i16> %v) { +; CHECK-BE-LABEL: define i64 @bitcast.v2i.be.tree.i16( +; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X:%.*]] = bitcast <4 x i16> [[V]] to i64 +; CHECK-BE-NEXT: ret i64 [[X]] +; +; CHECK-LE-LABEL: define i64 @bitcast.v2i.be.tree.i16( +; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 2 +; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64 +; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16 +; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]] +; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <4 x i16> [[V]], i64 1 +; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64 +; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32 +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 0 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[S_2]], [[S_3]] +; CHECK-LE-NEXT: [[X:%.*]] = or disjoint i64 [[X_1]], [[X_3]] +; CHECK-LE-NEXT: ret i64 [[X]] +; + %v.0 = extractelement <4 x i16> %v, i64 3 + %z.0 = zext i16 %v.0 to i64 + + %v.1 = extractelement <4 x i16> %v, i64 2 + %z.1 = zext i16 %v.1 to i64 + %s.1 = shl i64 %z.1, 16 + %x.1 = or i64 %z.0, %s.1 + + %v.2 = extractelement <4 x i16> %v, i64 1 + %z.2 = zext i16 %v.2 to i64 + %s.2 = shl i64 %z.2, 32 + + %v.3 = extractelement <4 x i16> %v, i64 0 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %s.2, %s.3 + + %x = or i64 %x.1, %x.3 + + ret i64 %x +} + +define i32 @extract.le.i32(<8 x i8> %v) { +; CHECK-BE-LABEL: define i32 @extract.le.i32( +; CHECK-BE-SAME: <8 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <8 x i8> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <8 x i8> [[V]], i64 4 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <8 x i8> [[V]], i64 5 +; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32 +; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16 +; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i32 [[X_1]], [[S_2]] +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <8 x i8> [[V]], i64 6 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]] +; CHECK-BE-NEXT: ret i32 [[X_3]] +; +; CHECK-LE-LABEL: define i32 @extract.le.i32( +; CHECK-LE-SAME: <8 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <8 x i32> +; CHECK-LE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i8> [[X_3_V_EXTRACT]] to <2 x i32> +; CHECK-LE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i32> [[X_3_V_BC]], i64 0 +; CHECK-LE-NEXT: ret i32 [[X_3_V_EXTRACT1]] +; + %v.0 = extractelement <8 x i8> %v, i64 3 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <8 x i8> %v, i64 4 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.2 = extractelement <8 x i8> %v, i64 5 + %z.2 = zext i8 %v.2 to i32 + %s.2 = shl i32 %z.2, 16 + %x.2 = or i32 %x.1, %s.2 + + %v.3 = extractelement <8 x i8> %v, i64 6 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.2, %s.3 + + ret i32 %x.3 +} + +define i32 @extract.be.i32(<8 x i8> %v) { +; CHECK-BE-LABEL: define i32 @extract.be.i32( +; CHECK-BE-SAME: <8 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> poison, <8 x i32> +; CHECK-BE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i8> [[X_3_V_EXTRACT]] to <2 x i32> +; CHECK-BE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i32> [[X_3_V_BC]], i64 0 +; CHECK-BE-NEXT: ret i32 [[X_3_V_EXTRACT1]] +; +; CHECK-LE-LABEL: define i32 @extract.be.i32( +; CHECK-LE-SAME: <8 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <8 x i8> [[V]], i64 6 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <8 x i8> [[V]], i64 5 +; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32 +; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8 +; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]] +; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <8 x i8> [[V]], i64 4 +; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32 +; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16 +; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i32 [[X_1]], [[S_2]] +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <8 x i8> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]] +; CHECK-LE-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <8 x i8> %v, i64 6 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <8 x i8> %v, i64 5 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.2 = extractelement <8 x i8> %v, i64 4 + %z.2 = zext i8 %v.2 to i32 + %s.2 = shl i32 %z.2, 16 + %x.2 = or i32 %x.1, %s.2 + + %v.3 = extractelement <8 x i8> %v, i64 3 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.2, %s.3 + + ret i32 %x.3 +} + +define i64 @extract.le.i64(<8 x i16> %v) { +; CHECK-BE-LABEL: define i64 @extract.le.i64( +; CHECK-BE-SAME: <8 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <8 x i16> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <8 x i16> [[V]], i64 4 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <8 x i16> [[V]], i64 5 +; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64 +; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32 +; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]] +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <8 x i16> [[V]], i64 6 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]] +; CHECK-BE-NEXT: ret i64 [[X_3]] +; +; CHECK-LE-LABEL: define i64 @extract.le.i64( +; CHECK-LE-SAME: <8 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i16> [[V]], <8 x i16> poison, <8 x i32> +; CHECK-LE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i16> [[X_3_V_EXTRACT]] to <2 x i64> +; CHECK-LE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i64> [[X_3_V_BC]], i64 0 +; CHECK-LE-NEXT: ret i64 [[X_3_V_EXTRACT1]] +; + %v.0 = extractelement <8 x i16> %v, i64 3 + %z.0 = zext i16 %v.0 to i64 + + %v.1 = extractelement <8 x i16> %v, i64 4 + %z.1 = zext i16 %v.1 to i64 + %s.1 = shl i64 %z.1, 16 + %x.1 = or i64 %z.0, %s.1 + + %v.2 = extractelement <8 x i16> %v, i64 5 + %z.2 = zext i16 %v.2 to i64 + %s.2 = shl i64 %z.2, 32 + %x.2 = or i64 %x.1, %s.2 + + %v.3 = extractelement <8 x i16> %v, i64 6 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %x.2, %s.3 + + ret i64 %x.3 +} + +define i64 @extract.be.i64(<8 x i16> %v) { +; CHECK-BE-LABEL: define i64 @extract.be.i64( +; CHECK-BE-SAME: <8 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i16> [[V]], <8 x i16> poison, <8 x i32> +; CHECK-BE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i16> [[X_3_V_EXTRACT]] to <2 x i64> +; CHECK-BE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i64> [[X_3_V_BC]], i64 0 +; CHECK-BE-NEXT: ret i64 [[X_3_V_EXTRACT1]] +; +; CHECK-LE-LABEL: define i64 @extract.be.i64( +; CHECK-LE-SAME: <8 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <8 x i16> [[V]], i64 6 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <8 x i16> [[V]], i64 5 +; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64 +; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16 +; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]] +; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <8 x i16> [[V]], i64 4 +; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64 +; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32 +; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]] +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <8 x i16> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]] +; CHECK-LE-NEXT: ret i64 [[X_3]] +; + %v.0 = extractelement <8 x i16> %v, i64 6 + %z.0 = zext i16 %v.0 to i64 + + %v.1 = extractelement <8 x i16> %v, i64 5 + %z.1 = zext i16 %v.1 to i64 + %s.1 = shl i64 %z.1, 16 + %x.1 = or i64 %z.0, %s.1 + + %v.2 = extractelement <8 x i16> %v, i64 4 + %z.2 = zext i16 %v.2 to i64 + %s.2 = shl i64 %z.2, 32 + %x.2 = or i64 %x.1, %s.2 + + %v.3 = extractelement <8 x i16> %v, i64 3 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %x.2, %s.3 + + ret i64 %x.3 +} + +define i32 @partial.le(<4 x i8> %v) { +; CHECK-BE-LABEL: define i32 @partial.le( +; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 0 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 1 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_1]], [[S_3]] +; CHECK-BE-NEXT: ret i32 [[X_3]] +; +; CHECK-LE-LABEL: define i32 @partial.le( +; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i8> [[V]], i8 0, i64 2 +; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V1]] to i32 +; CHECK-LE-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <4 x i8> %v, i64 0 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <4 x i8> %v, i64 1 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.3 = extractelement <4 x i8> %v, i64 3 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.1, %s.3 + + ret i32 %x.3 +} + +define i32 @partial.be(<4 x i8> %v) { +; CHECK-BE-LABEL: define i32 @partial.be( +; CHECK-BE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i8> [[V]], i8 0, i64 2 +; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V1]] to i32 +; CHECK-BE-NEXT: ret i32 [[X_3]] +; +; CHECK-LE-LABEL: define i32 @partial.be( +; CHECK-LE-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i8> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i8> [[V]], i64 1 +; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32 +; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 16 +; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]] +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i8> [[V]], i64 0 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_1]], [[S_3]] +; CHECK-LE-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <4 x i8> %v, i64 3 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <4 x i8> %v, i64 1 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 16 + %x.1 = or i32 %z.0, %s.1 + + %v.3 = extractelement <4 x i8> %v, i64 0 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.1, %s.3 + + ret i32 %x.3 +} + + +define i64 @partial.le.i16(<4 x i16> %v) { +; CHECK-BE-LABEL: define i64 @partial.le.i16( +; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 0 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 1 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_1]], [[S_3]] +; CHECK-BE-NEXT: ret i64 [[X_3]] +; +; CHECK-LE-LABEL: define i64 @partial.le.i16( +; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i16> [[V]], i16 0, i64 2 +; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[X_3_V1]] to i64 +; CHECK-LE-NEXT: ret i64 [[X_3]] +; + %v.0 = extractelement <4 x i16> %v, i64 0 + %z.0 = zext i16 %v.0 to i64 + + %v.1 = extractelement <4 x i16> %v, i64 1 + %z.1 = zext i16 %v.1 to i64 + %s.1 = shl i64 %z.1, 16 + %x.1 = or i64 %z.0, %s.1 + + %v.3 = extractelement <4 x i16> %v, i64 3 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %x.1, %s.3 + + ret i64 %x.3 +} + +define i64 @partial.be.i16(<4 x i16> %v) { +; CHECK-BE-LABEL: define i64 @partial.be.i16( +; CHECK-BE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X_3_V1:%.*]] = insertelement <4 x i16> [[V]], i16 0, i64 2 +; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[X_3_V1]] to i64 +; CHECK-BE-NEXT: ret i64 [[X_3]] +; +; CHECK-LE-LABEL: define i64 @partial.be.i16( +; CHECK-LE-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <4 x i16> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-LE-NEXT: [[V_1:%.*]] = extractelement <4 x i16> [[V]], i64 1 +; CHECK-LE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64 +; CHECK-LE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 32 +; CHECK-LE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]] +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <4 x i16> [[V]], i64 0 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_1]], [[S_3]] +; CHECK-LE-NEXT: ret i64 [[X_3]] +; + %v.0 = extractelement <4 x i16> %v, i64 3 + %z.0 = zext i16 %v.0 to i64 + + %v.1 = extractelement <4 x i16> %v, i64 1 + %z.1 = zext i16 %v.1 to i64 + %s.1 = shl i64 %z.1, 32 + %x.1 = or i64 %z.0, %s.1 + + %v.3 = extractelement <4 x i16> %v, i64 0 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %x.1, %s.3 + + ret i64 %x.3 +} + +define i32 @partial.extract.le.i32(<8 x i8> %v) { +; CHECK-BE-LABEL: define i32 @partial.extract.le.i32( +; CHECK-BE-SAME: <8 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <8 x i8> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <8 x i8> [[V]], i64 4 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i8 [[V_1]] to i32 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i32 [[Z_1]], 8 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i32 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <8 x i8> [[V]], i64 6 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_1]], [[S_3]] +; CHECK-BE-NEXT: ret i32 [[X_3]] +; +; CHECK-LE-LABEL: define i32 @partial.extract.le.i32( +; CHECK-LE-SAME: <8 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X_3_V:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> , <4 x i32> +; CHECK-LE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V]] to i32 +; CHECK-LE-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <8 x i8> %v, i64 3 + %z.0 = zext i8 %v.0 to i32 + + %v.1 = extractelement <8 x i8> %v, i64 4 + %z.1 = zext i8 %v.1 to i32 + %s.1 = shl i32 %z.1, 8 + %x.1 = or i32 %z.0, %s.1 + + %v.3 = extractelement <8 x i8> %v, i64 6 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.1, %s.3 + + ret i32 %x.3 +} + +define i32 @partial.extract.be.i32(<8 x i8> %v) { +; CHECK-BE-LABEL: define i32 @partial.extract.be.i32( +; CHECK-BE-SAME: <8 x i8> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X_3_V:%.*]] = shufflevector <8 x i8> [[V]], <8 x i8> , <4 x i32> +; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i8> [[X_3_V]] to i32 +; CHECK-BE-NEXT: ret i32 [[X_3]] +; +; CHECK-LE-LABEL: define i32 @partial.extract.be.i32( +; CHECK-LE-SAME: <8 x i8> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <8 x i8> [[V]], i64 6 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i8 [[V_0]] to i32 +; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <8 x i8> [[V]], i64 4 +; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i8 [[V_2]] to i32 +; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i32 [[Z_2]], 16 +; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i32 [[S_2]], [[Z_0]] +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <8 x i8> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i8 [[V_3]] to i32 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i32 [[Z_3]], 24 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i32 [[X_2]], [[S_3]] +; CHECK-LE-NEXT: ret i32 [[X_3]] +; + %v.0 = extractelement <8 x i8> %v, i64 6 + %z.0 = zext i8 %v.0 to i32 + + %v.2 = extractelement <8 x i8> %v, i64 4 + %z.2 = zext i8 %v.2 to i32 + %s.2 = shl i32 %z.2, 16 + %x.2 = or i32 %z.0, %s.2 + + %v.3 = extractelement <8 x i8> %v, i64 3 + %z.3 = zext i8 %v.3 to i32 + %s.3 = shl i32 %z.3, 24 + %x.3 = or i32 %x.2, %s.3 + + ret i32 %x.3 +} + +define i64 @partial.extract.le.i64(<8 x i16> %v) { +; CHECK-BE-LABEL: define i64 @partial.extract.le.i64( +; CHECK-BE-SAME: <8 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[V_0:%.*]] = extractelement <8 x i16> [[V]], i64 3 +; CHECK-BE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-BE-NEXT: [[V_1:%.*]] = extractelement <8 x i16> [[V]], i64 4 +; CHECK-BE-NEXT: [[Z_1:%.*]] = zext i16 [[V_1]] to i64 +; CHECK-BE-NEXT: [[S_1:%.*]] = shl nuw nsw i64 [[Z_1]], 16 +; CHECK-BE-NEXT: [[X_1:%.*]] = or disjoint i64 [[S_1]], [[Z_0]] +; CHECK-BE-NEXT: [[V_2:%.*]] = extractelement <8 x i16> [[V]], i64 5 +; CHECK-BE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64 +; CHECK-BE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32 +; CHECK-BE-NEXT: [[X_2:%.*]] = or disjoint i64 [[X_1]], [[S_2]] +; CHECK-BE-NEXT: [[V_3:%.*]] = extractelement <8 x i16> [[V]], i64 6 +; CHECK-BE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-BE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-BE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]] +; CHECK-BE-NEXT: ret i64 [[X_3]] +; +; CHECK-LE-LABEL: define i64 @partial.extract.le.i64( +; CHECK-LE-SAME: <8 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[X_3_V_EXTRACT:%.*]] = shufflevector <8 x i16> [[V]], <8 x i16> poison, <8 x i32> +; CHECK-LE-NEXT: [[X_3_V_BC:%.*]] = bitcast <8 x i16> [[X_3_V_EXTRACT]] to <2 x i64> +; CHECK-LE-NEXT: [[X_3_V_EXTRACT1:%.*]] = extractelement <2 x i64> [[X_3_V_BC]], i64 0 +; CHECK-LE-NEXT: ret i64 [[X_3_V_EXTRACT1]] +; + %v.0 = extractelement <8 x i16> %v, i64 3 + %z.0 = zext i16 %v.0 to i64 + + %v.1 = extractelement <8 x i16> %v, i64 4 + %z.1 = zext i16 %v.1 to i64 + %s.1 = shl i64 %z.1, 16 + %x.1 = or i64 %z.0, %s.1 + + %v.2 = extractelement <8 x i16> %v, i64 5 + %z.2 = zext i16 %v.2 to i64 + %s.2 = shl i64 %z.2, 32 + %x.2 = or i64 %x.1, %s.2 + + %v.3 = extractelement <8 x i16> %v, i64 6 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %x.2, %s.3 + + ret i64 %x.3 +} + +define i64 @partial.extract.be.i64(<8 x i16> %v) { +; CHECK-BE-LABEL: define i64 @partial.extract.be.i64( +; CHECK-BE-SAME: <8 x i16> [[V:%.*]]) { +; CHECK-BE-NEXT: [[X_3_V:%.*]] = shufflevector <8 x i16> [[V]], <8 x i16> , <4 x i32> +; CHECK-BE-NEXT: [[X_3:%.*]] = bitcast <4 x i16> [[X_3_V]] to i64 +; CHECK-BE-NEXT: ret i64 [[X_3]] +; +; CHECK-LE-LABEL: define i64 @partial.extract.be.i64( +; CHECK-LE-SAME: <8 x i16> [[V:%.*]]) { +; CHECK-LE-NEXT: [[V_0:%.*]] = extractelement <8 x i16> [[V]], i64 6 +; CHECK-LE-NEXT: [[Z_0:%.*]] = zext i16 [[V_0]] to i64 +; CHECK-LE-NEXT: [[V_2:%.*]] = extractelement <8 x i16> [[V]], i64 4 +; CHECK-LE-NEXT: [[Z_2:%.*]] = zext i16 [[V_2]] to i64 +; CHECK-LE-NEXT: [[S_2:%.*]] = shl nuw nsw i64 [[Z_2]], 32 +; CHECK-LE-NEXT: [[X_2:%.*]] = or disjoint i64 [[S_2]], [[Z_0]] +; CHECK-LE-NEXT: [[V_3:%.*]] = extractelement <8 x i16> [[V]], i64 3 +; CHECK-LE-NEXT: [[Z_3:%.*]] = zext i16 [[V_3]] to i64 +; CHECK-LE-NEXT: [[S_3:%.*]] = shl nuw i64 [[Z_3]], 48 +; CHECK-LE-NEXT: [[X_3:%.*]] = or disjoint i64 [[X_2]], [[S_3]] +; CHECK-LE-NEXT: ret i64 [[X_3]] +; + %v.0 = extractelement <8 x i16> %v, i64 6 + %z.0 = zext i16 %v.0 to i64 + + %v.2 = extractelement <8 x i16> %v, i64 4 + %z.2 = zext i16 %v.2 to i64 + %s.2 = shl i64 %z.2, 32 + %x.2 = or i64 %z.0, %s.2 + + %v.3 = extractelement <8 x i16> %v, i64 3 + %z.3 = zext i16 %v.3 to i64 + %s.3 = shl i64 %z.3, 48 + %x.3 = or i64 %x.2, %s.3 + + ret i64 %x.3 +} + +define <2 x i16> @shufflecast.v2v(<4 x i8> %v) { +; CHECK-LABEL: define <2 x i16> @shufflecast.v2v( +; CHECK-SAME: <4 x i8> [[V:%.*]]) { +; CHECK-NEXT: [[W_3:%.*]] = bitcast <4 x i8> [[V]] to <2 x i16> +; CHECK-NEXT: ret <2 x i16> [[W_3]] +; + %v.0 = shufflevector <4 x i8> %v, <4 x i8> zeroinitializer, <4 x i32> + %c.0 = bitcast <4 x i8> %v.0 to <2 x i16> + + %v.1 = shufflevector <4 x i8> %v, <4 x i8> zeroinitializer, <4 x i32> + %c.1 = bitcast <4 x i8> %v.1 to <2 x i16> + %w.1 = or <2 x i16> %c.0, %c.1 + + %v.2 = shufflevector <4 x i8> %v, <4 x i8> zeroinitializer, <4 x i32> + %c.2 = bitcast <4 x i8> %v.2 to <2 x i16> + %w.2 = or <2 x i16> %w.1, %c.2 + + %v.3 = shufflevector <4 x i8> %v, <4 x i8> zeroinitializer, <4 x i32> + %c.3 = bitcast <4 x i8> %v.3 to <2 x i16> + %w.3 = or <2 x i16> %w.2, %c.3 + + ret <2 x i16> %w.3 +} + +define <2 x i32> @shufflecast.v2v.i16(<4 x i16> %v) { +; CHECK-LABEL: define <2 x i32> @shufflecast.v2v.i16( +; CHECK-SAME: <4 x i16> [[V:%.*]]) { +; CHECK-NEXT: [[W_3:%.*]] = bitcast <4 x i16> [[V]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[W_3]] +; + %v.0 = shufflevector <4 x i16> %v, <4 x i16> zeroinitializer, <4 x i32> + %c.0 = bitcast <4 x i16> %v.0 to <2 x i32> + + %v.1 = shufflevector <4 x i16> %v, <4 x i16> zeroinitializer, <4 x i32> + %c.1 = bitcast <4 x i16> %v.1 to <2 x i32> + %w.1 = or <2 x i32> %c.0, %c.1 + + %v.2 = shufflevector <4 x i16> %v, <4 x i16> zeroinitializer, <4 x i32> + %c.2 = bitcast <4 x i16> %v.2 to <2 x i32> + %w.2 = or <2 x i32> %w.1, %c.2 + + %v.3 = shufflevector <4 x i16> %v, <4 x i16> zeroinitializer, <4 x i32> + %c.3 = bitcast <4 x i16> %v.3 to <2 x i32> + %w.3 = or <2 x i32> %w.2, %c.3 + + ret <2 x i32> %w.3 +} + +define i32 @bitcast.v2i.half(<2 x half> %v) { +; CHECK-LABEL: define i32 @bitcast.v2i.half( +; CHECK-SAME: <2 x half> [[V:%.*]]) { +; CHECK-NEXT: [[X:%.*]] = bitcast <2 x half> [[V]] to i32 +; CHECK-NEXT: ret i32 [[X]] +; + %v.0 = insertelement <2 x half> %v, half 0.0, i64 1 + %x.0 = bitcast <2 x half> %v.0 to i32 + + %v.1 = insertelement <2 x half> %v, half 0.0, i64 0 + %x.1 = bitcast <2 x half> %v.1 to i32 + + %x = or i32 %x.0, %x.1 + ret i32 %x +}