diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 8c156c93ba8d1..a3a0e31f887ab 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -842,6 +842,138 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, return true; } +/// ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset. +struct PartStore { + Value *PtrBase; + APInt PtrOffset; + Value *Val; + uint64_t ValOffset; + uint64_t ValWidth; + StoreInst *Store; + + bool isCompatibleWith(const PartStore &Other) const { + return PtrBase == Other.PtrBase && Val == Other.Val; + } + + bool operator<(const PartStore &Other) const { + return PtrOffset.slt(Other.PtrOffset); + } +}; + +static std::optional matchPartStore(Instruction &I, + const DataLayout &DL) { + auto *Store = dyn_cast(&I); + if (!Store || !Store->isSimple()) + return std::nullopt; + + Value *StoredVal = Store->getValueOperand(); + Type *StoredTy = StoredVal->getType(); + if (!StoredTy->isIntegerTy() || !DL.typeSizeEqualsStoreSize(StoredTy)) + return std::nullopt; + + uint64_t ValWidth = StoredTy->getPrimitiveSizeInBits(); + uint64_t ValOffset = 0; + Value *Val; + if (!match(StoredVal, m_CombineOr(m_Trunc(m_LShr(m_Value(Val), + m_ConstantInt(ValOffset))), + m_Trunc(m_Value(Val))))) + return std::nullopt; + + Value *Ptr = Store->getPointerOperand(); + APInt PtrOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); + Value *PtrBase = Ptr->stripAndAccumulateConstantOffsets( + DL, PtrOffset, /*AllowNonInbounds=*/true); + return {{PtrBase, PtrOffset, Val, ValOffset, ValWidth, Store}}; +} + +static bool mergePartStores(SmallVectorImpl &Parts, + const DataLayout &DL, TargetTransformInfo &TTI) { + if (Parts.size() < 2) + return false; + + // We now have multiple parts of the same value stored to the same pointer. + // Sort the parts by pointer offset, and make sure they are consistent with + // the value offsets. Also check that the value is fully covered without + // overlaps. + // FIXME: We could support merging stores for only part of the value here. + llvm::sort(Parts); + int64_t LastEndOffsetFromFirst = 0; + const PartStore &First = Parts[0]; + for (const PartStore &Part : Parts) { + APInt PtrOffsetFromFirst = Part.PtrOffset - First.PtrOffset; + int64_t ValOffsetFromFirst = Part.ValOffset - First.ValOffset; + if (PtrOffsetFromFirst * 8 != ValOffsetFromFirst || + LastEndOffsetFromFirst != ValOffsetFromFirst) + return false; + LastEndOffsetFromFirst = ValOffsetFromFirst + Part.ValWidth; + } + + // Check whether combining the stores is profitable. + // FIXME: We could generate smaller stores if we can't produce a large one. + LLVMContext &Ctx = First.Store->getContext(); + Type *NewTy = Type::getIntNTy(Ctx, LastEndOffsetFromFirst); + unsigned Fast = 0; + if (!TTI.isTypeLegal(NewTy) || + !TTI.allowsMisalignedMemoryAccesses(Ctx, LastEndOffsetFromFirst, + First.Store->getPointerAddressSpace(), + First.Store->getAlign(), &Fast) || + !Fast) + return false; + + // Generate the combined store. + IRBuilder<> Builder(First.Store); + Value *Val = First.Val; + if (First.ValOffset != 0) + Val = Builder.CreateLShr(Val, First.ValOffset); + Val = Builder.CreateTrunc(Val, NewTy); + StoreInst *Store = Builder.CreateAlignedStore( + Val, First.Store->getPointerOperand(), First.Store->getAlign()); + + AAMDNodes AATags = First.Store->getAAMetadata(); + for (const PartStore &Part : drop_begin(Parts)) + AATags = AATags.concat(Part.Store->getAAMetadata()); + Store->setAAMetadata(AATags); + + // Remove the old stores. + for (const PartStore &Part : Parts) + Part.Store->eraseFromParent(); + + return true; +} + +static bool foldConsecutiveStores(BasicBlock &BB, const DataLayout &DL, + TargetTransformInfo &TTI, AliasAnalysis &AA) { + // FIXME: Add big endian support. + if (DL.isBigEndian()) + return false; + + SmallVector Parts; + bool MadeChange = false; + for (Instruction &I : make_early_inc_range(BB)) { + if (std::optional Part = matchPartStore(I, DL)) { + if (Parts.empty() || Part->isCompatibleWith(Parts[0])) { + Parts.push_back(std::move(*Part)); + continue; + } + + MadeChange |= mergePartStores(Parts, DL, TTI); + Parts.clear(); + Parts.push_back(std::move(*Part)); + continue; + } + + // FIXME: Use AA to make this more precise. + if (I.mayReadOrWriteMemory() || I.mayThrow()) { + MadeChange |= mergePartStores(Parts, DL, TTI); + Parts.clear(); + continue; + } + } + + MadeChange |= mergePartStores(Parts, DL, TTI); + return MadeChange; +} + /// Combine away instructions providing they are still equivalent when compared /// against 0. i.e do they have any bits set. static Value *optimizeShiftInOrChain(Value *V, IRBuilder<> &Builder) { @@ -1330,6 +1462,9 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, // bugs. MadeChange |= foldLibCalls(I, TTI, TLI, AC, DT, DL, MadeCFGChange); } + + // Do this separately to avoid redundantly scanning stores multiple times. + MadeChange |= foldConsecutiveStores(BB, DL, TTI, AA); } // We're done with transforms, so remove dead instructions. diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge-be.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge-be.ll new file mode 100644 index 0000000000000..34f39245500b9 --- /dev/null +++ b/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge-be.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=aggressive-instcombine -mtriple=x86_64-unknown-linux-gnu -data-layout="E-n64" < %s | FileCheck %s + +; Pretend X86 is big endian. + +; FIXME: Big endian not supported yet. + +define void @test_i32_be(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_be( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[X]] to i8 +; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: store i8 [[X_0]], ptr [[GEP_0]], align 1 +; CHECK-NEXT: [[SHR_1:%.*]] = lshr i32 [[X]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[SHR_1]] to i8 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: store i8 [[X_1]], ptr [[GEP_1]], align 1 +; CHECK-NEXT: [[SHR_2:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[SHR_2]] to i8 +; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; CHECK-NEXT: store i8 [[X_2]], ptr [[GEP_2]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X]], 24 +; CHECK-NEXT: [[X_3:%.*]] = trunc i32 [[TMP1]] to i8 +; CHECK-NEXT: store i8 [[X_3]], ptr [[P]], align 1 +; CHECK-NEXT: ret void +; + %x.0 = trunc i32 %x to i8 + %gep.0 = getelementptr i8, ptr %p, i64 3 + store i8 %x.0, ptr %gep.0 + %shr.1 = lshr i32 %x, 8 + %x.1 = trunc i32 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 2 + store i8 %x.1, ptr %gep.1 + %shr.2 = lshr i32 %x, 16 + %x.2 = trunc i32 %shr.2 to i8 + %gep.2 = getelementptr i8, ptr %p, i64 1 + store i8 %x.2, ptr %gep.2 + %shr.3 = lshr i32 %x, 24 + %x.3 = trunc i32 %shr.3 to i8 + store i8 %x.3, ptr %p + ret void +} + +define void @test_i32_le(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_le( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[X]] to i8 +; CHECK-NEXT: store i8 [[X_0]], ptr [[P]], align 1 +; CHECK-NEXT: [[SHR_1:%.*]] = lshr i32 [[X]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[SHR_1]] to i8 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; CHECK-NEXT: store i8 [[X_1]], ptr [[GEP_1]], align 1 +; CHECK-NEXT: [[SHR_2:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[SHR_2]] to i8 +; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: store i8 [[X_2]], ptr [[GEP_2]], align 1 +; CHECK-NEXT: [[SHR_3:%.*]] = lshr i32 [[X]], 24 +; CHECK-NEXT: [[X_3:%.*]] = trunc i32 [[SHR_3]] to i8 +; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: store i8 [[X_3]], ptr [[GEP_3]], align 1 +; CHECK-NEXT: ret void +; + %x.0 = trunc i32 %x to i8 + store i8 %x.0, ptr %p + %shr.1 = lshr i32 %x, 8 + %x.1 = trunc i32 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + %shr.2 = lshr i32 %x, 16 + %x.2 = trunc i32 %shr.2 to i8 + %gep.2 = getelementptr i8, ptr %p, i64 2 + store i8 %x.2, ptr %gep.2 + %shr.3 = lshr i32 %x, 24 + %x.3 = trunc i32 %shr.3 to i8 + %gep.3 = getelementptr i8, ptr %p, i64 3 + store i8 %x.3, ptr %gep.3 + ret void +} + +define void @test_i32_mixed_parts(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_mixed_parts( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[X]] to i8 +; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: store i8 [[X_0]], ptr [[GEP_0]], align 1 +; CHECK-NEXT: [[SHR_1:%.*]] = lshr i32 [[X]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[SHR_1]] to i16 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; CHECK-NEXT: store i16 [[X_1]], ptr [[GEP_1]], align 2 +; CHECK-NEXT: [[SHR_3:%.*]] = lshr i32 [[X]], 24 +; CHECK-NEXT: [[X_3:%.*]] = trunc i32 [[SHR_3]] to i8 +; CHECK-NEXT: store i8 [[X_3]], ptr [[P]], align 1 +; CHECK-NEXT: ret void +; + %x.0 = trunc i32 %x to i8 + %gep.0 = getelementptr i8, ptr %p, i64 3 + store i8 %x.0, ptr %gep.0 + %shr.1 = lshr i32 %x, 8 + %x.1 = trunc i32 %shr.1 to i16 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i16 %x.1, ptr %gep.1 + %shr.3 = lshr i32 %x, 24 + %x.3 = trunc i32 %shr.3 to i8 + store i8 %x.3, ptr %p + ret void +} diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge.ll new file mode 100644 index 0000000000000..2642e86a06bf6 --- /dev/null +++ b/llvm/test/Transforms/AggressiveInstCombine/X86/store-merge.ll @@ -0,0 +1,790 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=aggressive-instcombine -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +declare void @use.i16(i16) +declare void @use.i32(i32) + +define void @test_i16(i16 %x, ptr %p) { +; CHECK-LABEL: define void @test_i16( +; CHECK-SAME: i16 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: store i16 [[X]], ptr [[P]], align 1 +; CHECK-NEXT: ret void +; + %x.0 = trunc i16 %x to i8 + store i8 %x.0, ptr %p + %shr.1 = lshr i16 %x, 8 + %x.1 = trunc i16 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + ret void +} + +define void @test_i32_i8_parts(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_i8_parts( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: store i32 [[X]], ptr [[P]], align 1 +; CHECK-NEXT: ret void +; + %x.0 = trunc i32 %x to i8 + store i8 %x.0, ptr %p + %shr.1 = lshr i32 %x, 8 + %x.1 = trunc i32 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + %shr.2 = lshr i32 %x, 16 + %x.2 = trunc i32 %shr.2 to i8 + %gep.2 = getelementptr i8, ptr %p, i64 2 + store i8 %x.2, ptr %gep.2 + %shr.3 = lshr i32 %x, 24 + %x.3 = trunc i32 %shr.3 to i8 + %gep.3 = getelementptr i8, ptr %p, i64 3 + store i8 %x.3, ptr %gep.3 + ret void +} + +define void @test_i32_i16_parts(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_i16_parts( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: store i32 [[X]], ptr [[P]], align 2 +; CHECK-NEXT: ret void +; + %x.0 = trunc i32 %x to i16 + store i16 %x.0, ptr %p + %shr.1 = lshr i32 %x, 16 + %x.1 = trunc i32 %shr.1 to i16 + %gep.1 = getelementptr i8, ptr %p, i64 2 + store i16 %x.1, ptr %gep.1 + ret void +} + +define void @test_i32_mixed_parts(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_mixed_parts( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: store i32 [[X]], ptr [[P]], align 1 +; CHECK-NEXT: ret void +; + %x.0 = trunc i32 %x to i8 + store i8 %x.0, ptr %p + %shr.1 = lshr i32 %x, 8 + %x.1 = trunc i32 %shr.1 to i16 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i16 %x.1, ptr %gep.1 + %shr.3 = lshr i32 %x, 24 + %x.3 = trunc i32 %shr.3 to i8 + %gep.3 = getelementptr i8, ptr %p, i64 3 + store i8 %x.3, ptr %gep.3 + ret void +} + +define void @test_i64(i64 %x, ptr %p) { +; CHECK-LABEL: define void @test_i64( +; CHECK-SAME: i64 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: store i64 [[X]], ptr [[P]], align 1 +; CHECK-NEXT: ret void +; + %x.0 = trunc i64 %x to i8 + store i8 %x.0, ptr %p + %shr.1 = lshr i64 %x, 8 + %x.1 = trunc i64 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + %shr.2 = lshr i64 %x, 16 + %x.2 = trunc i64 %shr.2 to i8 + %gep.2 = getelementptr i8, ptr %p, i64 2 + store i8 %x.2, ptr %gep.2 + %shr.3 = lshr i64 %x, 24 + %x.3 = trunc i64 %shr.3 to i8 + %gep.3 = getelementptr i8, ptr %p, i64 3 + store i8 %x.3, ptr %gep.3 + %shr.4 = lshr i64 %x, 32 + %x.4 = trunc i64 %shr.4 to i8 + %gep.4 = getelementptr i8, ptr %p, i64 4 + store i8 %x.4, ptr %gep.4 + %shr.5 = lshr i64 %x, 40 + %x.5 = trunc i64 %shr.5 to i8 + %gep.5 = getelementptr i8, ptr %p, i64 5 + store i8 %x.5, ptr %gep.5 + %shr.6 = lshr i64 %x, 48 + %x.6 = trunc i64 %shr.6 to i8 + %gep.6 = getelementptr i8, ptr %p, i64 6 + store i8 %x.6, ptr %gep.6 + %shr.7 = lshr i64 %x, 56 + %x.7 = trunc i64 %shr.7 to i8 + %gep.7 = getelementptr i8, ptr %p, i64 7 + store i8 %x.7, ptr %gep.7 + ret void +} + +define void @test_i128(i128 %x, ptr %p) { +; CHECK-LABEL: define void @test_i128( +; CHECK-SAME: i128 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[X_0:%.*]] = trunc i128 [[X]] to i8 +; CHECK-NEXT: store i8 [[X_0]], ptr [[P]], align 1 +; CHECK-NEXT: [[SHR_1:%.*]] = lshr i128 [[X]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i128 [[SHR_1]] to i8 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; CHECK-NEXT: store i8 [[X_1]], ptr [[GEP_1]], align 1 +; CHECK-NEXT: [[SHR_2:%.*]] = lshr i128 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i128 [[SHR_2]] to i8 +; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: store i8 [[X_2]], ptr [[GEP_2]], align 1 +; CHECK-NEXT: [[SHR_3:%.*]] = lshr i128 [[X]], 24 +; CHECK-NEXT: [[X_3:%.*]] = trunc i128 [[SHR_3]] to i8 +; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: store i8 [[X_3]], ptr [[GEP_3]], align 1 +; CHECK-NEXT: [[SHR_4:%.*]] = lshr i128 [[X]], 32 +; CHECK-NEXT: [[X_4:%.*]] = trunc i128 [[SHR_4]] to i8 +; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr i8, ptr [[P]], i64 4 +; CHECK-NEXT: store i8 [[X_4]], ptr [[GEP_4]], align 1 +; CHECK-NEXT: [[SHR_5:%.*]] = lshr i128 [[X]], 40 +; CHECK-NEXT: [[X_5:%.*]] = trunc i128 [[SHR_5]] to i8 +; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr i8, ptr [[P]], i64 5 +; CHECK-NEXT: store i8 [[X_5]], ptr [[GEP_5]], align 1 +; CHECK-NEXT: [[SHR_6:%.*]] = lshr i128 [[X]], 48 +; CHECK-NEXT: [[X_6:%.*]] = trunc i128 [[SHR_6]] to i8 +; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr i8, ptr [[P]], i64 6 +; CHECK-NEXT: store i8 [[X_6]], ptr [[GEP_6]], align 1 +; CHECK-NEXT: [[SHR_7:%.*]] = lshr i128 [[X]], 56 +; CHECK-NEXT: [[X_7:%.*]] = trunc i128 [[SHR_7]] to i8 +; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr i8, ptr [[P]], i64 7 +; CHECK-NEXT: store i8 [[X_7]], ptr [[GEP_7]], align 1 +; CHECK-NEXT: [[SHR_8:%.*]] = lshr i128 [[X]], 64 +; CHECK-NEXT: [[X_8:%.*]] = trunc i128 [[SHR_8]] to i8 +; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr i8, ptr [[P]], i64 8 +; CHECK-NEXT: store i8 [[X_8]], ptr [[GEP_8]], align 1 +; CHECK-NEXT: [[SHR_9:%.*]] = lshr i128 [[X]], 72 +; CHECK-NEXT: [[X_9:%.*]] = trunc i128 [[SHR_9]] to i8 +; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr i8, ptr [[P]], i64 9 +; CHECK-NEXT: store i8 [[X_9]], ptr [[GEP_9]], align 1 +; CHECK-NEXT: [[SHR_10:%.*]] = lshr i128 [[X]], 80 +; CHECK-NEXT: [[X_10:%.*]] = trunc i128 [[SHR_10]] to i8 +; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr i8, ptr [[P]], i64 10 +; CHECK-NEXT: store i8 [[X_10]], ptr [[GEP_10]], align 1 +; CHECK-NEXT: [[SHR_11:%.*]] = lshr i128 [[X]], 88 +; CHECK-NEXT: [[X_11:%.*]] = trunc i128 [[SHR_11]] to i8 +; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr i8, ptr [[P]], i64 11 +; CHECK-NEXT: store i8 [[X_11]], ptr [[GEP_11]], align 1 +; CHECK-NEXT: [[SHR_12:%.*]] = lshr i128 [[X]], 96 +; CHECK-NEXT: [[X_12:%.*]] = trunc i128 [[SHR_12]] to i8 +; CHECK-NEXT: [[GEP_12:%.*]] = getelementptr i8, ptr [[P]], i64 12 +; CHECK-NEXT: store i8 [[X_12]], ptr [[GEP_12]], align 1 +; CHECK-NEXT: [[SHR_13:%.*]] = lshr i128 [[X]], 104 +; CHECK-NEXT: [[X_13:%.*]] = trunc i128 [[SHR_13]] to i8 +; CHECK-NEXT: [[GEP_13:%.*]] = getelementptr i8, ptr [[P]], i64 13 +; CHECK-NEXT: store i8 [[X_13]], ptr [[GEP_13]], align 1 +; CHECK-NEXT: [[SHR_14:%.*]] = lshr i128 [[X]], 112 +; CHECK-NEXT: [[X_14:%.*]] = trunc i128 [[SHR_14]] to i8 +; CHECK-NEXT: [[GEP_14:%.*]] = getelementptr i8, ptr [[P]], i64 14 +; CHECK-NEXT: store i8 [[X_14]], ptr [[GEP_14]], align 1 +; CHECK-NEXT: [[SHR_15:%.*]] = lshr i128 [[X]], 120 +; CHECK-NEXT: [[X_15:%.*]] = trunc i128 [[SHR_15]] to i8 +; CHECK-NEXT: [[GEP_15:%.*]] = getelementptr i8, ptr [[P]], i64 15 +; CHECK-NEXT: store i8 [[X_15]], ptr [[GEP_15]], align 1 +; CHECK-NEXT: ret void +; + %x.0 = trunc i128 %x to i8 + store i8 %x.0, ptr %p + %shr.1 = lshr i128 %x, 8 + %x.1 = trunc i128 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + %shr.2 = lshr i128 %x, 16 + %x.2 = trunc i128 %shr.2 to i8 + %gep.2 = getelementptr i8, ptr %p, i64 2 + store i8 %x.2, ptr %gep.2 + %shr.3 = lshr i128 %x, 24 + %x.3 = trunc i128 %shr.3 to i8 + %gep.3 = getelementptr i8, ptr %p, i64 3 + store i8 %x.3, ptr %gep.3 + %shr.4 = lshr i128 %x, 32 + %x.4 = trunc i128 %shr.4 to i8 + %gep.4 = getelementptr i8, ptr %p, i64 4 + store i8 %x.4, ptr %gep.4 + %shr.5 = lshr i128 %x, 40 + %x.5 = trunc i128 %shr.5 to i8 + %gep.5 = getelementptr i8, ptr %p, i64 5 + store i8 %x.5, ptr %gep.5 + %shr.6 = lshr i128 %x, 48 + %x.6 = trunc i128 %shr.6 to i8 + %gep.6 = getelementptr i8, ptr %p, i64 6 + store i8 %x.6, ptr %gep.6 + %shr.7 = lshr i128 %x, 56 + %x.7 = trunc i128 %shr.7 to i8 + %gep.7 = getelementptr i8, ptr %p, i64 7 + store i8 %x.7, ptr %gep.7 + %shr.8 = lshr i128 %x, 64 + %x.8 = trunc i128 %shr.8 to i8 + %gep.8 = getelementptr i8, ptr %p, i64 8 + store i8 %x.8, ptr %gep.8 + %shr.9 = lshr i128 %x, 72 + %x.9 = trunc i128 %shr.9 to i8 + %gep.9 = getelementptr i8, ptr %p, i64 9 + store i8 %x.9, ptr %gep.9 + %shr.10 = lshr i128 %x, 80 + %x.10 = trunc i128 %shr.10 to i8 + %gep.10 = getelementptr i8, ptr %p, i64 10 + store i8 %x.10, ptr %gep.10 + %shr.11 = lshr i128 %x, 88 + %x.11 = trunc i128 %shr.11 to i8 + %gep.11 = getelementptr i8, ptr %p, i64 11 + store i8 %x.11, ptr %gep.11 + %shr.12 = lshr i128 %x, 96 + %x.12 = trunc i128 %shr.12 to i8 + %gep.12 = getelementptr i8, ptr %p, i64 12 + store i8 %x.12, ptr %gep.12 + %shr.13 = lshr i128 %x, 104 + %x.13 = trunc i128 %shr.13 to i8 + %gep.13 = getelementptr i8, ptr %p, i64 13 + store i8 %x.13, ptr %gep.13 + %shr.14 = lshr i128 %x, 112 + %x.14 = trunc i128 %shr.14 to i8 + %gep.14 = getelementptr i8, ptr %p, i64 14 + store i8 %x.14, ptr %gep.14 + %shr.15 = lshr i128 %x, 120 + %x.15 = trunc i128 %shr.15 to i8 + %gep.15 = getelementptr i8, ptr %p, i64 15 + store i8 %x.15, ptr %gep.15 + ret void +} + +define void @test_i32_lo(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_lo( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X]] to i16 +; CHECK-NEXT: store i16 [[TMP1]], ptr [[P]], align 1 +; CHECK-NEXT: ret void +; + %x.0 = trunc i32 %x to i8 + store i8 %x.0, ptr %p + %shr.1 = lshr i32 %x, 8 + %x.1 = trunc i32 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + ret void +} + +define void @test_i32_hi(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_hi( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[SHR_0:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[SHR_0]] to i16 +; CHECK-NEXT: store i16 [[TMP2]], ptr [[P]], align 1 +; CHECK-NEXT: ret void +; + %shr.0 = lshr i32 %x, 16 + %x.0 = trunc i32 %shr.0 to i8 + store i8 %x.0, ptr %p + %shr.1 = lshr i32 %x, 24 + %x.1 = trunc i32 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + ret void +} + +define void @test_i32_mid(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_mid( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X]], 10 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +; CHECK-NEXT: store i16 [[TMP2]], ptr [[P]], align 1 +; CHECK-NEXT: ret void +; + %shr.0 = lshr i32 %x, 10 + %x.0 = trunc i32 %shr.0 to i8 + store i8 %x.0, ptr %p + %shr.1 = lshr i32 %x, 18 + %x.1 = trunc i32 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + ret void +} + +define void @test_i32_shift_in_zeros(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_shift_in_zeros( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[SHR_0:%.*]] = lshr i32 [[X]], 20 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[SHR_0]] to i16 +; CHECK-NEXT: store i16 [[TMP2]], ptr [[P]], align 1 +; CHECK-NEXT: ret void +; + %shr.0 = lshr i32 %x, 20 + %x.0 = trunc i32 %shr.0 to i8 + store i8 %x.0, ptr %p + %shr.1 = lshr i32 %x, 28 + %x.1 = trunc i32 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + ret void +} + +define void @test_base_ptr_with_offset(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_base_ptr_with_offset( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P]], i64 7 +; CHECK-NEXT: store i32 [[X]], ptr [[TMP1]], align 2 +; CHECK-NEXT: ret void +; + %x.0 = trunc i32 %x to i16 + %gep.0 = getelementptr i8, ptr %p, i64 7 + store i16 %x.0, ptr %gep.0 + %shr.1 = lshr i32 %x, 16 + %x.1 = trunc i32 %shr.1 to i16 + %gep.1 = getelementptr i8, ptr %p, i64 9 + store i16 %x.1, ptr %gep.1 + ret void +} + +define void @test_aliasing_store(i16 %x, ptr %p, ptr %p2) { +; CHECK-LABEL: define void @test_aliasing_store( +; CHECK-SAME: i16 [[X:%.*]], ptr [[P:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: [[X_0:%.*]] = trunc i16 [[X]] to i8 +; CHECK-NEXT: store i8 [[X_0]], ptr [[P]], align 1 +; CHECK-NEXT: store i8 0, ptr [[P2]], align 1 +; CHECK-NEXT: [[SHR_1:%.*]] = lshr i16 [[X]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i16 [[SHR_1]] to i8 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; CHECK-NEXT: store i8 [[X_1]], ptr [[GEP_1]], align 1 +; CHECK-NEXT: ret void +; + %x.0 = trunc i16 %x to i8 + store i8 %x.0, ptr %p + store i8 0, ptr %p2 + %shr.1 = lshr i16 %x, 8 + %x.1 = trunc i16 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + ret void +} + +define void @test_non_aliasing_store(i16 %x, ptr noalias %p, ptr noalias %p2) { +; CHECK-LABEL: define void @test_non_aliasing_store( +; CHECK-SAME: i16 [[X:%.*]], ptr noalias [[P:%.*]], ptr noalias [[P2:%.*]]) { +; CHECK-NEXT: [[X_0:%.*]] = trunc i16 [[X]] to i8 +; CHECK-NEXT: store i8 [[X_0]], ptr [[P]], align 1 +; CHECK-NEXT: store i8 0, ptr [[P2]], align 1 +; CHECK-NEXT: [[SHR_1:%.*]] = lshr i16 [[X]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i16 [[SHR_1]] to i8 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; CHECK-NEXT: store i8 [[X_1]], ptr [[GEP_1]], align 1 +; CHECK-NEXT: ret void +; + %x.0 = trunc i16 %x to i8 + store i8 %x.0, ptr %p + store i8 0, ptr %p2 + %shr.1 = lshr i16 %x, 8 + %x.1 = trunc i16 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + ret void +} + +define i8 @test_aliasing_load(i16 %x, ptr %p, ptr %p2) { +; CHECK-LABEL: define i8 @test_aliasing_load( +; CHECK-SAME: i16 [[X:%.*]], ptr [[P:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: [[X_0:%.*]] = trunc i16 [[X]] to i8 +; CHECK-NEXT: store i8 [[X_0]], ptr [[P]], align 1 +; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[P2]], align 1 +; CHECK-NEXT: [[SHR_1:%.*]] = lshr i16 [[X]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i16 [[SHR_1]] to i8 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; CHECK-NEXT: store i8 [[X_1]], ptr [[GEP_1]], align 1 +; CHECK-NEXT: ret i8 [[V]] +; + %x.0 = trunc i16 %x to i8 + store i8 %x.0, ptr %p + %v = load i8, ptr %p2 + %shr.1 = lshr i16 %x, 8 + %x.1 = trunc i16 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + ret i8 %v +} + +define i8 @test_non_aliasing_load(i16 %x, ptr noalias %p, ptr noalias %p2) { +; CHECK-LABEL: define i8 @test_non_aliasing_load( +; CHECK-SAME: i16 [[X:%.*]], ptr noalias [[P:%.*]], ptr noalias [[P2:%.*]]) { +; CHECK-NEXT: [[X_0:%.*]] = trunc i16 [[X]] to i8 +; CHECK-NEXT: store i8 [[X_0]], ptr [[P]], align 1 +; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[P2]], align 1 +; CHECK-NEXT: [[SHR_1:%.*]] = lshr i16 [[X]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i16 [[SHR_1]] to i8 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; CHECK-NEXT: store i8 [[X_1]], ptr [[GEP_1]], align 1 +; CHECK-NEXT: ret i8 [[V]] +; + %x.0 = trunc i16 %x to i8 + store i8 %x.0, ptr %p + %v = load i8, ptr %p2 + %shr.1 = lshr i16 %x, 8 + %x.1 = trunc i16 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + ret i8 %v +} + +define i8 @test_aliasing_load_partially_mergeable(i32 %x, ptr %p, ptr %p2) { +; CHECK-LABEL: define i8 @test_aliasing_load_partially_mergeable( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X]] to i16 +; CHECK-NEXT: store i16 [[TMP1]], ptr [[P]], align 1 +; CHECK-NEXT: [[V:%.*]] = load i8, ptr [[P2]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 +; CHECK-NEXT: store i16 [[TMP3]], ptr [[TMP4]], align 1 +; CHECK-NEXT: ret i8 [[V]] +; + %x.0 = trunc i32 %x to i8 + store i8 %x.0, ptr %p + %shr.1 = lshr i32 %x, 8 + %x.1 = trunc i32 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + %v = load i8, ptr %p2 + %shr.2 = lshr i32 %x, 16 + %x.2 = trunc i32 %shr.2 to i8 + %gep.2 = getelementptr i8, ptr %p, i64 2 + store i8 %x.2, ptr %gep.2 + %shr.3 = lshr i32 %x, 24 + %x.3 = trunc i32 %shr.3 to i8 + %gep.3 = getelementptr i8, ptr %p, i64 3 + store i8 %x.3, ptr %gep.3 + ret i8 %v +} + +declare void @may_unwind() memory(none) + +define void @test_unwind(i16 %x, ptr %p, ptr %p2) { +; CHECK-LABEL: define void @test_unwind( +; CHECK-SAME: i16 [[X:%.*]], ptr [[P:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: [[X_0:%.*]] = trunc i16 [[X]] to i8 +; CHECK-NEXT: store i8 [[X_0]], ptr [[P]], align 1 +; CHECK-NEXT: call void @may_unwind() +; CHECK-NEXT: [[SHR_1:%.*]] = lshr i16 [[X]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i16 [[SHR_1]] to i8 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; CHECK-NEXT: store i8 [[X_1]], ptr [[GEP_1]], align 1 +; CHECK-NEXT: ret void +; + %x.0 = trunc i16 %x to i8 + store i8 %x.0, ptr %p + call void @may_unwind() + %shr.1 = lshr i16 %x, 8 + %x.1 = trunc i16 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + ret void +} + +define void @test_stores_out_of_order(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_stores_out_of_order( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: store i32 [[X]], ptr [[P]], align 1 +; CHECK-NEXT: ret void +; + %shr.2 = lshr i32 %x, 16 + %x.2 = trunc i32 %shr.2 to i8 + %gep.2 = getelementptr i8, ptr %p, i64 2 + store i8 %x.2, ptr %gep.2 + %x.0 = trunc i32 %x to i8 + store i8 %x.0, ptr %p + %shr.3 = lshr i32 %x, 24 + %x.3 = trunc i32 %shr.3 to i8 + %gep.3 = getelementptr i8, ptr %p, i64 3 + store i8 %x.3, ptr %gep.3 + %shr.1 = lshr i32 %x, 8 + %x.1 = trunc i32 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + ret void +} + +define void @test_i32_gap(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_gap( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[SHR_0:%.*]] = lshr i32 [[X]], 7 +; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[SHR_0]] to i8 +; CHECK-NEXT: store i8 [[X_0]], ptr [[P]], align 1 +; CHECK-NEXT: [[SHR_1:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[SHR_1]] to i8 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; CHECK-NEXT: store i8 [[X_1]], ptr [[GEP_1]], align 1 +; CHECK-NEXT: ret void +; + %shr.0 = lshr i32 %x, 7 + %x.0 = trunc i32 %shr.0 to i8 + store i8 %x.0, ptr %p + %shr.1 = lshr i32 %x, 16 + %x.1 = trunc i32 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + ret void +} + +define void @test_i32_non_byte_sized(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_non_byte_sized( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[X]] to i15 +; CHECK-NEXT: store i15 [[X_0]], ptr [[P]], align 2 +; CHECK-NEXT: [[SHR_1:%.*]] = lshr i32 [[X]], 15 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[SHR_1]] to i17 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: store i17 [[X_1]], ptr [[GEP_1]], align 4 +; CHECK-NEXT: ret void +; + %x.0 = trunc i32 %x to i15 + store i15 %x.0, ptr %p + %shr.1 = lshr i32 %x, 15 + %x.1 = trunc i32 %shr.1 to i17 + %gep.1 = getelementptr i8, ptr %p, i64 2 + store i17 %x.1, ptr %gep.1 + ret void +} + +define void @test_i32_wrong_ptr_offset(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_wrong_ptr_offset( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[SHR_0:%.*]] = lshr i32 [[X]], 8 +; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[SHR_0]] to i8 +; CHECK-NEXT: store i8 [[X_0]], ptr [[P]], align 1 +; CHECK-NEXT: [[SHR_1:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[SHR_1]] to i8 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: store i8 [[X_1]], ptr [[GEP_1]], align 1 +; CHECK-NEXT: ret void +; + %shr.0 = lshr i32 %x, 8 + %x.0 = trunc i32 %shr.0 to i8 + store i8 %x.0, ptr %p + %shr.1 = lshr i32 %x, 16 + %x.1 = trunc i32 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 2 + store i8 %x.1, ptr %gep.1 + ret void +} + +define void @test_i32_wrong_endian(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_wrong_endian( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[X]] to i8 +; CHECK-NEXT: [[GEP_0:%.*]] = getelementptr i8, ptr [[P]], i64 3 +; CHECK-NEXT: store i8 [[X_0]], ptr [[GEP_0]], align 1 +; CHECK-NEXT: [[SHR_1:%.*]] = lshr i32 [[X]], 8 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[SHR_1]] to i8 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: store i8 [[X_1]], ptr [[GEP_1]], align 1 +; CHECK-NEXT: [[SHR_2:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_2:%.*]] = trunc i32 [[SHR_2]] to i8 +; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; CHECK-NEXT: store i8 [[X_2]], ptr [[GEP_2]], align 1 +; CHECK-NEXT: [[SHR_3:%.*]] = lshr i32 [[X]], 24 +; CHECK-NEXT: [[X_3:%.*]] = trunc i32 [[SHR_3]] to i8 +; CHECK-NEXT: store i8 [[X_3]], ptr [[P]], align 1 +; CHECK-NEXT: ret void +; + %x.0 = trunc i32 %x to i8 + %gep.0 = getelementptr i8, ptr %p, i64 3 + store i8 %x.0, ptr %gep.0 + %shr.1 = lshr i32 %x, 8 + %x.1 = trunc i32 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 2 + store i8 %x.1, ptr %gep.1 + %shr.2 = lshr i32 %x, 16 + %x.2 = trunc i32 %shr.2 to i8 + %gep.2 = getelementptr i8, ptr %p, i64 1 + store i8 %x.2, ptr %gep.2 + %shr.3 = lshr i32 %x, 24 + %x.3 = trunc i32 %shr.3 to i8 + store i8 %x.3, ptr %p + ret void +} + +define void @test_i32_volatile(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_volatile( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X]], 8 +; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[TMP1]] to i8 +; CHECK-NEXT: store volatile i8 [[X_0]], ptr [[P]], align 1 +; CHECK-NEXT: [[SHR_1:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[SHR_1]] to i8 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; CHECK-NEXT: store i8 [[X_1]], ptr [[GEP_1]], align 1 +; CHECK-NEXT: ret void +; + %shr.0 = lshr i32 %x, 8 + %x.0 = trunc i32 %shr.0 to i8 + store volatile i8 %x.0, ptr %p + %shr.1 = lshr i32 %x, 16 + %x.1 = trunc i32 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + ret void +} + +define void @test_i32_atomic(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_atomic( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[SHR_0:%.*]] = lshr i32 [[X]], 8 +; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[SHR_0]] to i8 +; CHECK-NEXT: store atomic i8 [[X_0]], ptr [[P]] monotonic, align 1 +; CHECK-NEXT: [[SHR_1:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[SHR_1]] to i8 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; CHECK-NEXT: store i8 [[X_1]], ptr [[GEP_1]], align 1 +; CHECK-NEXT: ret void +; + %shr.0 = lshr i32 %x, 8 + %x.0 = trunc i32 %shr.0 to i8 + store atomic i8 %x.0, ptr %p monotonic, align 1 + %shr.1 = lshr i32 %x, 16 + %x.1 = trunc i32 %shr.1 to i8 + %gep.1 = getelementptr i8, ptr %p, i64 1 + store i8 %x.1, ptr %gep.1 + ret void +} + +define void @test_i32_multiple_pointers(i32 %x, i32 %y, ptr %p, ptr %p2) { +; CHECK-LABEL: define void @test_i32_multiple_pointers( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], ptr [[P:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: store i32 [[X]], ptr [[P]], align 2 +; CHECK-NEXT: store i32 [[Y]], ptr [[P2]], align 2 +; CHECK-NEXT: ret void +; + %x.0 = trunc i32 %x to i16 + store i16 %x.0, ptr %p + %shr.1 = lshr i32 %x, 16 + %x.1 = trunc i32 %shr.1 to i16 + %gep.1 = getelementptr i8, ptr %p, i64 2 + store i16 %x.1, ptr %gep.1 + + %y.0 = trunc i32 %y to i16 + store i16 %y.0, ptr %p2 + %y.shr.1 = lshr i32 %y, 16 + %y.1 = trunc i32 %y.shr.1 to i16 + %p2.gep.1 = getelementptr i8, ptr %p2, i64 2 + store i16 %y.1, ptr %p2.gep.1 + ret void +} + +define void @test_i32_multiple_pointers_interleaved(i32 %x, i32 %y, ptr noalias %p, ptr noalias %p2) { +; CHECK-LABEL: define void @test_i32_multiple_pointers_interleaved( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], ptr noalias [[P:%.*]], ptr noalias [[P2:%.*]]) { +; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[X]] to i16 +; CHECK-NEXT: store i16 [[X_0]], ptr [[P]], align 2 +; CHECK-NEXT: [[Y_0:%.*]] = trunc i32 [[Y]] to i16 +; CHECK-NEXT: store i16 [[Y_0]], ptr [[P2]], align 2 +; CHECK-NEXT: [[SHR_1:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[SHR_1]] to i16 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; CHECK-NEXT: store i16 [[X_1]], ptr [[GEP_1]], align 2 +; CHECK-NEXT: [[Y_SHR_1:%.*]] = lshr i32 [[Y]], 16 +; CHECK-NEXT: [[Y_1:%.*]] = trunc i32 [[Y_SHR_1]] to i16 +; CHECK-NEXT: [[P2_GEP_1:%.*]] = getelementptr i8, ptr [[P2]], i64 2 +; CHECK-NEXT: store i16 [[Y_1]], ptr [[P2_GEP_1]], align 2 +; CHECK-NEXT: ret void +; + %x.0 = trunc i32 %x to i16 + store i16 %x.0, ptr %p + %y.0 = trunc i32 %y to i16 + store i16 %y.0, ptr %p2 + + %shr.1 = lshr i32 %x, 16 + %x.1 = trunc i32 %shr.1 to i16 + %gep.1 = getelementptr i8, ptr %p, i64 2 + store i16 %x.1, ptr %gep.1 + %y.shr.1 = lshr i32 %y, 16 + %y.1 = trunc i32 %y.shr.1 to i16 + %p2.gep.1 = getelementptr i8, ptr %p2, i64 2 + store i16 %y.1, ptr %p2.gep.1 + ret void +} + +define void @test_i32_multi_use(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_multi_use( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[X_0:%.*]] = trunc i32 [[X]] to i16 +; CHECK-NEXT: store i32 [[X]], ptr [[P]], align 2 +; CHECK-NEXT: [[SHR_1:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[X_1:%.*]] = trunc i32 [[SHR_1]] to i16 +; CHECK-NEXT: call void @use.i16(i16 [[X_0]]) +; CHECK-NEXT: call void @use.i16(i16 [[X_1]]) +; CHECK-NEXT: call void @use.i32(i32 [[SHR_1]]) +; CHECK-NEXT: ret void +; + %x.0 = trunc i32 %x to i16 + store i16 %x.0, ptr %p + %shr.1 = lshr i32 %x, 16 + %x.1 = trunc i32 %shr.1 to i16 + %gep.1 = getelementptr i8, ptr %p, i64 2 + store i16 %x.1, ptr %gep.1 + call void @use.i16(i16 %x.0) + call void @use.i16(i16 %x.1) + call void @use.i32(i32 %shr.1) + ret void +} + +define void @test_i32_scoped_aa_same(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_scoped_aa_same( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: store i32 [[X]], ptr [[P]], align 2, !noalias [[META0:![0-9]+]] +; CHECK-NEXT: ret void +; + %x.0 = trunc i32 %x to i16 + store i16 %x.0, ptr %p, !noalias !0 + %shr.1 = lshr i32 %x, 16 + %x.1 = trunc i32 %shr.1 to i16 + %gep.1 = getelementptr i8, ptr %p, i64 2 + store i16 %x.1, ptr %gep.1, !noalias !0 + ret void +} + +define void @test_i32_scoped_aa_different(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_scoped_aa_different( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: store i32 [[X]], ptr [[P]], align 2, !noalias [[META3:![0-9]+]] +; CHECK-NEXT: ret void +; + %x.0 = trunc i32 %x to i16 + store i16 %x.0, ptr %p, !noalias !0 + %shr.1 = lshr i32 %x, 16 + %x.1 = trunc i32 %shr.1 to i16 + %gep.1 = getelementptr i8, ptr %p, i64 2 + store i16 %x.1, ptr %gep.1, !noalias !3 + ret void +} + +define void @test_i32_tbaa(i32 %x, ptr %p) { +; CHECK-LABEL: define void @test_i32_tbaa( +; CHECK-SAME: i32 [[X:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: store i32 [[X]], ptr [[P]], align 2 +; CHECK-NEXT: ret void +; + %x.0 = trunc i32 %x to i16 + store i16 %x.0, ptr %p, !tbaa !6 + %shr.1 = lshr i32 %x, 16 + %x.1 = trunc i32 %shr.1 to i16 + %gep.1 = getelementptr i8, ptr %p, i64 2 + store i16 %x.1, ptr %gep.1, !tbaa !6 + ret void +} + +!0 = !{!1} +!1 = !{!1, !2} +!2 = !{!2} + +!3 = !{!4} +!4 = !{!4, !5} +!5 = !{!5} + +!6 = !{!7, !7, i64 0} +!7 = !{!"short", !8, i64 0} +!8 = !{!"omnipotent char", !9, i64 0} +!9 = !{!"Simple C/C++ TBAA"} +;. +; CHECK: [[META0]] = !{[[META1:![0-9]+]]} +; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]} +; CHECK: [[META2]] = distinct !{[[META2]]} +; CHECK: [[META3]] = !{} +;.