diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 40a7f8043034e..40de36d81ddd2 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -83,8 +83,8 @@ static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) { // == (ShVal0 << ShAmt) | (ShVal1 >> (Width -ShAmt)) if (match(V, m_OneUse(m_c_Or( m_Shl(m_Value(ShVal0), m_Value(ShAmt)), - m_LShr(m_Value(ShVal1), - m_Sub(m_SpecificInt(Width), m_Deferred(ShAmt))))))) { + m_LShr(m_Value(ShVal1), m_Sub(m_SpecificInt(Width), + m_Deferred(ShAmt))))))) { return Intrinsic::fshl; } @@ -617,7 +617,7 @@ struct LoadOps { LoadInst *RootInsert = nullptr; bool FoundRoot = false; uint64_t LoadSize = 0; - const APInt *Shift = nullptr; + uint64_t Shift = 0; Type *ZextType; AAMDNodes AATags; }; @@ -627,17 +627,15 @@ struct LoadOps { // (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3) static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, AliasAnalysis &AA) { - const APInt *ShAmt2 = nullptr; + uint64_t ShAmt2; Value *X; Instruction *L1, *L2; // Go to the last node with loads. - if (match(V, m_OneUse(m_c_Or( - m_Value(X), - m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))), - m_APInt(ShAmt2)))))) || - match(V, m_OneUse(m_Or(m_Value(X), - m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))))))) { + if (match(V, + m_OneUse(m_c_Or(m_Value(X), m_OneUse(m_ShlOrSelf( + m_OneUse(m_ZExt(m_Instruction(L2))), + ShAmt2)))))) { if (!foldLoadsRecursive(X, LOps, DL, AA) && LOps.FoundRoot) // Avoid Partial chain merge. return false; @@ -646,11 +644,10 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, // Check if the pattern has loads LoadInst *LI1 = LOps.Root; - const APInt *ShAmt1 = LOps.Shift; + uint64_t ShAmt1 = LOps.Shift; if (LOps.FoundRoot == false && - (match(X, m_OneUse(m_ZExt(m_Instruction(L1)))) || - match(X, m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L1)))), - m_APInt(ShAmt1)))))) { + match(X, m_OneUse( + m_ShlOrSelf(m_OneUse(m_ZExt(m_Instruction(L1))), ShAmt1)))) { LI1 = dyn_cast(L1); } LoadInst *LI2 = dyn_cast(L2); @@ -726,13 +723,6 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, if (IsBigEndian) std::swap(ShAmt1, ShAmt2); - // Find Shifts values. - uint64_t Shift1 = 0, Shift2 = 0; - if (ShAmt1) - Shift1 = ShAmt1->getZExtValue(); - if (ShAmt2) - Shift2 = ShAmt2->getZExtValue(); - // First load is always LI1. This is where we put the new load. // Use the merged load size available from LI1 for forward loads. if (LOps.FoundRoot) { @@ -747,7 +737,7 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1; uint64_t PrevSize = DL.getTypeStoreSize(IntegerType::get(LI1->getContext(), LoadSize1)); - if ((Shift2 - Shift1) != ShiftDiff || (Offset2 - Offset1) != PrevSize) + if ((ShAmt2 - ShAmt1) != ShiftDiff || (Offset2 - Offset1) != PrevSize) return false; // Update LOps @@ -824,7 +814,7 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, // Check if shift needed. We need to shift with the amount of load1 // shift if not zero. if (LOps.Shift) - NewOp = Builder.CreateShl(NewOp, ConstantInt::get(I.getContext(), *LOps.Shift)); + NewOp = Builder.CreateShl(NewOp, LOps.Shift); I.replaceAllUsesWith(NewOp); return true; @@ -860,11 +850,9 @@ static std::optional matchPartStore(Instruction &I, return std::nullopt; uint64_t ValWidth = StoredTy->getPrimitiveSizeInBits(); - uint64_t ValOffset = 0; + uint64_t ValOffset; Value *Val; - if (!match(StoredVal, m_CombineOr(m_Trunc(m_LShr(m_Value(Val), - m_ConstantInt(ValOffset))), - m_Trunc(m_Value(Val))))) + if (!match(StoredVal, m_Trunc(m_LShrOrSelf(m_Value(Val), ValOffset)))) return std::nullopt; Value *Ptr = Store->getPointerOperand(); diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll index f6716c817b739..46ec9e0a50842 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll @@ -101,6 +101,107 @@ define i32 @loadCombine_4consecutive(ptr %p) { ret i32 %o3 } +define i32 @loadCombine_4consecutive_commuted(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_commuted( +; LE-NEXT: [[O3:%.*]] = load i32, ptr [[P:%.*]], align 1 +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_commuted( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[S2]], [[S3]] +; BE-NEXT: [[O2:%.*]] = or i32 [[S4]], [[O1]] +; BE-NEXT: [[O3:%.*]] = or i32 [[E1]], [[O2]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %s2, %s3 + %o2 = or i32 %s4, %o1 + %o3 = or i32 %e1, %o2 + ret i32 %o3 +} + +define i32 @loadCombine_4consecutive_multiuse(ptr %p) { +; LE-LABEL: @loadCombine_4consecutive_multiuse( +; LE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 3 +; LE-NEXT: [[O3:%.*]] = load i32, ptr [[P]], align 1 +; LE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; LE-NEXT: call void @use(i8 [[L4]]) +; LE-NEXT: ret i32 [[O3]] +; +; BE-LABEL: @loadCombine_4consecutive_multiuse( +; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1 +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1 +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1 +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1 +; BE-NEXT: call void @use(i8 [[L4]]) +; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 +; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 +; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 +; BE-NEXT: [[E4:%.*]] = zext i8 [[L4]] to i32 +; BE-NEXT: [[S2:%.*]] = shl i32 [[E2]], 8 +; BE-NEXT: [[S3:%.*]] = shl i32 [[E3]], 16 +; BE-NEXT: [[S4:%.*]] = shl i32 [[E4]], 24 +; BE-NEXT: [[O1:%.*]] = or i32 [[E1]], [[S2]] +; BE-NEXT: [[O2:%.*]] = or i32 [[O1]], [[S3]] +; BE-NEXT: [[O3:%.*]] = or i32 [[O2]], [[S4]] +; BE-NEXT: ret i32 [[O3]] +; + %p1 = getelementptr i8, ptr %p, i32 1 + %p2 = getelementptr i8, ptr %p, i32 2 + %p3 = getelementptr i8, ptr %p, i32 3 + %l1 = load i8, ptr %p + %l2 = load i8, ptr %p1 + %l3 = load i8, ptr %p2 + %l4 = load i8, ptr %p3 + call void @use(i8 %l4) + + %e1 = zext i8 %l1 to i32 + %e2 = zext i8 %l2 to i32 + %e3 = zext i8 %l3 to i32 + %e4 = zext i8 %l4 to i32 + + %s2 = shl i32 %e2, 8 + %s3 = shl i32 %e3, 16 + %s4 = shl i32 %e4, 24 + + %o1 = or i32 %e1, %s2 + %o2 = or i32 %o1, %s3 + %o3 = or i32 %o2, %s4 + ret i32 %o3 +} + define i32 @loadCombine_4consecutive_BE(ptr %p) { ; LE-LABEL: @loadCombine_4consecutive_BE( ; LE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1