diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index fbefa2bd074dd..1081357e734a6 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1028,8 +1028,10 @@ class ScalarEvolution { /// Test if the given expression is known to be non-zero. bool isKnownNonZero(const SCEV *S); - /// Test if the given expression is known to be a power of 2. - bool isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero = false); + /// Test if the given expression is known to be a power of 2. OrNegative + /// allows matching negative power of 2s, and OrZero allows matching 0. + bool isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero = false, + bool OrNegative = false); /// Splits SCEV expression \p S into two SCEVs. One of them is obtained from /// \p S by substitution of all AddRec sub-expression related to loop \p L diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 264ac392b16d1..21923cc80aa32 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -9158,7 +9158,8 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp( InnerLHS = ZExt->getOperand(); if (const SCEVAddRecExpr *AR = dyn_cast(InnerLHS); AR && !AR->hasNoSelfWrap() && AR->getLoop() == L && AR->isAffine() && - isKnownToBeAPowerOfTwo(AR->getStepRecurrence(*this), /*OrZero=*/true)) { + isKnownToBeAPowerOfTwo(AR->getStepRecurrence(*this), /*OrZero=*/true, + /*OrNegative=*/true)) { auto Flags = AR->getNoWrapFlags(); Flags = setFlags(Flags, SCEV::FlagNW); SmallVector Operands{AR->operands()}; @@ -10843,10 +10844,13 @@ bool ScalarEvolution::isKnownNonZero(const SCEV *S) { return getUnsignedRangeMin(S) != 0; } -bool ScalarEvolution::isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero) { - auto NonRecursive = [this](const SCEV *S) { +bool ScalarEvolution::isKnownToBeAPowerOfTwo(const SCEV *S, bool OrZero, + bool OrNegative) { + auto NonRecursive = [this, OrNegative](const SCEV *S) { if (auto *C = dyn_cast(S)) - return C->getAPInt().isPowerOf2(); + return C->getAPInt().isPowerOf2() || + (OrNegative && C->getAPInt().isNegatedPowerOf2()); + // The vscale_range indicates vscale is a power-of-two. return isa(S) && F.hasFnAttribute(Attribute::VScaleRange); }; @@ -12790,7 +12794,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, if (!isLoopInvariant(RHS, L)) return false; - if (!isKnownToBeAPowerOfTwo(AR->getStepRecurrence(*this), /*OrZero=*/true)) + if (!isKnownToBeAPowerOfTwo(AR->getStepRecurrence(*this), /*OrZero=*/true, + /*OrNegative*/ true)) return false; if (!ControlsOnlyExit || !loopHasNoAbnormalExits(L)) diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll index 7c9498304e939..6f26a8a64e718 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll @@ -444,8 +444,6 @@ for.end: ; preds = %for.body, %entry ; The next two cases check to see if we can infer the flags on the IV ; of a countdown loop using vscale strides. -; TODO: We should be able to because vscale is a power of two and these -; are finite loops by assumption. define void @vscale_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { ; CHECK-LABEL: 'vscale_countdown_ne' @@ -455,15 +453,16 @@ define void @vscale_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale_r ; CHECK-NEXT: %start = sub i32 %n, %vscale ; CHECK-NEXT: --> ((-1 * vscale) + %n) U: full-set S: full-set ; CHECK-NEXT: %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ] -; CHECK-NEXT: --> {((-1 * vscale) + %n),+,(-1 * vscale)}<%for.body> U: full-set S: full-set Exits: <> LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {((-1 * vscale) + %n),+,(-1 * vscale)}<%for.body> U: full-set S: full-set Exits: ((vscale * (-1 + (-1 * (((-2 * vscale) + %n) /u vscale)))) + %n) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv -; CHECK-NEXT: --> {((4 * %n) + (-4 * vscale) + %A),+,(-4 * vscale)}<%for.body> U: full-set S: full-set Exits: <> LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {((4 * %n) + (-4 * vscale) + %A),+,(-4 * vscale)}<%for.body> U: full-set S: full-set Exits: ((4 * %n) + (vscale * (-4 + (-4 * (((-2 * vscale) + %n) /u vscale)))) + %A) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %sub = sub i32 %iv, %vscale -; CHECK-NEXT: --> {((-2 * vscale) + %n),+,(-1 * vscale)}<%for.body> U: full-set S: full-set Exits: <> LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {((-2 * vscale) + %n),+,(-1 * vscale)}<%for.body> U: full-set S: full-set Exits: ((vscale * (-2 + (-1 * (((-2 * vscale) + %n) /u vscale)))) + %n) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: Determining loop execution counts for: @vscale_countdown_ne -; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %for.body: backedge-taken count is (((-2 * vscale) + %n) /u vscale) +; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483647 +; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (((-2 * vscale) + %n) /u vscale) +; CHECK-NEXT: Loop %for.body: Trip multiple is 1 ; entry: %vscale = call i32 @llvm.vscale.i32() @@ -495,15 +494,16 @@ define void @vscalex4_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale ; CHECK-NEXT: %start = sub i32 %n, %VF ; CHECK-NEXT: --> ((-4 * vscale) + %n) U: full-set S: full-set ; CHECK-NEXT: %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ] -; CHECK-NEXT: --> {((-4 * vscale) + %n),+,(-4 * vscale)}<%for.body> U: full-set S: full-set Exits: <> LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {((-4 * vscale) + %n),+,(-4 * vscale)}<%for.body> U: full-set S: full-set Exits: ((vscale * (-4 + (-4 * (((-8 * vscale) + %n) /u (4 * vscale))))) + %n) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv -; CHECK-NEXT: --> {((4 * %n) + (-16 * vscale) + %A),+,(-16 * vscale)}<%for.body> U: full-set S: full-set Exits: <> LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {((4 * %n) + (-16 * vscale) + %A),+,(-16 * vscale)}<%for.body> U: full-set S: full-set Exits: ((4 * %n) + (vscale * (-16 + (-16 * (((-8 * vscale) + %n) /u (4 * vscale))))) + %A) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %sub = sub i32 %iv, %VF -; CHECK-NEXT: --> {((-8 * vscale) + %n),+,(-4 * vscale)}<%for.body> U: full-set S: full-set Exits: <> LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {((-8 * vscale) + %n),+,(-4 * vscale)}<%for.body> U: full-set S: full-set Exits: ((vscale * (-8 + (-4 * (((-8 * vscale) + %n) /u (4 * vscale))))) + %n) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: Determining loop execution counts for: @vscalex4_countdown_ne -; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count. -; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count. +; CHECK-NEXT: Loop %for.body: backedge-taken count is (((-8 * vscale) + %n) /u (4 * vscale)) +; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 536870911 +; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (((-8 * vscale) + %n) /u (4 * vscale)) +; CHECK-NEXT: Loop %for.body: Trip multiple is 1 ; entry: %vscale = call i32 @llvm.vscale.i32()