Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/include/llvm/Analysis/LoopAccessAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -893,7 +893,7 @@ replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
/// result of this function is undefined.
LLVM_ABI std::optional<int64_t>
getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
const Loop *Lp,
const Loop *Lp, const DominatorTree &DT,
const DenseMap<Value *, const SCEV *> &StridesMap =
DenseMap<Value *, const SCEV *>(),
bool Assume = false, bool ShouldCheckWrap = true);
Expand Down
44 changes: 30 additions & 14 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -806,11 +806,11 @@ class AccessAnalysis {
typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;

AccessAnalysis(const Loop *TheLoop, AAResults *AA, const LoopInfo *LI,
MemoryDepChecker::DepCandidates &DA,
DominatorTree &DT, MemoryDepChecker::DepCandidates &DA,
PredicatedScalarEvolution &PSE,
SmallPtrSetImpl<MDNode *> &LoopAliasScopes)
: TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE),
LoopAliasScopes(LoopAliasScopes) {
: TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DT(DT), DepCands(DA),
PSE(PSE), LoopAliasScopes(LoopAliasScopes) {
// We're analyzing dependences across loop iterations.
BAA.enableCrossIterationMode();
}
Expand Down Expand Up @@ -934,6 +934,9 @@ class AccessAnalysis {
/// The LoopInfo of the loop being checked.
const LoopInfo *LI;

/// The dominator tree of the function.
DominatorTree &DT;

/// Sets of potentially dependent accesses - members of one set share an
/// underlying pointer. The set "CheckDeps" identfies which sets really need a
/// dependence check.
Expand Down Expand Up @@ -1015,6 +1018,7 @@ getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy,
/// informating from the IR pointer value to determine no-wrap.
static bool isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR,
Value *Ptr, Type *AccessTy, const Loop *L, bool Assume,
const DominatorTree &DT,
std::optional<int64_t> Stride = std::nullopt) {
// FIXME: This should probably only return true for NUW.
if (AR->getNoWrapFlags(SCEV::NoWrapMask))
Expand All @@ -1029,8 +1033,18 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR,
// case, the GEP would be poison and any memory access dependent on it would
// be immediate UB when executed.
if (auto *GEP = dyn_cast_if_present<GetElementPtrInst>(Ptr);
GEP && GEP->hasNoUnsignedSignedWrap())
return true;
GEP && GEP->hasNoUnsignedSignedWrap()) {
// For the above reasoning to apply, the pointer must be dereferenced in
// every iteration.
if (L->getHeader() == L->getLoopLatch() ||
any_of(GEP->users(), [L, &DT, GEP](User *U) {
if (getLoadStorePointerOperand(U) != GEP)
return false;
BasicBlock *UserBB = cast<Instruction>(U)->getParent();
return !LoopAccessInfo::blockNeedsPredication(UserBB, L, &DT);
}))
return true;
}

if (!Stride)
Stride = getStrideFromAddRec(AR, L, AccessTy, Ptr, PSE);
Expand Down Expand Up @@ -1293,7 +1307,7 @@ bool AccessAnalysis::createCheckForAccess(
}

if (!isNoWrap(PSE, AR, RTCheckPtrs.size() == 1 ? Ptr : nullptr, AccessTy,
TheLoop, Assume))
TheLoop, Assume, DT))
return false;
}

Expand Down Expand Up @@ -1606,7 +1620,7 @@ void AccessAnalysis::processMemAccesses() {
/// Check whether the access through \p Ptr has a constant stride.
std::optional<int64_t>
llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
const Loop *Lp,
const Loop *Lp, const DominatorTree &DT,
const DenseMap<Value *, const SCEV *> &StridesMap,
bool Assume, bool ShouldCheckWrap) {
const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
Expand All @@ -1630,7 +1644,7 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
if (!ShouldCheckWrap || !Stride)
return Stride;

if (isNoWrap(PSE, AR, Ptr, AccessTy, Lp, Assume, Stride))
if (isNoWrap(PSE, AR, Ptr, AccessTy, Lp, Assume, DT, Stride))
return Stride;

LLVM_DEBUG(
Expand Down Expand Up @@ -2047,10 +2061,10 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
BPtr->getType()->getPointerAddressSpace())
return MemoryDepChecker::Dependence::Unknown;

std::optional<int64_t> StrideAPtr =
getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true, true);
std::optional<int64_t> StrideBPtr =
getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true, true);
std::optional<int64_t> StrideAPtr = getPtrStride(
PSE, ATy, APtr, InnermostLoop, *DT, SymbolicStrides, true, true);
std::optional<int64_t> StrideBPtr = getPtrStride(
PSE, BTy, BPtr, InnermostLoop, *DT, SymbolicStrides, true, true);

const SCEV *Src = PSE.getSCEV(APtr);
const SCEV *Sink = PSE.getSCEV(BPtr);
Expand Down Expand Up @@ -2627,7 +2641,8 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
}

MemoryDepChecker::DepCandidates DepCands;
AccessAnalysis Accesses(TheLoop, AA, LI, DepCands, *PSE, LoopAliasScopes);
AccessAnalysis Accesses(TheLoop, AA, LI, *DT, DepCands, *PSE,
LoopAliasScopes);

// Holds the analyzed pointers. We don't want to call getUnderlyingObjects
// multiple times on the same object. If the ptr is accessed twice, once
Expand Down Expand Up @@ -2691,7 +2706,8 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
bool IsReadOnlyPtr = false;
Type *AccessTy = getLoadStoreType(LD);
if (Seen.insert({Ptr, AccessTy}).second ||
!getPtrStride(*PSE, AccessTy, Ptr, TheLoop, SymbolicStrides)) {
!getPtrStride(*PSE, AccessTy, Ptr, TheLoop, *DT, SymbolicStrides, false,
true)) {
++NumReads;
IsReadOnlyPtr = true;
}
Expand Down
11 changes: 6 additions & 5 deletions llvm/lib/Analysis/VectorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1387,9 +1387,9 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
// wrap around the address space we would do a memory access at nullptr
// even without the transformation. The wrapping checks are therefore
// deferred until after we've formed the interleaved groups.
int64_t Stride =
getPtrStride(PSE, ElementTy, Ptr, TheLoop, Strides,
/*Assume=*/true, /*ShouldCheckWrap=*/false).value_or(0);
int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, *DT, Strides,
/*Assume=*/true, /*ShouldCheckWrap=*/false)
.value_or(0);

const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
Expand Down Expand Up @@ -1643,8 +1643,9 @@ void InterleavedAccessInfo::analyzeInterleaving(
assert(Member && "Group member does not exist");
Value *MemberPtr = getLoadStorePointerOperand(Member);
Type *AccessTy = getLoadStoreType(Member);
if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
/*Assume=*/false, /*ShouldCheckWrap=*/true).value_or(0))
if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, *DT, Strides,
/*Assume=*/false, /*ShouldCheckWrap=*/true)
.value_or(0))
return false;
LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
<< FirstOrLast
Expand Down
10 changes: 6 additions & 4 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6207,7 +6207,8 @@ AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
}

static bool containsDecreasingPointers(Loop *TheLoop,
PredicatedScalarEvolution *PSE) {
PredicatedScalarEvolution *PSE,
const DominatorTree &DT) {
const auto &Strides = DenseMap<Value *, const SCEV *>();
for (BasicBlock *BB : TheLoop->blocks()) {
// Scan the instructions in the block and look for addresses that are
Expand All @@ -6216,8 +6217,8 @@ static bool containsDecreasingPointers(Loop *TheLoop,
if (isa<LoadInst>(&I) || isa<StoreInst>(&I)) {
Value *Ptr = getLoadStorePointerOperand(&I);
Type *AccessTy = getLoadStoreType(&I);
if (getPtrStride(*PSE, AccessTy, Ptr, TheLoop, Strides, /*Assume=*/true,
/*ShouldCheckWrap=*/false)
if (getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
/*Assume=*/true, /*ShouldCheckWrap=*/false)
.value_or(0) < 0)
return true;
}
Expand Down Expand Up @@ -6262,7 +6263,8 @@ bool AArch64TTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) const {
// negative strides. This will require extra work to reverse the loop
// predicate, which may be expensive.
if (containsDecreasingPointers(TFI->LVL->getLoop(),
TFI->LVL->getPredicatedScalarEvolution()))
TFI->LVL->getPredicatedScalarEvolution(),
*TFI->LVL->getDominatorTree()))
Required |= TailFoldingOpts::Reverse;
if (Required == TailFoldingOpts::Disabled)
Required |= TailFoldingOpts::Simple;
Expand Down
9 changes: 6 additions & 3 deletions llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2448,7 +2448,8 @@ static bool canTailPredicateInstruction(Instruction &I, int &ICmpCount) {
//
static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
const DataLayout &DL,
const LoopAccessInfo *LAI) {
const LoopAccessInfo *LAI,
const DominatorTree &DT) {
LLVM_DEBUG(dbgs() << "Tail-predication: checking allowed instructions\n");

// If there are live-out values, it is probably a reduction. We can predicate
Expand Down Expand Up @@ -2498,7 +2499,8 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
if (isa<StoreInst>(I) || isa<LoadInst>(I)) {
Value *Ptr = getLoadStorePointerOperand(&I);
Type *AccessTy = getLoadStoreType(&I);
int64_t NextStride = getPtrStride(PSE, AccessTy, Ptr, L).value_or(0);
int64_t NextStride =
getPtrStride(PSE, AccessTy, Ptr, L, DT).value_or(0);
if (NextStride == 1) {
// TODO: for now only allow consecutive strides of 1. We could support
// other strides as long as it is uniform, but let's keep it simple
Expand Down Expand Up @@ -2585,7 +2587,8 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) const {
return false;
}

return canTailPredicateLoop(L, LI, *SE, DL, LVL->getLAI());
return canTailPredicateLoop(L, LI, *SE, DL, LVL->getLAI(),
*LVL->getDominatorTree());
}

TailFoldingStyle
Expand Down
16 changes: 9 additions & 7 deletions llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ struct StoreToLoadForwardingCandidate {
/// Return true if the dependence from the store to the load has an
/// absolute distance of one.
/// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop)
bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
Loop *L) const {
bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE, Loop *L,
const DominatorTree &DT) const {
Value *LoadPtr = Load->getPointerOperand();
Value *StorePtr = Store->getPointerOperand();
Type *LoadType = getLoadStoreType(Load);
Expand All @@ -102,8 +102,10 @@ struct StoreToLoadForwardingCandidate {
DL.getTypeSizeInBits(getLoadStoreType(Store)) &&
"Should be a known dependence");

int64_t StrideLoad = getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0);
int64_t StrideStore = getPtrStride(PSE, LoadType, StorePtr, L).value_or(0);
int64_t StrideLoad =
getPtrStride(PSE, LoadType, LoadPtr, L, DT).value_or(0);
int64_t StrideStore =
getPtrStride(PSE, LoadType, StorePtr, L, DT).value_or(0);
if (!StrideLoad || !StrideStore || StrideLoad != StrideStore)
return false;

Expand Down Expand Up @@ -287,8 +289,8 @@ class LoadEliminationForLoop {
// so deciding which one forwards is easy. The later one forwards as
// long as they both have a dependence distance of one to the load.
if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
Cand.isDependenceDistanceOfOne(PSE, L) &&
OtherCand->isDependenceDistanceOfOne(PSE, L)) {
Cand.isDependenceDistanceOfOne(PSE, L, *DT) &&
OtherCand->isDependenceDistanceOfOne(PSE, L, *DT)) {
// They are in the same block, the later one will forward to the load.
if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
OtherCand = &Cand;
Expand Down Expand Up @@ -538,7 +540,7 @@ class LoadEliminationForLoop {

// Check whether the SCEV difference is the same as the induction step,
// thus we load the value in the next iteration.
if (!Cand.isDependenceDistanceOfOne(PSE, L))
if (!Cand.isDependenceDistanceOfOne(PSE, L, *DT))
continue;

assert(isa<SCEVAddRecExpr>(PSE.getSCEV(Cand.Load->getPointerOperand())) &&
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,9 @@ int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,

bool CanAddPredicate = !llvm::shouldOptimizeForSize(
TheLoop->getHeader(), PSI, BFI, PGSOQueryType::IRPass);
int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
CanAddPredicate, false).value_or(0);
int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, *DT, Strides,
CanAddPredicate, false)
.value_or(0);
if (Stride == 1 || Stride == -1)
return Stride;
return 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
; s0 += (1ULL << 62) + 1;
; s1 += (1ULL << 62) + 2;
; }
; FIXME: We cannot use inbounds on idx.0, idx.1 to infer no-wrap (and determine
; We cannot use inbounds on idx.0, idx.1 to infer no-wrap (and determine
; there are no dependences), as the pointers are not dereferenced in all loop iterations.
define void @test_inbounds_gep_used_in_predicated_block(ptr %A, i64 %n) {
; CHECK-LABEL: 'test_inbounds_gep_used_in_predicated_block'
Expand All @@ -19,9 +19,14 @@ define void @test_inbounds_gep_used_in_predicated_block(ptr %A, i64 %n) {
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: %A High: (-4611686018427387705 + %A))
; CHECK-NEXT: Member: {%A,+,4611686018427387906}<%loop.header>
; CHECK-NEXT: Member: {%A,+,4611686018427387905}<%loop.header>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: {%A,+,4611686018427387906}<%loop.header> Added Flags: <nusw>
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
Expand Down Expand Up @@ -63,9 +68,14 @@ define void @test_inbounds_gep_used_in_predicated_block_stored_value_operand(ptr
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: %A High: (-4611686018427387705 + %A))
; CHECK-NEXT: Member: {%A,+,4611686018427387906}<%loop.header>
; CHECK-NEXT: Member: {%A,+,4611686018427387905}<%loop.header>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: {%A,+,4611686018427387906}<%loop.header> Added Flags: <nusw>
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
Expand Down Expand Up @@ -109,9 +119,14 @@ define void @test_inbounds_gep_used_in_predicated_block_non_memop_user(ptr %A, i
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: %A High: (-4611686018427387705 + %A))
; CHECK-NEXT: Member: {%A,+,4611686018427387906}<%loop.header>
; CHECK-NEXT: Member: {%A,+,4611686018427387905}<%loop.header>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
; CHECK-NEXT: {%A,+,4611686018427387906}<%loop.header> Added Flags: <nusw>
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
;
Expand Down
Loading