Skip to content

Commit b54a78d

Browse files
committed
[LV,LAA] Don't vectorize loops with load and store to invar address.
Code checking stores to invariant addresses and reductions made an incorrect assumption that the case of both a load & store to the same invariant address does not need to be handled. In some cases when vectorizing with runtime checks, there may be dependences with a load and store to the same address, storing a reduction value. Update LAA to separately track if there was a store-store and a load-store dependence with an invariant addresses. Bail out early if there as a load-store dependence with invariant address. If there was a store-store one, still apply the logic checking if they all store a reduction.
1 parent 677ddde commit b54a78d

File tree

4 files changed

+43
-18
lines changed

4 files changed

+43
-18
lines changed

llvm/include/llvm/Analysis/LoopAccessAnalysis.h

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,11 @@ class LoopAccessInfo {
579579
AAResults *AA, DominatorTree *DT, LoopInfo *LI);
580580

581581
/// Return true we can analyze the memory accesses in the loop and there are
582-
/// no memory dependence cycles.
582+
/// no memory dependence cycles. Note that for dependences between loads &
583+
/// stores with uniform addresses,
584+
/// hasStoreStoreDependenceInvolvingLoopInvariantAddress and
585+
/// hasLoadStoreDependenceInvolvingLoopInvariantAddress also need to be
586+
/// checked.
583587
bool canVectorizeMemory() const { return CanVecMem; }
584588

585589
/// Return true if there is a convergent operation in the loop. There may
@@ -632,10 +636,16 @@ class LoopAccessInfo {
632636
/// Print the information about the memory accesses in the loop.
633637
void print(raw_ostream &OS, unsigned Depth = 0) const;
634638

635-
/// If the loop has memory dependence involving an invariant address, i.e. two
636-
/// stores or a store and a load, then return true, else return false.
637-
bool hasDependenceInvolvingLoopInvariantAddress() const {
638-
return HasDependenceInvolvingLoopInvariantAddress;
639+
/// Return true if the loop has memory dependence involving two stores to an
640+
/// invariant address, else return false.
641+
bool hasStoreStoreDependenceInvolvingLoopInvariantAddress() const {
642+
return HasStoreStoreDependenceInvolvingLoopInvariantAddress;
643+
}
644+
645+
/// Return true if the loop has memory dependence involving a load and a store
646+
/// to an invariant address, else return false.
647+
bool hasLoadStoreDependenceInvolvingLoopInvariantAddress() const {
648+
return HasLoadStoreDependenceInvolvingLoopInvariantAddress;
639649
}
640650

641651
/// Return the list of stores to invariant addresses.
@@ -697,8 +707,12 @@ class LoopAccessInfo {
697707
bool CanVecMem = false;
698708
bool HasConvergentOp = false;
699709

700-
/// Indicator that there are non vectorizable stores to a uniform address.
701-
bool HasDependenceInvolvingLoopInvariantAddress = false;
710+
/// Indicator that there are two non vectorizable stores to the same uniform
711+
/// address.
712+
bool HasStoreStoreDependenceInvolvingLoopInvariantAddress = false;
713+
/// Indicator that there is non vectorizable load and store to the same
714+
/// uniform address.
715+
bool HasLoadStoreDependenceInvolvingLoopInvariantAddress = false;
702716

703717
/// List of stores to invariant addresses.
704718
SmallVector<StoreInst *> StoresToInvariantAddresses;

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2537,7 +2537,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
25372537
if (isInvariant(Ptr)) {
25382538
// Record store instructions to loop invariant addresses
25392539
StoresToInvariantAddresses.push_back(ST);
2540-
HasDependenceInvolvingLoopInvariantAddress |=
2540+
HasStoreStoreDependenceInvolvingLoopInvariantAddress |=
25412541
!UniformStores.insert(Ptr).second;
25422542
}
25432543

@@ -2593,7 +2593,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
25932593
if (UniformStores.count(Ptr)) {
25942594
LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
25952595
"load and uniform store to the same address!\n");
2596-
HasDependenceInvolvingLoopInvariantAddress = true;
2596+
HasLoadStoreDependenceInvolvingLoopInvariantAddress = true;
25972597
}
25982598

25992599
MemoryLocation Loc = MemoryLocation::get(LD);
@@ -3057,9 +3057,13 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
30573057
PtrRtChecking->print(OS, Depth);
30583058
OS << "\n";
30593059

3060-
OS.indent(Depth) << "Non vectorizable stores to invariant address were "
3061-
<< (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
3062-
<< "found in loop.\n";
3060+
OS.indent(Depth)
3061+
<< "Non vectorizable stores to invariant address were "
3062+
<< (HasStoreStoreDependenceInvolvingLoopInvariantAddress ||
3063+
HasLoadStoreDependenceInvolvingLoopInvariantAddress
3064+
? ""
3065+
: "not ")
3066+
<< "found in loop.\n";
30633067

30643068
OS.indent(Depth) << "SCEV assumptions:\n";
30653069
PSE->getPredicate().print(OS, Depth);

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,6 +1067,15 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
10671067
if (!LAI->canVectorizeMemory())
10681068
return false;
10691069

1070+
if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
1071+
reportVectorizationFailure("We don't allow storing to uniform addresses",
1072+
"write to a loop invariant address could not "
1073+
"be vectorized",
1074+
"CantVectorizeStoreToLoopInvariantAddress", ORE,
1075+
TheLoop);
1076+
return false;
1077+
}
1078+
10701079
// We can vectorize stores to invariant address when final reduction value is
10711080
// guaranteed to be stored at the end of the loop. Also, if decision to
10721081
// vectorize loop is made, runtime checks are added so as to make sure that
@@ -1102,13 +1111,12 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
11021111
}
11031112
}
11041113

1105-
if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
1114+
if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {
11061115
// For each invariant address, check its last stored value is the result
11071116
// of one of our reductions.
11081117
//
1109-
// We do not check if dependence with loads exists because they are
1110-
// currently rejected earlier in LoopAccessInfo::analyzeLoop. In case this
1111-
// behaviour changes we have to modify this code.
1118+
// We do not check if dependence with loads exists because that is already
1119+
// checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
11121120
ScalarEvolution *SE = PSE.getSE();
11131121
SmallVector<StoreInst *, 4> UnhandledStores;
11141122
for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {

llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,8 @@ exit:
123123
; @reduc_store_load with a non-constant dependence distance, resulting in
124124
; vectorization with runtime checks.
125125
;
126-
; FIXME: currently this gets vectorized incorrectly.
127126
; CHECK-LABEL: @reduc_store_load_with_non_constant_distance_dependence
128-
; CHECK: vector.body:
127+
; CHECK-NOT: vector.body:
129128
define void @reduc_store_load_with_non_constant_distance_dependence(ptr %dst, ptr noalias %dst.2, i64 %off) {
130129
entry:
131130
%gep.dst = getelementptr inbounds i32, ptr %dst, i64 42

0 commit comments

Comments
 (0)