Skip to content

Commit 20675ee

Browse files
committed
[SLP] Check all copyable children for non-schedulable parent nodes
If the parent node is non-schedulable and it includes several copies of the same instruction, its operand might be replaced by the copyable nodes in multiple children nodes, and if the instruction is commutative, they can be used in different operands. The compiler shall consider this opportunity, taking into account that non-copyable children are scheduled only ones for the same parent instruction. Fixes #164242
1 parent 7aa6c62 commit 20675ee

File tree

2 files changed

+104
-50
lines changed

2 files changed

+104
-50
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 67 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -5577,62 +5577,79 @@ class BoUpSLP {
55775577
}
55785578
// Decrement the unscheduled counter and insert to ready list if
55795579
// ready.
5580-
auto DecrUnschedForInst = [&](Instruction *I, TreeEntry *UserTE,
5581-
unsigned OpIdx) {
5582-
if (!ScheduleCopyableDataMap.empty()) {
5583-
const EdgeInfo EI = {UserTE, OpIdx};
5584-
if (ScheduleCopyableData *CD = getScheduleCopyableData(EI, I)) {
5585-
DecrUnsched(CD, /*IsControl=*/false);
5586-
return;
5587-
}
5588-
}
5589-
auto It = OperandsUses.find(I);
5590-
assert(It != OperandsUses.end() && "Operand not found");
5591-
if (It->second > 0) {
5592-
--It->getSecond();
5593-
assert(TotalOpCount > 0 && "No more operands to decrement");
5594-
--TotalOpCount;
5595-
if (ScheduleData *OpSD = getScheduleData(I))
5596-
DecrUnsched(OpSD, /*IsControl=*/false);
5597-
}
5598-
};
5580+
auto DecrUnschedForInst =
5581+
[&](Instruction *I, TreeEntry *UserTE, unsigned OpIdx,
5582+
SmallDenseSet<std::pair<const ScheduleEntity *, unsigned>>
5583+
&Checked) {
5584+
if (!ScheduleCopyableDataMap.empty()) {
5585+
const EdgeInfo EI = {UserTE, OpIdx};
5586+
if (ScheduleCopyableData *CD =
5587+
getScheduleCopyableData(EI, I)) {
5588+
if (!Checked.insert(std::make_pair(CD, OpIdx)).second)
5589+
return;
5590+
DecrUnsched(CD, /*IsControl=*/false);
5591+
return;
5592+
}
5593+
}
5594+
auto It = OperandsUses.find(I);
5595+
assert(It != OperandsUses.end() && "Operand not found");
5596+
if (It->second > 0) {
5597+
--It->getSecond();
5598+
assert(TotalOpCount > 0 && "No more operands to decrement");
5599+
--TotalOpCount;
5600+
if (ScheduleData *OpSD = getScheduleData(I)) {
5601+
if (!Checked.insert(std::make_pair(OpSD, OpIdx)).second)
5602+
return;
5603+
DecrUnsched(OpSD, /*IsControl=*/false);
5604+
}
5605+
}
5606+
};
55995607

56005608
for (ScheduleBundle *Bundle : Bundles) {
56015609
if (ScheduleCopyableDataMap.empty() && TotalOpCount == 0)
56025610
break;
56035611
// Need to search for the lane since the tree entry can be
56045612
// reordered.
5605-
int Lane = std::distance(Bundle->getTreeEntry()->Scalars.begin(),
5606-
find(Bundle->getTreeEntry()->Scalars, In));
5607-
assert(Lane >= 0 && "Lane not set");
5608-
if (isa<StoreInst>(In) &&
5609-
!Bundle->getTreeEntry()->ReorderIndices.empty())
5610-
Lane = Bundle->getTreeEntry()->ReorderIndices[Lane];
5611-
assert(Lane < static_cast<int>(
5612-
Bundle->getTreeEntry()->Scalars.size()) &&
5613-
"Couldn't find extract lane");
5614-
5615-
// Since vectorization tree is being built recursively this
5616-
// assertion ensures that the tree entry has all operands set before
5617-
// reaching this code. Couple of exceptions known at the moment are
5618-
// extracts where their second (immediate) operand is not added.
5619-
// Since immediates do not affect scheduler behavior this is
5620-
// considered okay.
5621-
assert(In &&
5622-
(isa<ExtractValueInst, ExtractElementInst, CallBase>(In) ||
5623-
In->getNumOperands() ==
5624-
Bundle->getTreeEntry()->getNumOperands() ||
5625-
Bundle->getTreeEntry()->isCopyableElement(In)) &&
5626-
"Missed TreeEntry operands?");
5627-
5628-
for (unsigned OpIdx :
5629-
seq<unsigned>(Bundle->getTreeEntry()->getNumOperands()))
5630-
if (auto *I = dyn_cast<Instruction>(
5631-
Bundle->getTreeEntry()->getOperand(OpIdx)[Lane])) {
5632-
LLVM_DEBUG(dbgs() << "SLP: check for readiness (def): " << *I
5633-
<< "\n");
5634-
DecrUnschedForInst(I, Bundle->getTreeEntry(), OpIdx);
5635-
}
5613+
auto *It = find(Bundle->getTreeEntry()->Scalars, In);
5614+
SmallDenseSet<std::pair<const ScheduleEntity *, unsigned>> Checked;
5615+
do {
5616+
int Lane =
5617+
std::distance(Bundle->getTreeEntry()->Scalars.begin(), It);
5618+
assert(Lane >= 0 && "Lane not set");
5619+
if (isa<StoreInst>(In) &&
5620+
!Bundle->getTreeEntry()->ReorderIndices.empty())
5621+
Lane = Bundle->getTreeEntry()->ReorderIndices[Lane];
5622+
assert(Lane < static_cast<int>(
5623+
Bundle->getTreeEntry()->Scalars.size()) &&
5624+
"Couldn't find extract lane");
5625+
5626+
// Since vectorization tree is being built recursively this
5627+
// assertion ensures that the tree entry has all operands set
5628+
// before reaching this code. Couple of exceptions known at the
5629+
// moment are extracts where their second (immediate) operand is
5630+
// not added. Since immediates do not affect scheduler behavior
5631+
// this is considered okay.
5632+
assert(In &&
5633+
(isa<ExtractValueInst, ExtractElementInst, CallBase>(In) ||
5634+
In->getNumOperands() ==
5635+
Bundle->getTreeEntry()->getNumOperands() ||
5636+
Bundle->getTreeEntry()->isCopyableElement(In)) &&
5637+
"Missed TreeEntry operands?");
5638+
5639+
for (unsigned OpIdx :
5640+
seq<unsigned>(Bundle->getTreeEntry()->getNumOperands()))
5641+
if (auto *I = dyn_cast<Instruction>(
5642+
Bundle->getTreeEntry()->getOperand(OpIdx)[Lane])) {
5643+
LLVM_DEBUG(dbgs() << "SLP: check for readiness (def): "
5644+
<< *I << "\n");
5645+
DecrUnschedForInst(I, Bundle->getTreeEntry(), OpIdx, Checked);
5646+
}
5647+
// If parent node is schedulable, it will be handle correctly.
5648+
if (!Bundle->getTreeEntry()->doesNotNeedToSchedule())
5649+
break;
5650+
It = std::find(std::next(It),
5651+
Bundle->getTreeEntry()->Scalars.end(), In);
5652+
} while (It != Bundle->getTreeEntry()->Scalars.end());
56365653
}
56375654
} else {
56385655
// If BundleMember is a stand-alone instruction, no operand reordering
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s -slp-threshold=-99999 | FileCheck %s
3+
4+
define void @test() {
5+
; CHECK-LABEL: define void @test() {
6+
; CHECK-NEXT: [[BB:.*]]:
7+
; CHECK-NEXT: br i1 false, label %[[BB1:.*]], label %[[BB6:.*]]
8+
; CHECK: [[BB1]]:
9+
; CHECK-NEXT: br label %[[BB6]]
10+
; CHECK: [[BB6]]:
11+
; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ <i32 0, i32 0, i32 poison, i32 0>, %[[BB]] ], [ <i32 0, i32 0, i32 -1, i32 -1>, %[[BB1]] ]
12+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
13+
; CHECK-NEXT: [[OR:%.*]] = or i32 [[TMP1]], 0
14+
; CHECK-NEXT: ret void
15+
;
16+
bb:
17+
br i1 false, label %bb1, label %bb6
18+
19+
bb1:
20+
%add = add i32 0, 0
21+
%shl = shl i32 %add, 0
22+
%sub = sub i32 0, 1
23+
%add2 = add i32 %sub, %shl
24+
%add3 = add i32 0, 0
25+
%shl4 = shl i32 %add3, 0
26+
%ashr = ashr i32 %shl4, 1
27+
%add5 = add i32 0, 0
28+
br label %bb6
29+
30+
bb6:
31+
%phi = phi i32 [ poison, %bb ], [ %add2, %bb1 ]
32+
%phi7 = phi i32 [ 0, %bb ], [ %ashr, %bb1 ]
33+
%phi8 = phi i32 [ 0, %bb ], [ %add2, %bb1 ]
34+
%phi9 = phi i32 [ 0, %bb ], [ %add5, %bb1 ]
35+
%or = or i32 %phi8, 0
36+
ret void
37+
}

0 commit comments

Comments
 (0)