Skip to content

Commit 83bb542

Browse files
committed
Create identity instruction when there are vector_sz-1 isomorphic instructions
Change-Id: Iba09dc8a4b7fe851b34c6405236ea72dd75a7953
1 parent 5d3eccb commit 83bb542

File tree

3 files changed

+75
-37
lines changed

3 files changed

+75
-37
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 71 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,8 @@ static const int MinScheduleRegionSize = 16;
226226
/// Maximum allowed number of operands in the PHI nodes.
227227
static const unsigned MaxPHINumOperands = 128;
228228

229+
static SmallDenseMap<Value *, Value *> IdentityInstrsMp;
230+
229231
/// Predicate for the element types that the SLP vectorizer supports.
230232
///
231233
/// The most important thing to filter here are types which are invalid in LLVM
@@ -2075,6 +2077,55 @@ class BoUpSLP {
20752077

20762078
OptimizationRemarkEmitter *getORE() { return ORE; }
20772079

2080+
static SmallVector<Value*, 8> setIdentityInstr(ArrayRef<Value *> VL) {
2081+
SmallVector<Value *, 8> New_VL(VL.begin(), VL.end());
2082+
if (VL.size() <= 2)
2083+
return New_VL;
2084+
auto It = find_if(VL, IsaPred<Instruction>);
2085+
if (It == VL.end())
2086+
return New_VL;
2087+
// work on unique list of instructions only:
2088+
SmallDenseMap<llvm::StringRef, bool> SeenInstrs;
2089+
for (auto *V : VL)
2090+
if (auto *I = dyn_cast<Instruction>(V)) {
2091+
if (!SeenInstrs[I->getName()])
2092+
SeenInstrs[I->getName()] = true;
2093+
else {
2094+
return New_VL;
2095+
}
2096+
}
2097+
Instruction *MainOp = cast<Instruction>(*It);
2098+
auto ValidOperands = count_if(VL, IsaPred<Instruction, PoisonValue>);
2099+
if (ValidOperands != (int) VL.size()-1)
2100+
return New_VL;
2101+
auto DifferentOperand = find_if_not(VL, IsaPred<Instruction, PoisonValue>);
2102+
if (DifferentOperand == VL.end())
2103+
return New_VL;
2104+
assert(!isa<Instruction>(*DifferentOperand) && !isa<PoisonValue>(*DifferentOperand) &&
2105+
"Expected different operand to be not an instruction");
2106+
auto FoundIdentityInstrIt = IdentityInstrsMp.find(*DifferentOperand);
2107+
if (FoundIdentityInstrIt != IdentityInstrsMp.end()) {
2108+
auto OperandIndex = std::distance(VL.begin(), DifferentOperand);
2109+
New_VL[OperandIndex] = FoundIdentityInstrIt->second;
2110+
return New_VL;
2111+
}
2112+
auto *Identity = ConstantExpr::getIdentity(MainOp, MainOp->getType(), true /*AllowRHSConstant*/);
2113+
if (!Identity)
2114+
return New_VL;
2115+
auto *NewInstr = MainOp->clone();
2116+
NewInstr->setOperand(0, *DifferentOperand);
2117+
NewInstr->setOperand(1, Identity);
2118+
NewInstr->insertAfter(cast<Instruction>(MainOp));
2119+
NewInstr->setName((*DifferentOperand)->getName() + ".identity");
2120+
auto OperandIndex = std::distance(VL.begin(), DifferentOperand);
2121+
New_VL[OperandIndex] = NewInstr;
2122+
assert(find_if_not(New_VL, IsaPred<Instruction, PoisonValue>) ==
2123+
New_VL.end() &&
2124+
"Expected all operands to be instructions");
2125+
IdentityInstrsMp.try_emplace(*DifferentOperand, NewInstr);
2126+
return New_VL;
2127+
}
2128+
20782129
/// This structure holds any data we need about the edges being traversed
20792130
/// during buildTreeRec(). We keep track of:
20802131
/// (i) the user TreeEntry index, and
@@ -3786,7 +3837,8 @@ class BoUpSLP {
37863837
assert(OpVL.size() <= Scalars.size() &&
37873838
"Number of operands is greater than the number of scalars.");
37883839
Operands[OpIdx].resize(OpVL.size());
3789-
copy(OpVL, Operands[OpIdx].begin());
3840+
auto NewVL = BoUpSLP::setIdentityInstr(OpVL);
3841+
copy(NewVL, Operands[OpIdx].begin());
37903842
}
37913843

37923844
public:
@@ -4084,18 +4136,19 @@ class BoUpSLP {
40844136
"Reshuffling scalars not yet supported for nodes with padding");
40854137
Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
40864138
ReuseShuffleIndices.end());
4139+
SmallVector<Value*, 8> NewVL =BoUpSLP::setIdentityInstr(VL);
40874140
if (ReorderIndices.empty()) {
4088-
Last->Scalars.assign(VL.begin(), VL.end());
4141+
Last->Scalars.assign(NewVL.begin(), NewVL.end());
40894142
if (S)
40904143
Last->setOperations(S);
40914144
} else {
40924145
// Reorder scalars and build final mask.
4093-
Last->Scalars.assign(VL.size(), nullptr);
4146+
Last->Scalars.assign(NewVL.size(), nullptr);
40944147
transform(ReorderIndices, Last->Scalars.begin(),
4095-
[VL](unsigned Idx) -> Value * {
4096-
if (Idx >= VL.size())
4097-
return UndefValue::get(VL.front()->getType());
4098-
return VL[Idx];
4148+
[NewVL](unsigned Idx) -> Value * {
4149+
if (Idx >= NewVL.size())
4150+
return UndefValue::get(NewVL.front()->getType());
4151+
return NewVL[Idx];
40994152
});
41004153
InstructionsState S = getSameOpcode(Last->Scalars, *TLI);
41014154
if (S)
@@ -4106,7 +4159,7 @@ class BoUpSLP {
41064159
assert(S && "Split nodes must have operations.");
41074160
Last->setOperations(S);
41084161
SmallPtrSet<Value *, 4> Processed;
4109-
for (Value *V : VL) {
4162+
for (Value *V : NewVL) {
41104163
auto *I = dyn_cast<Instruction>(V);
41114164
if (!I)
41124165
continue;
@@ -4121,10 +4174,10 @@ class BoUpSLP {
41214174
}
41224175
}
41234176
} else if (!Last->isGather()) {
4124-
if (doesNotNeedToSchedule(VL))
4177+
if (doesNotNeedToSchedule(NewVL))
41254178
Last->setDoesNotNeedToSchedule();
41264179
SmallPtrSet<Value *, 4> Processed;
4127-
for (Value *V : VL) {
4180+
for (Value *V : NewVL) {
41284181
if (isa<PoisonValue>(V))
41294182
continue;
41304183
auto It = ScalarToTreeEntries.find(V);
@@ -4146,7 +4199,7 @@ class BoUpSLP {
41464199
#if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS)
41474200
auto *BundleMember = Bundle.getBundle().begin();
41484201
SmallPtrSet<Value *, 4> Processed;
4149-
for (Value *V : VL) {
4202+
for (Value *V : NewVL) {
41504203
if (doesNotNeedToBeScheduled(V) || !Processed.insert(V).second)
41514204
continue;
41524205
++BundleMember;
@@ -4159,7 +4212,7 @@ class BoUpSLP {
41594212
} else {
41604213
// Build a map for gathered scalars to the nodes where they are used.
41614214
bool AllConstsOrCasts = true;
4162-
for (Value *V : VL)
4215+
for (Value *V : NewVL)
41634216
if (!isConstant(V)) {
41644217
auto *I = dyn_cast<CastInst>(V);
41654218
AllConstsOrCasts &= I && I->getType()->isIntegerTy();
@@ -4170,7 +4223,7 @@ class BoUpSLP {
41704223
if (AllConstsOrCasts)
41714224
CastMaxMinBWSizes =
41724225
std::make_pair(std::numeric_limits<unsigned>::max(), 1);
4173-
MustGather.insert_range(VL);
4226+
MustGather.insert_range(NewVL);
41744227
}
41754228

41764229
if (UserTreeIdx.UserTE)
@@ -20844,6 +20897,11 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
2084420897
}
2084520898
}
2084620899

20900+
for (auto &I : IdentityInstrsMp) {
20901+
if (I.second && cast<Instruction>(I.second)->getParent())
20902+
cast<Instruction>(I.second)->eraseFromParent();
20903+
}
20904+
IdentityInstrsMp.clear();
2084720905
if (Changed) {
2084820906
R.optimizeGatherSequence();
2084920907
LLVM_DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n");

llvm/test/Transforms/SLPVectorizer/X86/pr47642.ll

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,8 @@ target triple = "x86_64-unknown-linux-gnu"
77
define <4 x i32> @foo(<4 x i32> %x, i32 %f) {
88
; CHECK-LABEL: @foo(
99
; CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i32> poison, i32 [[F:%.*]], i64 0
10-
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[F]], 1
11-
; CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x i32> [[VECINIT]], i32 [[ADD]], i64 1
12-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[F]], i64 0
13-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> zeroinitializer
14-
; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 2, i32 3>
15-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
16-
; CHECK-NEXT: [[VECINIT51:%.*]] = shufflevector <4 x i32> [[VECINIT1]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
10+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECINIT]], <4 x i32> poison, <4 x i32> zeroinitializer
11+
; CHECK-NEXT: [[VECINIT51:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 0, i32 1, i32 2, i32 3>
1712
; CHECK-NEXT: ret <4 x i32> [[VECINIT51]]
1813
;
1914
%vecinit = insertelement <4 x i32> undef, i32 %f, i32 0

llvm/test/Transforms/SLPVectorizer/infer-missing-instruction.ll

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,9 @@ define dso_local noundef i32 @_Z4testiPs(i32 noundef %a, ptr noundef readonly ca
55
; CHECK-LABEL: define dso_local noundef i32 @_Z4testiPs(
66
; CHECK-SAME: i32 noundef [[A:%.*]], ptr noundef readonly captures(none) [[B:%.*]]) local_unnamed_addr {
77
; CHECK-NEXT: [[ENTRY:.*:]]
8-
; CHECK-NEXT: [[SHR_1:%.*]] = lshr i32 [[A]], 1
9-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i64 0
10-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer
11-
; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], <i32 2, i32 3>
12-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
13-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer
14-
; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], <i32 4, i32 5, i32 6, i32 7>
15-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> poison, i32 [[A]], i64 0
16-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> zeroinitializer
17-
; CHECK-NEXT: [[TMP8:%.*]] = lshr <8 x i32> [[TMP7]], <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
188
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> poison, i32 [[A]], i64 0
19-
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[SHR_1]], i64 1
20-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
21-
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
22-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
23-
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i32> [[TMP12]], <16 x i32> [[TMP13]], <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
24-
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
25-
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i32> [[TMP14]], <16 x i32> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
9+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> zeroinitializer
10+
; CHECK-NEXT: [[TMP16:%.*]] = lshr <16 x i32> [[TMP1]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2611
; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i32> [[TMP16]], splat (i32 16)
2712
; CHECK-NEXT: [[TMP18:%.*]] = load <16 x i16>, ptr [[B]], align 2
2813
; CHECK-NEXT: [[TMP19:%.*]] = sext <16 x i16> [[TMP18]] to <16 x i32>

0 commit comments

Comments
 (0)