@@ -226,6 +226,8 @@ static const int MinScheduleRegionSize = 16;
226226/// Maximum allowed number of operands in the PHI nodes.
227227static const unsigned MaxPHINumOperands = 128;
228228
229+ static SmallDenseMap<Value *, Value *> IdentityInstrsMp;
230+
229231/// Predicate for the element types that the SLP vectorizer supports.
230232///
231233/// The most important thing to filter here are types which are invalid in LLVM
@@ -2075,6 +2077,55 @@ class BoUpSLP {
20752077
20762078 OptimizationRemarkEmitter *getORE() { return ORE; }
20772079
2080+ static SmallVector<Value*, 8> setIdentityInstr(ArrayRef<Value *> VL) {
2081+ SmallVector<Value *, 8> New_VL(VL.begin(), VL.end());
2082+ if (VL.size() <= 2)
2083+ return New_VL;
2084+ auto It = find_if(VL, IsaPred<Instruction>);
2085+ if (It == VL.end())
2086+ return New_VL;
2087+ // work on unique list of instructions only:
2088+ SmallDenseMap<llvm::StringRef, bool> SeenInstrs;
2089+ for (auto *V : VL)
2090+ if (auto *I = dyn_cast<Instruction>(V)) {
2091+ if (!SeenInstrs[I->getName()])
2092+ SeenInstrs[I->getName()] = true;
2093+ else {
2094+ return New_VL;
2095+ }
2096+ }
2097+ Instruction *MainOp = cast<Instruction>(*It);
2098+ auto ValidOperands = count_if(VL, IsaPred<Instruction, PoisonValue>);
2099+ if (ValidOperands != (int) VL.size()-1)
2100+ return New_VL;
2101+ auto DifferentOperand = find_if_not(VL, IsaPred<Instruction, PoisonValue>);
2102+ if (DifferentOperand == VL.end())
2103+ return New_VL;
2104+ assert(!isa<Instruction>(*DifferentOperand) && !isa<PoisonValue>(*DifferentOperand) &&
2105+ "Expected different operand to be not an instruction");
2106+ auto FoundIdentityInstrIt = IdentityInstrsMp.find(*DifferentOperand);
2107+ if (FoundIdentityInstrIt != IdentityInstrsMp.end()) {
2108+ auto OperandIndex = std::distance(VL.begin(), DifferentOperand);
2109+ New_VL[OperandIndex] = FoundIdentityInstrIt->second;
2110+ return New_VL;
2111+ }
2112+ auto *Identity = ConstantExpr::getIdentity(MainOp, MainOp->getType(), true /*AllowRHSConstant*/);
2113+ if (!Identity)
2114+ return New_VL;
2115+ auto *NewInstr = MainOp->clone();
2116+ NewInstr->setOperand(0, *DifferentOperand);
2117+ NewInstr->setOperand(1, Identity);
2118+ NewInstr->insertAfter(cast<Instruction>(MainOp));
2119+ NewInstr->setName((*DifferentOperand)->getName() + ".identity");
2120+ auto OperandIndex = std::distance(VL.begin(), DifferentOperand);
2121+ New_VL[OperandIndex] = NewInstr;
2122+ assert(find_if_not(New_VL, IsaPred<Instruction, PoisonValue>) ==
2123+ New_VL.end() &&
2124+ "Expected all operands to be instructions");
2125+ IdentityInstrsMp.try_emplace(*DifferentOperand, NewInstr);
2126+ return New_VL;
2127+ }
2128+
20782129 /// This structure holds any data we need about the edges being traversed
20792130 /// during buildTreeRec(). We keep track of:
20802131 /// (i) the user TreeEntry index, and
@@ -3786,7 +3837,8 @@ class BoUpSLP {
37863837 assert(OpVL.size() <= Scalars.size() &&
37873838 "Number of operands is greater than the number of scalars.");
37883839 Operands[OpIdx].resize(OpVL.size());
3789- copy(OpVL, Operands[OpIdx].begin());
3840+ auto NewVL = BoUpSLP::setIdentityInstr(OpVL);
3841+ copy(NewVL, Operands[OpIdx].begin());
37903842 }
37913843
37923844 public:
@@ -4084,18 +4136,19 @@ class BoUpSLP {
40844136 "Reshuffling scalars not yet supported for nodes with padding");
40854137 Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
40864138 ReuseShuffleIndices.end());
4139+ SmallVector<Value*, 8> NewVL =BoUpSLP::setIdentityInstr(VL);
40874140 if (ReorderIndices.empty()) {
4088- Last->Scalars.assign(VL .begin(), VL .end());
4141+ Last->Scalars.assign(NewVL .begin(), NewVL .end());
40894142 if (S)
40904143 Last->setOperations(S);
40914144 } else {
40924145 // Reorder scalars and build final mask.
4093- Last->Scalars.assign(VL .size(), nullptr);
4146+ Last->Scalars.assign(NewVL .size(), nullptr);
40944147 transform(ReorderIndices, Last->Scalars.begin(),
4095- [VL ](unsigned Idx) -> Value * {
4096- if (Idx >= VL .size())
4097- return UndefValue::get(VL .front()->getType());
4098- return VL [Idx];
4148+ [NewVL ](unsigned Idx) -> Value * {
4149+ if (Idx >= NewVL .size())
4150+ return UndefValue::get(NewVL .front()->getType());
4151+ return NewVL [Idx];
40994152 });
41004153 InstructionsState S = getSameOpcode(Last->Scalars, *TLI);
41014154 if (S)
@@ -4106,7 +4159,7 @@ class BoUpSLP {
41064159 assert(S && "Split nodes must have operations.");
41074160 Last->setOperations(S);
41084161 SmallPtrSet<Value *, 4> Processed;
4109- for (Value *V : VL ) {
4162+ for (Value *V : NewVL ) {
41104163 auto *I = dyn_cast<Instruction>(V);
41114164 if (!I)
41124165 continue;
@@ -4121,10 +4174,10 @@ class BoUpSLP {
41214174 }
41224175 }
41234176 } else if (!Last->isGather()) {
4124- if (doesNotNeedToSchedule(VL ))
4177+ if (doesNotNeedToSchedule(NewVL ))
41254178 Last->setDoesNotNeedToSchedule();
41264179 SmallPtrSet<Value *, 4> Processed;
4127- for (Value *V : VL ) {
4180+ for (Value *V : NewVL ) {
41284181 if (isa<PoisonValue>(V))
41294182 continue;
41304183 auto It = ScalarToTreeEntries.find(V);
@@ -4146,7 +4199,7 @@ class BoUpSLP {
41464199#if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS)
41474200 auto *BundleMember = Bundle.getBundle().begin();
41484201 SmallPtrSet<Value *, 4> Processed;
4149- for (Value *V : VL ) {
4202+ for (Value *V : NewVL ) {
41504203 if (doesNotNeedToBeScheduled(V) || !Processed.insert(V).second)
41514204 continue;
41524205 ++BundleMember;
@@ -4159,7 +4212,7 @@ class BoUpSLP {
41594212 } else {
41604213 // Build a map for gathered scalars to the nodes where they are used.
41614214 bool AllConstsOrCasts = true;
4162- for (Value *V : VL )
4215+ for (Value *V : NewVL )
41634216 if (!isConstant(V)) {
41644217 auto *I = dyn_cast<CastInst>(V);
41654218 AllConstsOrCasts &= I && I->getType()->isIntegerTy();
@@ -4170,7 +4223,7 @@ class BoUpSLP {
41704223 if (AllConstsOrCasts)
41714224 CastMaxMinBWSizes =
41724225 std::make_pair(std::numeric_limits<unsigned>::max(), 1);
4173- MustGather.insert_range(VL );
4226+ MustGather.insert_range(NewVL );
41744227 }
41754228
41764229 if (UserTreeIdx.UserTE)
@@ -20844,6 +20897,11 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
2084420897 }
2084520898 }
2084620899
20900+ for (auto &I : IdentityInstrsMp) {
20901+ if (I.second && cast<Instruction>(I.second)->getParent())
20902+ cast<Instruction>(I.second)->eraseFromParent();
20903+ }
20904+ IdentityInstrsMp.clear();
2084720905 if (Changed) {
2084820906 R.optimizeGatherSequence();
2084920907 LLVM_DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n");
0 commit comments