Skip to content

Commit 3d494bf

Browse files
authored
[SimplifyCFG] Increase budget for FoldTwoEntryPHINode() if the branch is unpredictable. (#98495)
The `!unpredictable` metadata has been present for a long time, but it's usage in optimizations is still limited. This patch teaches `FoldTwoEntryPHINode()` to be more aggressive with an unpredictable branch to reduce mispredictions. A TTI interface `getBranchMispredictPenalty()` is added to distinguish between different hardwares to ensure we don't go too far for simpler cores. For simplicity, only a naive x86 implementation is included for the time being.
1 parent b6dbda6 commit 3d494bf

File tree

12 files changed

+158
-13
lines changed

12 files changed

+158
-13
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,12 @@ class TargetTransformInfo {
419419
/// this factor, it is very likely to be predicted correctly.
420420
BranchProbability getPredictableBranchThreshold() const;
421421

422+
/// Returns estimated penalty of a branch misprediction in latency. Indicates
423+
/// how aggressive the target wants for eliminating unpredictable branches. A
424+
/// zero return value means extra optimization applied to them should be
425+
/// minimal.
426+
InstructionCost getBranchMispredictPenalty() const;
427+
422428
/// Return true if branch divergence exists.
423429
///
424430
/// Branch divergence has a significantly negative impact on GPU performance
@@ -1832,6 +1838,7 @@ class TargetTransformInfo::Concept {
18321838
ArrayRef<const Value *> Operands,
18331839
TargetCostKind CostKind) = 0;
18341840
virtual BranchProbability getPredictableBranchThreshold() = 0;
1841+
virtual InstructionCost getBranchMispredictPenalty() = 0;
18351842
virtual bool hasBranchDivergence(const Function *F = nullptr) = 0;
18361843
virtual bool isSourceOfDivergence(const Value *V) = 0;
18371844
virtual bool isAlwaysUniform(const Value *V) = 0;
@@ -2243,6 +2250,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
22432250
BranchProbability getPredictableBranchThreshold() override {
22442251
return Impl.getPredictableBranchThreshold();
22452252
}
2253+
InstructionCost getBranchMispredictPenalty() override {
2254+
return Impl.getBranchMispredictPenalty();
2255+
}
22462256
bool hasBranchDivergence(const Function *F = nullptr) override {
22472257
return Impl.hasBranchDivergence(F);
22482258
}

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ class TargetTransformInfoImplBase {
9999
return BranchProbability(99, 100);
100100
}
101101

102+
InstructionCost getBranchMispredictPenalty() const { return 0; }
103+
102104
bool hasBranchDivergence(const Function *F = nullptr) const { return false; }
103105

104106
bool isSourceOfDivergence(const Value *V) const { return false; }

llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ struct SimplifyCFGOptions {
3030
bool SinkCommonInsts = false;
3131
bool SimplifyCondBranch = true;
3232
bool SpeculateBlocks = true;
33+
bool SpeculateUnpredictables = false;
3334

3435
AssumptionCache *AC = nullptr;
3536

@@ -75,6 +76,10 @@ struct SimplifyCFGOptions {
7576
SpeculateBlocks = B;
7677
return *this;
7778
}
79+
SimplifyCFGOptions &speculateUnpredictables(bool B) {
80+
SpeculateUnpredictables = B;
81+
return *this;
82+
}
7883
};
7984

8085
} // namespace llvm

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,10 @@ BranchProbability TargetTransformInfo::getPredictableBranchThreshold() const {
279279
: TTIImpl->getPredictableBranchThreshold();
280280
}
281281

282+
InstructionCost TargetTransformInfo::getBranchMispredictPenalty() const {
283+
return TTIImpl->getBranchMispredictPenalty();
284+
}
285+
282286
bool TargetTransformInfo::hasBranchDivergence(const Function *F) const {
283287
return TTIImpl->hasBranchDivergence(F);
284288
}

llvm/lib/Passes/PassBuilder.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -845,6 +845,8 @@ Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
845845
Result.hoistCommonInsts(Enable);
846846
} else if (ParamName == "sink-common-insts") {
847847
Result.sinkCommonInsts(Enable);
848+
} else if (ParamName == "speculate-unpredictables") {
849+
Result.speculateUnpredictables(Enable);
848850
} else if (Enable && ParamName.consume_front("bonus-inst-threshold=")) {
849851
APInt BonusInstThreshold;
850852
if (ParamName.getAsInteger(0, BonusInstThreshold))

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1515,8 +1515,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
15151515

15161516
// LoopSink (and other loop passes since the last simplifyCFG) might have
15171517
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1518-
OptimizePM.addPass(
1519-
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
1518+
OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
1519+
.convertSwitchRangeToICmp(true)
1520+
.speculateUnpredictables(true)));
15201521

15211522
// Add the core optimizing pipeline.
15221523
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
@@ -2034,9 +2035,10 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
20342035
LateFPM.addPass(DivRemPairsPass());
20352036

20362037
// Delete basic blocks, which optimization passes may have killed.
2037-
LateFPM.addPass(SimplifyCFGPass(
2038-
SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
2039-
true)));
2038+
LateFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
2039+
.convertSwitchRangeToICmp(true)
2040+
.hoistCommonInsts(true)
2041+
.speculateUnpredictables(true)));
20402042
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
20412043

20422044
// Drop bodies of available eternally objects to improve GlobalDCE.

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6756,3 +6756,8 @@ InstructionCost X86TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
67566756
return AM.Scale != 0;
67576757
return -1;
67586758
}
6759+
6760+
InstructionCost X86TTIImpl::getBranchMispredictPenalty() const {
6761+
// TODO: Hook MispredictPenalty of SchedMachineModel into this.
6762+
return 14;
6763+
}

llvm/lib/Target/X86/X86TargetTransformInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,8 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
294294
bool supportsEfficientVectorElementLoadStore() const;
295295
bool enableInterleavedAccessVectorization();
296296

297+
InstructionCost getBranchMispredictPenalty() const;
298+
297299
private:
298300
bool supportsGather() const;
299301
InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind,

llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ static cl::opt<bool> UserSinkCommonInsts(
7777
"sink-common-insts", cl::Hidden, cl::init(false),
7878
cl::desc("Sink common instructions (default = false)"));
7979

80+
static cl::opt<bool> UserSpeculateUnpredictables(
81+
"speculate-unpredictables", cl::Hidden, cl::init(false),
82+
cl::desc("Speculate unpredictable branches (default = false)"));
8083

8184
STATISTIC(NumSimpl, "Number of blocks simplified");
8285

@@ -325,6 +328,8 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
325328
Options.HoistCommonInsts = UserHoistCommonInsts;
326329
if (UserSinkCommonInsts.getNumOccurrences())
327330
Options.SinkCommonInsts = UserSinkCommonInsts;
331+
if (UserSpeculateUnpredictables.getNumOccurrences())
332+
Options.SpeculateUnpredictables = UserSpeculateUnpredictables;
328333
}
329334

330335
SimplifyCFGPass::SimplifyCFGPass() {
@@ -351,7 +356,9 @@ void SimplifyCFGPass::printPipeline(
351356
OS << (Options.HoistCommonInsts ? "" : "no-") << "hoist-common-insts;";
352357
OS << (Options.SinkCommonInsts ? "" : "no-") << "sink-common-insts;";
353358
OS << (Options.SpeculateBlocks ? "" : "no-") << "speculate-blocks;";
354-
OS << (Options.SimplifyCondBranch ? "" : "no-") << "simplify-cond-branch";
359+
OS << (Options.SimplifyCondBranch ? "" : "no-") << "simplify-cond-branch;";
360+
OS << (Options.SpeculateUnpredictables ? "" : "no-")
361+
<< "speculate-unpredictables";
355362
OS << '>';
356363
}
357364

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3476,7 +3476,8 @@ static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI,
34763476
/// Given a BB that starts with the specified two-entry PHI node,
34773477
/// see if we can eliminate it.
34783478
static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
3479-
DomTreeUpdater *DTU, const DataLayout &DL) {
3479+
DomTreeUpdater *DTU, const DataLayout &DL,
3480+
bool SpeculateUnpredictables) {
34803481
// Ok, this is a two entry PHI node. Check to see if this is a simple "if
34813482
// statement", which has a very simple dominance structure. Basically, we
34823483
// are trying to find the condition that is being branched on, which
@@ -3508,7 +3509,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
35083509
// jump to one specific 'then' block (if we have two of them).
35093510
// It isn't beneficial to speculatively execute the code
35103511
// from the block that we know is predictably not entered.
3511-
if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) {
3512+
bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3513+
if (!IsUnpredictable) {
35123514
uint64_t TWeight, FWeight;
35133515
if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
35143516
(TWeight + FWeight) != 0) {
@@ -3551,6 +3553,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
35513553
InstructionCost Cost = 0;
35523554
InstructionCost Budget =
35533555
TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3556+
if (SpeculateUnpredictables && IsUnpredictable)
3557+
Budget += TTI.getBranchMispredictPenalty();
35543558

35553559
bool Changed = false;
35563560
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
@@ -3620,8 +3624,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
36203624
[](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
36213625
return Changed;
36223626

3623-
LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond
3624-
<< " T: " << IfTrue->getName()
3627+
LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3628+
if (IsUnpredictable) dbgs() << " (unpredictable)";
3629+
dbgs() << " T: " << IfTrue->getName()
36253630
<< " F: " << IfFalse->getName() << "\n");
36263631

36273632
// If we can still promote the PHI nodes after this gauntlet of tests,
@@ -7814,7 +7819,8 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
78147819
// eliminate it, do so now.
78157820
if (auto *PN = dyn_cast<PHINode>(BB->begin()))
78167821
if (PN->getNumIncomingValues() == 2)
7817-
if (FoldTwoEntryPHINode(PN, TTI, DTU, DL))
7822+
if (FoldTwoEntryPHINode(PN, TTI, DTU, DL,
7823+
Options.SpeculateUnpredictables))
78187824
return true;
78197825
}
78207826

0 commit comments

Comments
 (0)