@@ -288,6 +288,11 @@ class MachineBlockPlacement : public MachineFunctionPass {
288288 const BlockFilterSet *BlockFilter,
289289 BranchProbability SuccProb,
290290 BranchProbability HotProb);
291+ bool
292+ hasBetterLayoutPredecessor (MachineBasicBlock *BB, MachineBasicBlock *Succ,
293+ BlockChain &SuccChain, BranchProbability SuccProb,
294+ BranchProbability RealSuccProb, BlockChain &Chain,
295+ const BlockFilterSet *BlockFilter);
291296 MachineBasicBlock *selectBestSuccessor (MachineBasicBlock *BB,
292297 BlockChain &Chain,
293298 const BlockFilterSet *BlockFilter);
@@ -512,6 +517,128 @@ bool MachineBlockPlacement::shouldPredBlockBeOutlined(
512517 return false ;
513518}
514519
520+ // FIXME (PGO handling)
521+ // For now this method just returns a fixed threshold. It needs to be enhanced
522+ // such that BB and Succ is passed in so that CFG shapes are examined such that
523+ // the threshold is computed with more precise cost model when PGO is on.
524+ static BranchProbability getLayoutSuccessorProbThreshold () {
525+ BranchProbability HotProb (StaticLikelyProb, 100 );
526+ return HotProb;
527+ }
528+
529+ // / Checks to see if the layout candidate block \p Succ has a better layout
530+ // / predecessor than \c BB. If yes, returns true.
531+ bool MachineBlockPlacement::hasBetterLayoutPredecessor (
532+ MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &SuccChain,
533+ BranchProbability SuccProb, BranchProbability RealSuccProb,
534+ BlockChain &Chain, const BlockFilterSet *BlockFilter) {
535+
536+ // This is no global conflict, just return false.
537+ if (SuccChain.UnscheduledPredecessors == 0 )
538+ return false ;
539+
540+ // There are two basic scenarios here:
541+ // -------------------------------------
542+ // Case 1: triagular shape CFG:
543+ // BB
544+ // | \
545+ // | \
546+ // | Pred
547+ // | /
548+ // Succ
549+ // In this case, we are evaluating whether to select edge -> Succ, e.g.
550+ // set Succ as the layout successor of BB. Picking Succ as BB's
551+ // successor breaks the CFG constraints. With this layout, Pred BB
552+ // is forced to be outlined, so the overall cost will be cost of the
553+ // branch taken from BB to Pred, plus the cost of back taken branch
554+ // from Pred to Succ, as well as the additional cost asssociated
555+ // with the needed unconditional jump instruction from Pred To Succ.
556+ // The cost of the topological order layout is the taken branch cost
557+ // from BB to Succ, so to make BB->Succ a viable candidate, the following
558+ // must hold:
559+ // 2 * freq(BB->Pred) * taken_branch_cost + unconditional_jump_cost
560+ // < freq(BB->Succ) * taken_branch_cost.
561+ // Ignoring unconditional jump cost, we get
562+ // freq(BB->Succ) > 2 * freq(BB->Pred), i.e.,
563+ // prob(BB->Succ) > 2 * prob(BB->Pred)
564+ //
565+ // When real profile data is available, we can precisely compute the the
566+ // probabililty threshold that is needed for edge BB->Succ to be considered.
567+ // With out profile data, the heuristic requires the branch bias to be
568+ // a lot larger to make sure the signal is very strong (e.g. 80% default).
569+ // -----------------------------------------------------------------
570+ // Case 2: diamond like CFG:
571+ // S
572+ // / \
573+ // | \
574+ // BB Pred
575+ // \ /
576+ // Succ
577+ // ..
578+ // In this case, edge S->BB has already been selected, and we are evaluating
579+ // candidate edge BB->Succ. Edge S->BB is selected because prob(S->BB)
580+ // is no less than prob(S->Pred). When real profile data is *available*, if
581+ // the condition is true, it will be always better to continue the trace with
582+ // edge BB->Succ instead of laying out with topological order (i.e. laying
583+ // Pred first). The cost of S->BB->Succ is 2 * freq (S->Pred), while with
584+ // the topo order, the cost is freq(S-> Pred) + Pred(S->BB) which is larger.
585+ // When profile data is not available, however, we need to be more
586+ // conservative. If the branch prediction is wrong, breaking the topo-order
587+ // will actually yield a layout with large cost. For this reason, we need
588+ // strong biaaed branch at block S with Prob(S->BB) in order to select
589+ // BB->Succ. This is equialant to looking the CFG backward with backward
590+ // edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without
591+ // profile data).
592+
593+ BranchProbability HotProb = getLayoutSuccessorProbThreshold ();
594+
595+ // Forward checking. For case 2, SuccProb will be 1.
596+ if (SuccProb < HotProb) {
597+ DEBUG (dbgs () << " " << getBlockName (Succ) << " -> " << SuccProb
598+ << " (prob) (CFG conflict)\n " );
599+ return true ;
600+ }
601+
602+ // Make sure that a hot successor doesn't have a globally more
603+ // important predecessor.
604+ BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq (BB) * RealSuccProb;
605+ bool BadCFGConflict = false ;
606+
607+ for (MachineBasicBlock *Pred : Succ->predecessors ()) {
608+ if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
609+ (BlockFilter && !BlockFilter->count (Pred)) ||
610+ BlockToChain[Pred] == &Chain)
611+ continue ;
612+ // Do backward checking. For case 1, it is actually redundant check. For
613+ // case 2 above, we need a backward checking to filter out edges that are
614+ // not 'strongly' biased. With profile data available, the check is mostly
615+ // redundant too (when threshold prob is set at 50%) unless S has more than
616+ // two successors.
617+ // BB Pred
618+ // \ /
619+ // Succ
620+ // We select edgee BB->Succ if
621+ // freq(BB->Succ) > freq(Succ) * HotProb
622+ // i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) *
623+ // HotProb
624+ // i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb
625+ BlockFrequency PredEdgeFreq =
626+ MBFI->getBlockFreq (Pred) * MBPI->getEdgeProbability (Pred, Succ);
627+ if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl ()) {
628+ BadCFGConflict = true ;
629+ break ;
630+ }
631+ }
632+
633+ if (BadCFGConflict) {
634+ DEBUG (dbgs () << " " << getBlockName (Succ) << " -> " << SuccProb
635+ << " (prob) (non-cold CFG conflict)\n " );
636+ return true ;
637+ }
638+
639+ return false ;
640+ }
641+
515642// / \brief Select the best successor for a block.
516643// /
517644// / This looks across all successors of a particular block and attempts to
@@ -545,51 +672,18 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
545672 HotProb))
546673 return Succ;
547674
548- // Only consider successors which are either "hot", or wouldn't violate
549- // any CFG constraints.
550675 BlockChain &SuccChain = *BlockToChain[Succ];
551- if (SuccChain.UnscheduledPredecessors != 0 ) {
552- if (SuccProb < HotProb) {
553- DEBUG (dbgs () << " " << getBlockName (Succ) << " -> " << SuccProb
554- << " (prob) (CFG conflict)\n " );
555- continue ;
556- }
557-
558- // Make sure that a hot successor doesn't have a globally more
559- // important predecessor.
560- BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq (BB) * RealSuccProb;
561- bool BadCFGConflict = false ;
562- for (MachineBasicBlock *Pred : Succ->predecessors ()) {
563- if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
564- (BlockFilter && !BlockFilter->count (Pred)) ||
565- BlockToChain[Pred] == &Chain)
566- continue ;
567- BlockFrequency PredEdgeFreq =
568- MBFI->getBlockFreq (Pred) * MBPI->getEdgeProbability (Pred, Succ);
569- // A B
570- // \ /
571- // C
572- // We layout ACB iff A.freq > C.freq * HotProb
573- // i.e. A.freq > A.freq * HotProb + B.freq * HotProb
574- // i.e. A.freq * (1 - HotProb) > B.freq * HotProb
575- // A: CandidateEdge
576- // B: PredEdge
577- if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl ()) {
578- BadCFGConflict = true ;
579- break ;
580- }
581- }
582- if (BadCFGConflict) {
583- DEBUG (dbgs () << " " << getBlockName (Succ) << " -> " << SuccProb
584- << " (prob) (non-cold CFG conflict)\n " );
585- continue ;
586- }
587- }
676+ // Skip the edge \c BB->Succ if block \c Succ has a better layout
677+ // predecessor that yields lower global cost.
678+ if (hasBetterLayoutPredecessor (BB, Succ, SuccChain, SuccProb, RealSuccProb,
679+ Chain, BlockFilter))
680+ continue ;
588681
589- DEBUG (dbgs () << " " << getBlockName (Succ) << " -> " << SuccProb
590- << " (prob)"
591- << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : " " )
592- << " \n " );
682+ DEBUG (
683+ dbgs () << " " << getBlockName (Succ) << " -> " << SuccProb
684+ << " (prob)"
685+ << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : " " )
686+ << " \n " );
593687 if (BestSucc && BestProb >= SuccProb)
594688 continue ;
595689 BestSucc = Succ;
0 commit comments