@@ -414,7 +414,7 @@ namespace {
414414
415415 // Check the branch targets are within range and we satisfy our
416416 // restrictions.
417- void CheckLegality (ARMBasicBlockUtils *BBUtils);
417+ void Validate (ARMBasicBlockUtils *BBUtils);
418418
419419 bool FoundAllComponents () const {
420420 return Start && Dec && End;
@@ -520,41 +520,20 @@ std::map<MachineInstr *,
520520INITIALIZE_PASS (ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME,
521521 false , false )
522522
523- MachineInstr *LowOverheadLoop::isSafeToDefineLR() {
524- // We can define LR because LR already contains the same value.
525- if (Start->getOperand (0 ).getReg () == ARM::LR)
526- return Start;
527-
528- unsigned CountReg = Start->getOperand (0 ).getReg ();
529- auto IsMoveLR = [&CountReg](MachineInstr *MI) {
530- return MI->getOpcode () == ARM::tMOVr &&
531- MI->getOperand (0 ).getReg () == ARM::LR &&
532- MI->getOperand (1 ).getReg () == CountReg &&
533- MI->getOperand (2 ).getImm () == ARMCC::AL;
534- };
535-
536- MachineBasicBlock *MBB = Start->getParent ();
537-
538- // Find an insertion point:
539- // - Is there a (mov lr, Count) before Start? If so, and nothing else writes
540- // to Count before Start, we can insert at that mov.
541- if (auto *LRDef = RDA.getUniqueReachingMIDef (Start, ARM::LR))
542- if (IsMoveLR (LRDef) && RDA.hasSameReachingDef (Start, LRDef, CountReg))
543- return LRDef;
544-
545- // - Is there a (mov lr, Count) after Start? If so, and nothing else writes
546- // to Count after Start, we can insert at that mov.
547- if (auto *LRDef = RDA.getLocalLiveOutMIDef (MBB, ARM::LR))
548- if (IsMoveLR (LRDef) && RDA.hasSameReachingDef (Start, LRDef, CountReg))
549- return LRDef;
550-
551- // We've found no suitable LR def and Start doesn't use LR directly. Can we
552- // just define LR anyway?
553- return RDA.isSafeToDefRegAt (Start, ARM::LR) ? Start : nullptr ;
554- }
555-
556523bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
524+ if (!StartInsertPt)
525+ return false ;
526+
527+ if (!IsTailPredicationLegal ()) {
528+ LLVM_DEBUG (if (VCTPs.empty ())
529+ dbgs () << " ARM Loops: Didn't find a VCTP instruction.\n " ;
530+ dbgs () << " ARM Loops: Tail-predication is not valid.\n " );
531+ return false ;
532+ }
533+
557534 assert (!VCTPs.empty () && " VCTP instruction expected but is not set" );
535+ assert (ML.getBlocks ().size () == 1 &&
536+ " Shouldn't be processing a loop with more than one block" );
558537
559538 if (DisableTailPredication) {
560539 LLVM_DEBUG (dbgs () << " ARM Loops: tail-predication is disabled\n " );
@@ -631,15 +610,15 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
631610 // width, the Loop Start instruction will immediately generate one or more
632611 // false lane mask which can, incorrectly, affect the proceeding MVE
633612 // instructions in the preheader.
634- auto cannotInsertWDLSTPBetween = [](MachineBasicBlock::iterator I,
613+ auto CannotInsertWDLSTPBetween = [](MachineBasicBlock::iterator I,
635614 MachineBasicBlock::iterator E) {
636615 for (; I != E; ++I)
637616 if (shouldInspect (*I))
638617 return true ;
639618 return false ;
640619 };
641620
642- if (cannotInsertWDLSTPBetween (StartInsertPt, InsertBB->end ()))
621+ if (CannotInsertWDLSTPBetween (StartInsertPt, InsertBB->end ()))
643622 return false ;
644623
645624 // Especially in the case of while loops, InsertBB may not be the
@@ -658,15 +637,9 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
658637 return false ;
659638 };
660639
661- // First, find the block that looks like the preheader .
640+ // Search backwards for a def, until we get to InsertBB .
662641 MachineBasicBlock *MBB = Preheader;
663- if (!MBB) {
664- LLVM_DEBUG (dbgs () << " ARM Loops: Didn't find preheader.\n " );
665- return false ;
666- }
667-
668- // Then search backwards for a def, until we get to InsertBB.
669- while (MBB != InsertBB) {
642+ while (MBB && MBB != InsertBB) {
670643 if (CannotProvideElements (MBB, NumElements)) {
671644 LLVM_DEBUG (dbgs () << " ARM Loops: Unable to provide element count.\n " );
672645 return false ;
@@ -944,59 +917,83 @@ bool LowOverheadLoop::ValidateLiveOuts() {
944917 return true ;
945918}
946919
947- void LowOverheadLoop::CheckLegality (ARMBasicBlockUtils *BBUtils) {
920+ void LowOverheadLoop::Validate (ARMBasicBlockUtils *BBUtils) {
948921 if (Revert)
949922 return ;
950923
951- if (!End->getOperand (1 ).isMBB ())
952- report_fatal_error (" Expected LoopEnd to target basic block" );
924+ auto ValidateRanges = [this , &BBUtils]() {
925+ if (!End->getOperand (1 ).isMBB ())
926+ report_fatal_error (" Expected LoopEnd to target basic block" );
953927
954- // TODO Maybe there's cases where the target doesn't have to be the header,
955- // but for now be safe and revert.
956- if (End->getOperand (1 ).getMBB () != ML.getHeader ()) {
957- LLVM_DEBUG (dbgs () << " ARM Loops: LoopEnd is not targetting header.\n " );
958- Revert = true ;
959- return ;
960- }
961-
962- // The WLS and LE instructions have 12-bits for the label offset. WLS
963- // requires a positive offset, while LE uses negative.
964- if (BBUtils->getOffsetOf (End) < BBUtils->getOffsetOf (ML.getHeader ()) ||
965- !BBUtils->isBBInRange (End, ML.getHeader (), 4094 )) {
966- LLVM_DEBUG (dbgs () << " ARM Loops: LE offset is out-of-range\n " );
967- Revert = true ;
968- return ;
969- }
928+ // TODO Maybe there's cases where the target doesn't have to be the header,
929+ // but for now be safe and revert.
930+ if (End->getOperand (1 ).getMBB () != ML.getHeader ()) {
931+ LLVM_DEBUG (dbgs () << " ARM Loops: LoopEnd is not targetting header.\n " );
932+ return false ;
933+ }
970934
971- if (Start->getOpcode () == ARM::t2WhileLoopStart &&
972- (BBUtils->getOffsetOf (Start) >
973- BBUtils->getOffsetOf (Start->getOperand (1 ).getMBB ()) ||
974- !BBUtils->isBBInRange (Start, Start->getOperand (1 ).getMBB (), 4094 ))) {
975- LLVM_DEBUG (dbgs () << " ARM Loops: WLS offset is out-of-range!\n " );
976- Revert = true ;
977- return ;
978- }
935+ // The WLS and LE instructions have 12-bits for the label offset. WLS
936+ // requires a positive offset, while LE uses negative.
937+ if (BBUtils->getOffsetOf (End) < BBUtils->getOffsetOf (ML.getHeader ()) ||
938+ !BBUtils->isBBInRange (End, ML.getHeader (), 4094 )) {
939+ LLVM_DEBUG (dbgs () << " ARM Loops: LE offset is out-of-range\n " );
940+ return false ;
941+ }
979942
980- InsertPt = Revert ? nullptr : isSafeToDefineLR ();
981- if (!InsertPt) {
982- LLVM_DEBUG (dbgs () << " ARM Loops: Unable to find safe insertion point.\n " );
983- Revert = true ;
984- return ;
985- } else
986- LLVM_DEBUG (dbgs () << " ARM Loops: Start insertion point: " << *InsertPt);
943+ if (Start->getOpcode () == ARM::t2WhileLoopStart &&
944+ (BBUtils->getOffsetOf (Start) >
945+ BBUtils->getOffsetOf (Start->getOperand (1 ).getMBB ()) ||
946+ !BBUtils->isBBInRange (Start, Start->getOperand (1 ).getMBB (), 4094 ))) {
947+ LLVM_DEBUG (dbgs () << " ARM Loops: WLS offset is out-of-range!\n " );
948+ return false ;
949+ }
950+ return true ;
951+ };
987952
988- if (!IsTailPredicationLegal ()) {
989- LLVM_DEBUG (if (VCTPs.empty ())
990- dbgs () << " ARM Loops: Didn't find a VCTP instruction.\n " ;
991- dbgs () << " ARM Loops: Tail-predication is not valid.\n " );
992- return ;
993- }
953+ auto FindStartInsertionPoint = [this ]() -> MachineInstr* {
954+ // We can define LR because LR already contains the same value.
955+ if (Start->getOperand (0 ).getReg () == ARM::LR)
956+ return Start;
957+
958+ unsigned CountReg = Start->getOperand (0 ).getReg ();
959+ auto IsMoveLR = [&CountReg](MachineInstr *MI) {
960+ return MI->getOpcode () == ARM::tMOVr &&
961+ MI->getOperand (0 ).getReg () == ARM::LR &&
962+ MI->getOperand (1 ).getReg () == CountReg &&
963+ MI->getOperand (2 ).getImm () == ARMCC::AL;
964+ };
965+
966+ MachineBasicBlock *MBB = Start->getParent ();
967+
968+ // Find an insertion point:
969+ // - Is there a (mov lr, Count) before Start? If so, and nothing else
970+ // writes to Count before Start, we can insert at that mov.
971+ if (auto *LRDef = RDA.getUniqueReachingMIDef (Start, ARM::LR))
972+ if (IsMoveLR (LRDef) && RDA.hasSameReachingDef (Start, LRDef, CountReg))
973+ return LRDef;
974+
975+ // - Is there a (mov lr, Count) after Start? If so, and nothing else writes
976+ // to Count after Start, we can insert at that mov.
977+ if (auto *LRDef = RDA.getLocalLiveOutMIDef (MBB, ARM::LR))
978+ if (IsMoveLR (LRDef) && RDA.hasSameReachingDef (Start, LRDef, CountReg))
979+ return LRDef;
980+
981+ // We've found no suitable LR def and Start doesn't use LR directly. Can we
982+ // just define LR anyway?
983+ return RDA.isSafeToDefRegAt (Start, ARM::LR) ? Start : nullptr ;
984+ };
994985
995- assert (ML. getBlocks (). size () == 1 &&
996- " Shouldn't be processing a loop with more than one block " ) ;
986+ InsertPt = FindStartInsertionPoint ();
987+ Revert = ! ValidateRanges () || !InsertPt ;
997988 CannotTailPredicate = !ValidateTailPredicate (InsertPt);
998- LLVM_DEBUG (if (CannotTailPredicate)
999- dbgs () << " ARM Loops: Couldn't validate tail predicate.\n " );
989+
990+ LLVM_DEBUG (if (!InsertPt)
991+ dbgs () << " ARM Loops: Unable to find safe insertion point.\n " ;
992+ else
993+ dbgs () << " ARM Loops: Start insertion point: " << *InsertPt;
994+ if (CannotTailPredicate)
995+ dbgs () << " ARM Loops: Couldn't validate tail predicate.\n "
996+ );
1000997}
1001998
1002999bool LowOverheadLoop::AddVCTP (MachineInstr *MI) {
@@ -1206,7 +1203,7 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) {
12061203 LLVM_DEBUG (dbgs () << " ARM Loops: Unable to remove LoopDec.\n " );
12071204 LoLoop.Revert = true ;
12081205 }
1209- LoLoop.CheckLegality (BBUtils.get ());
1206+ LoLoop.Validate (BBUtils.get ());
12101207 Expand (LoLoop);
12111208 return true ;
12121209}
0 commit comments