diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp index aebfe154b3139..ccee5db9a3bb6 100644 --- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp +++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp @@ -480,7 +480,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy( LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, " "target occupancy = " << TgtOcc << '\n'); - GCNMaxOccupancySchedStrategy LStrgy(Context); + GCNMaxOccupancySchedStrategy LStrgy(Context, /*IsLegacyScheduler*/ true); unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy()); for (int I = 0; I < NumPasses; ++I) { diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp index 5c394e6d6296d..307b8477041c5 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -343,6 +343,48 @@ void GCNRPTracker::reset(const MachineInstr &MI, MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs); } +DenseMap +llvm::getLiveRegMap(DenseMap &R, bool After, + LiveIntervals &LIS) { + std::vector Indexes; + // Indexes.reserve(R.size()); + auto &SII = *LIS.getSlotIndexes(); + for (std::pair &Entry : R) { + auto SI = SII.getInstructionIndex(*Entry.first); + Indexes.push_back(After ? SI.getDeadSlot() : SI.getBaseIndex()); + } + llvm::sort(Indexes); + + auto &MRI = (*R.begin()).first->getParent()->getParent()->getRegInfo(); + DenseMap LiveRegMap; + SmallVector LiveIdxs, SRLiveIdxs; + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + auto Reg = Register::index2VirtReg(I); + if (!LIS.hasInterval(Reg)) + continue; + auto &LI = LIS.getInterval(Reg); + LiveIdxs.clear(); + if (!LI.findIndexesLiveAt(Indexes, std::back_inserter(LiveIdxs))) + continue; + if (!LI.hasSubRanges()) { + for (auto SI : LiveIdxs) { + auto Idx = R[SII.getInstructionFromIndex(SI)]; + LiveRegMap[Idx][Reg] = MRI.getMaxLaneMaskForVReg(Reg); + } + } else + for (const auto &S : LI.subranges()) { + // constrain search for subranges by indexes live at main range + SRLiveIdxs.clear(); + S.findIndexesLiveAt(LiveIdxs, std::back_inserter(SRLiveIdxs)); + for (auto SI : SRLiveIdxs) { + auto Idx = R[SII.getInstructionFromIndex(SI)]; + LiveRegMap[Idx][Reg] |= S.LaneMask; + } + } + } + return LiveRegMap; +} + //////////////////////////////////////////////////////////////////////////////// // GCNUpwardRPTracker @@ -570,6 +612,127 @@ bool GCNUpwardRPTracker::isValid() const { return true; } +//////////////////////////////////////////////////////////////////////////////// +// GCNIterativeRPTrackers + +void GCNIterativeRPTracker::reset(const MachineRegisterInfo *MRI_, + const LiveRegSet *LiveRegsCopy) { + + MRI = MRI_; + if (LiveRegsCopy && &LiveRegs != LiveRegsCopy) + LiveRegs = *LiveRegsCopy; + if (!LiveRegsCopy) + LiveRegs.clear(); + MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs); +} + +// Mostly copy+paste from GCNUpwardRPTracker::recede +void GCNIterativeUpwardRPTracker::recede(const MachineInstr &MI, + LiveIntervals *LIS) { + assert(MRI && "call reset first"); + + if (MI.isDebugInstr()) + return; + + SmallVector RegUses; + collectVirtualRegUses(RegUses, MI, *LIS, *MRI); + + // calc pressure at the MI (defs + uses) + auto AtMIPressure = CurPressure; + for (const auto &U : RegUses) { + auto LiveMask = LiveRegs[U.RegUnit]; + AtMIPressure.inc(U.RegUnit, LiveMask, LiveMask | U.LaneMask, *MRI); + } + // update max pressure + MaxPressure = max(AtMIPressure, MaxPressure); + + for (const auto &MO : MI.all_defs()) { + if (!MO.getReg().isVirtual() || MO.isDead()) + continue; + + auto Reg = MO.getReg(); + auto I = LiveRegs.find(Reg); + if (I == LiveRegs.end()) + continue; + auto &LiveMask = I->second; + auto PrevMask = LiveMask; + LiveMask &= ~getDefRegMask(MO, *MRI); + CurPressure.inc(Reg, PrevMask, LiveMask, *MRI); + if (LiveMask.none()) + LiveRegs.erase(I); + } + for (const auto &U : RegUses) { + auto &LiveMask = LiveRegs[U.RegUnit]; + auto PrevMask = LiveMask; + LiveMask |= U.LaneMask; + CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI); + } + assert(CurPressure == getRegPressure(*MRI, LiveRegs)); +} + +// Mostly copy+paste from GCNDownwardRPTracker::(advanceBeforeNext + +// advanceToNext) +void GCNIterativeDownwardRPTracker::advance(const MachineInstr &MI, + LiveIntervals *LIS) { + assert(MRI && "call reset first"); + // Add new registers or mask bits. + for (const auto &MO : MI.all_defs()) { + Register Reg = MO.getReg(); + if (!Reg.isVirtual()) + continue; + if (MO.isDead()) + continue; + auto &LiveMask = LiveRegs[Reg]; + auto PrevMask = LiveMask; + LiveMask |= getDefRegMask(MO, *MRI); + CurPressure.inc(Reg, PrevMask, LiveMask, *MRI); + } + + SlotIndex SI = LIS->getInstructionIndex(MI).getBoundaryIndex(); + assert(SI.isValid()); + + // Remove dead registers or mask bits. + SmallSet SeenRegs; + for (auto &MO : MI.operands()) { + if (!MO.isReg() || !MO.getReg().isVirtual()) + continue; + if (!MO.readsReg()) + continue; + if (!MO.isUse()) + continue; + if (!SeenRegs.insert(MO.getReg()).second) + continue; + + const LiveInterval &LI = LIS->getInterval(MO.getReg()); + if (LI.hasSubRanges()) { + auto It = LiveRegs.end(); + for (const auto &S : LI.subranges()) { + if (S.expiredAt(SI)) { + if (It == LiveRegs.end()) { + It = LiveRegs.find(MO.getReg()); + if (It == LiveRegs.end()) + llvm_unreachable("register isn't live"); + } + auto PrevMask = It->second; + It->second &= ~S.LaneMask; + CurPressure.inc(MO.getReg(), PrevMask, It->second, *MRI); + } + } + if (It != LiveRegs.end() && It->second.none()) { + LiveRegs.erase(It); + } + } else if (LI.expiredAt(SI)) { + auto It = LiveRegs.find(MO.getReg()); + if (It == LiveRegs.end()) + llvm_unreachable("register isn't live"); + CurPressure.inc(MO.getReg(), It->second, LaneBitmask::getNone(), *MRI); + LiveRegs.erase(It); + } + } + + MaxPressure = max(MaxPressure, CurPressure); +} + Printable llvm::print(const GCNRPTracker::LiveRegSet &LiveRegs, const MachineRegisterInfo &MRI) { return Printable([&LiveRegs, &MRI](raw_ostream &OS) { diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index 752f53752fa68..69a04e18185d7 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -259,6 +259,41 @@ class GCNDownwardRPTracker : public GCNRPTracker { const LiveRegSet *LiveRegsCopy = nullptr); }; +class GCNIterativeRPTracker { +public: + using LiveRegSet = DenseMap; + +protected: + LiveRegSet LiveRegs; + GCNRegPressure CurPressure, MaxPressure; + + mutable const MachineRegisterInfo *MRI = nullptr; + + GCNIterativeRPTracker(){}; + +public: + void reset(const MachineRegisterInfo *MRI_, const LiveRegSet *LiveRegsCopy); + + GCNRegPressure getPressure() const { return CurPressure; } + GCNRegPressure getMaxPressure() const { return MaxPressure; } +}; + +class GCNIterativeUpwardRPTracker : public GCNIterativeRPTracker { +public: + GCNIterativeUpwardRPTracker(){}; + + // Move to the state just before the MI. + void recede(const MachineInstr &MI, LiveIntervals *TheLIS); +}; + +class GCNIterativeDownwardRPTracker : public GCNIterativeRPTracker { +public: + GCNIterativeDownwardRPTracker(){}; + + // Move to the state just after the MI. + void advance(const MachineInstr &MI, LiveIntervals *TheLIS); +}; + LaneBitmask getLiveLaneMask(unsigned Reg, SlotIndex SI, const LiveIntervals &LIS, @@ -275,44 +310,8 @@ GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS, /// After - upon entry or exit of every instruction /// Note: there is no entry in the map for instructions with empty live reg set /// Complexity = O(NumVirtRegs * averageLiveRangeSegmentsPerReg * lg(R)) -template -DenseMap -getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) { - std::vector Indexes; - Indexes.reserve(std::distance(R.begin(), R.end())); - auto &SII = *LIS.getSlotIndexes(); - for (MachineInstr *I : R) { - auto SI = SII.getInstructionIndex(*I); - Indexes.push_back(After ? SI.getDeadSlot() : SI.getBaseIndex()); - } - llvm::sort(Indexes); - - auto &MRI = (*R.begin())->getParent()->getParent()->getRegInfo(); - DenseMap LiveRegMap; - SmallVector LiveIdxs, SRLiveIdxs; - for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - auto Reg = Register::index2VirtReg(I); - if (!LIS.hasInterval(Reg)) - continue; - auto &LI = LIS.getInterval(Reg); - LiveIdxs.clear(); - if (!LI.findIndexesLiveAt(Indexes, std::back_inserter(LiveIdxs))) - continue; - if (!LI.hasSubRanges()) { - for (auto SI : LiveIdxs) - LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] = - MRI.getMaxLaneMaskForVReg(Reg); - } else - for (const auto &S : LI.subranges()) { - // constrain search for subranges by indexes live at main range - SRLiveIdxs.clear(); - S.findIndexesLiveAt(LiveIdxs, std::back_inserter(SRLiveIdxs)); - for (auto SI : SRLiveIdxs) - LiveRegMap[SII.getInstructionFromIndex(SI)][Reg] |= S.LaneMask; - } - } - return LiveRegMap; -} +DenseMap +getLiveRegMap(DenseMap &R, bool After, LiveIntervals &LIS); inline GCNRPTracker::LiveRegSet getLiveRegsAfter(const MachineInstr &MI, const LiveIntervals &LIS) { diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 94d93390d0916..29904d813d9f4 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -58,6 +58,11 @@ static cl::opt "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false)); +static cl::opt GCNTrackers( + "amdgpu-use-gcn-iterative-trackers", cl::Hidden, + cl::desc("Use the GCN specific iterative RPTrackers during scheduling"), + cl::init(false)); + const unsigned ScheduleMetrics::ScaleFactor = 100; GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C) @@ -128,23 +133,46 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, if (!DAG->isTrackingPressure()) return; - // getDownwardPressure() and getUpwardPressure() make temporary changes to - // the tracker, so we need to pass those function a non-const copy. - RegPressureTracker &TempTracker = const_cast(RPTracker); + unsigned NewSGPRPressure, NewVGPRPressure; + if (!GCNTrackers) { + // getDownwardPressure() and getUpwardPressure() make temporary changes to + // the tracker, so we need to pass those function a non-const copy. + RegPressureTracker &TempTracker = + const_cast(RPTracker); + + Pressure.clear(); + MaxPressure.clear(); + + if (AtTop) + TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); + else { + // FIXME: I think for bottom up scheduling, the register pressure is + // cached and can be retrieved by DAG->getPressureDif(SU). + TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); + } + NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; + NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; + } - Pressure.clear(); - MaxPressure.clear(); + if (GCNTrackers) { + if (AtTop) { + GCNIterativeDownwardRPTracker TempTopTracker(TheTracker); + auto MI = SU->getInstr(); + TempTopTracker.advance(*MI, DAG->getLIS()); - if (AtTop) - TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); - else { - // FIXME: I think for bottom up scheduling, the register pressure is cached - // and can be retrieved by DAG->getPressureDif(SU). - TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); - } + NewSGPRPressure = TempTopTracker.getPressure().getSGPRNum(); + NewVGPRPressure = TempTopTracker.getPressure().getVGPRNum(false); + } + + else { + GCNIterativeUpwardRPTracker TempBotTracker(TheUpwardTracker); + auto MI = SU->getInstr(); + TempBotTracker.recede(*MI, DAG->getLIS()); - unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; - unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; + NewSGPRPressure = TempBotTracker.getPressure().getSGPRNum(); + NewVGPRPressure = TempBotTracker.getPressure().getVGPRNum(false); + } + } // If two instructions increase the pressure of different register sets // by the same amount, the generic scheduler will prefer to schedule the @@ -213,12 +241,20 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, unsigned SGPRPressure = 0; unsigned VGPRPressure = 0; if (DAG->isTrackingPressure()) { - SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; - VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; + SGPRPressure = + GCNTrackers + ? (Zone.isTop() ? TheTracker.getPressure().getSGPRNum() + : TheUpwardTracker.getPressure().getSGPRNum()) + : Pressure[AMDGPU::RegisterPressureSets::SReg_32]; + VGPRPressure = + GCNTrackers + ? (Zone.isTop() ? TheTracker.getPressure().getVGPRNum(false) + : TheUpwardTracker.getPressure().getVGPRNum(false)) + : Pressure[AMDGPU::RegisterPressureSets::VGPR_32]; } + ReadyQueue &Q = Zone.Available; for (SUnit *SU : Q) { - SchedCandidate TryCand(ZonePolicy); initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure, VGPRPressure); @@ -312,6 +348,16 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { return Cand.SU; } +void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { + if (GCNTrackers) { + MachineInstr *MI = SU->getInstr(); + IsTopNode ? TheTracker.advance(*MI, DAG->getLIS()) + : TheUpwardTracker.recede(*MI, DAG->getLIS()); + } + + return GenericScheduler::schedNode(SU, IsTopNode); +} + // This function is mostly cut and pasted from // GenericScheduler::pickNode() SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) { @@ -383,12 +429,13 @@ GCNSchedStageID GCNSchedStrategy::getNextStage() const { } GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( - const MachineSchedContext *C) + const MachineSchedContext *C, bool IsLegacyScheduler) : GCNSchedStrategy(C) { SchedStages.push_back(GCNSchedStageID::OccInitialSchedule); SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule); SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule); SchedStages.push_back(GCNSchedStageID::PreRARematerialize); + GCNTrackers = GCNTrackers && !IsLegacyScheduler; } GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C) @@ -565,7 +612,7 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx, MBBLiveIns.erase(LiveInIt); } else { I = Rgn.first; - auto LRS = BBLiveInMap.lookup(NonDbgMI); + auto LRS = BBLiveInMap.lookup(CurRegion); #ifdef EXPENSIVE_CHECKS assert(isEqual(getLiveRegsBefore(*NonDbgMI, *LIS), LRS)); #endif @@ -599,23 +646,44 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx, } } -DenseMap +DenseMap GCNScheduleDAGMILive::getBBLiveInMap() const { assert(!Regions.empty()); - std::vector BBStarters; - BBStarters.reserve(Regions.size()); - auto I = Regions.rbegin(), E = Regions.rend(); - auto *BB = I->first->getParent(); - do { - auto *MI = &*skipDebugInstructionsForward(I->first, I->second); - BBStarters.push_back(MI); - do { - ++I; - } while (I != E && I->first->getParent() == BB); - } while (I != E); + DenseMap BBStarters; + for (unsigned I = Regions.size(); I > 0; I--) { + unsigned Idx = I - 1; + auto Rgn = Regions[Idx]; + auto *MI = &*skipDebugInstructionsForward(Rgn.first, Rgn.second); + BBStarters.insert({MI, Idx}); + } return getLiveRegMap(BBStarters, false /*After*/, *LIS); } +DenseMap +GCNScheduleDAGMILive::getBBLiveOutMap() const { + assert(!Regions.empty()); + DenseMap BBEnders; + for (unsigned I = Regions.size(); I > 0; I--) { + unsigned Idx = I - 1; + auto Rgn = Regions[Idx]; + auto TheBB = Rgn.first->getParent(); + if (Rgn.second != TheBB->end() && !Rgn.second->isDebugInstr()) { + BBEnders.insert({&*Rgn.second, Idx}); + continue; + } + if (Rgn.second == TheBB->end()) { + auto *MI = &*prev_nodbg(Rgn.second, Rgn.first); + BBEnders.insert({&*MI, I}); + continue; + } + + auto *MI = &*skipDebugInstructionsBackward(Rgn.second, Rgn.first); + BBEnders.insert({MI, I}); + } + + return getLiveRegMap(BBEnders, true /*After*/, *LIS); +} + void GCNScheduleDAGMILive::finalizeSchedule() { // Start actual scheduling here. This function is called by the base // MachineScheduler after all regions have been recorded by @@ -639,9 +707,14 @@ void GCNScheduleDAGMILive::finalizeSchedule() { void GCNScheduleDAGMILive::runSchedStages() { LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n"); - if (!Regions.empty()) + if (!Regions.empty()) { BBLiveInMap = getBBLiveInMap(); + if (GCNTrackers) { + BBLiveOutMap = getBBLiveOutMap(); + } + } + GCNSchedStrategy &S = static_cast(*SchedImpl); while (S.advanceStage()) { auto Stage = createSchedStage(S.getCurrentStage()); @@ -658,6 +731,27 @@ void GCNScheduleDAGMILive::runSchedStages() { continue; } + if (GCNTrackers) { + GCNIterativeDownwardRPTracker *TheTracker = S.getTracker(); + GCNIterativeUpwardRPTracker *TheUpwardTracker = S.getUpwardTracker(); + auto LiveInEntry = BBLiveInMap.find(Stage->getRegionIdx()); + GCNRPTracker::LiveRegSet *LiveIns = + LiveInEntry != BBLiveInMap.end() ? &LiveInEntry->second : nullptr; + auto LiveOutEntry = BBLiveOutMap.find(Stage->getRegionIdx()); + GCNRPTracker::LiveRegSet *LiveOuts = LiveOutEntry != BBLiveOutMap.end() + ? &LiveOutEntry->second + : nullptr; + TheTracker->reset( + &Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(), + LiveIns); + TheUpwardTracker->reset( + &Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(), + LiveOuts); + + S.setTracker(*TheTracker); + S.setUpwardTracker(*TheUpwardTracker); + } + ScheduleDAGMILive::schedule(); Stage->finalizeGCNRegion(); } @@ -1479,9 +1573,6 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST, MachineInstr *MI = Entry.first; MachineInstr *OldMI = Entry.second; - // Remove OldMI from BBLiveInMap since we are sinking it from its MBB. - DAG.BBLiveInMap.erase(OldMI); - // Remove OldMI and update LIS Register Reg = MI->getOperand(0).getReg(); LIS->RemoveMachineInstrFromMaps(*OldMI); @@ -1493,12 +1584,17 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST, // Update live-ins, register pressure, and regions caches. for (auto Idx : ImpactedRegions) { DAG.LiveIns[Idx] = NewLiveIns[Idx]; + DAG.BBLiveInMap[Idx] = NewLiveIns[Idx]; DAG.Pressure[Idx] = NewPressure[Idx]; DAG.MBBLiveIns.erase(DAG.Regions[Idx].first->getParent()); } + DAG.Regions = NewRegions; DAG.RescheduleRegions = NewRescheduleRegions; + if (GCNTrackers) + DAG.BBLiveOutMap = DAG.getBBLiveOutMap(); + SIMachineFunctionInfo &MFI = *MF.getInfo(); MFI.increaseOccupancy(MF, ++DAG.MinOccupancy); diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 2084aae4128ff..19c841df88b77 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -70,6 +70,12 @@ class GCNSchedStrategy : public GenericScheduler { // Pointer to the current SchedStageID. SmallVectorImpl::iterator CurrentStage = nullptr; + // GCN RP Tracker for top-down scheduling + GCNIterativeDownwardRPTracker TheTracker; + + // GCN RP Trakcer for botttom-up scheduling + GCNIterativeUpwardRPTracker TheUpwardTracker; + public: // schedule() have seen register pressure over the critical limits and had to // track register pressure for actual scheduling heuristics. @@ -102,6 +108,8 @@ class GCNSchedStrategy : public GenericScheduler { SUnit *pickNode(bool &IsTopNode) override; + void schedNode(SUnit *SU, bool IsTopNode) override; + void initialize(ScheduleDAGMI *DAG) override; unsigned getTargetOccupancy() { return TargetOccupancy; } @@ -116,13 +124,26 @@ class GCNSchedStrategy : public GenericScheduler { bool hasNextStage() const; GCNSchedStageID getNextStage() const; + + GCNIterativeDownwardRPTracker *getTracker() { return &TheTracker; } + + GCNIterativeUpwardRPTracker *getUpwardTracker() { return &TheUpwardTracker; } + + void setTracker(GCNIterativeDownwardRPTracker &Tracker) { + TheTracker = Tracker; + } + + void setUpwardTracker(GCNIterativeUpwardRPTracker &Tracker) { + TheUpwardTracker = Tracker; + } }; /// The goal of this scheduling strategy is to maximize kernel occupancy (i.e. /// maximum number of waves per simd). class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy { public: - GCNMaxOccupancySchedStrategy(const MachineSchedContext *C); + GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, + bool IsLegacyScheduler = false); }; /// The goal of this scheduling strategy is to maximize ILP for a single wave @@ -211,9 +232,17 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive { // Temporary basic block live-in cache. DenseMap MBBLiveIns; - DenseMap BBLiveInMap; + // Map of RegionIdx->LiveIns + DenseMap BBLiveInMap; - DenseMap getBBLiveInMap() const; + // Calcalute and retun the per region map: RegionIdx->LiveIns + DenseMap getBBLiveInMap() const; + + // Map of RegionIdx->LiveOuts + DenseMap BBLiveOutMap; + + // Calcalute and retun the per region map: RegionIdx->LiveOuts + DenseMap getBBLiveOutMap() const; // Return current region pressure. GCNRegPressure getRealRegPressure(unsigned RegionIdx) const; @@ -311,6 +340,9 @@ class GCNSchedStage { return DAG.RegionsWithExcessRP[RegionIdx]; } + // The region number this stage is currently working on + unsigned getRegionIdx() { return RegionIdx; } + // Returns true if the new schedule may result in more spilling. bool mayCauseSpilling(unsigned WavesAfter);