Skip to content

Commit 1955aca

Browse files
committed
[AMDGPU] Optionally Use AMDGPU RPTrackers during scheduling
Change-Id: I6ae56149c1eb49ea85362267174cc6274c416330
1 parent f151fcb commit 1955aca

File tree

4 files changed

+98
-25
lines changed

4 files changed

+98
-25
lines changed

llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,7 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
480480
LLVM_DEBUG(dbgs() << "Scheduling using default scheduler, "
481481
"target occupancy = "
482482
<< TgtOcc << '\n');
483-
GCNMaxOccupancySchedStrategy LStrgy(Context);
483+
GCNMaxOccupancySchedStrategy LStrgy(Context, /*IsLegacyScheduler*/ true);
484484
unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
485485

486486
for (int I = 0; I < NumPasses; ++I) {

llvm/lib/Target/AMDGPU/GCNRegPressure.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,6 @@ class GCNRPTracker {
162162
public:
163163
// reset tracker and set live register set to the specified value.
164164
void reset(const MachineRegisterInfo &MRI_, const LiveRegSet &LiveRegs_);
165-
166165
// live regs for the current state
167166
const decltype(LiveRegs) &getLiveRegs() const { return LiveRegs; }
168167
const MachineInstr *getLastTrackedMI() const { return LastTrackedMI; }

llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp

Lines changed: 79 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ const unsigned ScheduleMetrics::ScaleFactor = 100;
6767

6868
GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
6969
: GenericScheduler(C), TargetOccupancy(0), MF(nullptr),
70+
TheTracker(*C->LIS), TheUpwardTracker(*C->LIS),
7071
HasHighPressure(false) {}
7172

7273
void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
@@ -133,23 +134,46 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
133134
if (!DAG->isTrackingPressure())
134135
return;
135136

136-
// getDownwardPressure() and getUpwardPressure() make temporary changes to
137-
// the tracker, so we need to pass those function a non-const copy.
138-
RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
137+
unsigned NewSGPRPressure, NewVGPRPressure;
138+
if (!GCNTrackers) {
139+
// getDownwardPressure() and getUpwardPressure() make temporary changes to
140+
// the tracker, so we need to pass those function a non-const copy.
141+
RegPressureTracker &TempTracker =
142+
const_cast<RegPressureTracker &>(RPTracker);
143+
144+
Pressure.clear();
145+
MaxPressure.clear();
146+
147+
if (AtTop)
148+
TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
149+
else {
150+
// FIXME: I think for bottom up scheduling, the register pressure is
151+
// cached and can be retrieved by DAG->getPressureDif(SU).
152+
TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
153+
}
154+
NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
155+
NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
156+
}
139157

140-
Pressure.clear();
141-
MaxPressure.clear();
158+
if (GCNTrackers) {
159+
if (AtTop) {
160+
GCNDownwardRPTracker TempTopTracker(TheTracker);
161+
auto MI = SU->getInstr();
162+
TempTopTracker.advance(MI, true, DAG->getLIS());
142163

143-
if (AtTop)
144-
TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
145-
else {
146-
// FIXME: I think for bottom up scheduling, the register pressure is cached
147-
// and can be retrieved by DAG->getPressureDif(SU).
148-
TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
149-
}
164+
NewSGPRPressure = TempTopTracker.getPressure().getSGPRNum();
165+
NewVGPRPressure = TempTopTracker.getPressure().getVGPRNum(false);
166+
}
150167

151-
unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
152-
unsigned NewVGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
168+
else {
169+
GCNUpwardRPTracker TempBotTracker(TheUpwardTracker);
170+
auto MI = SU->getInstr();
171+
TempBotTracker.recede(*MI, true);
172+
173+
NewSGPRPressure = TempBotTracker.getPressure().getSGPRNum();
174+
NewVGPRPressure = TempBotTracker.getPressure().getVGPRNum(false);
175+
}
176+
}
153177

154178
// If two instructions increase the pressure of different register sets
155179
// by the same amount, the generic scheduler will prefer to schedule the
@@ -218,8 +242,16 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
218242
unsigned SGPRPressure = 0;
219243
unsigned VGPRPressure = 0;
220244
if (DAG->isTrackingPressure()) {
221-
SGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
222-
VGPRPressure = Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
245+
SGPRPressure =
246+
GCNTrackers
247+
? (Zone.isTop() ? TheTracker.getPressure().getSGPRNum()
248+
: TheUpwardTracker.getPressure().getSGPRNum())
249+
: Pressure[AMDGPU::RegisterPressureSets::SReg_32];
250+
VGPRPressure =
251+
GCNTrackers
252+
? (Zone.isTop() ? TheTracker.getPressure().getVGPRNum(false)
253+
: TheUpwardTracker.getPressure().getVGPRNum(false))
254+
: Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
223255
}
224256
ReadyQueue &Q = Zone.Available;
225257
for (SUnit *SU : Q) {
@@ -362,6 +394,16 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
362394
return SU;
363395
}
364396

397+
void GCNSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
398+
if (GCNTrackers) {
399+
MachineInstr *MI = SU->getInstr();
400+
IsTopNode ? TheTracker.advance(MI, true, DAG->getLIS())
401+
: TheUpwardTracker.recede(*MI, true);
402+
}
403+
404+
return GenericScheduler::schedNode(SU, IsTopNode);
405+
}
406+
365407
GCNSchedStageID GCNSchedStrategy::getCurrentStage() {
366408
assert(CurrentStage && CurrentStage != SchedStages.end());
367409
return *CurrentStage;
@@ -388,12 +430,13 @@ GCNSchedStageID GCNSchedStrategy::getNextStage() const {
388430
}
389431

390432
GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
391-
const MachineSchedContext *C)
433+
const MachineSchedContext *C, bool IsLegacyScheduler)
392434
: GCNSchedStrategy(C) {
393435
SchedStages.push_back(GCNSchedStageID::OccInitialSchedule);
394436
SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule);
395437
SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule);
396438
SchedStages.push_back(GCNSchedStageID::PreRARematerialize);
439+
GCNTrackers = GCNTrackers & !IsLegacyScheduler;
397440
}
398441

399442
GCNMaxILPSchedStrategy::GCNMaxILPSchedStrategy(const MachineSchedContext *C)
@@ -682,9 +725,8 @@ void GCNScheduleDAGMILive::runSchedStages() {
682725

683726
if (!Regions.empty()) {
684727
BBLiveInMap = getBBLiveInMap();
685-
if (GCNTrackers) {
728+
if (GCNTrackers)
686729
BBLiveOutMap = getBBLiveOutMap();
687-
}
688730
}
689731

690732
GCNSchedStrategy &S = static_cast<GCNSchedStrategy &>(*SchedImpl);
@@ -703,6 +745,21 @@ void GCNScheduleDAGMILive::runSchedStages() {
703745
continue;
704746
}
705747

748+
if (GCNTrackers) {
749+
GCNDownwardRPTracker *TheTracker = S.getTracker();
750+
GCNUpwardRPTracker *TheUpwardTracker = S.getUpwardTracker();
751+
GCNRPTracker::LiveRegSet *RegionLiveIns = &LiveIns[Stage->getRegionIdx()];
752+
GCNRPTracker::LiveRegSet *RegionLiveOuts = &LiveOuts[Stage->getRegionIdx()];
753+
754+
reinterpret_cast<GCNRPTracker *>(TheTracker)->reset(
755+
Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
756+
*RegionLiveIns);
757+
reinterpret_cast<GCNRPTracker *>(TheUpwardTracker)->reset(
758+
Regions[Stage->getRegionIdx()].first->getMF()->getRegInfo(),
759+
*RegionLiveOuts);
760+
761+
}
762+
706763
ScheduleDAGMILive::schedule();
707764
Stage->finalizeGCNRegion();
708765
}
@@ -973,6 +1030,7 @@ void GCNSchedStage::finalizeGCNRegion() {
9731030
void GCNSchedStage::checkScheduling() {
9741031
// Check the results of scheduling.
9751032
PressureAfter = DAG.getRealRegPressure(RegionIdx);
1033+
9761034
LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
9771035
LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
9781036

@@ -1524,9 +1582,6 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
15241582
MachineInstr *MI = Entry.first;
15251583
MachineInstr *OldMI = Entry.second;
15261584

1527-
// Remove OldMI from BBLiveInMap since we are sinking it from its MBB.
1528-
DAG.BBLiveInMap.erase(OldMI);
1529-
15301585
// Remove OldMI and update LIS
15311586
Register Reg = MI->getOperand(0).getReg();
15321587
LIS->RemoveMachineInstrFromMaps(*OldMI);
@@ -1544,6 +1599,8 @@ bool PreRARematStage::sinkTriviallyRematInsts(const GCNSubtarget &ST,
15441599
DAG.Regions = NewRegions;
15451600
DAG.RescheduleRegions = NewRescheduleRegions;
15461601

1602+
DAG.BBLiveInMap = DAG.getBBLiveInMap();
1603+
15471604
if (GCNTrackers) {
15481605
DAG.BBLiveOutMap = DAG.getBBLiveOutMap();
15491606
auto I = DAG.Regions.begin(), E = DAG.Regions.end();

llvm/lib/Target/AMDGPU/GCNSchedStrategy.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,12 @@ class GCNSchedStrategy : public GenericScheduler {
7070
// Pointer to the current SchedStageID.
7171
SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
7272

73+
// GCN RP Tracker for top-down scheduling
74+
mutable GCNDownwardRPTracker TheTracker;
75+
76+
// GCN RP Tracker for botttom-up scheduling
77+
mutable GCNUpwardRPTracker TheUpwardTracker;
78+
7379
public:
7480
// schedule() have seen register pressure over the critical limits and had to
7581
// track register pressure for actual scheduling heuristics.
@@ -102,6 +108,8 @@ class GCNSchedStrategy : public GenericScheduler {
102108

103109
SUnit *pickNode(bool &IsTopNode) override;
104110

111+
void schedNode(SUnit *SU, bool IsTopNode) override;
112+
105113
void initialize(ScheduleDAGMI *DAG) override;
106114

107115
unsigned getTargetOccupancy() { return TargetOccupancy; }
@@ -116,13 +124,19 @@ class GCNSchedStrategy : public GenericScheduler {
116124
bool hasNextStage() const;
117125

118126
GCNSchedStageID getNextStage() const;
127+
128+
GCNDownwardRPTracker *getTracker() { return &TheTracker; }
129+
130+
GCNUpwardRPTracker *getUpwardTracker() { return &TheUpwardTracker; }
131+
119132
};
120133

121134
/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
122135
/// maximum number of waves per simd).
123136
class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
124137
public:
125-
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
138+
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C,
139+
bool IsLegacyScheduler = false);
126140
};
127141

128142
/// The goal of this scheduling strategy is to maximize ILP for a single wave
@@ -317,6 +331,9 @@ class GCNSchedStage {
317331
bool isRegionWithExcessRP() const {
318332
return DAG.RegionsWithExcessRP[RegionIdx];
319333
}
334+
335+
// The region number this stage is currently working on
336+
unsigned getRegionIdx() { return RegionIdx; }
320337

321338
// Returns true if the new schedule may result in more spilling.
322339
bool mayCauseSpilling(unsigned WavesAfter);

0 commit comments

Comments
 (0)