@@ -116,31 +116,112 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
116116 << " , SGPRExcessLimit = " << SGPRExcessLimit << " \n\n " );
117117}
118118
119+ // / Checks whether \p SU can use the cached DAG pressure diffs to compute the
120+ // / current register pressure.
121+ // /
122+ // / This works for the common case, but it has a few exceptions that have been
123+ // / observed through trial and error:
124+ // / - Explicit physical register operands
125+ // / - Subregister definitions
126+ // /
127+ // / In both of those cases, PressureDiff doesn't represent the actual pressure,
128+ // / and querying LiveIntervals through the RegPressureTracker is needed to get
129+ // / an accurate value.
130+ // /
131+ // / We should eventually only use PressureDiff for maximum performance, but this
132+ // / already allows 80% of SUs to take the fast path without changing scheduling
133+ // / at all. Further changes would either change scheduling, or require a lot
134+ // / more logic to recover an accurate pressure estimate from the PressureDiffs.
135+ static bool canUsePressureDiffs (const SUnit &SU) {
136+ if (!SU.isInstr ())
137+ return false ;
138+
139+ // Cannot use pressure diffs for subregister defs or with physregs, it's
140+ // imprecise in both cases.
141+ for (const auto &Op : SU.getInstr ()->operands ()) {
142+ if (!Op.isReg () || Op.isImplicit ())
143+ continue ;
144+ if (Op.getReg ().isPhysical () ||
145+ (Op.isDef () && Op.getSubReg () != AMDGPU::NoSubRegister))
146+ return false ;
147+ }
148+ return true ;
149+ }
150+
151+ static void getRegisterPressures (bool AtTop,
152+ const RegPressureTracker &RPTracker, SUnit *SU,
153+ std::vector<unsigned > &Pressure,
154+ std::vector<unsigned > &MaxPressure) {
155+ // getDownwardPressure() and getUpwardPressure() make temporary changes to
156+ // the tracker, so we need to pass those function a non-const copy.
157+ RegPressureTracker &TempTracker = const_cast <RegPressureTracker &>(RPTracker);
158+ if (AtTop)
159+ TempTracker.getDownwardPressure (SU->getInstr (), Pressure, MaxPressure);
160+ else
161+ TempTracker.getUpwardPressure (SU->getInstr (), Pressure, MaxPressure);
162+ }
163+
119164void GCNSchedStrategy::initCandidate (SchedCandidate &Cand, SUnit *SU,
120165 bool AtTop,
121166 const RegPressureTracker &RPTracker,
122167 const SIRegisterInfo *SRI,
123168 unsigned SGPRPressure,
124- unsigned VGPRPressure) {
169+ unsigned VGPRPressure, bool IsBottomUp ) {
125170 Cand.SU = SU;
126171 Cand.AtTop = AtTop;
127172
128173 if (!DAG->isTrackingPressure ())
129174 return ;
130175
131- // getDownwardPressure() and getUpwardPressure() make temporary changes to
132- // the tracker, so we need to pass those function a non-const copy.
133- RegPressureTracker &TempTracker = const_cast <RegPressureTracker&>(RPTracker);
134-
135176 Pressure.clear ();
136177 MaxPressure.clear ();
137178
138- if (AtTop)
139- TempTracker.getDownwardPressure (SU->getInstr (), Pressure, MaxPressure);
140- else {
141- // FIXME: I think for bottom up scheduling, the register pressure is cached
142- // and can be retrieved by DAG->getPressureDif(SU).
143- TempTracker.getUpwardPressure (SU->getInstr (), Pressure, MaxPressure);
179+ // We try to use the cached PressureDiffs in the ScheduleDAG whenever
180+ // possible over querying the RegPressureTracker.
181+ //
182+ // RegPressureTracker will make a lot of LIS queries which are very
183+ // expensive, it is considered a slow function in this context.
184+ //
185+ // PressureDiffs are precomputed and cached, and getPressureDiff is just a
186+ // trivial lookup into an array. It is pretty much free.
187+ //
188+ // In EXPENSIVE_CHECKS, we always query RPTracker to verify the results of
189+ // PressureDiffs.
190+ if (AtTop || !canUsePressureDiffs (*SU)) {
191+ getRegisterPressures (AtTop, RPTracker, SU, Pressure, MaxPressure);
192+ } else {
193+ // Reserve 4 slots.
194+ Pressure.resize (4 , 0 );
195+ Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
196+ Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
197+
198+ for (const auto &Diff : DAG->getPressureDiff (SU)) {
199+ if (!Diff.isValid ())
200+ continue ;
201+ // PressureDiffs is always bottom-up so if we're working top-down we need
202+ // to invert its sign.
203+ Pressure[Diff.getPSet ()] +=
204+ (IsBottomUp ? Diff.getUnitInc () : -Diff.getUnitInc ());
205+ }
206+
207+ #ifdef EXPENSIVE_CHECKS
208+ std::vector<unsigned > CheckPressure, CheckMaxPressure;
209+ getRegisterPressures (AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure);
210+ if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
211+ CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
212+ Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
213+ CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
214+ errs () << " Register Pressure is inaccurate when calculated through "
215+ " PressureDiff\n "
216+ << " SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32]
217+ << " , expected "
218+ << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << " \n "
219+ << " VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
220+ << " , expected "
221+ << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << " \n " ;
222+ report_fatal_error (" inaccurate register pressure calculation" );
223+ }
224+ #endif
144225 }
145226
146227 unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32];
@@ -158,7 +239,6 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
158239 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
159240 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
160241
161-
162242 // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
163243 // to increase the likelihood we don't go over the limits. We should improve
164244 // the analysis to look through dependencies to find the path with the least
@@ -207,7 +287,8 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
207287void GCNSchedStrategy::pickNodeFromQueue (SchedBoundary &Zone,
208288 const CandPolicy &ZonePolicy,
209289 const RegPressureTracker &RPTracker,
210- SchedCandidate &Cand) {
290+ SchedCandidate &Cand,
291+ bool IsBottomUp) {
211292 const SIRegisterInfo *SRI = static_cast <const SIRegisterInfo*>(TRI);
212293 ArrayRef<unsigned > Pressure = RPTracker.getRegSetPressureAtPos ();
213294 unsigned SGPRPressure = 0 ;
@@ -220,8 +301,8 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
220301 for (SUnit *SU : Q) {
221302
222303 SchedCandidate TryCand (ZonePolicy);
223- initCandidate (TryCand, SU, Zone.isTop (), RPTracker, SRI,
224- SGPRPressure, VGPRPressure );
304+ initCandidate (TryCand, SU, Zone.isTop (), RPTracker, SRI, SGPRPressure,
305+ VGPRPressure, IsBottomUp );
225306 // Pass SchedBoundary only when comparing nodes from the same boundary.
226307 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr ;
227308 tryCandidate (Cand, TryCand, ZoneArg);
@@ -262,15 +343,17 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
262343 if (!BotCand.isValid () || BotCand.SU ->isScheduled ||
263344 BotCand.Policy != BotPolicy) {
264345 BotCand.reset (CandPolicy ());
265- pickNodeFromQueue (Bot, BotPolicy, DAG->getBotRPTracker (), BotCand);
346+ pickNodeFromQueue (Bot, BotPolicy, DAG->getBotRPTracker (), BotCand,
347+ /* IsBottomUp=*/ true );
266348 assert (BotCand.Reason != NoCand && " failed to find the first candidate" );
267349 } else {
268350 LLVM_DEBUG (traceCandidate (BotCand));
269351#ifndef NDEBUG
270352 if (VerifyScheduling) {
271353 SchedCandidate TCand;
272354 TCand.reset (CandPolicy ());
273- pickNodeFromQueue (Bot, BotPolicy, DAG->getBotRPTracker (), TCand);
355+ pickNodeFromQueue (Bot, BotPolicy, DAG->getBotRPTracker (), TCand,
356+ /* IsBottomUp=*/ true );
274357 assert (TCand.SU == BotCand.SU &&
275358 " Last pick result should correspond to re-picking right now" );
276359 }
@@ -282,15 +365,17 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
282365 if (!TopCand.isValid () || TopCand.SU ->isScheduled ||
283366 TopCand.Policy != TopPolicy) {
284367 TopCand.reset (CandPolicy ());
285- pickNodeFromQueue (Top, TopPolicy, DAG->getTopRPTracker (), TopCand);
368+ pickNodeFromQueue (Top, TopPolicy, DAG->getTopRPTracker (), TopCand,
369+ /* IsBottomUp=*/ false );
286370 assert (TopCand.Reason != NoCand && " failed to find the first candidate" );
287371 } else {
288372 LLVM_DEBUG (traceCandidate (TopCand));
289373#ifndef NDEBUG
290374 if (VerifyScheduling) {
291375 SchedCandidate TCand;
292376 TCand.reset (CandPolicy ());
293- pickNodeFromQueue (Top, TopPolicy, DAG->getTopRPTracker (), TCand);
377+ pickNodeFromQueue (Top, TopPolicy, DAG->getTopRPTracker (), TCand,
378+ /* IsBottomUp=*/ false );
294379 assert (TCand.SU == TopCand.SU &&
295380 " Last pick result should correspond to re-picking right now" );
296381 }
@@ -327,7 +412,8 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
327412 if (!SU) {
328413 CandPolicy NoPolicy;
329414 TopCand.reset (NoPolicy);
330- pickNodeFromQueue (Top, NoPolicy, DAG->getTopRPTracker (), TopCand);
415+ pickNodeFromQueue (Top, NoPolicy, DAG->getTopRPTracker (), TopCand,
416+ /* IsBottomUp=*/ false );
331417 assert (TopCand.Reason != NoCand && " failed to find a candidate" );
332418 SU = TopCand.SU ;
333419 }
@@ -337,7 +423,8 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) {
337423 if (!SU) {
338424 CandPolicy NoPolicy;
339425 BotCand.reset (NoPolicy);
340- pickNodeFromQueue (Bot, NoPolicy, DAG->getBotRPTracker (), BotCand);
426+ pickNodeFromQueue (Bot, NoPolicy, DAG->getBotRPTracker (), BotCand,
427+ /* IsBottomUp=*/ true );
341428 assert (BotCand.Reason != NoCand && " failed to find a candidate" );
342429 SU = BotCand.SU ;
343430 }
0 commit comments