Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
299 changes: 275 additions & 24 deletions llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,102 @@ collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
}
}

/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
Register RegUnit) {
auto I = llvm::find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
return Other.RegUnit == RegUnit;
});
if (I == RegUnits.end())
return LaneBitmask::getNone();
return I->LaneMask;
}

/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
static LaneBitmask getLanesWithProperty(
const LiveIntervals &LIS, const MachineRegisterInfo &MRI,
bool TrackLaneMasks, Register RegUnit, SlotIndex Pos,
LaneBitmask SafeDefault,
function_ref<bool(const LiveRange &LR, SlotIndex Pos)> Property) {
if (RegUnit.isVirtual()) {
const LiveInterval &LI = LIS.getInterval(RegUnit);
LaneBitmask Result;
if (TrackLaneMasks && LI.hasSubRanges()) {
for (const LiveInterval::SubRange &SR : LI.subranges()) {
if (Property(SR, Pos))
Result |= SR.LaneMask;
}
} else if (Property(LI, Pos)) {
Result = TrackLaneMasks ? MRI.getMaxLaneMaskForVReg(RegUnit)
: LaneBitmask::getAll();
}

return Result;
}

const LiveRange *LR = LIS.getCachedRegUnit(RegUnit);
if (LR == nullptr)
return SafeDefault;
return Property(*LR, Pos) ? LaneBitmask::getAll() : LaneBitmask::getNone();
}

/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx).
/// The query starts with a lane bitmask which gets lanes/bits removed for every
/// use we find.
static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
SlotIndex PriorUseIdx, SlotIndex NextUseIdx,
const MachineRegisterInfo &MRI,
const SIRegisterInfo *TRI,
const LiveIntervals *LIS,
bool Upward = false) {
for (const MachineOperand &MO : MRI.use_nodbg_operands(Reg)) {
if (MO.isUndef())
continue;
Comment on lines +340 to +342
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This probably doesn't handle subregister defs that read the register correctly. I think you need to one of the other iterators, and MO.readsReg

Copy link
Contributor Author

@jrbyrnes jrbyrnes Aug 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I'm not quite sure what we're trying to accomplish here?

  // Definitions

  1.  undef %63.sub1:vreg_128 = COPY %49.sub18:areg_1024 
  2.  %49.sub1:areg_1024 = COPY %63.sub1:vreg_128
  3.  %63.sub0:vreg_128 = COPY %49.sub19:areg_1024

  // No uses of %49.sub19 or defs of %49.subx

If we were speculating the RP from schedule (1, 3, ..) we would invoke findUseBetween to find any uses of %49 between 3s original position and its candidate position. I think you are saying that we should consider the %49.sub1 def in 2. as an implicit use of %49.sub19? I know that the LIS requires subregister liveness to be modeled in this way somewhat (e.g. no multiple connected components), but from the perspective of RegisterPressure, It seems more accurate (and safe) to not model it this way -- that is, to only check use operands.

Copy link
Contributor

@arsenm arsenm Aug 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a use, and I don't see how it could possibly be safer to ignore it as such. Deviation from the liveness model requires a much stronger rationale than this. I've fixed too many bugs from missing uses on subreg defs.

All of the rest of the code is following ordinary liveness tracking

Copy link
Contributor Author

@jrbyrnes jrbyrnes Aug 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ultimately, RP should agree with RA interference. AFAICT, RA interference is determined by the subrange segments, rather than the main range with the strange liveness concerns -- I'm still analyzing.

In https://godbolt.org/z/YY4P7f9oe we have a use %49.sub0:areg_1024 occuring before a def of the same superreg %49.sub1:areg_1024. According to the subreg def is a use of other subregs, %49.sub0:areg_1024 should be live until the def %49.sub1:areg_1024. However, when assigning %2:agpr_32, RA sees there is no interference with %49.sub0:areg_1024 and assigns to the same PhysReg.

This is consistent with how the GCNTrackers currently calculate RP: we consider the use subrange and whether or not it is live at some given index

and don't check if there is some reaching subreg def.

const MachineInstr *MI = MO.getParent();
SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
bool InRange = Upward ? (InstSlot > PriorUseIdx && InstSlot <= NextUseIdx)
: (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx);
if (InRange) {
unsigned SubRegIdx = MO.getSubReg();
LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
LastUseMask &= ~UseMask;
if (LastUseMask.none())
return LaneBitmask::getNone();
}
}
return LastUseMask;
}

/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
static LaneBitmask getLiveLanesAt(const LiveIntervals &LIS,
const MachineRegisterInfo &MRI,
bool TrackLaneMasks, Register RegUnit,
SlotIndex Pos) {
return getLanesWithProperty(
LIS, MRI, TrackLaneMasks, RegUnit, Pos, LaneBitmask::getAll(),
[](const LiveRange &LR, SlotIndex Pos) { return LR.liveAt(Pos); });
}

// Copy/paste from RegisterPressure.cpp (RegisterOperands::adjustLaneLiveness)
static void adjustDefLaneLiveness(SmallVectorImpl<RegisterMaskPair> &Defs,
SlotIndex &Pos, const LiveIntervals &LIS,
const MachineRegisterInfo &MRI) {
for (auto *I = Defs.begin(); I != Defs.end();) {
LaneBitmask LiveAfter =
getLiveLanesAt(LIS, MRI, true, I->RegUnit, Pos.getDeadSlot());
// If the def is all that is live after the instruction, then in case
// of a subregister def we need a read-undef flag.
LaneBitmask ActualDef = I->LaneMask & LiveAfter;
if (ActualDef.none()) {
I = Defs.erase(I);
} else {
I->LaneMask = ActualDef;
++I;
}
}
}

///////////////////////////////////////////////////////////////////////////////
// GCNRPTracker

Expand Down Expand Up @@ -343,17 +439,41 @@ void GCNRPTracker::reset(const MachineInstr &MI,
MaxPressure = CurPressure = getRegPressure(*MRI, LiveRegs);
}

////////////////////////////////////////////////////////////////////////////////
// GCNUpwardRPTracker

void GCNUpwardRPTracker::reset(const MachineRegisterInfo &MRI_,
const LiveRegSet &LiveRegs_) {
void GCNRPTracker::reset(const MachineRegisterInfo &MRI_,
const LiveRegSet &LiveRegs_) {
MRI = &MRI_;
LiveRegs = LiveRegs_;
LastTrackedMI = nullptr;
MaxPressure = CurPressure = getRegPressure(MRI_, LiveRegs_);
}

void GCNRPTracker::bumpDeadDefs(ArrayRef<RegisterMaskPair> DeadDefs) {
GCNRegPressure TempPressure = CurPressure;
for (const RegisterMaskPair &P : DeadDefs) {
Register Reg = P.RegUnit;
if (!Reg.isVirtual())
continue;
LaneBitmask LiveMask = LiveRegs[Reg];
LaneBitmask BumpedMask = LiveMask | P.LaneMask;
CurPressure.inc(Reg, LiveMask, BumpedMask, *MRI);
}
MaxPressure = max(MaxPressure, CurPressure);
CurPressure = TempPressure;
}
/// Mostly copy/paste from CodeGen/RegisterPressure.cpp
LaneBitmask GCNRPTracker::getLastUsedLanes(Register RegUnit,
SlotIndex Pos) const {
return getLanesWithProperty(
LIS, *MRI, true, RegUnit, Pos.getBaseIndex(), LaneBitmask::getNone(),
[](const LiveRange &LR, SlotIndex Pos) {
const LiveRange::Segment *S = LR.getSegmentContaining(Pos);
return S != nullptr && S->end == Pos.getRegSlot();
});
}

////////////////////////////////////////////////////////////////////////////////
// GCNUpwardRPTracker

void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
assert(MRI && "call reset first");

Expand Down Expand Up @@ -414,6 +534,49 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
assert(CurPressure == getRegPressure(*MRI, LiveRegs));
}

void GCNUpwardRPTracker::bumpUpwardPressure(const MachineInstr *MI,
const SIRegisterInfo *TRI) {
assert(!MI->isDebugOrPseudoInstr() && "Expect a nondebug instruction.");

SlotIndex SlotIdx = LIS.getInstructionIndex(*MI).getRegSlot();

// Account for register pressure similar to RegPressureTracker::recede().
RegisterOperands RegOpers;

RegOpers.collect(*MI, *TRI, *MRI, true, /*IgnoreDead=*/true);
assert(RegOpers.DeadDefs.empty());
adjustDefLaneLiveness(RegOpers.Defs, SlotIdx, LIS, *MRI);
RegOpers.detectDeadDefs(*MI, LIS);

// Boost max pressure for all dead defs together.
// Since CurrSetPressure and MaxSetPressure
bumpDeadDefs(RegOpers.DeadDefs);

// Kill liveness at live defs.
for (const RegisterMaskPair &P : RegOpers.Defs) {
Register Reg = P.RegUnit;
if (!Reg.isVirtual())
continue;
LaneBitmask LiveAfter = LiveRegs[Reg];
LaneBitmask UseLanes = getRegLanes(RegOpers.Uses, Reg);
LaneBitmask DefLanes = P.LaneMask;
LaneBitmask LiveBefore = (LiveAfter & ~DefLanes) | UseLanes;

CurPressure.inc(Reg, LiveAfter, LiveAfter & LiveBefore, *MRI);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No decrease of pressure after?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MaxPressure = max(MaxPressure, CurPressure);
}
// Generate liveness for uses.
for (const RegisterMaskPair &P : RegOpers.Uses) {
Register Reg = P.RegUnit;
if (!Reg.isVirtual())
continue;
LaneBitmask LiveAfter = LiveRegs[Reg];
LaneBitmask LiveBefore = LiveAfter | P.LaneMask;
CurPressure.inc(Reg, LiveAfter, LiveBefore, *MRI);
}
MaxPressure = max(MaxPressure, CurPressure);
}

////////////////////////////////////////////////////////////////////////////////
// GCNDownwardRPTracker

Expand All @@ -430,28 +593,44 @@ bool GCNDownwardRPTracker::reset(const MachineInstr &MI,
return true;
}

bool GCNDownwardRPTracker::advanceBeforeNext() {
bool GCNDownwardRPTracker::advanceBeforeNext(MachineInstr *MI,
bool UseInternalIterator,
LiveIntervals *TheLIS) {
assert(MRI && "call reset first");
if (!LastTrackedMI)
return NextMI == MBBEnd;

assert(NextMI == MBBEnd || !NextMI->isDebugInstr());
SlotIndex SI;
const LiveIntervals *CurrLIS;
const MachineInstr *CurrMI;
if (UseInternalIterator) {
if (!LastTrackedMI)
return NextMI == MBBEnd;

assert(NextMI == MBBEnd || !NextMI->isDebugInstr());
CurrLIS = &LIS;
CurrMI = LastTrackedMI;

SI = NextMI == MBBEnd
? CurrLIS->getInstructionIndex(*LastTrackedMI).getDeadSlot()
: CurrLIS->getInstructionIndex(*NextMI).getBaseIndex();
} else { //! UseInternalIterator
CurrLIS = TheLIS;
SI = CurrLIS->getInstructionIndex(*MI).getBaseIndex();
CurrMI = MI;
}

SlotIndex SI = NextMI == MBBEnd
? LIS.getInstructionIndex(*LastTrackedMI).getDeadSlot()
: LIS.getInstructionIndex(*NextMI).getBaseIndex();
assert(SI.isValid());

// Remove dead registers or mask bits.
SmallSet<Register, 8> SeenRegs;
for (auto &MO : LastTrackedMI->operands()) {
for (auto &MO : CurrMI->operands()) {
if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
if (MO.isUse() && !MO.readsReg())
continue;
if (!UseInternalIterator && MO.isDef())
continue;
if (!SeenRegs.insert(MO.getReg()).second)
continue;
const LiveInterval &LI = LIS.getInterval(MO.getReg());
const LiveInterval &LI = CurrLIS->getInterval(MO.getReg());
if (LI.hasSubRanges()) {
auto It = LiveRegs.end();
for (const auto &S : LI.subranges()) {
Expand Down Expand Up @@ -481,15 +660,22 @@ bool GCNDownwardRPTracker::advanceBeforeNext() {

LastTrackedMI = nullptr;

return NextMI == MBBEnd;
return UseInternalIterator && (NextMI == MBBEnd);
}

void GCNDownwardRPTracker::advanceToNext() {
LastTrackedMI = &*NextMI++;
NextMI = skipDebugInstructionsForward(NextMI, MBBEnd);
void GCNDownwardRPTracker::advanceToNext(MachineInstr *MI,
bool UseInternalIterator) {
if (UseInternalIterator) {
LastTrackedMI = &*NextMI++;
NextMI = skipDebugInstructionsForward(NextMI, MBBEnd);
} else {
LastTrackedMI = MI;
}

const MachineInstr *CurrMI = LastTrackedMI;

// Add new registers or mask bits.
for (const auto &MO : LastTrackedMI->all_defs()) {
for (const auto &MO : CurrMI->all_defs()) {
Register Reg = MO.getReg();
if (!Reg.isVirtual())
continue;
Expand All @@ -502,11 +688,17 @@ void GCNDownwardRPTracker::advanceToNext() {
MaxPressure = max(MaxPressure, CurPressure);
}

bool GCNDownwardRPTracker::advance() {
if (NextMI == MBBEnd)
bool GCNDownwardRPTracker::advance(MachineInstr *MI, bool UseInternalIterator,
LiveIntervals *TheLIS) {
if (UseInternalIterator && NextMI == MBBEnd)
return false;
advanceBeforeNext();
advanceToNext();

advanceBeforeNext(MI, UseInternalIterator, TheLIS);
advanceToNext(MI, UseInternalIterator);
if (!UseInternalIterator) {
// We must remove any dead def lanes from the current RP
advanceBeforeNext(MI, true, TheLIS);
}
return true;
}

Expand Down Expand Up @@ -548,6 +740,65 @@ Printable llvm::reportMismatch(const GCNRPTracker::LiveRegSet &LISLR,
});
}

void GCNDownwardRPTracker::bumpDownwardPressure(const MachineInstr *MI,
const SIRegisterInfo *TRI) {
assert(!MI->isDebugOrPseudoInstr() && "Expect a nondebug instruction.");

SlotIndex SlotIdx;
SlotIdx = LIS.getInstructionIndex(*MI).getRegSlot();

// Account for register pressure similar to RegPressureTracker::recede().
RegisterOperands RegOpers;
RegOpers.collect(*MI, *TRI, *MRI, true, /*IgnoreDead=*/false);
RegOpers.adjustLaneLiveness(LIS, *MRI, SlotIdx);

for (const RegisterMaskPair &Use : RegOpers.Uses) {
Register Reg = Use.RegUnit;
if (!Reg.isVirtual())
continue;
LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
if (LastUseMask.none())
continue;
// The LastUseMask is queried from the liveness information of instruction
// which may be further down the schedule. Some lanes may actually not be
// last uses for the current position.
// FIXME: allow the caller to pass in the list of vreg uses that remain
// to be bottom-scheduled to avoid searching uses at each query.
SlotIndex CurrIdx;
const MachineBasicBlock *MBB = MI->getParent();
MachineBasicBlock::const_iterator IdxPos = skipDebugInstructionsForward(
LastTrackedMI ? LastTrackedMI : MBB->begin(), MBB->end());
if (IdxPos == MBB->end()) {
CurrIdx = LIS.getMBBEndIdx(MBB);
} else {
CurrIdx = LIS.getInstructionIndex(*IdxPos).getRegSlot();
}

LastUseMask =
findUseBetween(Reg, LastUseMask, CurrIdx, SlotIdx, *MRI, TRI, &LIS);
if (LastUseMask.none())
continue;

LaneBitmask LiveMask = LiveRegs[Reg];
LaneBitmask NewMask = LiveMask & ~LastUseMask;
CurPressure.inc(Reg, LiveMask, NewMask, *MRI);
}

// Generate liveness for defs.
for (const RegisterMaskPair &Def : RegOpers.Defs) {
Register Reg = Def.RegUnit;
if (!Reg.isVirtual())
continue;
LaneBitmask LiveMask = LiveRegs[Reg];
LaneBitmask NewMask = LiveMask | Def.LaneMask;
CurPressure.inc(Reg, LiveMask, NewMask, *MRI);
}
MaxPressure = max(MaxPressure, CurPressure);

// Boost pressure for all dead defs together.
bumpDeadDefs(RegOpers.DeadDefs);
}

bool GCNUpwardRPTracker::isValid() const {
const auto &SI = LIS.getInstructionIndex(*LastTrackedMI).getBaseIndex();
const auto LISLR = llvm::getLiveRegs(SI, LIS, *MRI);
Expand Down
Loading