Skip to content

Commit a1b32f8

Browse files
committed
[RISCV] Improve stack clash probe loop
Limit the unrolled probe loop and emit a variable length probe loop for bigger allocations. We add a new pseudo instruction RISCV::PROBED_STACKALLOC that will later be synthesized in a loop by `inlineStackProbe`.
1 parent f9c0f9c commit a1b32f8

File tree

5 files changed

+896
-33
lines changed

5 files changed

+896
-33
lines changed

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

Lines changed: 150 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -608,46 +608,97 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
608608
return;
609609
}
610610

611-
// Do an unrolled probe loop.
612-
uint64_t CurrentOffset = 0;
613-
bool IsRV64 = STI.is64Bit();
614-
while (CurrentOffset + ProbeSize <= Offset) {
615-
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
616-
StackOffset::getFixed(-ProbeSize), MachineInstr::FrameSetup,
617-
getStackAlign());
618-
// s[d|w] zero, 0(sp)
619-
BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
620-
.addReg(RISCV::X0)
621-
.addReg(SPReg)
622-
.addImm(0)
623-
.setMIFlags(MachineInstr::FrameSetup);
611+
// Unroll the probe loop depending on the number of iterations.
612+
if (Offset < ProbeSize * 5) {
613+
uint64_t CurrentOffset = 0;
614+
bool IsRV64 = STI.is64Bit();
615+
while (CurrentOffset + ProbeSize <= Offset) {
616+
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
617+
StackOffset::getFixed(-ProbeSize), MachineInstr::FrameSetup,
618+
getStackAlign());
619+
// s[d|w] zero, 0(sp)
620+
BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
621+
.addReg(RISCV::X0)
622+
.addReg(SPReg)
623+
.addImm(0)
624+
.setMIFlags(MachineInstr::FrameSetup);
625+
626+
CurrentOffset += ProbeSize;
627+
if (EmitCFI) {
628+
// Emit ".cfi_def_cfa_offset CurrentOffset"
629+
unsigned CFIIndex = MF.addFrameInst(
630+
MCCFIInstruction::cfiDefCfaOffset(nullptr, CurrentOffset));
631+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
632+
.addCFIIndex(CFIIndex)
633+
.setMIFlag(MachineInstr::FrameSetup);
634+
}
635+
}
624636

625-
CurrentOffset += ProbeSize;
626-
if (EmitCFI) {
627-
// Emit ".cfi_def_cfa_offset CurrentOffset"
628-
unsigned CFIIndex = MF.addFrameInst(
629-
MCCFIInstruction::cfiDefCfaOffset(nullptr, CurrentOffset));
630-
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
631-
.addCFIIndex(CFIIndex)
632-
.setMIFlag(MachineInstr::FrameSetup);
637+
uint64_t Residual = Offset - CurrentOffset;
638+
if (Residual) {
639+
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
640+
StackOffset::getFixed(-Residual), MachineInstr::FrameSetup,
641+
getStackAlign());
642+
if (EmitCFI) {
643+
// Emit ".cfi_def_cfa_offset Offset"
644+
unsigned CFIIndex =
645+
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
646+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
647+
.addCFIIndex(CFIIndex)
648+
.setMIFlag(MachineInstr::FrameSetup);
649+
}
633650
}
651+
652+
return;
653+
}
654+
655+
// Emit a variable-length allocation probing loop.
656+
uint64_t RoundedSize = alignDown(Offset, ProbeSize);
657+
uint64_t Residual = Offset - RoundedSize;
658+
659+
Register TargetReg = RISCV::X6;
660+
// SUB TargetReg, SP, RoundedSize
661+
RI->adjustReg(MBB, MBBI, DL, TargetReg, SPReg,
662+
StackOffset::getFixed(-RoundedSize), MachineInstr::FrameSetup,
663+
getStackAlign());
664+
665+
if (EmitCFI) {
666+
// Set the CFA register to TargetReg.
667+
unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(TargetReg, true);
668+
unsigned CFIIndex =
669+
MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, RoundedSize));
670+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
671+
.addCFIIndex(CFIIndex)
672+
.setMIFlags(MachineInstr::FrameSetup);
673+
}
674+
675+
// It will be expanded to a probe loop in `inlineStackProbe`.
676+
BuildMI(MBB, MBBI, DL, TII->get(RISCV::PROBED_STACKALLOC))
677+
.addReg(SPReg)
678+
.addReg(TargetReg);
679+
680+
if (EmitCFI) {
681+
// Set the CFA register back to SP.
682+
unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(SPReg, true);
683+
unsigned CFIIndex =
684+
MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
685+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
686+
.addCFIIndex(CFIIndex)
687+
.setMIFlags(MachineInstr::FrameSetup);
634688
}
635689

636-
uint64_t Residual = Offset - CurrentOffset;
637-
if (Residual) {
690+
if (Residual)
638691
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Residual),
639692
MachineInstr::FrameSetup, getStackAlign());
640-
if (EmitCFI) {
641-
// Emit ".cfi_def_cfa_offset Offset"
642-
unsigned CFIIndex =
643-
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
644-
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
645-
.addCFIIndex(CFIIndex)
646-
.setMIFlag(MachineInstr::FrameSetup);
647-
}
648-
}
649693

650-
return;
694+
if (EmitCFI) {
695+
// Emit ".cfi_def_cfa_offset Offset"
696+
unsigned CFIIndex =
697+
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
698+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
699+
.addCFIIndex(CFIIndex)
700+
.setMIFlags(MachineInstr::FrameSetup);
701+
}
651702
}
652703

653704
void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
@@ -1962,3 +2013,69 @@ bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
19622013
TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const {
19632014
return TargetStackID::ScalableVector;
19642015
}
2016+
2017+
// Synthesize the probe loop.
2018+
static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
2019+
MachineBasicBlock::iterator MBBI,
2020+
DebugLoc DL) {
2021+
2022+
auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
2023+
const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
2024+
bool IsRV64 = Subtarget.is64Bit();
2025+
Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
2026+
const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
2027+
uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
2028+
2029+
MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
2030+
MachineBasicBlock *LoopTestMBB =
2031+
MF.CreateMachineBasicBlock(MBB.getBasicBlock());
2032+
MF.insert(MBBInsertPoint, LoopTestMBB);
2033+
MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
2034+
MF.insert(MBBInsertPoint, ExitMBB);
2035+
MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
2036+
Register TargetReg = RISCV::X6;
2037+
Register ScratchReg = RISCV::X7;
2038+
2039+
// ScratchReg = ProbeSize
2040+
TII->movImm(MBB, MBBI, DL, ScratchReg, ProbeSize, Flags);
2041+
2042+
// LoopTest:
2043+
// SUB SP, SP, ProbeSize
2044+
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
2045+
.addReg(SPReg)
2046+
.addReg(ScratchReg)
2047+
.setMIFlags(Flags);
2048+
2049+
// s[d|w] zero, 0(sp)
2050+
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
2051+
TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
2052+
.addReg(RISCV::X0)
2053+
.addReg(SPReg)
2054+
.addImm(0)
2055+
.setMIFlags(Flags);
2056+
2057+
// BNE SP, TargetReg, LoopTest
2058+
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BNE))
2059+
.addReg(SPReg)
2060+
.addReg(TargetReg)
2061+
.addMBB(LoopTestMBB)
2062+
.setMIFlags(Flags);
2063+
2064+
ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
2065+
2066+
LoopTestMBB->addSuccessor(ExitMBB);
2067+
LoopTestMBB->addSuccessor(LoopTestMBB);
2068+
MBB.addSuccessor(LoopTestMBB);
2069+
}
2070+
2071+
void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF,
2072+
MachineBasicBlock &MBB) const {
2073+
auto Where = llvm::find_if(MBB, [](MachineInstr &MI) {
2074+
return MI.getOpcode() == RISCV::PROBED_STACKALLOC;
2075+
});
2076+
if (Where != MBB.end()) {
2077+
DebugLoc DL = MBB.findDebugLoc(Where);
2078+
emitStackProbeInline(MF, MBB, Where, DL);
2079+
Where->eraseFromParent();
2080+
}
2081+
}

llvm/lib/Target/RISCV/RISCVFrameLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ class RISCVFrameLowering : public TargetFrameLowering {
104104

105105
std::pair<int64_t, Align>
106106
assignRVVStackObjectOffsets(MachineFunction &MF) const;
107+
// Replace a StackProbe stub (if any) with the actual probe code inline
108+
void inlineStackProbe(MachineFunction &MF,
109+
MachineBasicBlock &PrologueMBB) const override;
107110
};
108111
} // namespace llvm
109112
#endif

llvm/lib/Target/RISCV/RISCVInstrInfo.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1373,6 +1373,17 @@ def PseudoAddTPRel : Pseudo<(outs GPR:$rd),
13731373
def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12),
13741374
(ADDI GPR:$rs1, simm12:$imm12)>;
13751375

1376+
/// Stack probing
1377+
1378+
let hasSideEffects = 1, mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
1379+
// Probed stack allocation of a constant size, used in function prologues when
1380+
// stack-clash protection is enabled.
1381+
def PROBED_STACKALLOC : Pseudo<(outs GPR:$sp),
1382+
(ins GPR:$scratch),
1383+
[]>,
1384+
Sched<[]>;
1385+
}
1386+
13761387
/// HI and ADD_LO address nodes.
13771388

13781389
// Pseudo for a rematerializable LUI+ADDI sequence for loading an address.

0 commit comments

Comments
 (0)