Skip to content

Commit c722000

Browse files
committed
[RISCV] Improve stack clash probe loop
Limit the unrolled probe loop and emit a variable length probe loop for bigger allocations. We add a new pseudo instruction RISCV::PROBED_STACKALLOC that will later be synthesized in a loop by `inlineStackProbe`.
1 parent 708dce3 commit c722000

File tree

5 files changed

+906
-30
lines changed

5 files changed

+906
-30
lines changed

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

Lines changed: 160 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -608,46 +608,107 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
608608
return;
609609
}
610610

611-
// Do an unrolled probe loop.
612-
uint64_t CurrentOffset = 0;
613-
bool IsRV64 = STI.is64Bit();
614-
while (CurrentOffset + ProbeSize <= Offset) {
615-
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
616-
StackOffset::getFixed(-ProbeSize), MachineInstr::FrameSetup,
617-
getStackAlign());
618-
// s[d|w] zero, 0(sp)
619-
BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
620-
.addReg(RISCV::X0)
621-
.addReg(SPReg)
622-
.addImm(0)
623-
.setMIFlags(MachineInstr::FrameSetup);
611+
// Unroll the probe loop depending on the number of iterations.
612+
if (Offset < ProbeSize * 5) {
613+
uint64_t CurrentOffset = 0;
614+
bool IsRV64 = STI.is64Bit();
615+
while (CurrentOffset + ProbeSize <= Offset) {
616+
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
617+
StackOffset::getFixed(-ProbeSize), MachineInstr::FrameSetup,
618+
getStackAlign());
619+
// s[d|w] zero, 0(sp)
620+
BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
621+
.addReg(RISCV::X0)
622+
.addReg(SPReg)
623+
.addImm(0)
624+
.setMIFlags(MachineInstr::FrameSetup);
625+
626+
CurrentOffset += ProbeSize;
627+
if (EmitCFI) {
628+
// Emit ".cfi_def_cfa_offset CurrentOffset"
629+
unsigned CFIIndex = MF.addFrameInst(
630+
MCCFIInstruction::cfiDefCfaOffset(nullptr, CurrentOffset));
631+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
632+
.addCFIIndex(CFIIndex)
633+
.setMIFlag(MachineInstr::FrameSetup);
634+
}
635+
}
624636

625-
CurrentOffset += ProbeSize;
626-
if (EmitCFI) {
627-
// Emit ".cfi_def_cfa_offset CurrentOffset"
628-
unsigned CFIIndex = MF.addFrameInst(
629-
MCCFIInstruction::cfiDefCfaOffset(nullptr, CurrentOffset));
630-
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
631-
.addCFIIndex(CFIIndex)
632-
.setMIFlag(MachineInstr::FrameSetup);
637+
uint64_t Residual = Offset - CurrentOffset;
638+
if (Residual) {
639+
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
640+
StackOffset::getFixed(-Residual), MachineInstr::FrameSetup,
641+
getStackAlign());
642+
if (EmitCFI) {
643+
// Emit ".cfi_def_cfa_offset Offset"
644+
unsigned CFIIndex =
645+
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
646+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
647+
.addCFIIndex(CFIIndex)
648+
.setMIFlag(MachineInstr::FrameSetup);
649+
}
633650
}
651+
652+
return;
653+
}
654+
655+
// Emit a variable-length allocation probing loop.
656+
uint64_t RoundedSize = (Offset / ProbeSize) * ProbeSize;
657+
uint64_t Residual = Offset - RoundedSize;
658+
659+
Register TargetReg = RISCV::X6;
660+
// SUB TargetReg, SP, RoundedSize
661+
RI->adjustReg(MBB, MBBI, DL, TargetReg, SPReg,
662+
StackOffset::getFixed(-RoundedSize), MachineInstr::FrameSetup,
663+
getStackAlign());
664+
665+
if (EmitCFI) {
666+
// Set the CFA register to TargetReg.
667+
unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(TargetReg, true);
668+
unsigned CFIIndex =
669+
MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, RoundedSize));
670+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
671+
.addCFIIndex(CFIIndex)
672+
.setMIFlags(MachineInstr::FrameSetup);
673+
}
674+
675+
// It will be expanded to a probe loop in `inlineStackProbe`.
676+
BuildMI(MBB, MBBI, DL, TII->get(RISCV::PROBED_STACKALLOC))
677+
.addReg(SPReg)
678+
.addReg(TargetReg);
679+
680+
if (EmitCFI) {
681+
// Set the CFA register back to SP.
682+
unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(SPReg, true);
683+
unsigned CFIIndex =
684+
MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
685+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
686+
.addCFIIndex(CFIIndex)
687+
.setMIFlags(MachineInstr::FrameSetup);
634688
}
635689

636-
uint64_t Residual = Offset - CurrentOffset;
637690
if (Residual) {
638691
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Residual),
639692
MachineInstr::FrameSetup, getStackAlign());
640-
if (EmitCFI) {
641-
// Emit ".cfi_def_cfa_offset Offset"
642-
unsigned CFIIndex =
643-
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
644-
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
645-
.addCFIIndex(CFIIndex)
646-
.setMIFlag(MachineInstr::FrameSetup);
693+
if (Residual > ProbeSize) {
694+
// s[d|w] zero, 0(sp)
695+
bool IsRV64 = STI.is64Bit();
696+
BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
697+
.addReg(RISCV::X0)
698+
.addReg(SPReg)
699+
.addImm(0)
700+
.setMIFlags(MachineInstr::FrameSetup);
647701
}
648702
}
649703

650-
return;
704+
if (EmitCFI) {
705+
// Emit ".cfi_def_cfa_offset Offset"
706+
unsigned CFIIndex =
707+
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
708+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
709+
.addCFIIndex(CFIIndex)
710+
.setMIFlags(MachineInstr::FrameSetup);
711+
}
651712
}
652713

653714
void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
@@ -1962,3 +2023,72 @@ bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
19622023
TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const {
19632024
return TargetStackID::ScalableVector;
19642025
}
2026+
2027+
// Synthesize the probe loop.
2028+
static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
2029+
MachineBasicBlock::iterator MBBI,
2030+
DebugLoc DL) {
2031+
2032+
auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
2033+
const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
2034+
bool IsRV64 = Subtarget.is64Bit();
2035+
Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
2036+
const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
2037+
uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
2038+
2039+
MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
2040+
MachineBasicBlock *LoopTestMBB =
2041+
MF.CreateMachineBasicBlock(MBB.getBasicBlock());
2042+
MF.insert(MBBInsertPoint, LoopTestMBB);
2043+
MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
2044+
MF.insert(MBBInsertPoint, ExitMBB);
2045+
MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
2046+
Register TargetReg = RISCV::X6;
2047+
Register ScratchReg = RISCV::X7;
2048+
2049+
// ScratchReg = ProbeSize
2050+
TII->movImm(MBB, MBBI, DL, ScratchReg, ProbeSize, Flags);
2051+
2052+
// LoopTest:
2053+
// SUB SP, SP, ProbeSize
2054+
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
2055+
.addReg(SPReg)
2056+
.addReg(ScratchReg)
2057+
.setMIFlags(Flags);
2058+
2059+
// s[d|w] zero, 0(sp)
2060+
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
2061+
TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
2062+
.addReg(RISCV::X0)
2063+
.addReg(SPReg)
2064+
.addImm(0)
2065+
.setMIFlags(Flags);
2066+
2067+
// BNE SP, TargetReg, LoopTest
2068+
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BNE))
2069+
.addReg(SPReg)
2070+
.addReg(TargetReg)
2071+
.addMBB(LoopTestMBB)
2072+
.setMIFlags(Flags);
2073+
2074+
ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
2075+
2076+
LoopTestMBB->addSuccessor(ExitMBB);
2077+
LoopTestMBB->addSuccessor(LoopTestMBB);
2078+
MBB.addSuccessor(LoopTestMBB);
2079+
}
2080+
2081+
void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF,
2082+
MachineBasicBlock &MBB) const {
2083+
// Get the instructions that need to be replaced. We emit at most two of
2084+
// these. Remember them in order to avoid complications coming from the need
2085+
// to traverse the block while potentially creating more blocks.
2086+
auto Where = llvm::find_if(MBB, [](MachineInstr &MI) {
2087+
return MI.getOpcode() == RISCV::PROBED_STACKALLOC;
2088+
});
2089+
if (Where != MBB.end()) {
2090+
DebugLoc DL = MBB.findDebugLoc(Where);
2091+
emitStackProbeInline(MF, MBB, Where, DL);
2092+
Where->eraseFromParent();
2093+
}
2094+
}

llvm/lib/Target/RISCV/RISCVFrameLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ class RISCVFrameLowering : public TargetFrameLowering {
104104

105105
std::pair<int64_t, Align>
106106
assignRVVStackObjectOffsets(MachineFunction &MF) const;
107+
// Replace a StackProbe stub (if any) with the actual probe code inline
108+
void inlineStackProbe(MachineFunction &MF,
109+
MachineBasicBlock &PrologueMBB) const override;
107110
};
108111
} // namespace llvm
109112
#endif

llvm/lib/Target/RISCV/RISCVInstrInfo.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1373,6 +1373,17 @@ def PseudoAddTPRel : Pseudo<(outs GPR:$rd),
13731373
def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12),
13741374
(ADDI GPR:$rs1, simm12:$imm12)>;
13751375

1376+
/// Stack probing
1377+
1378+
let hasSideEffects = 1, mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
1379+
// Probed stack allocation of a constant size, used in function prologues when
1380+
// stack-clash protection is enabled.
1381+
def PROBED_STACKALLOC : Pseudo<(outs GPR:$sp),
1382+
(ins GPR:$scratch),
1383+
[]>,
1384+
Sched<[]>;
1385+
}
1386+
13761387
/// HI and ADD_LO address nodes.
13771388

13781389
// Pseudo for a rematerializable LUI+ADDI sequence for loading an address.

0 commit comments

Comments
 (0)