Skip to content

Commit 34a0b71

Browse files
committed
[RISCV] Improve stack clash probe loop
Limit the unrolled probe loop and emit a variable length probe loop for bigger allocations. We add a new pseudo instruction RISCV::PROBED_STACKALLOC that will later be synthesized in a loop by `inlineStackProbe`.
1 parent 6a09d85 commit 34a0b71

File tree

5 files changed

+906
-30
lines changed

5 files changed

+906
-30
lines changed

llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

Lines changed: 160 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -638,46 +638,107 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
638638
return;
639639
}
640640

641-
// Do an unrolled probe loop.
642-
uint64_t CurrentOffset = 0;
643-
bool IsRV64 = STI.is64Bit();
644-
while (CurrentOffset + ProbeSize <= Offset) {
645-
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
646-
StackOffset::getFixed(-ProbeSize), MachineInstr::FrameSetup,
647-
getStackAlign());
648-
// s[d|w] zero, 0(sp)
649-
BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
650-
.addReg(RISCV::X0)
651-
.addReg(SPReg)
652-
.addImm(0)
653-
.setMIFlags(MachineInstr::FrameSetup);
641+
// Unroll the probe loop depending on the number of iterations.
642+
if (Offset < ProbeSize * 5) {
643+
uint64_t CurrentOffset = 0;
644+
bool IsRV64 = STI.is64Bit();
645+
while (CurrentOffset + ProbeSize <= Offset) {
646+
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
647+
StackOffset::getFixed(-ProbeSize), MachineInstr::FrameSetup,
648+
getStackAlign());
649+
// s[d|w] zero, 0(sp)
650+
BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
651+
.addReg(RISCV::X0)
652+
.addReg(SPReg)
653+
.addImm(0)
654+
.setMIFlags(MachineInstr::FrameSetup);
655+
656+
CurrentOffset += ProbeSize;
657+
if (EmitCFI) {
658+
// Emit ".cfi_def_cfa_offset CurrentOffset"
659+
unsigned CFIIndex = MF.addFrameInst(
660+
MCCFIInstruction::cfiDefCfaOffset(nullptr, CurrentOffset));
661+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
662+
.addCFIIndex(CFIIndex)
663+
.setMIFlag(MachineInstr::FrameSetup);
664+
}
665+
}
654666

655-
CurrentOffset += ProbeSize;
656-
if (EmitCFI) {
657-
// Emit ".cfi_def_cfa_offset CurrentOffset"
658-
unsigned CFIIndex = MF.addFrameInst(
659-
MCCFIInstruction::cfiDefCfaOffset(nullptr, CurrentOffset));
660-
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
661-
.addCFIIndex(CFIIndex)
662-
.setMIFlag(MachineInstr::FrameSetup);
667+
uint64_t Residual = Offset - CurrentOffset;
668+
if (Residual) {
669+
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
670+
StackOffset::getFixed(-Residual), MachineInstr::FrameSetup,
671+
getStackAlign());
672+
if (EmitCFI) {
673+
// Emit ".cfi_def_cfa_offset Offset"
674+
unsigned CFIIndex =
675+
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
676+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
677+
.addCFIIndex(CFIIndex)
678+
.setMIFlag(MachineInstr::FrameSetup);
679+
}
663680
}
681+
682+
return;
683+
}
684+
685+
// Emit a variable-length allocation probing loop.
686+
uint64_t RoundedSize = (Offset / ProbeSize) * ProbeSize;
687+
uint64_t Residual = Offset - RoundedSize;
688+
689+
Register TargetReg = RISCV::X6;
690+
// SUB TargetReg, SP, RoundedSize
691+
RI->adjustReg(MBB, MBBI, DL, TargetReg, SPReg,
692+
StackOffset::getFixed(-RoundedSize), MachineInstr::FrameSetup,
693+
getStackAlign());
694+
695+
if (EmitCFI) {
696+
// Set the CFA register to TargetReg.
697+
unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(TargetReg, true);
698+
unsigned CFIIndex =
699+
MF.addFrameInst(MCCFIInstruction::cfiDefCfa(nullptr, Reg, RoundedSize));
700+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
701+
.addCFIIndex(CFIIndex)
702+
.setMIFlags(MachineInstr::FrameSetup);
703+
}
704+
705+
// It will be expanded to a probe loop in `inlineStackProbe`.
706+
BuildMI(MBB, MBBI, DL, TII->get(RISCV::PROBED_STACKALLOC))
707+
.addReg(SPReg)
708+
.addReg(TargetReg);
709+
710+
if (EmitCFI) {
711+
// Set the CFA register back to SP.
712+
unsigned Reg = STI.getRegisterInfo()->getDwarfRegNum(SPReg, true);
713+
unsigned CFIIndex =
714+
MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
715+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
716+
.addCFIIndex(CFIIndex)
717+
.setMIFlags(MachineInstr::FrameSetup);
664718
}
665719

666-
uint64_t Residual = Offset - CurrentOffset;
667720
if (Residual) {
668721
RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-Residual),
669722
MachineInstr::FrameSetup, getStackAlign());
670-
if (EmitCFI) {
671-
// Emit ".cfi_def_cfa_offset Offset"
672-
unsigned CFIIndex =
673-
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
674-
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
675-
.addCFIIndex(CFIIndex)
676-
.setMIFlag(MachineInstr::FrameSetup);
723+
if (Residual > ProbeSize) {
724+
// s[d|w] zero, 0(sp)
725+
bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit);
726+
BuildMI(MBB, MBBI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
727+
.addReg(RISCV::X0)
728+
.addReg(SPReg)
729+
.addImm(0)
730+
.setMIFlags(MachineInstr::FrameSetup);
677731
}
678732
}
679733

680-
return;
734+
if (EmitCFI) {
735+
// Emit ".cfi_def_cfa_offset Offset"
736+
unsigned CFIIndex =
737+
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Offset));
738+
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
739+
.addCFIIndex(CFIIndex)
740+
.setMIFlags(MachineInstr::FrameSetup);
741+
}
681742
}
682743

683744
void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
@@ -1988,3 +2049,72 @@ bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
19882049
TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors() const {
19892050
return TargetStackID::ScalableVector;
19902051
}
2052+
2053+
// Synthesize the probe loop.
2054+
static void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
2055+
MachineBasicBlock::iterator MBBI,
2056+
DebugLoc DL) {
2057+
2058+
auto &Subtarget = MF.getSubtarget<RISCVSubtarget>();
2059+
const RISCVInstrInfo *TII = Subtarget.getInstrInfo();
2060+
bool IsRV64 = Subtarget.is64Bit();
2061+
Align StackAlign = Subtarget.getFrameLowering()->getStackAlign();
2062+
const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
2063+
uint64_t ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
2064+
2065+
MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
2066+
MachineBasicBlock *LoopTestMBB =
2067+
MF.CreateMachineBasicBlock(MBB.getBasicBlock());
2068+
MF.insert(MBBInsertPoint, LoopTestMBB);
2069+
MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
2070+
MF.insert(MBBInsertPoint, ExitMBB);
2071+
MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
2072+
Register TargetReg = RISCV::X6;
2073+
Register ScratchReg = RISCV::X7;
2074+
2075+
// ScratchReg = ProbeSize
2076+
TII->movImm(MBB, MBBI, DL, ScratchReg, ProbeSize, Flags);
2077+
2078+
// LoopTest:
2079+
// SUB SP, SP, ProbeSize
2080+
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::SUB), SPReg)
2081+
.addReg(SPReg)
2082+
.addReg(ScratchReg)
2083+
.setMIFlags(Flags);
2084+
2085+
// s[d|w] zero, 0(sp)
2086+
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL,
2087+
TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
2088+
.addReg(RISCV::X0)
2089+
.addReg(SPReg)
2090+
.addImm(0)
2091+
.setMIFlags(Flags);
2092+
2093+
// BNE SP, TargetReg, LoopTest
2094+
BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(RISCV::BNE))
2095+
.addReg(SPReg)
2096+
.addReg(TargetReg)
2097+
.addMBB(LoopTestMBB)
2098+
.setMIFlags(Flags);
2099+
2100+
ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
2101+
2102+
LoopTestMBB->addSuccessor(ExitMBB);
2103+
LoopTestMBB->addSuccessor(LoopTestMBB);
2104+
MBB.addSuccessor(LoopTestMBB);
2105+
}
2106+
2107+
void RISCVFrameLowering::inlineStackProbe(MachineFunction &MF,
2108+
MachineBasicBlock &MBB) const {
2109+
// Get the instructions that need to be replaced. We emit at most two of
2110+
// these. Remember them in order to avoid complications coming from the need
2111+
// to traverse the block while potentially creating more blocks.
2112+
auto Where = llvm::find_if(MBB, [](MachineInstr &MI) {
2113+
return MI.getOpcode() == RISCV::PROBED_STACKALLOC;
2114+
});
2115+
if (Where != MBB.end()) {
2116+
DebugLoc DL = MBB.findDebugLoc(Where);
2117+
emitStackProbeInline(MF, MBB, Where, DL);
2118+
Where->eraseFromParent();
2119+
}
2120+
}

llvm/lib/Target/RISCV/RISCVFrameLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ class RISCVFrameLowering : public TargetFrameLowering {
107107

108108
std::pair<int64_t, Align>
109109
assignRVVStackObjectOffsets(MachineFunction &MF) const;
110+
// Replace a StackProbe stub (if any) with the actual probe code inline
111+
void inlineStackProbe(MachineFunction &MF,
112+
MachineBasicBlock &PrologueMBB) const override;
110113
};
111114
} // namespace llvm
112115
#endif

llvm/lib/Target/RISCV/RISCVInstrInfo.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1373,6 +1373,17 @@ def PseudoAddTPRel : Pseudo<(outs GPR:$rd),
13731373
def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12),
13741374
(ADDI GPR:$rs1, simm12:$imm12)>;
13751375

1376+
/// Stack probing
1377+
1378+
let hasSideEffects = 1, mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
1379+
// Probed stack allocation of a constant size, used in function prologues when
1380+
// stack-clash protection is enabled.
1381+
def PROBED_STACKALLOC : Pseudo<(outs GPR:$sp),
1382+
(ins GPR:$scratch),
1383+
[]>,
1384+
Sched<[]>;
1385+
}
1386+
13761387
/// HI and ADD_LO address nodes.
13771388

13781389
// Pseudo for a rematerializable LUI+ADDI sequence for loading an address.

0 commit comments

Comments
 (0)