@@ -638,46 +638,107 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
638
638
return ;
639
639
}
640
640
641
- // Do an unrolled probe loop.
642
- uint64_t CurrentOffset = 0 ;
643
- bool IsRV64 = STI.is64Bit ();
644
- while (CurrentOffset + ProbeSize <= Offset) {
645
- RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg,
646
- StackOffset::getFixed (-ProbeSize), MachineInstr::FrameSetup,
647
- getStackAlign ());
648
- // s[d|w] zero, 0(sp)
649
- BuildMI (MBB, MBBI, DL, TII->get (IsRV64 ? RISCV::SD : RISCV::SW))
650
- .addReg (RISCV::X0)
651
- .addReg (SPReg)
652
- .addImm (0 )
653
- .setMIFlags (MachineInstr::FrameSetup);
641
+ // Unroll the probe loop depending on the number of iterations.
642
+ if (Offset < ProbeSize * 5 ) {
643
+ uint64_t CurrentOffset = 0 ;
644
+ bool IsRV64 = STI.is64Bit ();
645
+ while (CurrentOffset + ProbeSize <= Offset) {
646
+ RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg,
647
+ StackOffset::getFixed (-ProbeSize), MachineInstr::FrameSetup,
648
+ getStackAlign ());
649
+ // s[d|w] zero, 0(sp)
650
+ BuildMI (MBB, MBBI, DL, TII->get (IsRV64 ? RISCV::SD : RISCV::SW))
651
+ .addReg (RISCV::X0)
652
+ .addReg (SPReg)
653
+ .addImm (0 )
654
+ .setMIFlags (MachineInstr::FrameSetup);
655
+
656
+ CurrentOffset += ProbeSize;
657
+ if (EmitCFI) {
658
+ // Emit ".cfi_def_cfa_offset CurrentOffset"
659
+ unsigned CFIIndex = MF.addFrameInst (
660
+ MCCFIInstruction::cfiDefCfaOffset (nullptr , CurrentOffset));
661
+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
662
+ .addCFIIndex (CFIIndex)
663
+ .setMIFlag (MachineInstr::FrameSetup);
664
+ }
665
+ }
654
666
655
- CurrentOffset += ProbeSize;
656
- if (EmitCFI) {
657
- // Emit ".cfi_def_cfa_offset CurrentOffset"
658
- unsigned CFIIndex = MF.addFrameInst (
659
- MCCFIInstruction::cfiDefCfaOffset (nullptr , CurrentOffset));
660
- BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
661
- .addCFIIndex (CFIIndex)
662
- .setMIFlag (MachineInstr::FrameSetup);
667
+ uint64_t Residual = Offset - CurrentOffset;
668
+ if (Residual) {
669
+ RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg,
670
+ StackOffset::getFixed (-Residual), MachineInstr::FrameSetup,
671
+ getStackAlign ());
672
+ if (EmitCFI) {
673
+ // Emit ".cfi_def_cfa_offset Offset"
674
+ unsigned CFIIndex =
675
+ MF.addFrameInst (MCCFIInstruction::cfiDefCfaOffset (nullptr , Offset));
676
+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
677
+ .addCFIIndex (CFIIndex)
678
+ .setMIFlag (MachineInstr::FrameSetup);
679
+ }
663
680
}
681
+
682
+ return ;
683
+ }
684
+
685
+ // Emit a variable-length allocation probing loop.
686
+ uint64_t RoundedSize = (Offset / ProbeSize) * ProbeSize;
687
+ uint64_t Residual = Offset - RoundedSize;
688
+
689
+ Register TargetReg = RISCV::X6;
690
+ // SUB TargetReg, SP, RoundedSize
691
+ RI->adjustReg (MBB, MBBI, DL, TargetReg, SPReg,
692
+ StackOffset::getFixed (-RoundedSize), MachineInstr::FrameSetup,
693
+ getStackAlign ());
694
+
695
+ if (EmitCFI) {
696
+ // Set the CFA register to TargetReg.
697
+ unsigned Reg = STI.getRegisterInfo ()->getDwarfRegNum (TargetReg, true );
698
+ unsigned CFIIndex =
699
+ MF.addFrameInst (MCCFIInstruction::cfiDefCfa (nullptr , Reg, RoundedSize));
700
+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
701
+ .addCFIIndex (CFIIndex)
702
+ .setMIFlags (MachineInstr::FrameSetup);
703
+ }
704
+
705
+ // It will be expanded to a probe loop in `inlineStackProbe`.
706
+ BuildMI (MBB, MBBI, DL, TII->get (RISCV::PROBED_STACKALLOC))
707
+ .addReg (SPReg)
708
+ .addReg (TargetReg);
709
+
710
+ if (EmitCFI) {
711
+ // Set the CFA register back to SP.
712
+ unsigned Reg = STI.getRegisterInfo ()->getDwarfRegNum (SPReg, true );
713
+ unsigned CFIIndex =
714
+ MF.addFrameInst (MCCFIInstruction::createDefCfaRegister (nullptr , Reg));
715
+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
716
+ .addCFIIndex (CFIIndex)
717
+ .setMIFlags (MachineInstr::FrameSetup);
664
718
}
665
719
666
- uint64_t Residual = Offset - CurrentOffset;
667
720
if (Residual) {
668
721
RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed (-Residual),
669
722
MachineInstr::FrameSetup, getStackAlign ());
670
- if (EmitCFI) {
671
- // Emit ".cfi_def_cfa_offset Offset"
672
- unsigned CFIIndex =
673
- MF.addFrameInst (MCCFIInstruction::cfiDefCfaOffset (nullptr , Offset));
674
- BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
675
- .addCFIIndex (CFIIndex)
676
- .setMIFlag (MachineInstr::FrameSetup);
723
+ if (Residual > ProbeSize) {
724
+ // s[d|w] zero, 0(sp)
725
+ bool IsRV64 = STI.hasFeature (RISCV::Feature64Bit);
726
+ BuildMI (MBB, MBBI, DL, TII->get (IsRV64 ? RISCV::SD : RISCV::SW))
727
+ .addReg (RISCV::X0)
728
+ .addReg (SPReg)
729
+ .addImm (0 )
730
+ .setMIFlags (MachineInstr::FrameSetup);
677
731
}
678
732
}
679
733
680
- return ;
734
+ if (EmitCFI) {
735
+ // Emit ".cfi_def_cfa_offset Offset"
736
+ unsigned CFIIndex =
737
+ MF.addFrameInst (MCCFIInstruction::cfiDefCfaOffset (nullptr , Offset));
738
+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
739
+ .addCFIIndex (CFIIndex)
740
+ .setMIFlags (MachineInstr::FrameSetup);
741
+ }
681
742
}
682
743
683
744
void RISCVFrameLowering::emitPrologue (MachineFunction &MF,
@@ -1988,3 +2049,72 @@ bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
1988
2049
TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors () const {
1989
2050
return TargetStackID::ScalableVector;
1990
2051
}
2052
+
2053
+ // Synthesize the probe loop.
2054
+ static void emitStackProbeInline (MachineFunction &MF, MachineBasicBlock &MBB,
2055
+ MachineBasicBlock::iterator MBBI,
2056
+ DebugLoc DL) {
2057
+
2058
+ auto &Subtarget = MF.getSubtarget <RISCVSubtarget>();
2059
+ const RISCVInstrInfo *TII = Subtarget.getInstrInfo ();
2060
+ bool IsRV64 = Subtarget.is64Bit ();
2061
+ Align StackAlign = Subtarget.getFrameLowering ()->getStackAlign ();
2062
+ const RISCVTargetLowering *TLI = Subtarget.getTargetLowering ();
2063
+ uint64_t ProbeSize = TLI->getStackProbeSize (MF, StackAlign);
2064
+
2065
+ MachineFunction::iterator MBBInsertPoint = std::next (MBB.getIterator ());
2066
+ MachineBasicBlock *LoopTestMBB =
2067
+ MF.CreateMachineBasicBlock (MBB.getBasicBlock ());
2068
+ MF.insert (MBBInsertPoint, LoopTestMBB);
2069
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock (MBB.getBasicBlock ());
2070
+ MF.insert (MBBInsertPoint, ExitMBB);
2071
+ MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
2072
+ Register TargetReg = RISCV::X6;
2073
+ Register ScratchReg = RISCV::X7;
2074
+
2075
+ // ScratchReg = ProbeSize
2076
+ TII->movImm (MBB, MBBI, DL, ScratchReg, ProbeSize, Flags);
2077
+
2078
+ // LoopTest:
2079
+ // SUB SP, SP, ProbeSize
2080
+ BuildMI (*LoopTestMBB, LoopTestMBB->end (), DL, TII->get (RISCV::SUB), SPReg)
2081
+ .addReg (SPReg)
2082
+ .addReg (ScratchReg)
2083
+ .setMIFlags (Flags);
2084
+
2085
+ // s[d|w] zero, 0(sp)
2086
+ BuildMI (*LoopTestMBB, LoopTestMBB->end (), DL,
2087
+ TII->get (IsRV64 ? RISCV::SD : RISCV::SW))
2088
+ .addReg (RISCV::X0)
2089
+ .addReg (SPReg)
2090
+ .addImm (0 )
2091
+ .setMIFlags (Flags);
2092
+
2093
+ // BNE SP, TargetReg, LoopTest
2094
+ BuildMI (*LoopTestMBB, LoopTestMBB->end (), DL, TII->get (RISCV::BNE))
2095
+ .addReg (SPReg)
2096
+ .addReg (TargetReg)
2097
+ .addMBB (LoopTestMBB)
2098
+ .setMIFlags (Flags);
2099
+
2100
+ ExitMBB->splice (ExitMBB->end (), &MBB, std::next (MBBI), MBB.end ());
2101
+
2102
+ LoopTestMBB->addSuccessor (ExitMBB);
2103
+ LoopTestMBB->addSuccessor (LoopTestMBB);
2104
+ MBB.addSuccessor (LoopTestMBB);
2105
+ }
2106
+
2107
+ void RISCVFrameLowering::inlineStackProbe (MachineFunction &MF,
2108
+ MachineBasicBlock &MBB) const {
2109
+ // Get the instructions that need to be replaced. We emit at most two of
2110
+ // these. Remember them in order to avoid complications coming from the need
2111
+ // to traverse the block while potentially creating more blocks.
2112
+ auto Where = llvm::find_if (MBB, [](MachineInstr &MI) {
2113
+ return MI.getOpcode () == RISCV::PROBED_STACKALLOC;
2114
+ });
2115
+ if (Where != MBB.end ()) {
2116
+ DebugLoc DL = MBB.findDebugLoc (Where);
2117
+ emitStackProbeInline (MF, MBB, Where, DL);
2118
+ Where->eraseFromParent ();
2119
+ }
2120
+ }
0 commit comments