1616// ===----------------------------------------------------------------------===//
1717
1818#include " AMDGPU.h"
19+ #include " AMDGPUGlobalISelUtils.h"
20+ #include " GCNSubtarget.h"
21+ #include " llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
1922#include " llvm/CodeGen/MachineFunctionPass.h"
23+ #include " llvm/CodeGen/MachineUniformityAnalysis.h"
2024#include " llvm/InitializePasses.h"
2125
2226#define DEBUG_TYPE " amdgpu-regbankselect"
@@ -40,6 +44,7 @@ class AMDGPURegBankSelect : public MachineFunctionPass {
4044 }
4145
4246 void getAnalysisUsage (AnalysisUsage &AU) const override {
47+ AU.addRequired <MachineUniformityAnalysisPass>();
4348 MachineFunctionPass::getAnalysisUsage (AU);
4449 }
4550
@@ -55,6 +60,7 @@ class AMDGPURegBankSelect : public MachineFunctionPass {
5560
5661INITIALIZE_PASS_BEGIN (AMDGPURegBankSelect, DEBUG_TYPE,
5762 " AMDGPU Register Bank Select" , false , false )
63+ INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
5864INITIALIZE_PASS_END(AMDGPURegBankSelect, DEBUG_TYPE,
5965 " AMDGPU Register Bank Select" , false , false )
6066
@@ -66,9 +72,220 @@ FunctionPass *llvm::createAMDGPURegBankSelectPass() {
6672 return new AMDGPURegBankSelect ();
6773}
6874
75+ class RegBankSelectHelper {
76+ MachineFunction &MF;
77+ MachineIRBuilder &B;
78+ MachineRegisterInfo &MRI;
79+ AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA;
80+ const MachineUniformityInfo &MUI;
81+ const SIRegisterInfo &TRI;
82+ const RegisterBank *SgprRB;
83+ const RegisterBank *VgprRB;
84+ const RegisterBank *VccRB;
85+
86+ public:
87+ RegBankSelectHelper (MachineFunction &MF, MachineIRBuilder &B,
88+ MachineRegisterInfo &MRI,
89+ AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA,
90+ const MachineUniformityInfo &MUI,
91+ const SIRegisterInfo &TRI, const RegisterBankInfo &RBI)
92+ : MF(MF), B(B), MRI(MRI), ILMA(ILMA), MUI(MUI), TRI(TRI),
93+ SgprRB (&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
94+ VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
95+ VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
96+
97+ bool shouldRegBankSelect (MachineInstr &MI) {
98+ return MI.isPreISelOpcode () || MI.isCopy ();
99+ }
100+
101+ void setRBDef (MachineInstr &MI, MachineOperand &DefOP,
102+ const RegisterBank *RB) {
103+ Register Reg = DefOP.getReg ();
104+ // Register that already has Register class got it during pre-inst selection
105+ // of another instruction. Maybe cross bank copy was required so we insert a
106+ // copy that can be removed later. This simplifies post-rb-legalize artifact
107+ // combiner and avoids need to special case some patterns.
108+ if (MRI.getRegClassOrNull (Reg)) {
109+ LLT Ty = MRI.getType (Reg);
110+ Register NewReg = MRI.createVirtualRegister ({RB, Ty});
111+ DefOP.setReg (NewReg);
112+
113+ auto &MBB = *MI.getParent ();
114+ B.setInsertPt (MBB, MBB.SkipPHIsAndLabels (std::next (MI.getIterator ())));
115+ B.buildCopy (Reg, NewReg);
116+
117+ // The problem was discovered for uniform S1 that was used as both
118+ // lane mask(vcc) and regular sgpr S1.
119+ // - lane-mask(vcc) use was by si_if, this use is divergent and requires
120+ // non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets
121+ // sreg_64_xexec(S1) on def of uniform S1 making it lane-mask.
122+ // - the regular sgpr S1(uniform) instruction is now broken since
123+ // it uses sreg_64_xexec(S1) which is divergent.
124+
125+ // "Clear" reg classes from uses on generic instructions and put register
126+ // banks instead.
127+ for (auto &UseMI : MRI.use_instructions (Reg)) {
128+ if (shouldRegBankSelect (UseMI)) {
129+ for (MachineOperand &Op : UseMI.operands ()) {
130+ if (Op.isReg () && Op.getReg () == Reg)
131+ Op.setReg (NewReg);
132+ }
133+ }
134+ }
135+
136+ } else {
137+ MRI.setRegBank (Reg, *RB);
138+ }
139+ }
140+
141+ void constrainRBUse (MachineInstr &MI, MachineOperand &UseOP,
142+ const RegisterBank *RB) {
143+ Register Reg = UseOP.getReg ();
144+
145+ LLT Ty = MRI.getType (Reg);
146+ Register NewReg = MRI.createVirtualRegister ({RB, Ty});
147+ UseOP.setReg (NewReg);
148+
149+ if (MI.isPHI ()) {
150+ auto DefMI = MRI.getVRegDef (Reg)->getIterator ();
151+ MachineBasicBlock *DefMBB = DefMI->getParent ();
152+ B.setInsertPt (*DefMBB, DefMBB->SkipPHIsAndLabels (std::next (DefMI)));
153+ } else {
154+ B.setInstr (MI);
155+ }
156+
157+ B.buildCopy (NewReg, Reg);
158+ }
159+
160+ std::optional<Register> tryGetVReg (MachineOperand &Op) {
161+ if (!Op.isReg ())
162+ return std::nullopt ;
163+
164+ Register Reg = Op.getReg ();
165+ if (!Reg.isVirtual ())
166+ return std::nullopt ;
167+
168+ return Reg;
169+ }
170+
171+ void assignBanksOnDefs () {
172+ for (MachineBasicBlock &MBB : MF) {
173+ for (MachineInstr &MI : MBB) {
174+ if (!shouldRegBankSelect (MI))
175+ continue ;
176+
177+ for (MachineOperand &DefOP : MI.defs ()) {
178+ auto MaybeDefReg = tryGetVReg (DefOP);
179+ if (!MaybeDefReg)
180+ continue ;
181+ Register DefReg = *MaybeDefReg;
182+
183+ // Copies can have register class on def registers.
184+ if (MI.isCopy () && MRI.getRegClassOrNull (DefReg)) {
185+ continue ;
186+ }
187+
188+ if (MUI.isUniform (DefReg) || ILMA.isS32S64LaneMask (DefReg)) {
189+ setRBDef (MI, DefOP, SgprRB);
190+ } else {
191+ if (MRI.getType (DefReg) == LLT::scalar (1 ))
192+ setRBDef (MI, DefOP, VccRB);
193+ else
194+ setRBDef (MI, DefOP, VgprRB);
195+ }
196+ }
197+ }
198+ }
199+ }
200+
201+ // Temporal divergence copy: COPY to vgpr with implicit use of $exec inside of
202+ // the cycle
203+ // Note: uniformity analysis does not consider that registers with vgpr def
204+ // are divergent (you can have uniform value in vgpr).
205+ // - TODO: implicit use of $exec could be implemented as indicator that
206+ // instruction is divergent
207+ bool isTemporalDivergenceCopy (Register Reg) {
208+ MachineInstr *MI = MRI.getVRegDef (Reg);
209+ if (!MI->isCopy ())
210+ return false ;
211+
212+ for (auto Op : MI->implicit_operands ()) {
213+ if (!Op.isReg ())
214+ continue ;
215+
216+ if (Op.getReg () == TRI.getExec ()) {
217+ return true ;
218+ }
219+ }
220+
221+ return false ;
222+ }
223+
224+ void constrainBanksOnUses () {
225+ for (MachineBasicBlock &MBB : MF) {
226+ for (MachineInstr &MI : MBB) {
227+ if (!shouldRegBankSelect (MI))
228+ continue ;
229+
230+ // Copies can have register class on use registers.
231+ if (MI.isCopy ())
232+ continue ;
233+
234+ for (MachineOperand &UseOP : MI.uses ()) {
235+ auto MaybeUseReg = tryGetVReg (UseOP);
236+ if (!MaybeUseReg)
237+ continue ;
238+ Register UseReg = *MaybeUseReg;
239+
240+ // UseReg already has register bank.
241+ if (MRI.getRegBankOrNull (UseReg))
242+ continue ;
243+
244+ if (!isTemporalDivergenceCopy (UseReg) &&
245+ (MUI.isUniform (UseReg) || ILMA.isS32S64LaneMask (UseReg))) {
246+ constrainRBUse (MI, UseOP, SgprRB);
247+ } else {
248+ if (MRI.getType (UseReg) == LLT::scalar (1 ))
249+ constrainRBUse (MI, UseOP, VccRB);
250+ else
251+ constrainRBUse (MI, UseOP, VgprRB);
252+ }
253+ }
254+ }
255+ }
256+ }
257+ };
258+
69259bool AMDGPURegBankSelect::runOnMachineFunction (MachineFunction &MF) {
70260 if (MF.getProperties ().hasProperty (
71261 MachineFunctionProperties::Property::FailedISel))
72262 return false ;
263+
264+ MachineUniformityInfo &MUI =
265+ getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo ();
266+ AMDGPU::IntrinsicLaneMaskAnalyzer ILMA (MF);
267+ MachineRegisterInfo &MRI = MF.getRegInfo ();
268+ const SIRegisterInfo &TRI =
269+ *MF.getSubtarget <GCNSubtarget>().getRegisterInfo ();
270+ const RegisterBankInfo &RBI = *MF.getSubtarget ().getRegBankInfo ();
271+
272+ MachineIRBuilder B (MF);
273+ RegBankSelectHelper RBSHelper (MF, B, MRI, ILMA, MUI, TRI, RBI);
274+
275+ // Assign register banks to ALL def registers on G_ instructions.
276+ // Same for copies if they have no register bank or class on def.
277+ RBSHelper.assignBanksOnDefs ();
278+
279+ // At this point all virtual registers have register class or bank
280+ // - Defs of G_ instructions have register banks.
281+ // - Defs and uses of inst-selected instructions have register class.
282+ // - Defs and uses of copies can have either register class or bank
283+ // and most notably
284+ // - Uses of G_ instructions can have either register class or bank
285+
286+ // Reassign uses of G_ instructions to only have register banks.
287+ RBSHelper.constrainBanksOnUses ();
288+
289+ // Defs and uses of G_ instructions have register banks exclusively.
73290 return true ;
74291}
0 commit comments