1616// ===----------------------------------------------------------------------===//
1717
1818#include " AMDGPU.h"
19+ #include " AMDGPUGlobalISelUtils.h"
20+ #include " AMDGPURegisterBankInfo.h"
21+ #include " MCTargetDesc/AMDGPUMCTargetDesc.h"
22+ #include " llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
1923#include " llvm/CodeGen/MachineFunctionPass.h"
24+ #include " llvm/CodeGen/MachineUniformityAnalysis.h"
2025#include " llvm/InitializePasses.h"
2126
2227#define DEBUG_TYPE " rb-select"
@@ -39,6 +44,7 @@ class AMDGPURBSelect : public MachineFunctionPass {
3944 StringRef getPassName () const override { return " AMDGPU RB select" ; }
4045
4146 void getAnalysisUsage (AnalysisUsage &AU) const override {
47+ AU.addRequired <MachineUniformityAnalysisPass>();
4248 MachineFunctionPass::getAnalysisUsage (AU);
4349 }
4450
@@ -54,6 +60,7 @@ class AMDGPURBSelect : public MachineFunctionPass {
5460
5561INITIALIZE_PASS_BEGIN (AMDGPURBSelect, DEBUG_TYPE, " AMDGPU RB select" , false ,
5662 false )
63+ INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
5764INITIALIZE_PASS_END(AMDGPURBSelect, DEBUG_TYPE, " AMDGPU RB select" , false ,
5865 false )
5966
@@ -63,4 +70,189 @@ char &llvm::AMDGPURBSelectID = AMDGPURBSelect::ID;
6370
6471FunctionPass *llvm::createAMDGPURBSelectPass () { return new AMDGPURBSelect (); }
6572
66- bool AMDGPURBSelect::runOnMachineFunction (MachineFunction &MF) { return true ; }
73+ bool shouldRBSelect (MachineInstr &MI) {
74+ if (isTargetSpecificOpcode (MI.getOpcode ()) && !MI.isPreISelOpcode ())
75+ return false ;
76+
77+ if (MI.getOpcode () == AMDGPU::PHI || MI.getOpcode () == AMDGPU::IMPLICIT_DEF)
78+ return false ;
79+
80+ if (MI.isInlineAsm ())
81+ return false ;
82+
83+ return true ;
84+ }
85+
86+ void setRB (MachineInstr &MI, MachineOperand &DefOP, MachineIRBuilder B,
87+ MachineRegisterInfo &MRI, const RegisterBank &RB) {
88+ Register Reg = DefOP.getReg ();
89+ // Register that already has Register class got it during pre-inst selection
90+ // of another instruction. Maybe cross bank copy was required so we insert a
91+ // copy trat can be removed later. This simplifies post-rb-legalize artifact
92+ // combiner and avoids need to special case some patterns.
93+ if (MRI.getRegClassOrNull (Reg)) {
94+ LLT Ty = MRI.getType (Reg);
95+ Register NewReg = MRI.createVirtualRegister ({&RB, Ty});
96+ DefOP.setReg (NewReg);
97+
98+ auto &MBB = *MI.getParent ();
99+ B.setInsertPt (MBB, MI.isPHI () ? MBB.getFirstNonPHI ()
100+ : std::next (MI.getIterator ()));
101+ B.buildCopy (Reg, NewReg);
102+
103+ // The problem was discoverd for uniform S1 that was used as both
104+ // lane mask(vcc) and regular sgpr S1.
105+ // - lane-mask(vcc) use was by si_if, this use is divergent and requires
106+ // non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets
107+ // sreg_64_xexec(S1) on def of uniform S1 making it lane-mask.
108+ // - the regular regular sgpr S1(uniform) instruction is now broken since
109+ // it uses sreg_64_xexec(S1) which is divergent.
110+
111+ // "Clear" reg classes from uses on generic instructions and but register
112+ // banks instead.
113+ for (auto &UseMI : MRI.use_instructions (Reg)) {
114+ if (shouldRBSelect (UseMI)) {
115+ for (MachineOperand &Op : UseMI.operands ()) {
116+ if (Op.isReg () && Op.isUse () && Op.getReg () == Reg)
117+ Op.setReg (NewReg);
118+ }
119+ }
120+ }
121+
122+ } else {
123+ MRI.setRegBank (Reg, RB);
124+ }
125+ }
126+
127+ void setRBUse (MachineInstr &MI, MachineOperand &UseOP, MachineIRBuilder B,
128+ MachineRegisterInfo &MRI, const RegisterBank &RB) {
129+ Register Reg = UseOP.getReg ();
130+
131+ LLT Ty = MRI.getType (Reg);
132+ Register NewReg = MRI.createVirtualRegister ({&RB, Ty});
133+ UseOP.setReg (NewReg);
134+
135+ if (MI.isPHI ()) {
136+ auto DefMI = MRI.getVRegDef (Reg)->getIterator ();
137+ MachineBasicBlock *DefMBB = DefMI->getParent ();
138+ B.setInsertPt (*DefMBB, DefMBB->SkipPHIsAndLabels (std::next (DefMI)));
139+ } else {
140+ B.setInstr (MI);
141+ }
142+
143+ B.buildCopy (NewReg, Reg);
144+ }
145+
146+ // Temporal divergence copy: COPY to vgpr with implicit use of $exec inside of
147+ // the cycle
148+ // Note: uniformity analysis does not consider that registers with vgpr def are
149+ // divergent (you can have uniform value in vgpr).
150+ // - TODO: implicit use of $exec could be implemented as indicator that
151+ // instruction is divergent
152+ bool isTemporalDivergenceCopy (Register Reg, MachineRegisterInfo &MRI) {
153+ MachineInstr *MI = MRI.getVRegDef (Reg);
154+ if (MI->getOpcode () == AMDGPU::COPY) {
155+ for (auto Op : MI->implicit_operands ()) {
156+ if (!Op.isReg ())
157+ continue ;
158+ Register Reg = Op.getReg ();
159+ if (Reg == AMDGPU::EXEC) {
160+ return true ;
161+ }
162+ }
163+ }
164+
165+ return false ;
166+ }
167+
168+ Register getVReg (MachineOperand &Op) {
169+ if (!Op.isReg ())
170+ return 0 ;
171+
172+ Register Reg = Op.getReg ();
173+ if (!Reg.isVirtual ())
174+ return 0 ;
175+
176+ return Reg;
177+ }
178+
179+ bool AMDGPURBSelect::runOnMachineFunction (MachineFunction &MF) {
180+ MachineUniformityInfo &MUI =
181+ getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo ();
182+ AMDGPU::IntrinsicLaneMaskAnalyzer ILMA (MF);
183+ MachineRegisterInfo &MRI = MF.getRegInfo ();
184+ const RegisterBankInfo &RBI = *MF.getSubtarget ().getRegBankInfo ();
185+
186+ MachineIRBuilder B (MF);
187+
188+ // Assign register banks to ALL def registers on G_ instructions.
189+ // Same for copies if they have no register bank or class on def.
190+ for (MachineBasicBlock &MBB : MF) {
191+ for (MachineInstr &MI : MBB) {
192+ if (!shouldRBSelect (MI))
193+ continue ;
194+
195+ for (MachineOperand &DefOP : MI.defs ()) {
196+ Register DefReg = getVReg (DefOP);
197+ if (!DefReg)
198+ continue ;
199+
200+ // Copies can have register class on def registers.
201+ if (MI.isCopy () && MRI.getRegClassOrNull (DefReg)) {
202+ continue ;
203+ }
204+
205+ if (MUI.isUniform (DefReg) || ILMA.isS32S64LaneMask (DefReg)) {
206+ setRB (MI, DefOP, B, MRI, RBI.getRegBank (AMDGPU::SGPRRegBankID));
207+ } else {
208+ if (MRI.getType (DefReg) == LLT::scalar (1 ))
209+ setRB (MI, DefOP, B, MRI, RBI.getRegBank (AMDGPU::VCCRegBankID));
210+ else
211+ setRB (MI, DefOP, B, MRI, RBI.getRegBank (AMDGPU::VGPRRegBankID));
212+ }
213+ }
214+ }
215+ }
216+
217+ // At this point all virtual registers have register class or bank
218+ // - Defs of G_ instructions have register banks.
219+ // - Defs and uses of inst-selected instructions have register class.
220+ // - Defs and uses of copies can have either register class or bank
221+ // and most notably
222+ // - Uses of G_ instructions can have either register class or bank
223+
224+ // Reassign uses of G_ instructions to only have register banks.
225+ for (MachineBasicBlock &MBB : MF) {
226+ for (MachineInstr &MI : MBB) {
227+ if (!shouldRBSelect (MI))
228+ continue ;
229+
230+ // Copies can have register class on use registers.
231+ if (MI.isCopy ())
232+ continue ;
233+
234+ for (MachineOperand &UseOP : MI.uses ()) {
235+ Register UseReg = getVReg (UseOP);
236+ if (!UseReg)
237+ continue ;
238+
239+ if (!MRI.getRegClassOrNull (UseReg))
240+ continue ;
241+
242+ if (!isTemporalDivergenceCopy (UseReg, MRI) &&
243+ (MUI.isUniform (UseReg) || ILMA.isS32S64LaneMask (UseReg))) {
244+ setRBUse (MI, UseOP, B, MRI, RBI.getRegBank (AMDGPU::SGPRRegBankID));
245+ } else {
246+ if (MRI.getType (UseReg) == LLT::scalar (1 ))
247+ setRBUse (MI, UseOP, B, MRI, RBI.getRegBank (AMDGPU::VCCRegBankID));
248+ else
249+ setRBUse (MI, UseOP, B, MRI, RBI.getRegBank (AMDGPU::VGPRRegBankID));
250+ }
251+ }
252+ }
253+ }
254+
255+ // Defs and uses of G_ instructions have register banks exclusively.
256+
257+ return true ;
258+ }
0 commit comments