1616// ===----------------------------------------------------------------------===//
1717
1818#include " AMDGPU.h"
19+ #include " AMDGPUGlobalISelUtils.h"
20+ #include " GCNSubtarget.h"
21+ #include " llvm/CodeGen/GlobalISel/CSEInfo.h"
22+ #include " llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
23+ #include " llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
1924#include " llvm/CodeGen/MachineFunctionPass.h"
25+ #include " llvm/CodeGen/MachineInstr.h"
26+ #include " llvm/CodeGen/MachineUniformityAnalysis.h"
27+ #include " llvm/CodeGen/TargetPassConfig.h"
2028#include " llvm/InitializePasses.h"
2129
2230#define DEBUG_TYPE " amdgpu-regbankselect"
2331
2432using namespace llvm ;
33+ using namespace AMDGPU ;
2534
2635namespace {
2736
@@ -40,6 +49,9 @@ class AMDGPURegBankSelect : public MachineFunctionPass {
4049 }
4150
4251 void getAnalysisUsage (AnalysisUsage &AU) const override {
52+ AU.addRequired <TargetPassConfig>();
53+ AU.addRequired <GISelCSEAnalysisWrapperPass>();
54+ AU.addRequired <MachineUniformityAnalysisPass>();
4355 MachineFunctionPass::getAnalysisUsage (AU);
4456 }
4557
@@ -55,6 +67,9 @@ class AMDGPURegBankSelect : public MachineFunctionPass {
5567
5668INITIALIZE_PASS_BEGIN (AMDGPURegBankSelect, DEBUG_TYPE,
5769 " AMDGPU Register Bank Select" , false , false )
70+ INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
71+ INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
72+ INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
5873INITIALIZE_PASS_END(AMDGPURegBankSelect, DEBUG_TYPE,
5974 " AMDGPU Register Bank Select" , false , false )
6075
@@ -66,9 +81,209 @@ FunctionPass *llvm::createAMDGPURegBankSelectPass() {
6681 return new AMDGPURegBankSelect ();
6782}
6883
84+ class RegBankSelectHelper {
85+ MachineIRBuilder &B;
86+ MachineRegisterInfo &MRI;
87+ AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA;
88+ const MachineUniformityInfo &MUI;
89+ const SIRegisterInfo &TRI;
90+ const RegisterBank *SgprRB;
91+ const RegisterBank *VgprRB;
92+ const RegisterBank *VccRB;
93+
94+ public:
95+ RegBankSelectHelper (MachineIRBuilder &B,
96+ AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA,
97+ const MachineUniformityInfo &MUI,
98+ const SIRegisterInfo &TRI, const RegisterBankInfo &RBI)
99+ : B(B), MRI(*B.getMRI()), ILMA(ILMA), MUI(MUI), TRI(TRI),
100+ SgprRB (&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
101+ VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
102+ VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
103+
104+ // Temporal divergence copy: COPY to vgpr with implicit use of $exec inside of
105+ // the cycle
106+ // Note: uniformity analysis does not consider that registers with vgpr def
107+ // are divergent (you can have uniform value in vgpr).
108+ // - TODO: implicit use of $exec could be implemented as indicator that
109+ // instruction is divergent
110+ bool isTemporalDivergenceCopy (Register Reg) {
111+ MachineInstr *MI = MRI.getVRegDef (Reg);
112+ if (!MI->isCopy ())
113+ return false ;
114+
115+ const MachineOperand *ImplicitExecUse = MI->implicit_operands ().begin ();
116+ return ImplicitExecUse && ImplicitExecUse->getReg () == TRI.getExec ();
117+ }
118+
119+ void setRegBankDef (MachineInstr &MI, MachineOperand &DefOP,
120+ const RegisterBank *RB) {
121+ Register Reg = DefOP.getReg ();
122+
123+ if (!MRI.getRegClassOrNull (Reg)) {
124+ MRI.setRegBank (Reg, *RB);
125+ return ;
126+ }
127+
128+ // Register that already has Register class got it during pre-inst selection
129+ // of another instruction. Maybe cross bank copy was required so we insert a
130+ // copy that can be removed later. This simplifies post regbanklegalize
131+ // combiner and avoids need to special case some patterns.
132+ LLT Ty = MRI.getType (Reg);
133+ Register NewReg = MRI.createVirtualRegister ({RB, Ty});
134+ DefOP.setReg (NewReg);
135+
136+ auto &MBB = *MI.getParent ();
137+ B.setInsertPt (MBB, MBB.SkipPHIsAndLabels (std::next (MI.getIterator ())));
138+ B.buildCopy (Reg, NewReg);
139+
140+ // The problem was discovered for uniform S1 that was used as both
141+ // lane mask(vcc) and regular sgpr S1.
142+ // - lane-mask(vcc) use was by si_if, this use is divergent and requires
143+ // non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets
144+ // sreg_64_xexec(S1) on def of uniform S1 making it lane-mask.
145+ // - the regular sgpr S1(uniform) instruction is now broken since
146+ // it uses sreg_64_xexec(S1) which is divergent.
147+
148+ // Replace virtual registers with register class on generic instructions
149+ // uses with virtual registers with register bank.
150+ for (auto &UseMI : make_early_inc_range (MRI.use_instructions (Reg))) {
151+ if (UseMI.isPreISelOpcode ()) {
152+ for (MachineOperand &Op : UseMI.operands ()) {
153+ if (Op.isReg () && Op.getReg () == Reg)
154+ Op.setReg (NewReg);
155+ }
156+ }
157+ }
158+ }
159+
160+ Register tryGetVReg (MachineOperand &Op) {
161+ if (!Op.isReg ())
162+ return {};
163+
164+ Register Reg = Op.getReg ();
165+ if (!Reg.isVirtual ())
166+ return {};
167+
168+ return Reg;
169+ }
170+
171+ void assignBanksOnDefs (MachineInstr &MI) {
172+ for (MachineOperand &DefOP : MI.defs ()) {
173+ Register DefReg = tryGetVReg (DefOP);
174+ if (!DefReg.isValid ())
175+ continue ;
176+
177+ // Copies can have register class on def registers.
178+ if (MI.isCopy () && MRI.getRegClassOrNull (DefReg)) {
179+ continue ;
180+ }
181+
182+ if (MUI.isUniform (DefReg) || ILMA.isS32S64LaneMask (DefReg)) {
183+ setRegBankDef (MI, DefOP, SgprRB);
184+ } else {
185+ if (MRI.getType (DefReg) == LLT::scalar (1 ))
186+ setRegBankDef (MI, DefOP, VccRB);
187+ else
188+ setRegBankDef (MI, DefOP, VgprRB);
189+ }
190+ }
191+ }
192+
193+ void constrainRegBankUse (MachineInstr &MI, MachineOperand &UseOP,
194+ const RegisterBank *RB) {
195+ Register Reg = UseOP.getReg ();
196+
197+ LLT Ty = MRI.getType (Reg);
198+ Register NewReg = MRI.createVirtualRegister ({RB, Ty});
199+ UseOP.setReg (NewReg);
200+
201+ if (MI.isPHI ()) {
202+ auto DefMI = MRI.getVRegDef (Reg)->getIterator ();
203+ MachineBasicBlock *DefMBB = DefMI->getParent ();
204+ B.setInsertPt (*DefMBB, DefMBB->SkipPHIsAndLabels (std::next (DefMI)));
205+ } else {
206+ B.setInstr (MI);
207+ }
208+
209+ B.buildCopy (NewReg, Reg);
210+ }
211+
212+ void constrainBanksOnUses (MachineInstr &MI) {
213+ for (MachineOperand &UseOP : MI.uses ()) {
214+ auto UseReg = tryGetVReg (UseOP);
215+ if (!UseReg.isValid ())
216+ continue ;
217+
218+ // UseReg already has register bank.
219+ if (MRI.getRegBankOrNull (UseReg))
220+ continue ;
221+
222+ if (!isTemporalDivergenceCopy (UseReg) &&
223+ (MUI.isUniform (UseReg) || ILMA.isS32S64LaneMask (UseReg))) {
224+ constrainRegBankUse (MI, UseOP, SgprRB);
225+ } else {
226+ if (MRI.getType (UseReg) == LLT::scalar (1 ))
227+ constrainRegBankUse (MI, UseOP, VccRB);
228+ else
229+ constrainRegBankUse (MI, UseOP, VgprRB);
230+ }
231+ }
232+ }
233+ };
234+
69235bool AMDGPURegBankSelect::runOnMachineFunction (MachineFunction &MF) {
70236 if (MF.getProperties ().hasProperty (
71237 MachineFunctionProperties::Property::FailedISel))
72238 return false ;
239+
240+ // Setup the instruction builder with CSE.
241+ const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
242+ GISelCSEAnalysisWrapper &Wrapper =
243+ getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper ();
244+ GISelCSEInfo &CSEInfo = Wrapper.get (TPC.getCSEConfig ());
245+ GISelObserverWrapper Observer;
246+ Observer.addObserver (&CSEInfo);
247+
248+ CSEMIRBuilder B (MF);
249+ B.setCSEInfo (&CSEInfo);
250+ B.setChangeObserver (Observer);
251+
252+ RAIIDelegateInstaller DelegateInstaller (MF, &Observer);
253+ RAIIMFObserverInstaller MFObserverInstaller (MF, Observer);
254+
255+ IntrinsicLaneMaskAnalyzer ILMA (MF);
256+ MachineUniformityInfo &MUI =
257+ getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo ();
258+ const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
259+ RegBankSelectHelper RBSHelper (B, ILMA, MUI, *ST.getRegisterInfo (),
260+ *ST.getRegBankInfo ());
261+
262+ // Assign register banks to ALL def registers on G_ instructions.
263+ // Same for copies if they have no register bank or class on def.
264+ for (MachineBasicBlock &MBB : MF) {
265+ for (MachineInstr &MI : MBB) {
266+ if (MI.isPreISelOpcode () || MI.isCopy ())
267+ RBSHelper.assignBanksOnDefs (MI);
268+ }
269+ }
270+
271+ // At this point all virtual registers have register class or bank
272+ // - Defs of G_ instructions have register banks.
273+ // - Defs and uses of inst-selected instructions have register class.
274+ // - Defs and uses of copies can have either register class or bank
275+ // and most notably:
276+ // - Uses of G_ instructions can have either register class or bank.
277+
278+ // Reassign use registers of G_ instructions to only have register banks.
279+ for (MachineBasicBlock &MBB : MF) {
280+ for (MachineInstr &MI : MBB) {
281+ // Copies are skipped since they can have register class on use registers.
282+ if (MI.isPreISelOpcode ())
283+ RBSHelper.constrainBanksOnUses (MI);
284+ }
285+ }
286+
287+ // Defs and uses of G_ instructions have register banks exclusively.
73288 return true ;
74289}
0 commit comments