@@ -57,27 +57,33 @@ class AMDGPURewriteAGPRCopyMFMAImpl {
5757 TRI (*ST.getRegisterInfo()), MRI(MF.getRegInfo()), VRM(VRM), LRM(LRM),
5858 LIS(LIS) {}
5959
60+ bool isRewriteCandidate (const MachineInstr &MI) const {
61+ if (!TII.isMAI (MI))
62+ return false ;
63+ return AMDGPU::getMFMASrcCVDstAGPROp (MI.getOpcode ()) != -1 ;
64+ }
65+
6066 // / Compute the register class constraints based on the uses of \p Reg,
6167 // / excluding uses from \p ExceptMI. This should be nearly identical to
6268 // / MachineRegisterInfo::recomputeRegClass.
6369 const TargetRegisterClass *
64- recomputeRegClassExcept (Register Reg, const TargetRegisterClass *OldRC ,
65- const TargetRegisterClass *NewRC ,
66- const MachineInstr *ExceptMI ) const ;
70+ recomputeRegClassExceptRewritable (Register Reg,
71+ const TargetRegisterClass *OldRC ,
72+ const TargetRegisterClass *NewRC ) const ;
6773
6874 bool run (MachineFunction &MF) const ;
6975};
7076
7177const TargetRegisterClass *
72- AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExcept (
78+ AMDGPURewriteAGPRCopyMFMAImpl::recomputeRegClassExceptRewritable (
7379 Register Reg, const TargetRegisterClass *OldRC,
74- const TargetRegisterClass *NewRC, const MachineInstr *ExceptMI ) const {
80+ const TargetRegisterClass *NewRC) const {
7581
7682 // Accumulate constraints from all uses.
7783 for (MachineOperand &MO : MRI.reg_nodbg_operands (Reg)) {
7884 // Apply the effect of the given operand to NewRC.
7985 MachineInstr *MI = MO.getParent ();
80- if (MI == ExceptMI )
86+ if (isRewriteCandidate (*MI) )
8187 continue ;
8288
8389 unsigned OpNo = &MO - &MI->getOperand (0 );
@@ -182,10 +188,13 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
182188 // first place, as well as need to assign another register, and need to
183189 // figure out where to put them. The live range splitting is smarter than
184190 // anything we're doing here, so trust it did something reasonable.
185- const TargetRegisterClass *Src2ExceptRC = recomputeRegClassExcept (
186- Src2->getReg (), Src2VirtRegRC, VirtRegRC, CopySrcMI);
187- if (!Src2ExceptRC)
191+ const TargetRegisterClass *Src2ExceptRC =
192+ recomputeRegClassExceptRewritable (Src2->getReg (), Src2VirtRegRC,
193+ VirtRegRC);
194+ if (!Src2ExceptRC) {
195+ LLVM_DEBUG (dbgs () << " Could not recompute the regclass\n " );
188196 continue ;
197+ }
189198
190199 const TargetRegisterClass *NewSrc2ConstraintRC =
191200 TII.getRegClass (TII.get (AGPROp), Src2->getOperandNo (), &TRI, MF);
@@ -207,8 +216,19 @@ bool AMDGPURewriteAGPRCopyMFMAImpl::run(MachineFunction &MF) const {
207216
208217 CopySrcMI->setDesc (TII.get (AGPROp));
209218
210- // TODO: Is replacing too aggressive, fixup these instructions only?
211- MRI.replaceRegWith (CopySrcReg, VReg);
219+ // Perform replacement of the register, rewriting the rewritable uses.
220+ for (MachineInstr &UseMI :
221+ make_early_inc_range (MRI.reg_instructions (CopySrcReg))) {
222+ if (TII.isMAI (UseMI)) {
223+ // Note the register we need to rewrite may still appear in src0/src1,
224+ // but that's fine since those can use A or V anyway.
225+ int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp (UseMI.getOpcode ());
226+ if (ReplacementOp != -1 )
227+ UseMI.setDesc (TII.get (ReplacementOp));
228+ }
229+
230+ UseMI.substituteRegister (CopySrcReg, VReg, AMDGPU::NoSubRegister, TRI);
231+ }
212232
213233 LLVM_DEBUG (dbgs () << " Replaced VGPR MFMA with AGPR: " << *CopySrcMI);
214234
0 commit comments