@@ -2867,6 +2867,12 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
28672867 MachineRegisterInfo &MRI = MF->getRegInfo ();
28682868 const SIMachineFunctionInfo *MFI = MF->getInfo <SIMachineFunctionInfo>();
28692869
2870+ // Note: as this is used after hazard recognizer we need to apply some hazard
2871+ // workarounds directly.
2872+ const GCNSubtarget &ST = MF->getSubtarget <GCNSubtarget>();
2873+ const bool FlushSGPRWrites = (ST.isWave64 () && ST.hasVALUMaskWriteHazard ()) ||
2874+ ST.hasVALUReadSGPRHazard ();
2875+
28702876 // FIXME: Virtual register workaround for RegScavenger not working with empty
28712877 // blocks.
28722878 Register PCReg = MRI.createVirtualRegister (&AMDGPU::SReg_64RegClass);
@@ -2876,6 +2882,9 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
28762882 // We need to compute the offset relative to the instruction immediately after
28772883 // s_getpc_b64. Insert pc arithmetic code before last terminator.
28782884 MachineInstr *GetPC = BuildMI (MBB, I, DL, get (AMDGPU::S_GETPC_B64), PCReg);
2885+ if (FlushSGPRWrites)
2886+ BuildMI (MBB, I, DL, get (AMDGPU::S_WAITCNT_DEPCTR))
2887+ .addImm (AMDGPU::DepCtr::encodeFieldSaSdst (0 ));
28792888
28802889 auto &MCCtx = MF->getContext ();
28812890 MCSymbol *PostGetPCLabel =
@@ -2890,10 +2899,16 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
28902899 .addReg (PCReg, RegState::Define, AMDGPU::sub0)
28912900 .addReg (PCReg, 0 , AMDGPU::sub0)
28922901 .addSym (OffsetLo, MO_FAR_BRANCH_OFFSET);
2902+ if (FlushSGPRWrites)
2903+ BuildMI (MBB, I, DL, get (AMDGPU::S_WAITCNT_DEPCTR))
2904+ .addImm (AMDGPU::DepCtr::encodeFieldSaSdst (0 ));
28932905 BuildMI (MBB, I, DL, get (AMDGPU::S_ADDC_U32))
28942906 .addReg (PCReg, RegState::Define, AMDGPU::sub1)
28952907 .addReg (PCReg, 0 , AMDGPU::sub1)
28962908 .addSym (OffsetHi, MO_FAR_BRANCH_OFFSET);
2909+ if (FlushSGPRWrites)
2910+ BuildMI (MBB, I, DL, get (AMDGPU::S_WAITCNT_DEPCTR))
2911+ .addImm (AMDGPU::DepCtr::encodeFieldSaSdst (0 ));
28972912
28982913 // Insert the indirect branch after the other terminator.
28992914 BuildMI (&MBB, DL, get (AMDGPU::S_SETPC_B64))
0 commit comments