@@ -2873,9 +2873,20 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
28732873
28742874 auto I = MBB.end ();
28752875
2876+ // Note: as this is used after hazard recognizer we need to apply some hazard
2877+ // workarounds directly.
2878+ const bool FlushSGPRWrites = (ST.isWave64 () && ST.hasVALUMaskWriteHazard ()) ||
2879+ ST.hasVALUReadSGPRHazard ();
2880+ auto ApplyHazardWorkarounds = [this , &MBB, &I, &DL, FlushSGPRWrites]() {
2881+ if (FlushSGPRWrites)
2882+ BuildMI (MBB, I, DL, get (AMDGPU::S_WAITCNT_DEPCTR))
2883+ .addImm (AMDGPU::DepCtr::encodeFieldSaSdst (0 ));
2884+ };
2885+
28762886 // We need to compute the offset relative to the instruction immediately after
28772887 // s_getpc_b64. Insert pc arithmetic code before last terminator.
28782888 MachineInstr *GetPC = BuildMI (MBB, I, DL, get (AMDGPU::S_GETPC_B64), PCReg);
2889+ ApplyHazardWorkarounds ();
28792890
28802891 auto &MCCtx = MF->getContext ();
28812892 MCSymbol *PostGetPCLabel =
@@ -2894,6 +2905,7 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
28942905 .addReg (PCReg, RegState::Define, AMDGPU::sub1)
28952906 .addReg (PCReg, 0 , AMDGPU::sub1)
28962907 .addSym (OffsetHi, MO_FAR_BRANCH_OFFSET);
2908+ ApplyHazardWorkarounds ();
28972909
28982910 // Insert the indirect branch after the other terminator.
28992911 BuildMI (&MBB, DL, get (AMDGPU::S_SETPC_B64))
0 commit comments