@@ -2873,9 +2873,20 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
28732873
28742874 auto I = MBB.end ();
28752875
2876+ // Note: as this is used after hazard recognizer we need to apply some hazard
2877+ // workarounds directly.
2878+ const bool FlushSGPRWrites = (ST.isWave64 () && ST.hasVALUMaskWriteHazard ()) ||
2879+ ST.hasVALUReadSGPRHazard ();
2880+ auto ApplyHazardWorkarounds = [this , &MBB, &I, &DL, FlushSGPRWrites]() {
2881+ if (FlushSGPRWrites)
2882+ BuildMI (MBB, I, DL, get (AMDGPU::S_WAITCNT_DEPCTR))
2883+ .addImm (AMDGPU::DepCtr::encodeFieldSaSdst (0 ));
2884+ };
2885+
28762886 // We need to compute the offset relative to the instruction immediately after
28772887 // s_getpc_b64. Insert pc arithmetic code before last terminator.
28782888 MachineInstr *GetPC = BuildMI (MBB, I, DL, get (AMDGPU::S_GETPC_B64), PCReg);
2889+ ApplyHazardWorkarounds ();
28792890
28802891 auto &MCCtx = MF->getContext ();
28812892 MCSymbol *PostGetPCLabel =
@@ -2890,10 +2901,12 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
28902901 .addReg (PCReg, RegState::Define, AMDGPU::sub0)
28912902 .addReg (PCReg, 0 , AMDGPU::sub0)
28922903 .addSym (OffsetLo, MO_FAR_BRANCH_OFFSET);
2904+ ApplyHazardWorkarounds ();
28932905 BuildMI (MBB, I, DL, get (AMDGPU::S_ADDC_U32))
28942906 .addReg (PCReg, RegState::Define, AMDGPU::sub1)
28952907 .addReg (PCReg, 0 , AMDGPU::sub1)
28962908 .addSym (OffsetHi, MO_FAR_BRANCH_OFFSET);
2909+ ApplyHazardWorkarounds ();
28972910
28982911 // Insert the indirect branch after the other terminator.
28992912 BuildMI (&MBB, DL, get (AMDGPU::S_SETPC_B64))
0 commit comments