Skip to content

Commit 651af28

Browse files
committed
[AMDGPU] Add hazard workarounds to insertIndirectBranch
BranchRelaxation runs after the hazard recognizer, so workarounds for SGPR accesses need to be applied directly inline to the code it generates.
1 parent 4b529f8 commit 651af28

File tree

2 files changed

+952
-0
lines changed

2 files changed

+952
-0
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2867,6 +2867,12 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
28672867
MachineRegisterInfo &MRI = MF->getRegInfo();
28682868
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
28692869

2870+
// Note: as this is used after hazard recognizer we need to apply some hazard
2871+
// workarounds directly.
2872+
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
2873+
const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
2874+
ST.hasVALUReadSGPRHazard();
2875+
28702876
// FIXME: Virtual register workaround for RegScavenger not working with empty
28712877
// blocks.
28722878
Register PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
@@ -2876,6 +2882,9 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
28762882
// We need to compute the offset relative to the instruction immediately after
28772883
// s_getpc_b64. Insert pc arithmetic code before last terminator.
28782884
MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg);
2885+
if (FlushSGPRWrites)
2886+
BuildMI(MBB, I, DL, get(AMDGPU::S_WAITCNT_DEPCTR))
2887+
.addImm(AMDGPU::DepCtr::encodeFieldSaSdst(0));
28792888

28802889
auto &MCCtx = MF->getContext();
28812890
MCSymbol *PostGetPCLabel =
@@ -2890,10 +2899,16 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
28902899
.addReg(PCReg, RegState::Define, AMDGPU::sub0)
28912900
.addReg(PCReg, 0, AMDGPU::sub0)
28922901
.addSym(OffsetLo, MO_FAR_BRANCH_OFFSET);
2902+
if (FlushSGPRWrites)
2903+
BuildMI(MBB, I, DL, get(AMDGPU::S_WAITCNT_DEPCTR))
2904+
.addImm(AMDGPU::DepCtr::encodeFieldSaSdst(0));
28932905
BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32))
28942906
.addReg(PCReg, RegState::Define, AMDGPU::sub1)
28952907
.addReg(PCReg, 0, AMDGPU::sub1)
28962908
.addSym(OffsetHi, MO_FAR_BRANCH_OFFSET);
2909+
if (FlushSGPRWrites)
2910+
BuildMI(MBB, I, DL, get(AMDGPU::S_WAITCNT_DEPCTR))
2911+
.addImm(AMDGPU::DepCtr::encodeFieldSaSdst(0));
28972912

28982913
// Insert the indirect branch after the other terminator.
28992914
BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))

0 commit comments

Comments
 (0)