Skip to content

Commit a80291c

Browse files
committed
Revert "[AMDGPU] Invert the handling of skip insertion."
This reverts commit 0dc6c24. The commit is reported to cause a regression in piglit/bin/glsl-vs-loop for Mesa.
1 parent e15fb06 commit a80291c

40 files changed

+380
-390
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,9 +156,6 @@ extern char &SIWholeQuadModeID;
156156
void initializeSILowerControlFlowPass(PassRegistry &);
157157
extern char &SILowerControlFlowID;
158158

159-
void initializeSIRemoveShortExecBranchesPass(PassRegistry &);
160-
extern char &SIRemoveShortExecBranchesID;
161-
162159
void initializeSIInsertSkipsPass(PassRegistry &);
163160
extern char &SIInsertSkipsPassID;
164161

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
228228
initializeSIModeRegisterPass(*PR);
229229
initializeSIWholeQuadModePass(*PR);
230230
initializeSILowerControlFlowPass(*PR);
231-
initializeSIRemoveShortExecBranchesPass(*PR);
232231
initializeSIInsertSkipsPass(*PR);
233232
initializeSIMemoryLegalizerPass(*PR);
234233
initializeSIOptimizeExecMaskingPass(*PR);
@@ -994,7 +993,6 @@ void GCNPassConfig::addPreEmitPass() {
994993
// be better for it to emit S_NOP <N> when possible.
995994
addPass(&PostRAHazardRecognizerID);
996995

997-
addPass(&SIRemoveShortExecBranchesID);
998996
addPass(&SIInsertSkipsPassID);
999997
addPass(&BranchRelaxationPassID);
1000998
}

llvm/lib/Target/AMDGPU/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,6 @@ add_llvm_target(AMDGPUCodeGen
116116
SIOptimizeExecMaskingPreRA.cpp
117117
SIPeepholeSDWA.cpp
118118
SIRegisterInfo.cpp
119-
SIRemoveShortExecBranches.cpp
120119
SIShrinkInstructions.cpp
121120
SIWholeQuadMode.cpp
122121
GCNILPSched.cpp

llvm/lib/Target/AMDGPU/SIInsertSkips.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ using namespace llvm;
4141
#define DEBUG_TYPE "si-insert-skips"
4242

4343
static cl::opt<unsigned> SkipThresholdFlag(
44-
"amdgpu-skip-threshold-legacy",
44+
"amdgpu-skip-threshold",
4545
cl::desc("Number of instructions before jumping over divergent control flow"),
4646
cl::init(12), cl::Hidden);
4747

@@ -466,9 +466,6 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
466466
MachineInstr &MI = *I;
467467

468468
switch (MI.getOpcode()) {
469-
case AMDGPU::S_CBRANCH_EXECZ:
470-
ExecBranchStack.push_back(MI.getOperand(0).getMBB());
471-
break;
472469
case AMDGPU::SI_MASK_BRANCH:
473470
ExecBranchStack.push_back(MI.getOperand(0).getMBB());
474471
MadeChange |= skipMaskBranch(MI, MBB);

llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -244,9 +244,9 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
244244
BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec)
245245
.addReg(Tmp, RegState::Kill);
246246

247-
// Insert the S_CBRANCH_EXECZ instruction which will be optimized later
248-
// during SIRemoveShortExecBranches.
249-
MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
247+
// Insert a pseudo terminator to help keep the verifier happy. This will also
248+
// be used later when inserting skips.
249+
MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
250250
.add(MI.getOperand(2));
251251

252252
if (!LIS) {
@@ -323,8 +323,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
323323
.addReg(DstReg);
324324

325325
MachineInstr *Branch =
326-
BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
327-
.addMBB(DestBB);
326+
BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
327+
.addMBB(DestBB);
328328

329329
if (!LIS) {
330330
MI.eraseFromParent();

llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp

Lines changed: 0 additions & 158 deletions
This file was deleted.

llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@ define i32 @divergent_if_swap_brtarget_order0(i32 %value) {
1010
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
1111
; CHECK-NEXT: ; implicit-def: $vgpr0
1212
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
13+
; CHECK-NEXT: ; mask branch BB0_2
1314
; CHECK-NEXT: s_cbranch_execz BB0_2
14-
; CHECK-NEXT: ; %bb.1: ; %if.true
15+
; CHECK-NEXT: BB0_1: ; %if.true
1516
; CHECK-NEXT: global_load_dword v0, v[0:1], off
1617
; CHECK-NEXT: BB0_2: ; %endif
1718
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
@@ -37,10 +38,12 @@ define i32 @divergent_if_swap_brtarget_order1(i32 %value) {
3738
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
3839
; CHECK-NEXT: ; implicit-def: $vgpr0
3940
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
40-
; CHECK-NEXT: s_cbranch_execnz BB1_2
41-
; CHECK-NEXT: ; %bb.1: ; %if.true
41+
; CHECK-NEXT: ; mask branch BB1_2
42+
; CHECK-NEXT: BB1_1: ; %endif
43+
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
44+
; CHECK-NEXT: s_setpc_b64 s[30:31]
45+
; CHECK-NEXT: BB1_2: ; %if.true
4246
; CHECK-NEXT: global_load_dword v0, v[0:1], off
43-
; CHECK-NEXT: BB1_2: ; %endif
4447
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
4548
; CHECK-NEXT: s_waitcnt vmcnt(0)
4649
; CHECK-NEXT: s_setpc_b64 s[30:31]

0 commit comments

Comments
 (0)