Skip to content

Commit 1805a17

Browse files
committed
[AMDGPU] Alternative control flow lowering. Handling uniform if and lop inside the divergent CF in SIAnnotateControlFlow
1 parent 6b2a6ab commit 1805a17

File tree

2 files changed

+53
-80
lines changed

2 files changed

+53
-80
lines changed

llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp

Lines changed: 49 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "GCNSubtarget.h"
1616
#include "llvm/Analysis/LoopInfo.h"
1717
#include "llvm/Analysis/UniformityAnalysis.h"
18+
#include "llvm/Analysis/DomTreeUpdater.h"
1819
#include "llvm/CodeGen/TargetPassConfig.h"
1920
#include "llvm/IR/BasicBlock.h"
2021
#include "llvm/IR/Constants.h"
@@ -86,7 +87,7 @@ class SIAnnotateControlFlow : public FunctionPass {
8687

8788
bool handleLoop(BranchInst *Term);
8889

89-
bool insertWaveReconverge(BasicBlock *BB);
90+
bool tryWaveReconverge(BasicBlock *BB);
9091

9192
public:
9293
static char ID;
@@ -203,8 +204,6 @@ bool SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) {
203204

204205
/// Open a new "If" block
205206
bool SIAnnotateControlFlow::openIf(BranchInst *Term) {
206-
if (isUniform(Term))
207-
return false;
208207

209208
IRBuilder<> IRB(Term);
210209
Value *IfCall = IRB.CreateCall(If, {Term->getCondition()});
@@ -305,20 +304,44 @@ bool SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
305304
}
306305

307306
/// Close the last opened control flow
308-
bool SIAnnotateControlFlow::insertWaveReconverge(BasicBlock *BB) {
309-
assert(succ_empty(BB) || succ_size(BB) == 1);
310-
311-
if (succ_empty(BB))
312-
return false;
307+
bool SIAnnotateControlFlow::tryWaveReconverge(BasicBlock *BB) {
308+
309+
if (succ_empty(BB))
310+
return false;
313311

314-
BasicBlock *SingleSucc = *succ_begin(BB);
315-
BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
316-
BasicBlock::iterator InsPt = Term ? BasicBlock::iterator(Term) : BB->end();
312+
BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator());
313+
if (Term->getNumSuccessors() == 1) {
314+
// The current BBs single successor is a top of the stack. We need to
315+
// reconverge over thaqt path.
316+
BasicBlock *SingleSucc = *succ_begin(BB);
317+
BasicBlock::iterator InsPt = Term ? BasicBlock::iterator(Term) : BB->end();
317318

318-
if (isTopOfStack(SingleSucc)) {
319-
Value *Exec = Stack.back().second;
320-
IRBuilder<>(BB, InsPt).CreateCall(WaveReconverge, {Exec});
319+
if (isTopOfStack(SingleSucc)) {
320+
Value *Exec = Stack.back().second;
321+
IRBuilder<>(BB, InsPt).CreateCall(WaveReconverge, {Exec});
322+
}
323+
} else {
324+
// We have a uniform conditional branch terminating the block.
325+
// THis block may be the last in the Then path of the enclosing divergent
326+
// IF.
327+
if (!isUniform(Term))
328+
// Divergent loop is going to be further processed in another place
329+
return false;
330+
331+
for (auto Succ : Term->successors()) {
332+
if (isTopOfStack(Succ)) {
333+
// Just split to make a room for further WAVE_RECONVERGE insertion
334+
SmallVector<BasicBlock*, 2> Preds;
335+
for (auto P : predecessors(Succ)) {
336+
if (DT->dominates(BB, P))
337+
Preds.push_back(P);
338+
}
339+
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
340+
SplitBlockPredecessors(Succ, Preds, ".reconverge", &DTU, LI,
341+
nullptr, false);
321342
}
343+
}
344+
}
322345

323346
return true;
324347
}
@@ -342,8 +365,8 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) {
342365
if (!Term || Term->isUnconditional()) {
343366
if (isTopOfStack(BB))
344367
Stack.pop_back();
345-
346-
insertWaveReconverge(BB);
368+
369+
Changed |= tryWaveReconverge(BB);
347370

348371
continue;
349372
}
@@ -352,6 +375,10 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) {
352375
if (isTopOfStack(BB))
353376
Stack.pop_back();
354377

378+
// Let's take care of uniform loop latch that may be closing the Then
379+
// path of the enclosing divergent branch.
380+
Changed |= tryWaveReconverge(BB);
381+
355382
if (DT->dominates(Term->getSuccessor(1), BB))
356383
Changed |= handleLoop(Term);
357384
continue;
@@ -368,7 +395,12 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) {
368395
Stack.pop_back();
369396
}
370397

371-
Changed |= openIf(Term);
398+
if (isUniform(Term))
399+
// Uniform conditional branch may be in the block that closes the Then
400+
// path of the divergent conditional branch.
401+
Changed |= tryWaveReconverge(BB);
402+
else
403+
Changed |= openIf(Term);
372404
}
373405

374406
if (!Stack.empty()) {

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -15741,9 +15741,8 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
1574115741
}
1574215742

1574315743
// ISel inserts copy to regs for the successor PHIs
15744-
// at the BB end. We need to move the SI_WAVE_RECONVERGE right before the branch.
15745-
// Even we don't have to move SI_WAVE_RECONVERGE we need to take care of the
15746-
// S_CBRANCH_SCC0/1 as SI_WAVE_RECONVERGE overwrites SCC
15744+
// at the BB end. We need to move the SI_WAVE_RECONVERGE right before the
15745+
// branch.
1574715746
for (auto &MBB : MF) {
1574815747
for (auto &MI : MBB) {
1574915748
if (MI.getOpcode() == AMDGPU::SI_WAVE_RECONVERGE) {
@@ -15755,66 +15754,8 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
1575515754
Next++;
1575615755
}
1575715756

15758-
// Lets take care of SCC users as SI_WAVE_RECONVERGE defines SCC
15759-
bool NeedPreserveSCC =
15760-
Next != MBB.end() && Next->readsRegister(AMDGPU::SCC);
15761-
MachineBasicBlock::iterator SCCDefUse(Next);
15762-
// This loop will be never taken as we always have S_CBRANCH_SCC1/0 at
15763-
// the end of the block.
15764-
while (!NeedPreserveSCC && SCCDefUse != MBB.end()) {
15765-
if (SCCDefUse->definesRegister(AMDGPU::SCC))
15766-
// This should never happen - SCC def after the branch reading SCC
15767-
break;
15768-
if (SCCDefUse->readsRegister(AMDGPU::SCC)) {
15769-
NeedPreserveSCC = true;
15770-
break;
15771-
}
15772-
SCCDefUse++;
15773-
}
15774-
if (NeedPreserveSCC) {
15775-
MachineBasicBlock::reverse_iterator BackSeeker(Next);
15776-
while (BackSeeker != MBB.rend()) {
15777-
if (BackSeeker != MI && BackSeeker->definesRegister(AMDGPU::SCC))
15778-
break;
15779-
BackSeeker++;
15780-
}
15781-
// we need this to makes some artificial MIR tests happy
15782-
bool NeedSetSCCUndef = false;
15783-
if (BackSeeker == MBB.rend()) {
15784-
// We have reached the begin of the block but haven't seen the SCC
15785-
// def Given that the MIR is correct, we either have SCC live in
15786-
// or SCCUser SCC operand is undef. In fact, we don't need to emit
15787-
// the instructions that preserve thje SCC if the use is Undef. We
15788-
// do this just because the MIR looks weird otherwise.
15789-
MachineOperand *SCCUseOp =
15790-
SCCDefUse->findRegisterUseOperand(AMDGPU::SCC, false, TRI);
15791-
assert(SCCUseOp);
15792-
bool IsSCCLiveIn = MBB.isLiveIn(AMDGPU::SCC);
15793-
bool IsUseUndef = SCCUseOp->isUndef();
15794-
NeedSetSCCUndef = (!IsSCCLiveIn && IsUseUndef);
15795-
}
15796-
MachineBasicBlock::iterator InsPt(BackSeeker);
15797-
Register SavedSCC =
15798-
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
15799-
MachineInstr *SaveSCC =
15800-
BuildMI(MBB, InsPt, InsPt->getDebugLoc(),
15801-
TII->get(AMDGPU::S_CSELECT_B32), SavedSCC)
15802-
.addImm(1)
15803-
.addImm(0);
15804-
if (NeedSetSCCUndef) {
15805-
15806-
MachineOperand *SCCOp =
15807-
SaveSCC->findRegisterUseOperand(AMDGPU::SCC, false, TRI);
15808-
if (SCCOp)
15809-
SCCOp->setIsUndef();
15810-
}
15811-
Register Tmp =
15812-
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
15813-
Next = BuildMI(MBB, Next, Next->getDebugLoc(),
15814-
TII->get(AMDGPU::S_AND_B32_term), Tmp)
15815-
.addReg(SavedSCC)
15816-
.addImm(1);
15817-
}
15757+
assert((Next == MBB.end() || !Next->readsRegister(AMDGPU::SCC)) &&
15758+
"Malformed CFG detected!\n");
1581815759

1581915760
if (NeedToMove) {
1582015761
MBB.splice(Next, &MBB, &MI);

0 commit comments

Comments
 (0)