16
16
// ===----------------------------------------------------------------------===//
17
17
18
18
#include " AMDGPU.h"
19
- #include " SILowerI1Copies.h"
20
- #include " llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
21
19
#include " llvm/CodeGen/MachineFunctionPass.h"
22
- #include " llvm/CodeGen/MachineUniformityAnalysis.h"
23
- #include " llvm/InitializePasses.h"
24
20
25
21
#define DEBUG_TYPE " amdgpu-global-isel-divergence-lowering"
26
22
@@ -46,146 +42,14 @@ class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {
46
42
47
43
void getAnalysisUsage (AnalysisUsage &AU) const override {
48
44
AU.setPreservesCFG ();
49
- AU.addRequired <MachineDominatorTree>();
50
- AU.addRequired <MachinePostDominatorTree>();
51
- AU.addRequired <MachineUniformityAnalysisPass>();
52
45
MachineFunctionPass::getAnalysisUsage (AU);
53
46
}
54
47
};
55
48
56
- class DivergenceLoweringHelper : public PhiLoweringHelper {
57
- public:
58
- DivergenceLoweringHelper (MachineFunction *MF, MachineDominatorTree *DT,
59
- MachinePostDominatorTree *PDT,
60
- MachineUniformityInfo *MUI);
61
-
62
- private:
63
- MachineUniformityInfo *MUI = nullptr ;
64
- MachineIRBuilder B;
65
- Register buildRegCopyToLaneMask (Register Reg);
66
-
67
- public:
68
- void markAsLaneMask (Register DstReg) const override ;
69
- void getCandidatesForLowering (
70
- SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override ;
71
- void collectIncomingValuesFromPhi (
72
- const MachineInstr *MI,
73
- SmallVectorImpl<Incoming> &Incomings) const override ;
74
- void replaceDstReg (Register NewReg, Register OldReg,
75
- MachineBasicBlock *MBB) override ;
76
- void buildMergeLaneMasks (MachineBasicBlock &MBB,
77
- MachineBasicBlock::iterator I, const DebugLoc &DL,
78
- Register DstReg, Register PrevReg,
79
- Register CurReg) override ;
80
- void constrainAsLaneMask (Incoming &In) override ;
81
- };
82
-
83
- DivergenceLoweringHelper::DivergenceLoweringHelper (
84
- MachineFunction *MF, MachineDominatorTree *DT,
85
- MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI)
86
- : PhiLoweringHelper(MF, DT, PDT), MUI(MUI), B(*MF) {}
87
-
88
- // _(s1) -> SReg_32/64(s1)
89
- void DivergenceLoweringHelper::markAsLaneMask (Register DstReg) const {
90
- assert (MRI->getType (DstReg) == LLT::scalar (1 ));
91
-
92
- if (MRI->getRegClassOrNull (DstReg)) {
93
- if (MRI->constrainRegClass (DstReg, ST->getBoolRC ()))
94
- return ;
95
- llvm_unreachable (" Failed to constrain register class" );
96
- }
97
-
98
- MRI->setRegClass (DstReg, ST->getBoolRC ());
99
- }
100
-
101
- void DivergenceLoweringHelper::getCandidatesForLowering (
102
- SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
103
- LLT S1 = LLT::scalar (1 );
104
-
105
- // Add divergent i1 phis to the list
106
- for (MachineBasicBlock &MBB : *MF) {
107
- for (MachineInstr &MI : MBB.phis ()) {
108
- Register Dst = MI.getOperand (0 ).getReg ();
109
- if (MRI->getType (Dst) == S1 && MUI->isDivergent (Dst))
110
- Vreg1Phis.push_back (&MI);
111
- }
112
- }
113
- }
114
-
115
- void DivergenceLoweringHelper::collectIncomingValuesFromPhi (
116
- const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
117
- for (unsigned i = 1 ; i < MI->getNumOperands (); i += 2 ) {
118
- Incomings.emplace_back (MI->getOperand (i).getReg (),
119
- MI->getOperand (i + 1 ).getMBB (), Register ());
120
- }
121
- }
122
-
123
- void DivergenceLoweringHelper::replaceDstReg (Register NewReg, Register OldReg,
124
- MachineBasicBlock *MBB) {
125
- BuildMI (*MBB, MBB->getFirstNonPHI (), {}, TII->get (AMDGPU::COPY), OldReg)
126
- .addReg (NewReg);
127
- }
128
-
129
- // Copy Reg to new lane mask register, insert a copy after instruction that
130
- // defines Reg while skipping phis if needed.
131
- Register DivergenceLoweringHelper::buildRegCopyToLaneMask (Register Reg) {
132
- Register LaneMask = createLaneMaskReg (MRI, LaneMaskRegAttrs);
133
- MachineInstr *Instr = MRI->getVRegDef (Reg);
134
- MachineBasicBlock *MBB = Instr->getParent ();
135
- B.setInsertPt (*MBB, MBB->SkipPHIsAndLabels (std::next (Instr->getIterator ())));
136
- B.buildCopy (LaneMask, Reg);
137
- return LaneMask;
138
- }
139
-
140
- // bb.previous
141
- // %PrevReg = ...
142
- //
143
- // bb.current
144
- // %CurReg = ...
145
- //
146
- // %DstReg - not defined
147
- //
148
- // -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
149
- //
150
- // bb.previous
151
- // %PrevReg = ...
152
- // %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
153
- //
154
- // bb.current
155
- // %CurReg = ...
156
- // %CurRegCopy:sreg_32(s1) = COPY %CurReg
157
- // ...
158
- // %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
159
- // %CurMaskedReg:sreg_32(s1) = AND %ExecReg, CurRegCopy - inactive lanes to 0
160
- // %DstReg:sreg_32(s1) = OR %PrevMaskedReg, CurMaskedReg
161
- //
162
- // DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
163
- void DivergenceLoweringHelper::buildMergeLaneMasks (
164
- MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
165
- Register DstReg, Register PrevReg, Register CurReg) {
166
- // DstReg = (PrevReg & !EXEC) | (CurReg & EXEC)
167
- // TODO: check if inputs are constants or results of a compare.
168
-
169
- Register PrevRegCopy = buildRegCopyToLaneMask (PrevReg);
170
- Register CurRegCopy = buildRegCopyToLaneMask (CurReg);
171
- Register PrevMaskedReg = createLaneMaskReg (MRI, LaneMaskRegAttrs);
172
- Register CurMaskedReg = createLaneMaskReg (MRI, LaneMaskRegAttrs);
173
-
174
- B.setInsertPt (MBB, I);
175
- B.buildInstr (AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
176
- B.buildInstr (AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
177
- B.buildInstr (OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
178
- }
179
-
180
- void DivergenceLoweringHelper::constrainAsLaneMask (Incoming &In) { return ; }
181
-
182
49
} // End anonymous namespace.
183
50
184
51
INITIALIZE_PASS_BEGIN (AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
185
52
" AMDGPU GlobalISel divergence lowering" , false , false )
186
- INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
187
- INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
188
- INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
189
53
INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
190
54
" AMDGPU GlobalISel divergence lowering" , false , false )
191
55
@@ -200,12 +64,5 @@ FunctionPass *llvm::createAMDGPUGlobalISelDivergenceLoweringPass() {
200
64
201
65
bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction (
202
66
MachineFunction &MF) {
203
- MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
204
- MachinePostDominatorTree &PDT = getAnalysis<MachinePostDominatorTree>();
205
- MachineUniformityInfo &MUI =
206
- getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo ();
207
-
208
- DivergenceLoweringHelper Helper (&MF, &DT, &PDT, &MUI);
209
-
210
- return Helper.lowerPhis ();
67
+ return false ;
211
68
}
0 commit comments