@@ -105,6 +105,12 @@ class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
105105 : RegisterRegAllocBase(N, D, C) {}
106106};
107107
108+ class WWMRegisterRegAlloc : public RegisterRegAllocBase <WWMRegisterRegAlloc> {
109+ public:
110+ WWMRegisterRegAlloc (const char *N, const char *D, FunctionPassCtor C)
111+ : RegisterRegAllocBase(N, D, C) {}
112+ };
113+
108114static bool onlyAllocateSGPRs (const TargetRegisterInfo &TRI,
109115 const MachineRegisterInfo &MRI,
110116 const Register Reg) {
@@ -119,13 +125,24 @@ static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
119125 return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC);
120126}
121127
122- // / -{sgpr|vgpr}-regalloc=... command line option.
128+ static bool onlyAllocateWWMRegs (const TargetRegisterInfo &TRI,
129+ const MachineRegisterInfo &MRI,
130+ const Register Reg) {
131+ const SIMachineFunctionInfo *MFI =
132+ MRI.getMF ().getInfo <SIMachineFunctionInfo>();
133+ const TargetRegisterClass *RC = MRI.getRegClass (Reg);
134+ return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC) &&
135+ MFI->checkFlag (Reg, AMDGPU::VirtRegFlag::WWM_REG);
136+ }
137+
138+ // / -{sgpr|wwm|vgpr}-regalloc=... command line option.
123139static FunctionPass *useDefaultRegisterAllocator () { return nullptr ; }
124140
125141// / A dummy default pass factory indicates whether the register allocator is
126142// / overridden on the command line.
127143static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
128144static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
145+ static llvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag;
129146
130147static SGPRRegisterRegAlloc
131148defaultSGPRRegAlloc (" default" ,
@@ -142,6 +159,11 @@ static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
142159VGPRRegAlloc (" vgpr-regalloc" , cl::Hidden, cl::init(&useDefaultRegisterAllocator),
143160 cl::desc (" Register allocator to use for VGPRs" ));
144161
162+ static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false ,
163+ RegisterPassParser<WWMRegisterRegAlloc>>
164+ WWMRegAlloc (" wwm-regalloc" , cl::Hidden,
165+ cl::init (&useDefaultRegisterAllocator),
166+ cl::desc(" Register allocator to use for WWM registers" ));
145167
146168static void initializeDefaultSGPRRegisterAllocatorOnce () {
147169 RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault ();
@@ -161,6 +183,15 @@ static void initializeDefaultVGPRRegisterAllocatorOnce() {
161183 }
162184}
163185
186+ static void initializeDefaultWWMRegisterAllocatorOnce () {
187+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
188+
189+ if (!Ctor) {
190+ Ctor = WWMRegAlloc;
191+ WWMRegisterRegAlloc::setDefault (WWMRegAlloc);
192+ }
193+ }
194+
164195static FunctionPass *createBasicSGPRRegisterAllocator () {
165196 return createBasicRegisterAllocator (onlyAllocateSGPRs);
166197}
@@ -185,6 +216,18 @@ static FunctionPass *createFastVGPRRegisterAllocator() {
185216 return createFastRegisterAllocator (onlyAllocateVGPRs, true );
186217}
187218
219+ static FunctionPass *createBasicWWMRegisterAllocator () {
220+ return createBasicRegisterAllocator (onlyAllocateWWMRegs);
221+ }
222+
223+ static FunctionPass *createGreedyWWMRegisterAllocator () {
224+ return createGreedyRegisterAllocator (onlyAllocateWWMRegs);
225+ }
226+
227+ static FunctionPass *createFastWWMRegisterAllocator () {
228+ return createFastRegisterAllocator (onlyAllocateWWMRegs, false );
229+ }
230+
188231static SGPRRegisterRegAlloc basicRegAllocSGPR (
189232 " basic" , " basic register allocator" , createBasicSGPRRegisterAllocator);
190233static SGPRRegisterRegAlloc greedyRegAllocSGPR (
@@ -201,6 +244,14 @@ static VGPRRegisterRegAlloc greedyRegAllocVGPR(
201244
202245static VGPRRegisterRegAlloc fastRegAllocVGPR (
203246 " fast" , " fast register allocator" , createFastVGPRRegisterAllocator);
247+ static WWMRegisterRegAlloc basicRegAllocWWMReg (" basic" ,
248+ " basic register allocator" ,
249+ createBasicWWMRegisterAllocator);
250+ static WWMRegisterRegAlloc
251+ greedyRegAllocWWMReg (" greedy" , " greedy register allocator" ,
252+ createGreedyWWMRegisterAllocator);
253+ static WWMRegisterRegAlloc fastRegAllocWWMReg (" fast" , " fast register allocator" ,
254+ createFastWWMRegisterAllocator);
204255} // anonymous namespace
205256
206257static cl::opt<bool >
@@ -443,6 +494,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
443494 initializeAMDGPURemoveIncompatibleFunctionsPass (*PR);
444495 initializeAMDGPULowerModuleLDSLegacyPass (*PR);
445496 initializeAMDGPULowerBufferFatPointersPass (*PR);
497+ initializeAMDGPUReserveWWMRegsPass (*PR);
446498 initializeAMDGPURewriteOutArgumentsPass (*PR);
447499 initializeAMDGPURewriteUndefForPHILegacyPass (*PR);
448500 initializeAMDGPUUnifyMetadataPass (*PR);
@@ -994,6 +1046,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
9941046
9951047 FunctionPass *createSGPRAllocPass (bool Optimized);
9961048 FunctionPass *createVGPRAllocPass (bool Optimized);
1049+ FunctionPass *createWWMRegAllocPass (bool Optimized);
9971050 FunctionPass *createRegAllocPass (bool Optimized) override ;
9981051
9991052 bool addRegAssignAndRewriteFast () override ;
@@ -1387,7 +1440,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
13871440}
13881441
13891442bool GCNPassConfig::addPreRewrite () {
1390- addPass (&SILowerWWMCopiesID);
13911443 if (EnableRegReassign)
13921444 addPass (&GCNNSAReassignID);
13931445 return true ;
@@ -1423,12 +1475,28 @@ FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
14231475 return createFastVGPRRegisterAllocator ();
14241476}
14251477
1478+ FunctionPass *GCNPassConfig::createWWMRegAllocPass (bool Optimized) {
1479+ // Initialize the global default.
1480+ llvm::call_once (InitializeDefaultWWMRegisterAllocatorFlag,
1481+ initializeDefaultWWMRegisterAllocatorOnce);
1482+
1483+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
1484+ if (Ctor != useDefaultRegisterAllocator)
1485+ return Ctor ();
1486+
1487+ if (Optimized)
1488+ return createGreedyWWMRegisterAllocator ();
1489+
1490+ return createFastWWMRegisterAllocator ();
1491+ }
1492+
14261493FunctionPass *GCNPassConfig::createRegAllocPass (bool Optimized) {
14271494 llvm_unreachable (" should not be used" );
14281495}
14291496
14301497static const char RegAllocOptNotSupportedMessage[] =
1431- " -regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc" ;
1498+ " -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
1499+ " and -vgpr-regalloc" ;
14321500
14331501bool GCNPassConfig::addRegAssignAndRewriteFast () {
14341502 if (!usingDefaultRegAlloc ())
@@ -1440,11 +1508,19 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
14401508
14411509 // Equivalent of PEI for SGPRs.
14421510 addPass (&SILowerSGPRSpillsID);
1511+
1512+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
14431513 addPass (&SIPreAllocateWWMRegsID);
14441514
1445- addPass (createVGPRAllocPass (false ));
1515+ // For allocating other wwm register operands.
1516+ addPass (createWWMRegAllocPass (false ));
14461517
14471518 addPass (&SILowerWWMCopiesID);
1519+ addPass (&AMDGPUReserveWWMRegsID);
1520+
1521+ // For allocating per-thread VGPRs.
1522+ addPass (createVGPRAllocPass (false ));
1523+
14481524 return true ;
14491525}
14501526
@@ -1464,8 +1540,17 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
14641540
14651541 // Equivalent of PEI for SGPRs.
14661542 addPass (&SILowerSGPRSpillsID);
1543+
1544+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
14671545 addPass (&SIPreAllocateWWMRegsID);
14681546
1547+ // For allocating other whole wave mode registers.
1548+ addPass (createWWMRegAllocPass (true ));
1549+ addPass (&SILowerWWMCopiesID);
1550+ addPass (createVirtRegRewriter (false ));
1551+ addPass (&AMDGPUReserveWWMRegsID);
1552+
1553+ // For allocating per-thread VGPRs.
14691554 addPass (createVGPRAllocPass (true ));
14701555
14711556 addPreRewrite ();
0 commit comments