@@ -108,6 +108,12 @@ class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
108108 : RegisterRegAllocBase(N, D, C) {}
109109};
110110
111+ class WWMRegisterRegAlloc : public RegisterRegAllocBase <WWMRegisterRegAlloc> {
112+ public:
113+ WWMRegisterRegAlloc (const char *N, const char *D, FunctionPassCtor C)
114+ : RegisterRegAllocBase(N, D, C) {}
115+ };
116+
111117static bool onlyAllocateSGPRs (const TargetRegisterInfo &TRI,
112118 const MachineRegisterInfo &MRI,
113119 const Register Reg) {
@@ -122,13 +128,24 @@ static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
122128 return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC);
123129}
124130
125- // / -{sgpr|vgpr}-regalloc=... command line option.
131+ static bool onlyAllocateWWMRegs (const TargetRegisterInfo &TRI,
132+ const MachineRegisterInfo &MRI,
133+ const Register Reg) {
134+ const SIMachineFunctionInfo *MFI =
135+ MRI.getMF ().getInfo <SIMachineFunctionInfo>();
136+ const TargetRegisterClass *RC = MRI.getRegClass (Reg);
137+ return !static_cast <const SIRegisterInfo &>(TRI).isSGPRClass (RC) &&
138+ MFI->checkFlag (Reg, AMDGPU::VirtRegFlag::WWM_REG);
139+ }
140+
141+ // / -{sgpr|wwm|vgpr}-regalloc=... command line option.
126142static FunctionPass *useDefaultRegisterAllocator () { return nullptr ; }
127143
128144// / A dummy default pass factory indicates whether the register allocator is
129145// / overridden on the command line.
130146static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
131147static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
148+ static llvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag;
132149
133150static SGPRRegisterRegAlloc
134151defaultSGPRRegAlloc (" default" ,
@@ -145,6 +162,11 @@ static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
145162VGPRRegAlloc (" vgpr-regalloc" , cl::Hidden, cl::init(&useDefaultRegisterAllocator),
146163 cl::desc (" Register allocator to use for VGPRs" ));
147164
165+ static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false ,
166+ RegisterPassParser<WWMRegisterRegAlloc>>
167+ WWMRegAlloc (" wwm-regalloc" , cl::Hidden,
168+ cl::init (&useDefaultRegisterAllocator),
169+ cl::desc(" Register allocator to use for WWM registers" ));
148170
149171static void initializeDefaultSGPRRegisterAllocatorOnce () {
150172 RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault ();
@@ -164,6 +186,15 @@ static void initializeDefaultVGPRRegisterAllocatorOnce() {
164186 }
165187}
166188
189+ static void initializeDefaultWWMRegisterAllocatorOnce () {
190+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
191+
192+ if (!Ctor) {
193+ Ctor = WWMRegAlloc;
194+ WWMRegisterRegAlloc::setDefault (WWMRegAlloc);
195+ }
196+ }
197+
167198static FunctionPass *createBasicSGPRRegisterAllocator () {
168199 return createBasicRegisterAllocator (onlyAllocateSGPRs);
169200}
@@ -188,6 +219,18 @@ static FunctionPass *createFastVGPRRegisterAllocator() {
188219 return createFastRegisterAllocator (onlyAllocateVGPRs, true );
189220}
190221
222+ static FunctionPass *createBasicWWMRegisterAllocator () {
223+ return createBasicRegisterAllocator (onlyAllocateWWMRegs);
224+ }
225+
226+ static FunctionPass *createGreedyWWMRegisterAllocator () {
227+ return createGreedyRegisterAllocator (onlyAllocateWWMRegs);
228+ }
229+
230+ static FunctionPass *createFastWWMRegisterAllocator () {
231+ return createFastRegisterAllocator (onlyAllocateWWMRegs, false );
232+ }
233+
191234static SGPRRegisterRegAlloc basicRegAllocSGPR (
192235 " basic" , " basic register allocator" , createBasicSGPRRegisterAllocator);
193236static SGPRRegisterRegAlloc greedyRegAllocSGPR (
@@ -204,6 +247,14 @@ static VGPRRegisterRegAlloc greedyRegAllocVGPR(
204247
205248static VGPRRegisterRegAlloc fastRegAllocVGPR (
206249 " fast" , " fast register allocator" , createFastVGPRRegisterAllocator);
250+ static WWMRegisterRegAlloc basicRegAllocWWMReg (" basic" ,
251+ " basic register allocator" ,
252+ createBasicWWMRegisterAllocator);
253+ static WWMRegisterRegAlloc
254+ greedyRegAllocWWMReg (" greedy" , " greedy register allocator" ,
255+ createGreedyWWMRegisterAllocator);
256+ static WWMRegisterRegAlloc fastRegAllocWWMReg (" fast" , " fast register allocator" ,
257+ createFastWWMRegisterAllocator);
207258} // anonymous namespace
208259
209260static cl::opt<bool >
@@ -440,6 +491,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
440491 initializeAMDGPURemoveIncompatibleFunctionsPass (*PR);
441492 initializeAMDGPULowerModuleLDSLegacyPass (*PR);
442493 initializeAMDGPULowerBufferFatPointersPass (*PR);
494+ initializeAMDGPUReserveWWMRegsPass (*PR);
443495 initializeAMDGPURewriteOutArgumentsPass (*PR);
444496 initializeAMDGPURewriteUndefForPHILegacyPass (*PR);
445497 initializeAMDGPUUnifyMetadataPass (*PR);
@@ -990,6 +1042,7 @@ class GCNPassConfig final : public AMDGPUPassConfig {
9901042
9911043 FunctionPass *createSGPRAllocPass (bool Optimized);
9921044 FunctionPass *createVGPRAllocPass (bool Optimized);
1045+ FunctionPass *createWWMRegAllocPass (bool Optimized);
9931046 FunctionPass *createRegAllocPass (bool Optimized) override ;
9941047
9951048 bool addRegAssignAndRewriteFast () override ;
@@ -1383,7 +1436,6 @@ void GCNPassConfig::addOptimizedRegAlloc() {
13831436}
13841437
13851438bool GCNPassConfig::addPreRewrite () {
1386- addPass (&SILowerWWMCopiesID);
13871439 if (EnableRegReassign)
13881440 addPass (&GCNNSAReassignID);
13891441 return true ;
@@ -1419,12 +1471,28 @@ FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
14191471 return createFastVGPRRegisterAllocator ();
14201472}
14211473
1474+ FunctionPass *GCNPassConfig::createWWMRegAllocPass (bool Optimized) {
1475+ // Initialize the global default.
1476+ llvm::call_once (InitializeDefaultWWMRegisterAllocatorFlag,
1477+ initializeDefaultWWMRegisterAllocatorOnce);
1478+
1479+ RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault ();
1480+ if (Ctor != useDefaultRegisterAllocator)
1481+ return Ctor ();
1482+
1483+ if (Optimized)
1484+ return createGreedyWWMRegisterAllocator ();
1485+
1486+ return createFastWWMRegisterAllocator ();
1487+ }
1488+
14221489FunctionPass *GCNPassConfig::createRegAllocPass (bool Optimized) {
14231490 llvm_unreachable (" should not be used" );
14241491}
14251492
14261493static const char RegAllocOptNotSupportedMessage[] =
1427- " -regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc" ;
1494+ " -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
1495+ " and -vgpr-regalloc" ;
14281496
14291497bool GCNPassConfig::addRegAssignAndRewriteFast () {
14301498 if (!usingDefaultRegAlloc ())
@@ -1436,11 +1504,19 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
14361504
14371505 // Equivalent of PEI for SGPRs.
14381506 addPass (&SILowerSGPRSpillsLegacyID);
1507+
1508+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
14391509 addPass (&SIPreAllocateWWMRegsID);
14401510
1441- addPass (createVGPRAllocPass (false ));
1511+ // For allocating other wwm register operands.
1512+ addPass (createWWMRegAllocPass (false ));
14421513
14431514 addPass (&SILowerWWMCopiesID);
1515+ addPass (&AMDGPUReserveWWMRegsID);
1516+
1517+ // For allocating per-thread VGPRs.
1518+ addPass (createVGPRAllocPass (false ));
1519+
14441520 return true ;
14451521}
14461522
@@ -1460,8 +1536,17 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
14601536
14611537 // Equivalent of PEI for SGPRs.
14621538 addPass (&SILowerSGPRSpillsLegacyID);
1539+
1540+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
14631541 addPass (&SIPreAllocateWWMRegsID);
14641542
1543+ // For allocating other whole wave mode registers.
1544+ addPass (createWWMRegAllocPass (true ));
1545+ addPass (&SILowerWWMCopiesID);
1546+ addPass (createVirtRegRewriter (false ));
1547+ addPass (&AMDGPUReserveWWMRegsID);
1548+
1549+ // For allocating per-thread VGPRs.
14651550 addPass (createVGPRAllocPass (true ));
14661551
14671552 addPreRewrite ();
0 commit comments