diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h index 2367d8d04787d..7a2c23c13a3ce 100644 --- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h @@ -184,6 +184,8 @@ class MachineRegisterInfo { TheDelegate->MRI_NoteCloneVirtualRegister(NewReg, SrcReg); } + const MachineFunction &getMF() const { return *MF; } + //===--------------------------------------------------------------------===// // Function State //===--------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 4abb5a63ab6d2..342d55e828bca 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -57,6 +57,7 @@ FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *); ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *); FunctionPass *createAMDGPUCodeGenPreparePass(); FunctionPass *createAMDGPULateCodeGenPrepareLegacyPass(); +FunctionPass *createAMDGPUReserveWWMRegsPass(); FunctionPass *createAMDGPURewriteOutArgumentsPass(); ModulePass * createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr); @@ -154,6 +155,9 @@ struct AMDGPULowerBufferFatPointersPass const TargetMachine &TM; }; +void initializeAMDGPUReserveWWMRegsPass(PassRegistry &); +extern char &AMDGPUReserveWWMRegsID; + void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &); extern char &AMDGPURewriteOutArgumentsID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp new file mode 100644 index 0000000000000..7dc492a8f7adf --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUReserveWWMRegs.cpp @@ -0,0 +1,96 @@ +//===-- AMDGPUReserveWWMRegs.cpp - Add WWM Regs to reserved regs list -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass should be invoked at the end of wwm-regalloc pipeline. +/// It identifies the WWM regs allocated during this pipeline and add +/// them to the list of reserved registers so that they won't be available for +/// per-thread VGPR allocation in the subsequent regalloc pipeline. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; + +#define DEBUG_TYPE "amdgpu-reserve-wwm-regs" + +namespace { + +class AMDGPUReserveWWMRegs : public MachineFunctionPass { +public: + static char ID; + + AMDGPUReserveWWMRegs() : MachineFunctionPass(ID) { + initializeAMDGPUReserveWWMRegsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return "AMDGPU Reserve WWM Registers"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // End anonymous namespace. + +INITIALIZE_PASS(AMDGPUReserveWWMRegs, DEBUG_TYPE, + "AMDGPU Reserve WWM Registers", false, false) + +char AMDGPUReserveWWMRegs::ID = 0; + +char &llvm::AMDGPUReserveWWMRegsID = AMDGPUReserveWWMRegs::ID; + +bool AMDGPUReserveWWMRegs::runOnMachineFunction(MachineFunction &MF) { + SIMachineFunctionInfo *MFI = MF.getInfo(); + + bool Changed = false; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + unsigned Opc = MI.getOpcode(); + if (Opc != AMDGPU::SI_SPILL_S32_TO_VGPR && + Opc != AMDGPU::SI_RESTORE_S32_FROM_VGPR) + continue; + + Register Reg = Opc == AMDGPU::SI_SPILL_S32_TO_VGPR + ? MI.getOperand(0).getReg() + : MI.getOperand(1).getReg(); + + assert(Reg.isPhysical() && + "All WWM registers should have been allocated by now."); + + MFI->reserveWWMRegister(Reg); + Changed |= true; + } + } + + // The renamable flag can't be set for reserved registers. Reset the flag for + // MOs involving wwm-regs as they will be reserved during vgpr-regalloc + // pipeline. + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (Register Reg : MFI->getWWMReservedRegs()) { + for (MachineOperand &MO : MRI.reg_operands(Reg)) + MO.setIsRenamable(false); + } + + // Now clear the NonWWMRegMask earlier set during wwm-regalloc. + MFI->clearNonWWMRegAllocMask(); + + return Changed; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index ce3d70332d0a6..1f2148c2922de 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -108,6 +108,12 @@ class VGPRRegisterRegAlloc : public RegisterRegAllocBase { : RegisterRegAllocBase(N, D, C) {} }; +class WWMRegisterRegAlloc : public RegisterRegAllocBase { +public: + WWMRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C) + : RegisterRegAllocBase(N, D, C) {} +}; + static bool onlyAllocateSGPRs(const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, const Register Reg) { @@ -122,13 +128,24 @@ static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI, return !static_cast(TRI).isSGPRClass(RC); } -/// -{sgpr|vgpr}-regalloc=... command line option. +static bool onlyAllocateWWMRegs(const TargetRegisterInfo &TRI, + const MachineRegisterInfo &MRI, + const Register Reg) { + const SIMachineFunctionInfo *MFI = + MRI.getMF().getInfo(); + const TargetRegisterClass *RC = MRI.getRegClass(Reg); + return !static_cast(TRI).isSGPRClass(RC) && + MFI->checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG); +} + +/// -{sgpr|wwm|vgpr}-regalloc=... command line option. static FunctionPass *useDefaultRegisterAllocator() { return nullptr; } /// A dummy default pass factory indicates whether the register allocator is /// overridden on the command line. static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag; static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag; +static llvm::once_flag InitializeDefaultWWMRegisterAllocatorFlag; static SGPRRegisterRegAlloc defaultSGPRRegAlloc("default", @@ -145,6 +162,11 @@ static cl::opt> + WWMRegAlloc("wwm-regalloc", cl::Hidden, + cl::init(&useDefaultRegisterAllocator), + cl::desc("Register allocator to use for WWM registers")); static void initializeDefaultSGPRRegisterAllocatorOnce() { RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault(); @@ -164,6 +186,15 @@ static void initializeDefaultVGPRRegisterAllocatorOnce() { } } +static void initializeDefaultWWMRegisterAllocatorOnce() { + RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault(); + + if (!Ctor) { + Ctor = WWMRegAlloc; + WWMRegisterRegAlloc::setDefault(WWMRegAlloc); + } +} + static FunctionPass *createBasicSGPRRegisterAllocator() { return createBasicRegisterAllocator(onlyAllocateSGPRs); } @@ -188,6 +219,18 @@ static FunctionPass *createFastVGPRRegisterAllocator() { return createFastRegisterAllocator(onlyAllocateVGPRs, true); } +static FunctionPass *createBasicWWMRegisterAllocator() { + return createBasicRegisterAllocator(onlyAllocateWWMRegs); +} + +static FunctionPass *createGreedyWWMRegisterAllocator() { + return createGreedyRegisterAllocator(onlyAllocateWWMRegs); +} + +static FunctionPass *createFastWWMRegisterAllocator() { + return createFastRegisterAllocator(onlyAllocateWWMRegs, false); +} + static SGPRRegisterRegAlloc basicRegAllocSGPR( "basic", "basic register allocator", createBasicSGPRRegisterAllocator); static SGPRRegisterRegAlloc greedyRegAllocSGPR( @@ -204,6 +247,14 @@ static VGPRRegisterRegAlloc greedyRegAllocVGPR( static VGPRRegisterRegAlloc fastRegAllocVGPR( "fast", "fast register allocator", createFastVGPRRegisterAllocator); +static WWMRegisterRegAlloc basicRegAllocWWMReg("basic", + "basic register allocator", + createBasicWWMRegisterAllocator); +static WWMRegisterRegAlloc + greedyRegAllocWWMReg("greedy", "greedy register allocator", + createGreedyWWMRegisterAllocator); +static WWMRegisterRegAlloc fastRegAllocWWMReg("fast", "fast register allocator", + createFastWWMRegisterAllocator); } // anonymous namespace static cl::opt @@ -440,6 +491,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeAMDGPURemoveIncompatibleFunctionsPass(*PR); initializeAMDGPULowerModuleLDSLegacyPass(*PR); initializeAMDGPULowerBufferFatPointersPass(*PR); + initializeAMDGPUReserveWWMRegsPass(*PR); initializeAMDGPURewriteOutArgumentsPass(*PR); initializeAMDGPURewriteUndefForPHILegacyPass(*PR); initializeAMDGPUUnifyMetadataPass(*PR); @@ -989,6 +1041,7 @@ class GCNPassConfig final : public AMDGPUPassConfig { FunctionPass *createSGPRAllocPass(bool Optimized); FunctionPass *createVGPRAllocPass(bool Optimized); + FunctionPass *createWWMRegAllocPass(bool Optimized); FunctionPass *createRegAllocPass(bool Optimized) override; bool addRegAssignAndRewriteFast() override; @@ -1382,7 +1435,6 @@ void GCNPassConfig::addOptimizedRegAlloc() { } bool GCNPassConfig::addPreRewrite() { - addPass(&SILowerWWMCopiesID); if (EnableRegReassign) addPass(&GCNNSAReassignID); return true; @@ -1418,12 +1470,28 @@ FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) { return createFastVGPRRegisterAllocator(); } +FunctionPass *GCNPassConfig::createWWMRegAllocPass(bool Optimized) { + // Initialize the global default. + llvm::call_once(InitializeDefaultWWMRegisterAllocatorFlag, + initializeDefaultWWMRegisterAllocatorOnce); + + RegisterRegAlloc::FunctionPassCtor Ctor = WWMRegisterRegAlloc::getDefault(); + if (Ctor != useDefaultRegisterAllocator) + return Ctor(); + + if (Optimized) + return createGreedyWWMRegisterAllocator(); + + return createFastWWMRegisterAllocator(); +} + FunctionPass *GCNPassConfig::createRegAllocPass(bool Optimized) { llvm_unreachable("should not be used"); } static const char RegAllocOptNotSupportedMessage[] = - "-regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc"; + "-regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, " + "and -vgpr-regalloc"; bool GCNPassConfig::addRegAssignAndRewriteFast() { if (!usingDefaultRegAlloc()) @@ -1435,11 +1503,19 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() { // Equivalent of PEI for SGPRs. addPass(&SILowerSGPRSpillsLegacyID); + + // To Allocate wwm registers used in whole quad mode operations (for shaders). addPass(&SIPreAllocateWWMRegsID); - addPass(createVGPRAllocPass(false)); + // For allocating other wwm register operands. + addPass(createWWMRegAllocPass(false)); addPass(&SILowerWWMCopiesID); + addPass(&AMDGPUReserveWWMRegsID); + + // For allocating per-thread VGPRs. + addPass(createVGPRAllocPass(false)); + return true; } @@ -1459,8 +1535,17 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() { // Equivalent of PEI for SGPRs. addPass(&SILowerSGPRSpillsLegacyID); + + // To Allocate wwm registers used in whole quad mode operations (for shaders). addPass(&SIPreAllocateWWMRegsID); + // For allocating other whole wave mode registers. + addPass(createWWMRegAllocPass(true)); + addPass(&SILowerWWMCopiesID); + addPass(createVirtRegRewriter(false)); + addPass(&AMDGPUReserveWWMRegsID); + + // For allocating per-thread VGPRs. addPass(createVGPRAllocPass(true)); addPreRewrite(); diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index 4605be344f731..fed29c3e14aae 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -95,6 +95,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPURegBankSelect.cpp AMDGPURegisterBankInfo.cpp AMDGPURemoveIncompatibleFunctions.cpp + AMDGPUReserveWWMRegs.cpp AMDGPUResourceUsageAnalysis.cpp AMDGPURewriteOutArguments.cpp AMDGPURewriteUndefForPHI.cpp diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 07505110476b5..3d1657392884f 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1341,13 +1341,6 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *FuncInfo = MF.getInfo(); - // Allocate spill slots for WWM reserved VGPRs. - for (Register Reg : FuncInfo->getWWMReservedRegs()) { - const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg); - FuncInfo->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC), - TRI->getSpillAlign(*RC)); - } - const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() && EnableSpillVGPRToAGPR; @@ -1573,11 +1566,7 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall()) return; - MFI->shiftSpillPhysVGPRsToLowestRange(MF); - TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS); - if (MFI->isEntryFunction()) - return; const GCNSubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); @@ -1587,19 +1576,9 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, MachineInstr *ReturnMI = nullptr; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { - // WRITELANE instructions used for SGPR spills can overwrite the inactive - // lanes of VGPRs and callee must spill and restore them even if they are - // marked Caller-saved. - - // TODO: Handle this elsewhere at an early point. Walking through all MBBs - // here would be a bad heuristic. A better way should be by calling - // allocateWWMSpill during the regalloc pipeline whenever a physical - // register is allocated for the intended virtual registers. - if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR) - MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg()); - else if (MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR) - MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg()); - else if (TII->isWWMRegSpillOpcode(MI.getOpcode())) + // TODO: Walking through all MBBs here would be a bad heuristic. Better + // handle them elsewhere. + if (TII->isWWMRegSpillOpcode(MI.getOpcode())) NeedExecCopyReservedReg = true; else if (MI.getOpcode() == AMDGPU::SI_RETURN || MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG || @@ -1614,6 +1593,23 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, } } + SmallVector SortedWWMVGPRs; + for (Register Reg : MFI->getWWMReservedRegs()) { + // The shift-back is needed only for the VGPRs used for SGPR spills and they + // are of 32-bit size. SIPreAllocateWWMRegs pass can add tuples into WWM + // reserved registers. + const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg); + if (TRI->getRegSizeInBits(*RC) > 32) + continue; + SortedWWMVGPRs.push_back(Reg); + } + + sort(SortedWWMVGPRs, std::greater()); + MFI->shiftWwmVGPRsToLowestRange(MF, SortedWWMVGPRs, SavedVGPRs); + + if (MFI->isEntryFunction()) + return; + // Remove any VGPRs used in the return value because these do not need to be saved. // This prevents CSR restore from clobbering return VGPRs. if (ReturnMI) { @@ -1623,6 +1619,13 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, } } + // Create the stack objects for WWM registers now. + for (Register Reg : MFI->getWWMReservedRegs()) { + const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg); + MFI->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC), + TRI->getSpillAlign(*RC)); + } + // Ignore the SGPRs the default implementation found. SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask()); @@ -1638,14 +1641,6 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF, // allow the default insertion to handle them. for (auto &Reg : MFI->getWWMSpills()) SavedVGPRs.reset(Reg.first); - - // Mark all lane VGPRs as BB LiveIns. - for (MachineBasicBlock &MBB : MF) { - for (auto &Reg : MFI->getWWMSpills()) - MBB.addLiveIn(Reg.first); - - MBB.sortUniqueLiveIns(); - } } void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF, diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 9afb29d95abd7..8073aca7f197f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -931,6 +931,7 @@ def SI_SPILL_S32_TO_VGPR : PseudoInstSI <(outs VGPR_32:$vdst), let hasSideEffects = 0; let mayLoad = 0; let mayStore = 0; + let hasExtraDefRegAllocReq = 1; let Constraints = "$vdst = $vdst_in"; } @@ -941,6 +942,7 @@ def SI_RESTORE_S32_FROM_VGPR : PseudoInstSI <(outs SReg_32:$sdst), let hasSideEffects = 0; let mayLoad = 0; let mayStore = 0; + let hasExtraSrcRegAllocReq = 1; } } // End Spill = 1, VALU = 1, isConvergent = 1 diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 35e5bea9ae16e..822336ebaf5dc 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -21,6 +21,7 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/InitializePasses.h" @@ -33,12 +34,18 @@ using MBBVector = SmallVector; namespace { +static cl::opt MaxNumVGPRsForWwmAllocation( + "amdgpu-num-vgprs-for-wwm-alloc", + cl::desc("Max num VGPRs for whole-wave register allocation."), + cl::ReallyHidden, cl::init(10)); + class SILowerSGPRSpills { private: const SIRegisterInfo *TRI = nullptr; const SIInstrInfo *TII = nullptr; LiveIntervals *LIS = nullptr; SlotIndexes *Indexes = nullptr; + MachineDominatorTree *MDT = nullptr; // Save and Restore blocks of the current function. Typically there is a // single save block, unless Windows EH funclets are involved. @@ -46,13 +53,17 @@ class SILowerSGPRSpills { MBBVector RestoreBlocks; public: - SILowerSGPRSpills(LiveIntervals *LIS, SlotIndexes *Indexes) - : LIS(LIS), Indexes(Indexes) {} + SILowerSGPRSpills(LiveIntervals *LIS, SlotIndexes *Indexes, + MachineDominatorTree *MDT) + : LIS(LIS), Indexes(Indexes), MDT(MDT) {} bool run(MachineFunction &MF); void calculateSaveRestoreBlocks(MachineFunction &MF); bool spillCalleeSavedRegs(MachineFunction &MF, SmallVectorImpl &CalleeSavedFIs); - void extendWWMVirtRegLiveness(MachineFunction &MF, LiveIntervals *LIS); + void updateLaneVGPRDomInstr( + int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt, + DenseMap &LaneVGPRDomInstr); + void determineRegsForWWMAllocation(MachineFunction &MF, BitVector &RegMask); }; class SILowerSGPRSpillsLegacy : public MachineFunctionPass { @@ -64,6 +75,7 @@ class SILowerSGPRSpillsLegacy : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -84,6 +96,7 @@ INITIALIZE_PASS_BEGIN(SILowerSGPRSpillsLegacy, DEBUG_TYPE, "SI lower SGPR spill instructions", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_END(SILowerSGPRSpillsLegacy, DEBUG_TYPE, "SI lower SGPR spill instructions", false, false) @@ -266,51 +279,90 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs( return false; } -void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF, - LiveIntervals *LIS) { - // TODO: This is a workaround to avoid the unmodelled liveness computed with - // whole-wave virtual registers when allocated together with the regular VGPR - // virtual registers. Presently, the liveness computed during the regalloc is - // only uniform (or single lane aware) and it doesn't take account of the - // divergent control flow that exists for our GPUs. Since the WWM registers - // can modify inactive lanes, the wave-aware liveness should be computed for - // the virtual registers to accurately plot their interferences. Without - // having the divergent CFG for the function, it is difficult to implement the - // wave-aware liveness info. Until then, we conservatively extend the liveness - // of the wwm registers into the entire function so that they won't be reused - // without first spilling/splitting their liveranges. - SIMachineFunctionInfo *MFI = MF.getInfo(); - - // Insert the IMPLICIT_DEF for the wwm-registers in the entry blocks. - for (auto Reg : MFI->getSGPRSpillVGPRs()) { - for (MachineBasicBlock *SaveBlock : SaveBlocks) { - MachineBasicBlock::iterator InsertBefore = SaveBlock->begin(); - DebugLoc DL = SaveBlock->findDebugLoc(InsertBefore); - auto MIB = BuildMI(*SaveBlock, InsertBefore, DL, - TII->get(AMDGPU::IMPLICIT_DEF), Reg); - MFI->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG); - // Set SGPR_SPILL asm printer flag - MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL); - if (LIS) { - LIS->InsertMachineInstrInMaps(*MIB); +void SILowerSGPRSpills::updateLaneVGPRDomInstr( + int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt, + DenseMap &LaneVGPRDomInstr) { + // For the Def of a virtual LaneVPGR to dominate all its uses, we should + // insert an IMPLICIT_DEF before the dominating spill. Switching to a + // depth first order doesn't really help since the machine function can be in + // the unstructured control flow post-SSA. For each virtual register, hence + // finding the common dominator to get either the dominating spill or a block + // dominating all spills. + SIMachineFunctionInfo *FuncInfo = + MBB->getParent()->getInfo(); + ArrayRef VGPRSpills = + FuncInfo->getSGPRSpillToVirtualVGPRLanes(FI); + Register PrevLaneVGPR; + for (auto &Spill : VGPRSpills) { + if (PrevLaneVGPR == Spill.VGPR) + continue; + + PrevLaneVGPR = Spill.VGPR; + auto I = LaneVGPRDomInstr.find(Spill.VGPR); + if (Spill.Lane == 0 && I == LaneVGPRDomInstr.end()) { + // Initially add the spill instruction itself for Insertion point. + LaneVGPRDomInstr[Spill.VGPR] = InsertPt; + } else { + assert(I != LaneVGPRDomInstr.end()); + auto PrevInsertPt = I->second; + MachineBasicBlock *DomMBB = PrevInsertPt->getParent(); + if (DomMBB == MBB) { + // The insertion point earlier selected in a predecessor block whose + // spills are currently being lowered. The earlier InsertPt would be + // the one just before the block terminator and it should be changed + // if we insert any new spill in it. + if (MDT->dominates(&*InsertPt, &*PrevInsertPt)) + I->second = InsertPt; + + continue; } + + // Find the common dominator block between PrevInsertPt and the + // current spill. + DomMBB = MDT->findNearestCommonDominator(DomMBB, MBB); + if (DomMBB == MBB) + I->second = InsertPt; + else if (DomMBB != PrevInsertPt->getParent()) + I->second = &(*DomMBB->getFirstTerminator()); } } +} - // Insert the KILL in the return blocks to extend their liveness untill the - // end of function. Insert a separate KILL for each VGPR. - for (MachineBasicBlock *RestoreBlock : RestoreBlocks) { - MachineBasicBlock::iterator InsertBefore = - RestoreBlock->getFirstTerminator(); - DebugLoc DL = RestoreBlock->findDebugLoc(InsertBefore); - for (auto Reg : MFI->getSGPRSpillVGPRs()) { - auto MIB = BuildMI(*RestoreBlock, InsertBefore, DL, - TII->get(TargetOpcode::KILL)); - MIB.addReg(Reg); - if (LIS) - LIS->InsertMachineInstrInMaps(*MIB); +void SILowerSGPRSpills::determineRegsForWWMAllocation(MachineFunction &MF, + BitVector &RegMask) { + // Determine an optimal number of VGPRs for WWM allocation. The complement + // list will be available for allocating other VGPR virtual registers. + SIMachineFunctionInfo *MFI = MF.getInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + BitVector ReservedRegs = TRI->getReservedRegs(MF); + BitVector NonWwmAllocMask(TRI->getNumRegs()); + + // FIXME: MaxNumVGPRsForWwmAllocation might need to be adjusted in the future + // to have a balanced allocation between WWM values and per-thread vector + // register operands. + unsigned NumRegs = MaxNumVGPRsForWwmAllocation; + NumRegs = + std::min(static_cast(MFI->getSGPRSpillVGPRs().size()), NumRegs); + + auto [MaxNumVGPRs, MaxNumAGPRs] = TRI->getMaxNumVectorRegs(MF); + // Try to use the highest available registers for now. Later after + // vgpr-regalloc, they can be shifted to the lowest range. + unsigned I = 0; + for (unsigned Reg = AMDGPU::VGPR0 + MaxNumVGPRs - 1; + (I < NumRegs) && (Reg >= AMDGPU::VGPR0); --Reg) { + if (!ReservedRegs.test(Reg) && + !MRI.isPhysRegUsed(Reg, /*SkipRegMaskTest=*/true)) { + TRI->markSuperRegs(RegMask, Reg); + ++I; } } + + if (I != NumRegs) { + // Reserve an arbitrary register and report the error. + TRI->markSuperRegs(RegMask, AMDGPU::VGPR0); + MF.getFunction().getContext().emitError( + "can't find enough VGPRs for wwm-regalloc"); + } } bool SILowerSGPRSpillsLegacy::runOnMachineFunction(MachineFunction &MF) { @@ -318,7 +370,9 @@ bool SILowerSGPRSpillsLegacy::runOnMachineFunction(MachineFunction &MF) { LiveIntervals *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr; auto *SIWrapper = getAnalysisIfAvailable(); SlotIndexes *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr; - return SILowerSGPRSpills(LIS, Indexes).run(MF); + MachineDominatorTree *MDT = + &getAnalysis().getDomTree(); + return SILowerSGPRSpills(LIS, Indexes, MDT).run(MF); } bool SILowerSGPRSpills::run(MachineFunction &MF) { @@ -361,6 +415,9 @@ bool SILowerSGPRSpills::run(MachineFunction &MF) { // To track the spill frame indices handled in this pass. BitVector SpillFIs(MFI.getObjectIndexEnd(), false); + // To track the IMPLICIT_DEF insertion point for the lane vgprs. + DenseMap LaneVGPRDomInstr; + for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { if (!TII->isSGPRSpill(MI)) @@ -390,6 +447,7 @@ bool SILowerSGPRSpills::run(MachineFunction &MF) { "failed to spill SGPR to physical VGPR lane when allocated"); } } else { + MachineInstrSpan MIS(&MI, &MBB); if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) { bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex( MI, FI, nullptr, Indexes, LIS); @@ -397,21 +455,47 @@ bool SILowerSGPRSpills::run(MachineFunction &MF) { llvm_unreachable( "failed to spill SGPR to virtual VGPR lane when allocated"); SpillFIs.set(FI); + updateLaneVGPRDomInstr(FI, &MBB, MIS.begin(), LaneVGPRDomInstr); SpilledToVirtVGPRLanes = true; } } } } - if (SpilledToVirtVGPRLanes) { - extendWWMVirtRegLiveness(MF, LIS); + for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) { + auto InsertPt = LaneVGPRDomInstr[Reg]; + // Insert the IMPLICIT_DEF at the identified points. + MachineBasicBlock &Block = *InsertPt->getParent(); + DebugLoc DL = Block.findDebugLoc(InsertPt); + auto MIB = + BuildMI(Block, *InsertPt, DL, TII->get(AMDGPU::IMPLICIT_DEF), Reg); + + // Add WWM flag to the virtual register. + FuncInfo->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG); + + // Set SGPR_SPILL asm printer flag + MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL); if (LIS) { - // Compute the LiveInterval for the newly created virtual registers. - for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) - LIS->createAndComputeVirtRegInterval(Reg); + LIS->InsertMachineInstrInMaps(*MIB); + LIS->createAndComputeVirtRegInterval(Reg); } } + // Determine the registers for WWM allocation and also compute the register + // mask for non-wwm VGPR allocation. + if (FuncInfo->getSGPRSpillVGPRs().size()) { + BitVector WwmRegMask(TRI->getNumRegs()); + + determineRegsForWWMAllocation(MF, WwmRegMask); + + BitVector NonWwmRegMask(WwmRegMask); + NonWwmRegMask.flip().clearBitsNotInMask(TRI->getAllVGPRRegMask()); + + // The complement set will be the registers for non-wwm (per-thread) vgpr + // allocation. + FuncInfo->updateNonWWMRegMask(NonWwmRegMask); + } + for (MachineBasicBlock &MBB : MF) { // FIXME: The dead frame indices are replaced with a null register from // the debug value instructions. We should instead, update it with the @@ -468,6 +552,7 @@ SILowerSGPRSpillsPass::run(MachineFunction &MF, MFPropsModifier _(*this, MF); auto *LIS = MFAM.getCachedResult(MF); auto *Indexes = MFAM.getCachedResult(MF); - SILowerSGPRSpills(LIS, Indexes).run(MF); + MachineDominatorTree *MDT = &MFAM.getResult(MF); + SILowerSGPRSpills(LIS, Indexes, MDT).run(MF); return PreservedAnalyses::all(); } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index f59d29bd81403..8be9a082a7fd0 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -325,11 +325,13 @@ bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs, return false; } -void SIMachineFunctionInfo::shiftSpillPhysVGPRsToLowestRange( - MachineFunction &MF) { +void SIMachineFunctionInfo::shiftWwmVGPRsToLowestRange( + MachineFunction &MF, SmallVectorImpl &WWMVGPRs, + BitVector &SavedVGPRs) { const SIRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); - for (Register &Reg : SpillPhysVGPRs) { + for (unsigned I = 0, E = WWMVGPRs.size(); I < E; ++I) { + Register Reg = WWMVGPRs[I]; Register NewReg = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); if (!NewReg || NewReg >= Reg) @@ -338,10 +340,22 @@ void SIMachineFunctionInfo::shiftSpillPhysVGPRsToLowestRange( MRI.replaceRegWith(Reg, NewReg); // Update various tables with the new VGPR. + WWMVGPRs[I] = NewReg; WWMReservedRegs.remove(Reg); WWMReservedRegs.insert(NewReg); - WWMSpills.insert(std::make_pair(NewReg, WWMSpills[Reg])); - WWMSpills.erase(Reg); + MRI.reserveReg(NewReg, TRI); + + // Replace the register in SpillPhysVGPRs. This is needed to look for free + // lanes while spilling special SGPRs like FP, BP, etc. during PEI. + auto RegItr = std::find(SpillPhysVGPRs.begin(), SpillPhysVGPRs.end(), Reg); + if (RegItr != SpillPhysVGPRs.end()) { + unsigned Idx = std::distance(SpillPhysVGPRs.begin(), RegItr); + SpillPhysVGPRs[Idx] = NewReg; + } + + // The generic `determineCalleeSaves` might have set the old register if it + // is in the CSR range. + SavedVGPRs.reset(Reg); for (MachineBasicBlock &MBB : MF) { MBB.removeLiveIn(Reg); @@ -386,7 +400,9 @@ bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills( return false; } - allocateWWMSpill(MF, LaneVGPR); + if (IsPrologEpilog) + allocateWWMSpill(MF, LaneVGPR); + reserveWWMRegister(LaneVGPR); for (MachineBasicBlock &MBB : MF) { MBB.addLiveIn(LaneVGPR); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index aff0b34947d68..669f98dd865d6 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -524,6 +524,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, // the VGPR and its stack slot index. WWMSpillsMap WWMSpills; + // Before allocation, the VGPR registers are partitioned into two distinct + // sets, the first one for WWM-values and the second set for non-WWM values. + // The latter set should be reserved during WWM-regalloc. + BitVector NonWWMRegMask; + using ReservedRegSet = SmallSetVector; // To track the VGPRs reserved for WWM instructions. They get stack slots // later during PrologEpilogInserter and get added into the superset WWMSpills @@ -590,6 +595,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(Reg); } + void updateNonWWMRegMask(BitVector &RegMask) { NonWWMRegMask = RegMask; } + BitVector getNonWWMRegMask() const { return NonWWMRegMask; } + void clearNonWWMRegAllocMask() { NonWWMRegMask.clear(); } + SIModeRegisterDefaults getMode() const { return Mode; } ArrayRef @@ -729,9 +738,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction, I->second.IsDead = true; } - // To bring the Physical VGPRs in the highest range allocated for CSR SGPR - // spilling into the lowest available range. - void shiftSpillPhysVGPRsToLowestRange(MachineFunction &MF); + // To bring the allocated WWM registers in \p WWMVGPRs to the lowest available + // range. + void shiftWwmVGPRsToLowestRange(MachineFunction &MF, + SmallVectorImpl &WWMVGPRs, + BitVector &SavedVGPRs); bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane = false, diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 2d1cd1bda3afe..d7421a1ceff0f 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -561,6 +561,37 @@ MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg( return getAlignedHighSGPRForRC(MF, /*Align=*/4, &AMDGPU::SGPR_128RegClass); } +std::pair +SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction &MF) const { + const SIMachineFunctionInfo *MFI = MF.getInfo(); + unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF); + unsigned MaxNumAGPRs = MaxNumVGPRs; + unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs(); + + // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically, + // a wave may have up to 512 total vector registers combining together both + // VGPRs and AGPRs. Hence, in an entry function without calls and without + // AGPRs used within it, it is possible to use the whole vector register + // budget for VGPRs. + // + // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split + // register file accordingly. + if (ST.hasGFX90AInsts()) { + if (MFI->usesAGPRs(MF)) { + MaxNumVGPRs /= 2; + MaxNumAGPRs = MaxNumVGPRs; + } else { + if (MaxNumVGPRs > TotalNumVGPRs) { + MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs; + MaxNumVGPRs = TotalNumVGPRs; + } else + MaxNumAGPRs = 0; + } + } + + return std::pair(MaxNumVGPRs, MaxNumAGPRs); +} + BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(AMDGPU::MODE); @@ -668,30 +699,7 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Reserve VGPRs/AGPRs. // - unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF); - unsigned MaxNumAGPRs = MaxNumVGPRs; - unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs(); - - // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically, - // a wave may have up to 512 total vector registers combining together both - // VGPRs and AGPRs. Hence, in an entry function without calls and without - // AGPRs used within it, it is possible to use the whole vector register - // budget for VGPRs. - // - // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split - // register file accordingly. - if (ST.hasGFX90AInsts()) { - if (MFI->usesAGPRs(MF)) { - MaxNumVGPRs /= 2; - MaxNumAGPRs = MaxNumVGPRs; - } else { - if (MaxNumVGPRs > TotalNumVGPRs) { - MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs; - MaxNumVGPRs = TotalNumVGPRs; - } else - MaxNumAGPRs = 0; - } - } + auto [MaxNumVGPRs, MaxNumAGPRs] = getMaxNumVectorRegs(MF); for (const TargetRegisterClass *RC : regclasses()) { if (RC->isBaseClass() && isVGPRClass(RC)) { @@ -724,6 +732,18 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy()); } + // During wwm-regalloc, reserve the registers for perlane VGPR allocation. The + // MFI->getNonWWMRegMask() field will have a valid bitmask only during + // wwm-regalloc and it would be empty otherwise. + BitVector NonWWMRegMask = MFI->getNonWWMRegMask(); + if (!NonWWMRegMask.empty()) { + for (unsigned RegI = AMDGPU::VGPR0, RegE = AMDGPU::VGPR0 + MaxNumVGPRs; + RegI < RegE; ++RegI) { + if (NonWWMRegMask.test(RegI)) + reserveRegisterTuples(Reserved, RegI); + } + } + for (Register Reg : MFI->getWWMReservedRegs()) reserveRegisterTuples(Reserved, Reg); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index 88d5686720985..409e5418abc8e 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -82,6 +82,11 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { /// spilling is needed. MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const; + /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number + /// of waves per execution unit required for the function \p MF. + std::pair + getMaxNumVectorRegs(const MachineFunction &MF) const; + BitVector getReservedRegs(const MachineFunction &MF) const override; bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll index de973481f8230..e9e7360733581 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll @@ -12,97 +12,90 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_xor_saveexec_b32 s4, -1 -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 exec_lo, s4 -; CHECK-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane -; CHECK-NEXT: v_mov_b32_e32 v8, v0 -; CHECK-NEXT: s_or_saveexec_b32 s21, -1 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b32 exec_lo, s21 -; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; CHECK-NEXT: v_mov_b32_e32 v15, v1 -; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill -; CHECK-NEXT: v_mov_b32_e32 v14, v2 -; CHECK-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; CHECK-NEXT: v_mov_b32_e32 v13, v3 -; CHECK-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; CHECK-NEXT: v_mov_b32_e32 v12, v4 -; CHECK-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; CHECK-NEXT: v_mov_b32_e32 v11, v5 -; CHECK-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; CHECK-NEXT: v_mov_b32_e32 v10, v6 -; CHECK-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; CHECK-NEXT: v_mov_b32_e32 v9, v7 -; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; CHECK-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 killed $exec -; CHECK-NEXT: v_mov_b32_e32 v2, v15 -; CHECK-NEXT: v_mov_b32_e32 v3, v14 -; CHECK-NEXT: v_mov_b32_e32 v4, v13 -; CHECK-NEXT: v_mov_b32_e32 v5, v12 -; CHECK-NEXT: v_mov_b32_e32 v6, v11 -; CHECK-NEXT: v_mov_b32_e32 v7, v10 -; CHECK-NEXT: v_mov_b32_e32 v8, v9 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; CHECK-NEXT: v_mov_b32_e32 v14, v1 +; CHECK-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill +; CHECK-NEXT: v_mov_b32_e32 v13, v2 +; CHECK-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; CHECK-NEXT: v_mov_b32_e32 v12, v3 +; CHECK-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; CHECK-NEXT: v_mov_b32_e32 v11, v4 +; CHECK-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; CHECK-NEXT: v_mov_b32_e32 v10, v5 +; CHECK-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; CHECK-NEXT: v_mov_b32_e32 v9, v6 +; CHECK-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; CHECK-NEXT: v_mov_b32_e32 v8, v7 +; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 killed $exec +; CHECK-NEXT: v_mov_b32_e32 v1, v14 +; CHECK-NEXT: v_mov_b32_e32 v2, v13 +; CHECK-NEXT: v_mov_b32_e32 v3, v12 +; CHECK-NEXT: v_mov_b32_e32 v4, v11 +; CHECK-NEXT: v_mov_b32_e32 v5, v10 +; CHECK-NEXT: v_mov_b32_e32 v6, v9 +; CHECK-NEXT: v_mov_b32_e32 v7, v8 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 s8, 0 ; CHECK-NEXT: s_mov_b32 s4, s8 ; CHECK-NEXT: s_mov_b32 s5, s8 ; CHECK-NEXT: s_mov_b32 s6, s8 ; CHECK-NEXT: s_mov_b32 s7, s8 -; CHECK-NEXT: v_writelane_b32 v0, s4, 0 -; CHECK-NEXT: v_writelane_b32 v0, s5, 1 -; CHECK-NEXT: v_writelane_b32 v0, s6, 2 -; CHECK-NEXT: v_writelane_b32 v0, s7, 3 +; CHECK-NEXT: ; implicit-def: $vgpr16 : SGPR spill to VGPR lane +; CHECK-NEXT: v_writelane_b32 v16, s4, 0 +; CHECK-NEXT: v_writelane_b32 v16, s5, 1 +; CHECK-NEXT: v_writelane_b32 v16, s6, 2 +; CHECK-NEXT: v_writelane_b32 v16, s7, 3 ; CHECK-NEXT: s_mov_b32 s6, 0 ; CHECK-NEXT: s_mov_b32 s4, s6 ; CHECK-NEXT: s_mov_b32 s5, s6 -; CHECK-NEXT: v_mov_b32_e32 v1, s4 -; CHECK-NEXT: v_mov_b32_e32 v2, s5 -; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; CHECK-NEXT: v_mov_b32_e32 v0, s4 +; CHECK-NEXT: v_mov_b32_e32 v1, s5 +; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 s4, exec_lo -; CHECK-NEXT: v_writelane_b32 v0, s4, 4 +; CHECK-NEXT: v_writelane_b32 v16, s4, 4 ; CHECK-NEXT: s_or_saveexec_b32 s21, -1 -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 exec_lo, s21 ; CHECK-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; CHECK-NEXT: s_or_saveexec_b32 s21, -1 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 exec_lo, s21 -; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_readfirstlane_b32 s12, v8 -; CHECK-NEXT: v_readfirstlane_b32 s10, v7 -; CHECK-NEXT: v_readfirstlane_b32 s9, v6 -; CHECK-NEXT: v_readfirstlane_b32 s8, v5 -; CHECK-NEXT: v_readfirstlane_b32 s7, v4 -; CHECK-NEXT: v_readfirstlane_b32 s6, v3 -; CHECK-NEXT: v_readfirstlane_b32 s5, v2 -; CHECK-NEXT: v_readfirstlane_b32 s4, v1 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: v_readfirstlane_b32 s12, v7 +; CHECK-NEXT: v_readfirstlane_b32 s10, v6 +; CHECK-NEXT: v_readfirstlane_b32 s9, v5 +; CHECK-NEXT: v_readfirstlane_b32 s8, v4 +; CHECK-NEXT: v_readfirstlane_b32 s7, v3 +; CHECK-NEXT: v_readfirstlane_b32 s6, v2 +; CHECK-NEXT: v_readfirstlane_b32 s5, v1 +; CHECK-NEXT: v_readfirstlane_b32 s4, v0 ; CHECK-NEXT: ; kill: def $sgpr12 killed $sgpr12 def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; CHECK-NEXT: s_mov_b32 s13, s10 ; CHECK-NEXT: s_mov_b32 s14, s9 @@ -111,59 +104,59 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: s_mov_b32 s17, s6 ; CHECK-NEXT: s_mov_b32 s18, s5 ; CHECK-NEXT: s_mov_b32 s19, s4 -; CHECK-NEXT: v_writelane_b32 v0, s12, 5 -; CHECK-NEXT: v_writelane_b32 v0, s13, 6 -; CHECK-NEXT: v_writelane_b32 v0, s14, 7 -; CHECK-NEXT: v_writelane_b32 v0, s15, 8 -; CHECK-NEXT: v_writelane_b32 v0, s16, 9 -; CHECK-NEXT: v_writelane_b32 v0, s17, 10 -; CHECK-NEXT: v_writelane_b32 v0, s18, 11 -; CHECK-NEXT: v_writelane_b32 v0, s19, 12 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_writelane_b32 v16, s12, 5 +; CHECK-NEXT: v_writelane_b32 v16, s13, 6 +; CHECK-NEXT: v_writelane_b32 v16, s14, 7 +; CHECK-NEXT: v_writelane_b32 v16, s15, 8 +; CHECK-NEXT: v_writelane_b32 v16, s16, 9 +; CHECK-NEXT: v_writelane_b32 v16, s17, 10 +; CHECK-NEXT: v_writelane_b32 v16, s18, 11 +; CHECK-NEXT: v_writelane_b32 v16, s19, 12 +; CHECK-NEXT: v_mov_b32_e32 v6, v8 ; CHECK-NEXT: v_mov_b32_e32 v7, v9 -; CHECK-NEXT: v_mov_b32_e32 v8, v10 +; CHECK-NEXT: v_mov_b32_e32 v4, v10 ; CHECK-NEXT: v_mov_b32_e32 v5, v11 -; CHECK-NEXT: v_mov_b32_e32 v6, v12 +; CHECK-NEXT: v_mov_b32_e32 v2, v12 ; CHECK-NEXT: v_mov_b32_e32 v3, v13 -; CHECK-NEXT: v_mov_b32_e32 v4, v14 +; CHECK-NEXT: v_mov_b32_e32 v0, v14 ; CHECK-NEXT: v_mov_b32_e32 v1, v15 -; CHECK-NEXT: v_mov_b32_e32 v2, v16 ; CHECK-NEXT: s_mov_b64 s[4:5], s[12:13] ; CHECK-NEXT: s_mov_b64 s[10:11], s[14:15] ; CHECK-NEXT: s_mov_b64 s[8:9], s[16:17] ; CHECK-NEXT: s_mov_b64 s[6:7], s[18:19] -; CHECK-NEXT: v_cmp_eq_u64_e64 s4, s[4:5], v[7:8] -; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[10:11], v[5:6] +; CHECK-NEXT: v_cmp_eq_u64_e64 s4, s[4:5], v[6:7] +; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[10:11], v[4:5] ; CHECK-NEXT: s_and_b32 s4, s4, s5 -; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[8:9], v[3:4] +; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[8:9], v[2:3] ; CHECK-NEXT: s_and_b32 s4, s4, s5 -; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[6:7], v[1:2] +; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[6:7], v[0:1] ; CHECK-NEXT: s_and_b32 s4, s4, s5 ; CHECK-NEXT: s_and_saveexec_b32 s4, s4 -; CHECK-NEXT: v_writelane_b32 v0, s4, 13 +; CHECK-NEXT: v_writelane_b32 v16, s4, 13 ; CHECK-NEXT: s_or_saveexec_b32 s21, -1 -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b32 exec_lo, s21 ; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: s_or_saveexec_b32 s21, -1 -; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b32 exec_lo, s21 -; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_readlane_b32 s4, v2, 13 -; CHECK-NEXT: v_readlane_b32 s8, v2, 5 -; CHECK-NEXT: v_readlane_b32 s9, v2, 6 -; CHECK-NEXT: v_readlane_b32 s10, v2, 7 -; CHECK-NEXT: v_readlane_b32 s11, v2, 8 -; CHECK-NEXT: v_readlane_b32 s12, v2, 9 -; CHECK-NEXT: v_readlane_b32 s13, v2, 10 -; CHECK-NEXT: v_readlane_b32 s14, v2, 11 -; CHECK-NEXT: v_readlane_b32 s15, v2, 12 -; CHECK-NEXT: v_readlane_b32 s16, v2, 0 -; CHECK-NEXT: v_readlane_b32 s17, v2, 1 -; CHECK-NEXT: v_readlane_b32 s18, v2, 2 -; CHECK-NEXT: v_readlane_b32 s19, v2, 3 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; CHECK-NEXT: s_or_saveexec_b32 s21, -1 +; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: s_mov_b32 exec_lo, s21 ; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_readlane_b32 s4, v16, 13 +; CHECK-NEXT: v_readlane_b32 s8, v16, 5 +; CHECK-NEXT: v_readlane_b32 s9, v16, 6 +; CHECK-NEXT: v_readlane_b32 s10, v16, 7 +; CHECK-NEXT: v_readlane_b32 s11, v16, 8 +; CHECK-NEXT: v_readlane_b32 s12, v16, 9 +; CHECK-NEXT: v_readlane_b32 s13, v16, 10 +; CHECK-NEXT: v_readlane_b32 s14, v16, 11 +; CHECK-NEXT: v_readlane_b32 s15, v16, 12 +; CHECK-NEXT: v_readlane_b32 s16, v16, 0 +; CHECK-NEXT: v_readlane_b32 s17, v16, 1 +; CHECK-NEXT: v_readlane_b32 s18, v16, 2 +; CHECK-NEXT: v_readlane_b32 s19, v16, 3 ; CHECK-NEXT: image_sample v0, v[0:1], s[8:15], s[16:19] dmask:0x1 dim:SQ_RSRC_IMG_2D ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill @@ -171,24 +164,19 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: s_cbranch_execnz .LBB0_1 ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: s_or_saveexec_b32 s21, -1 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 exec_lo, s21 ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_readlane_b32 s4, v0, 4 +; CHECK-NEXT: v_readlane_b32 s4, v16, 4 ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: ; %bb.4: -; CHECK-NEXT: s_or_saveexec_b32 s21, -1 -; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b32 exec_lo, s21 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload ; CHECK-NEXT: ; implicit-def: $sgpr4 ; CHECK-NEXT: v_mov_b32_e32 v1, s4 ; CHECK-NEXT: v_mov_b32_e32 v2, s4 ; CHECK-NEXT: v_mov_b32_e32 v3, s4 -; CHECK-NEXT: ; kill: killed $vgpr4 ; CHECK-NEXT: s_xor_saveexec_b32 s4, -1 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir index 9794130d2b000..c91b686697b9d 100644 --- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir @@ -20,7 +20,7 @@ body: | ; GFX908-LABEL: name: agpr32_restore_clobber_scc ; GFX908: bb.0: ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -36,7 +36,7 @@ body: | ; GFX908-NEXT: S_NOP 0 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: - ; GFX908-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; @@ -514,7 +514,7 @@ body: | ; GFX908-FLATSCR-LABEL: name: agpr32_restore_clobber_scc ; GFX908-FLATSCR: bb.0: ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -531,7 +531,7 @@ body: | ; GFX908-FLATSCR-NEXT: S_NOP 0 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: bb.2: - ; GFX908-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; @@ -1038,7 +1038,7 @@ body: | ; GFX908-LABEL: name: agpr64_restore_clobber_scc ; GFX908: bb.0: ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -1056,7 +1056,7 @@ body: | ; GFX908-NEXT: S_NOP 0 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: - ; GFX908-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; @@ -1535,7 +1535,7 @@ body: | ; GFX908-FLATSCR-LABEL: name: agpr64_restore_clobber_scc ; GFX908-FLATSCR: bb.0: ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -1554,7 +1554,7 @@ body: | ; GFX908-FLATSCR-NEXT: S_NOP 0 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: bb.2: - ; GFX908-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; @@ -2061,7 +2061,7 @@ body: | ; GFX908-LABEL: name: agpr96_restore_clobber_scc ; GFX908: bb.0: ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -2081,7 +2081,7 @@ body: | ; GFX908-NEXT: S_NOP 0 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: - ; GFX908-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; @@ -2561,7 +2561,7 @@ body: | ; GFX908-FLATSCR-LABEL: name: agpr96_restore_clobber_scc ; GFX908-FLATSCR: bb.0: ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -2582,7 +2582,7 @@ body: | ; GFX908-FLATSCR-NEXT: S_NOP 0 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: bb.2: - ; GFX908-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; @@ -3089,7 +3089,7 @@ body: | ; GFX908-LABEL: name: agpr32_save_clobber_scc ; GFX908: bb.0: ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-NEXT: liveins: $agpr0, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -3105,7 +3105,7 @@ body: | ; GFX908-NEXT: S_NOP 0 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: - ; GFX908-NEXT: liveins: $agpr0, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; @@ -3583,7 +3583,7 @@ body: | ; GFX908-FLATSCR-LABEL: name: agpr32_save_clobber_scc ; GFX908-FLATSCR: bb.0: ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-FLATSCR-NEXT: liveins: $agpr0, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -3600,7 +3600,7 @@ body: | ; GFX908-FLATSCR-NEXT: S_NOP 0 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: bb.2: - ; GFX908-FLATSCR-NEXT: liveins: $agpr0, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; @@ -4106,7 +4106,7 @@ body: | ; GFX908-LABEL: name: agpr64_save_clobber_scc ; GFX908: bb.0: ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -4124,7 +4124,7 @@ body: | ; GFX908-NEXT: S_NOP 0 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: - ; GFX908-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; @@ -4603,7 +4603,7 @@ body: | ; GFX908-FLATSCR-LABEL: name: agpr64_save_clobber_scc ; GFX908-FLATSCR: bb.0: ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-FLATSCR-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -4622,7 +4622,7 @@ body: | ; GFX908-FLATSCR-NEXT: S_NOP 0 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: bb.2: - ; GFX908-FLATSCR-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; @@ -5127,7 +5127,7 @@ body: | ; GFX908-LABEL: name: agpr96_save_clobber_scc ; GFX908: bb.0: ; GFX908-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -5147,7 +5147,7 @@ body: | ; GFX908-NEXT: S_NOP 0 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: - ; GFX908-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; @@ -5627,7 +5627,7 @@ body: | ; GFX908-FLATSCR-LABEL: name: agpr96_save_clobber_scc ; GFX908-FLATSCR: bb.0: ; GFX908-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX908-FLATSCR-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -5648,7 +5648,7 @@ body: | ; GFX908-FLATSCR-NEXT: S_NOP 0 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: bb.2: - ; GFX908-FLATSCR-NEXT: liveins: $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; diff --git a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir index 80923dfc6f522..3c3c9839755a2 100644 --- a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir +++ b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,GFX908 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -start-before=greedy,0 -stop-after=virtregrewriter,2 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,GFX908 %s --- # GCN-LABEL: name: alloc_vgpr_64 diff --git a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir index 0f0cd0e8171d1..c42b570b40812 100644 --- a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir +++ b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,GFX90A %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -start-before=greedy,0 -stop-after=virtregrewriter,2 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,GFX90A %s # Using the unaligned vector tuples are OK as long as they aren't used # in a real instruction. diff --git a/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll b/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll index 3ed2cb856eaea..2b98f61748066 100644 --- a/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -O0 -verify-machineinstrs --stop-after=regallocfast,1 -o - %s | FileCheck -check-prefix=REGALLOC %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -O0 -verify-machineinstrs --stop-after=regallocfast,2 -o - %s | FileCheck -check-prefix=REGALLOC %s ; Test to check if the bb prolog spills are inserted correctly during regalloc. define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) { @@ -8,22 +8,20 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) { ; REGALLOC-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; REGALLOC-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; REGALLOC-NEXT: {{ $}} - ; REGALLOC-NEXT: renamable $vgpr3 = IMPLICIT_DEF ; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr2, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) ; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) - ; REGALLOC-NEXT: renamable $vgpr1 = COPY $vgpr0 - ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) ; REGALLOC-NEXT: renamable $sgpr4 = S_MOV_B32 49 - ; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = V_CMP_GT_I32_e64 killed $vgpr1, killed $sgpr4, implicit $exec + ; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = V_CMP_GT_I32_e64 killed $vgpr0, killed $sgpr4, implicit $exec ; REGALLOC-NEXT: renamable $sgpr6 = IMPLICIT_DEF - ; REGALLOC-NEXT: renamable $vgpr1 = COPY killed renamable $sgpr6 - ; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; REGALLOC-NEXT: renamable $vgpr0 = COPY killed renamable $sgpr6 + ; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; REGALLOC-NEXT: renamable $sgpr6_sgpr7 = COPY $exec, implicit-def $exec ; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 renamable $sgpr6_sgpr7, killed renamable $sgpr4_sgpr5, implicit-def dead $scc ; REGALLOC-NEXT: renamable $sgpr6_sgpr7 = S_XOR_B64 renamable $sgpr4_sgpr5, killed renamable $sgpr6_sgpr7, implicit-def dead $scc - ; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr6, 0, $vgpr0, implicit-def $sgpr6_sgpr7, implicit $sgpr6_sgpr7 - ; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr7, 1, $vgpr0, implicit killed $sgpr6_sgpr7 - ; REGALLOC-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr0, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; REGALLOC-NEXT: $vgpr63 = IMPLICIT_DEF + ; REGALLOC-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr6, 0, $vgpr63, implicit-def $sgpr6_sgpr7, implicit $sgpr6_sgpr7 + ; REGALLOC-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr7, 1, $vgpr63, implicit killed $sgpr6_sgpr7 + ; REGALLOC-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr63, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; REGALLOC-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr4_sgpr5 ; REGALLOC-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec ; REGALLOC-NEXT: S_BRANCH %bb.3 @@ -31,16 +29,16 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) { ; REGALLOC-NEXT: bb.1.Flow: ; REGALLOC-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; REGALLOC-NEXT: {{ $}} - ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) - ; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr4_sgpr5 - ; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1 + ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) + ; REGALLOC-NEXT: $vgpr63 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 0, implicit-def $sgpr4_sgpr5 + ; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 1 ; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; REGALLOC-NEXT: $vgpr1 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) - ; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) + ; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5) ; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc - ; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 2, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 - ; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr5, 3, $vgpr0, implicit $sgpr4_sgpr5 - ; REGALLOC-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr0, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; REGALLOC-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 2, $vgpr63, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5 + ; REGALLOC-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr5, 3, $vgpr63, implicit $sgpr4_sgpr5 + ; REGALLOC-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr63, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; REGALLOC-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc ; REGALLOC-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; REGALLOC-NEXT: S_BRANCH %bb.2 @@ -64,13 +62,12 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) { ; REGALLOC-NEXT: S_BRANCH %bb.1 ; REGALLOC-NEXT: {{ $}} ; REGALLOC-NEXT: bb.4.bb.3: - ; REGALLOC-NEXT: $vgpr1 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) - ; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 2, implicit-def $sgpr4_sgpr5 - ; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 3 - ; REGALLOC-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc ; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5) + ; REGALLOC-NEXT: $vgpr63 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 2, implicit-def $sgpr4_sgpr5 + ; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 3 + ; REGALLOC-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc ; REGALLOC-NEXT: renamable $vgpr0 = V_LSHL_ADD_U32_e64 killed $vgpr0, 2, $vgpr0, implicit $exec - ; REGALLOC-NEXT: KILL killed renamable $vgpr1 ; REGALLOC-NEXT: SI_RETURN implicit killed $vgpr0 bb.0: %cmp = icmp slt i32 %arg0, 50 diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll index adfc177c8bf74..0047b6b0ee934 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -971,12 +971,12 @@ define void @spill_func(ptr addrspace(1) %arg) #0 { ; CHECK-NEXT: v_writelane_b32 v1, s98, 3 ; CHECK-NEXT: v_writelane_b32 v0, s92, 61 ; CHECK-NEXT: v_writelane_b32 v1, s99, 4 +; CHECK-NEXT: s_mov_b32 s49, s12 ; CHECK-NEXT: v_writelane_b32 v0, s93, 62 ; CHECK-NEXT: v_writelane_b32 v1, s100, 5 -; CHECK-NEXT: s_mov_b32 s49, s12 +; CHECK-NEXT: s_cmp_eq_u32 s49, 0 ; CHECK-NEXT: v_writelane_b32 v0, s94, 63 ; CHECK-NEXT: v_writelane_b32 v1, s101, 6 -; CHECK-NEXT: s_cmp_eq_u32 s49, 0 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: s_mov_b32 s0, 0 ; CHECK-NEXT: ;;#ASMEND diff --git a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll index b46cdb8ab3ba0..3e25904aa044d 100644 --- a/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll @@ -36,66 +36,56 @@ define amdgpu_kernel void @test_loop(ptr addrspace(3) %ptr, i32 %n) nounwind { ; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN_DBG-NEXT: s_add_u32 s12, s12, s9 ; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0 -; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_load_dword s0, s[2:3], 0x9 +; GCN_DBG-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0 ; GCN_DBG-NEXT: s_load_dword s1, s[2:3], 0xa ; GCN_DBG-NEXT: s_mov_b32 s0, 0 ; GCN_DBG-NEXT: s_mov_b32 s2, -1 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) ; GCN_DBG-NEXT: s_cmp_lg_u32 s1, s2 -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 ; GCN_DBG-NEXT: s_mov_b64 s[4:5], exec ; GCN_DBG-NEXT: s_mov_b64 exec, -1 -; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_cbranch_scc1 .LBB0_2 ; GCN_DBG-NEXT: ; %bb.1: ; %for.exit -; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN_DBG-NEXT: s_waitcnt expcnt(0) -; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload -; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] -; GCN_DBG-NEXT: ; kill: killed $vgpr0 ; GCN_DBG-NEXT: s_endpgm ; GCN_DBG-NEXT: .LBB0_2: ; %for.body ; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN_DBG-NEXT: s_waitcnt expcnt(0) -; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN_DBG-NEXT: buffer_load_dword v2, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_waitcnt vmcnt(0) -; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 1 -; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 0 +; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 1 +; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 0 ; GCN_DBG-NEXT: s_mov_b32 s1, 2 ; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 ; GCN_DBG-NEXT: s_mov_b32 s2, 0x80 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_read_b32 v1, v1 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_read_b32 v0, v0 ; GCN_DBG-NEXT: s_mov_b32 s2, 1.0 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_add_f32_e64 v2, v1, s2 +; GCN_DBG-NEXT: v_add_f32_e64 v1, v0, s2 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_write_b32 v1, v2 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_write_b32 v0, v1 ; GCN_DBG-NEXT: s_mov_b32 s1, 1 ; GCN_DBG-NEXT: s_add_i32 s0, s0, s1 ; GCN_DBG-NEXT: s_mov_b64 s[2:3], -1 ; GCN_DBG-NEXT: s_and_b64 vcc, exec, s[2:3] -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 ; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_cbranch_vccnz .LBB0_2 ; GCN_DBG-NEXT: ; %bb.3: ; %DummyReturnBlock -; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN_DBG-NEXT: s_waitcnt expcnt(0) -; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload -; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] -; GCN_DBG-NEXT: ; kill: killed $vgpr0 ; GCN_DBG-NEXT: s_endpgm entry: %cmp = icmp eq i32 %n, -1 @@ -144,53 +134,48 @@ define amdgpu_kernel void @loop_const_true(ptr addrspace(3) %ptr, i32 %n) nounwi ; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN_DBG-NEXT: s_add_u32 s12, s12, s9 ; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0 -; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_load_dword s0, s[2:3], 0x9 +; GCN_DBG-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0 ; GCN_DBG-NEXT: s_mov_b32 s0, 0 -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 ; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_branch .LBB1_2 ; GCN_DBG-NEXT: .LBB1_1: ; %for.exit -; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN_DBG-NEXT: s_waitcnt expcnt(0) -; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload -; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] -; GCN_DBG-NEXT: ; kill: killed $vgpr0 ; GCN_DBG-NEXT: s_endpgm ; GCN_DBG-NEXT: .LBB1_2: ; %for.body ; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN_DBG-NEXT: s_waitcnt expcnt(0) -; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN_DBG-NEXT: buffer_load_dword v2, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_waitcnt vmcnt(0) -; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 1 -; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 0 +; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 1 +; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 0 ; GCN_DBG-NEXT: s_mov_b32 s1, 2 ; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 ; GCN_DBG-NEXT: s_mov_b32 s2, 0x80 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_read_b32 v1, v1 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_read_b32 v0, v0 ; GCN_DBG-NEXT: s_mov_b32 s2, 1.0 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_add_f32_e64 v2, v1, s2 +; GCN_DBG-NEXT: v_add_f32_e64 v1, v0, s2 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_write_b32 v1, v2 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_write_b32 v0, v1 ; GCN_DBG-NEXT: s_mov_b32 s1, 1 ; GCN_DBG-NEXT: s_add_i32 s0, s0, s1 ; GCN_DBG-NEXT: s_mov_b64 s[2:3], 0 ; GCN_DBG-NEXT: s_and_b64 vcc, exec, s[2:3] -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 ; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_cbranch_vccnz .LBB1_1 ; GCN_DBG-NEXT: s_branch .LBB1_2 @@ -232,53 +217,48 @@ define amdgpu_kernel void @loop_const_false(ptr addrspace(3) %ptr, i32 %n) nounw ; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN_DBG-NEXT: s_add_u32 s12, s12, s9 ; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0 -; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_load_dword s0, s[2:3], 0x9 +; GCN_DBG-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0 ; GCN_DBG-NEXT: s_mov_b32 s0, 0 -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 ; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_branch .LBB2_2 ; GCN_DBG-NEXT: .LBB2_1: ; %for.exit -; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN_DBG-NEXT: s_waitcnt expcnt(0) -; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload -; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] -; GCN_DBG-NEXT: ; kill: killed $vgpr0 ; GCN_DBG-NEXT: s_endpgm ; GCN_DBG-NEXT: .LBB2_2: ; %for.body ; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN_DBG-NEXT: s_waitcnt expcnt(0) -; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN_DBG-NEXT: buffer_load_dword v2, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_waitcnt vmcnt(0) -; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 1 -; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 0 +; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 1 +; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 0 ; GCN_DBG-NEXT: s_mov_b32 s1, 2 ; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 ; GCN_DBG-NEXT: s_mov_b32 s2, 0x80 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_read_b32 v1, v1 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_read_b32 v0, v0 ; GCN_DBG-NEXT: s_mov_b32 s2, 1.0 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_add_f32_e64 v2, v1, s2 +; GCN_DBG-NEXT: v_add_f32_e64 v1, v0, s2 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_write_b32 v1, v2 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_write_b32 v0, v1 ; GCN_DBG-NEXT: s_mov_b32 s1, 1 ; GCN_DBG-NEXT: s_add_i32 s0, s0, s1 ; GCN_DBG-NEXT: s_mov_b64 s[2:3], -1 ; GCN_DBG-NEXT: s_and_b64 vcc, exec, s[2:3] -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 ; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_cbranch_vccnz .LBB2_1 ; GCN_DBG-NEXT: s_branch .LBB2_2 @@ -321,51 +301,46 @@ define amdgpu_kernel void @loop_const_undef(ptr addrspace(3) %ptr, i32 %n) nounw ; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN_DBG-NEXT: s_add_u32 s12, s12, s9 ; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0 -; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_load_dword s0, s[2:3], 0x9 +; GCN_DBG-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0 ; GCN_DBG-NEXT: s_mov_b32 s0, 0 -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 ; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_branch .LBB3_2 ; GCN_DBG-NEXT: .LBB3_1: ; %for.exit -; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN_DBG-NEXT: s_waitcnt expcnt(0) -; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload -; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] -; GCN_DBG-NEXT: ; kill: killed $vgpr0 ; GCN_DBG-NEXT: s_endpgm ; GCN_DBG-NEXT: .LBB3_2: ; %for.body ; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN_DBG-NEXT: s_waitcnt expcnt(0) -; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN_DBG-NEXT: buffer_load_dword v2, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_waitcnt vmcnt(0) -; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 1 -; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 0 +; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 1 +; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 0 ; GCN_DBG-NEXT: s_mov_b32 s1, 2 ; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 ; GCN_DBG-NEXT: s_mov_b32 s2, 0x80 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s2 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_read_b32 v1, v1 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_read_b32 v0, v0 ; GCN_DBG-NEXT: s_mov_b32 s2, 1.0 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_add_f32_e64 v2, v1, s2 +; GCN_DBG-NEXT: v_add_f32_e64 v1, v0, s2 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_write_b32 v1, v2 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_write_b32 v0, v1 ; GCN_DBG-NEXT: s_mov_b32 s1, 1 ; GCN_DBG-NEXT: s_add_i32 s0, s0, s1 -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 ; GCN_DBG-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN_DBG-NEXT: s_mov_b64 exec, s[4:5] ; GCN_DBG-NEXT: s_cbranch_scc1 .LBB3_1 ; GCN_DBG-NEXT: s_branch .LBB3_2 @@ -422,66 +397,61 @@ define amdgpu_kernel void @loop_arg_0(ptr addrspace(3) %ptr, i32 %n) nounwind { ; GCN_DBG-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN_DBG-NEXT: s_add_u32 s12, s12, s9 ; GCN_DBG-NEXT: s_addc_u32 s13, s13, 0 -; GCN_DBG-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_load_dword s0, s[2:3], 0x9 +; GCN_DBG-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 0 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, 0 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 0 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, 0 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: ds_read_u8 v1, v1 +; GCN_DBG-NEXT: ds_read_u8 v0, v0 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_readfirstlane_b32 s0, v1 +; GCN_DBG-NEXT: v_readfirstlane_b32 s0, v0 ; GCN_DBG-NEXT: s_and_b32 s0, 1, s0 ; GCN_DBG-NEXT: s_cmp_eq_u32 s0, 1 ; GCN_DBG-NEXT: s_cselect_b64 s[0:1], -1, 0 ; GCN_DBG-NEXT: s_mov_b64 s[2:3], -1 ; GCN_DBG-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 1 -; GCN_DBG-NEXT: v_writelane_b32 v0, s1, 2 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 1 +; GCN_DBG-NEXT: v_writelane_b32 v2, s1, 2 ; GCN_DBG-NEXT: s_mov_b32 s0, 0 -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 3 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 3 ; GCN_DBG-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN_DBG-NEXT: s_mov_b64 exec, s[6:7] ; GCN_DBG-NEXT: s_branch .LBB4_2 ; GCN_DBG-NEXT: .LBB4_1: ; %for.exit -; GCN_DBG-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN_DBG-NEXT: s_waitcnt expcnt(0) -; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload -; GCN_DBG-NEXT: s_mov_b64 exec, s[6:7] -; GCN_DBG-NEXT: ; kill: killed $vgpr0 ; GCN_DBG-NEXT: s_endpgm ; GCN_DBG-NEXT: .LBB4_2: ; %for.body ; GCN_DBG-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN_DBG-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN_DBG-NEXT: s_waitcnt expcnt(0) -; GCN_DBG-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN_DBG-NEXT: buffer_load_dword v2, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN_DBG-NEXT: s_mov_b64 exec, s[6:7] ; GCN_DBG-NEXT: s_waitcnt vmcnt(0) -; GCN_DBG-NEXT: v_readlane_b32 s0, v0, 3 -; GCN_DBG-NEXT: v_readlane_b32 s2, v0, 1 -; GCN_DBG-NEXT: v_readlane_b32 s3, v0, 2 -; GCN_DBG-NEXT: v_readlane_b32 s4, v0, 0 +; GCN_DBG-NEXT: v_readlane_b32 s0, v2, 3 +; GCN_DBG-NEXT: v_readlane_b32 s2, v2, 1 +; GCN_DBG-NEXT: v_readlane_b32 s3, v2, 2 +; GCN_DBG-NEXT: v_readlane_b32 s4, v2, 0 ; GCN_DBG-NEXT: s_mov_b32 s1, 2 ; GCN_DBG-NEXT: s_lshl_b32 s1, s0, s1 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s4 ; GCN_DBG-NEXT: s_mov_b32 s4, 0x80 ; GCN_DBG-NEXT: s_add_i32 s1, s1, s4 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_read_b32 v1, v1 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_read_b32 v0, v0 ; GCN_DBG-NEXT: s_mov_b32 s4, 1.0 ; GCN_DBG-NEXT: s_waitcnt lgkmcnt(0) -; GCN_DBG-NEXT: v_add_f32_e64 v2, v1, s4 +; GCN_DBG-NEXT: v_add_f32_e64 v1, v0, s4 ; GCN_DBG-NEXT: s_mov_b32 m0, -1 -; GCN_DBG-NEXT: v_mov_b32_e32 v1, s1 -; GCN_DBG-NEXT: ds_write_b32 v1, v2 +; GCN_DBG-NEXT: v_mov_b32_e32 v0, s1 +; GCN_DBG-NEXT: ds_write_b32 v0, v1 ; GCN_DBG-NEXT: s_mov_b32 s1, 1 ; GCN_DBG-NEXT: s_add_i32 s0, s0, s1 ; GCN_DBG-NEXT: s_and_b64 vcc, exec, s[2:3] -; GCN_DBG-NEXT: v_writelane_b32 v0, s0, 3 +; GCN_DBG-NEXT: v_writelane_b32 v2, s0, 3 ; GCN_DBG-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN_DBG-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN_DBG-NEXT: buffer_store_dword v2, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN_DBG-NEXT: s_mov_b64 exec, s[6:7] ; GCN_DBG-NEXT: s_cbranch_vccnz .LBB4_1 ; GCN_DBG-NEXT: s_branch .LBB4_2 diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll index 67a084068941a..7cec15ea5be87 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -48,72 +48,67 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN-O0-NEXT: s_add_u32 s12, s12, s9 ; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 -; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: v_writelane_b32 v0, s0, 0 -; GCN-O0-NEXT: v_writelane_b32 v0, s1, 1 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v1 -; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill +; GCN-O0-NEXT: ; implicit-def: $vgpr4 : SGPR spill to VGPR lane +; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) +; GCN-O0-NEXT: v_writelane_b32 v4, s0, 0 +; GCN-O0-NEXT: v_writelane_b32 v4, s1, 1 +; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 +; GCN-O0-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b32 s0, 1 -; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v1, s0 +; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v0, s0 ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec -; GCN-O0-NEXT: v_writelane_b32 v0, s0, 2 -; GCN-O0-NEXT: v_writelane_b32 v0, s1, 3 +; GCN-O0-NEXT: v_writelane_b32 v4, s0, 2 +; GCN-O0-NEXT: v_writelane_b32 v4, s1, 3 ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB0_4 ; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then +; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v0, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v0, 1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: v_readlane_b32 s4, v4, 0 +; GCN-O0-NEXT: v_readlane_b32 s5, v4, 1 ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s0, 0 ; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 ; GCN-O0-NEXT: s_mov_b32 s1, s2 ; GCN-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-O0-NEXT: s_mov_b64 s[6:7], s[0:1] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_ashrrev_i32_e64 v4, 31, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v4 +; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v0 +; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 +; GCN-O0-NEXT: v_mov_b32_e32 v2, v3 ; GCN-O0-NEXT: s_mov_b32 s0, 2 -; GCN-O0-NEXT: v_lshl_b64 v[3:4], v[2:3], s0 -; GCN-O0-NEXT: v_mov_b32_e32 v2, 0 -; GCN-O0-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 -; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[2:3], v1, s0 +; GCN-O0-NEXT: v_lshl_b64 v[2:3], v[1:2], s0 +; GCN-O0-NEXT: v_mov_b32_e32 v1, 0 +; GCN-O0-NEXT: buffer_store_dword v1, v[2:3], s[4:7], 0 addr64 +; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[2:3], v0, s0 ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec -; GCN-O0-NEXT: v_writelane_b32 v0, s0, 4 -; GCN-O0-NEXT: v_writelane_b32 v0, s1, 5 +; GCN-O0-NEXT: v_writelane_b32 v4, s0, 4 +; GCN-O0-NEXT: v_writelane_b32 v4, s1, 5 ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB0_3 ; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then +; GCN-O0-NEXT: s_waitcnt expcnt(1) +; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0 -; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0 +; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1 ; GCN-O0-NEXT: v_mov_b32_e32 v0, 1 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0 ; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1 ; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec @@ -130,26 +125,25 @@ define amdgpu_kernel void @simple_nested_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: .LBB0_3: ; %Flow ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s0, v0, 4 -; GCN-O0-NEXT: v_readlane_b32 s1, v0, 5 +; GCN-O0-NEXT: v_readlane_b32 s0, v4, 4 +; GCN-O0-NEXT: v_readlane_b32 s1, v4, 5 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] ; GCN-O0-NEXT: .LBB0_4: ; %bb.outer.end ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s0, v0, 2 -; GCN-O0-NEXT: v_readlane_b32 s1, v0, 3 +; GCN-O0-NEXT: v_readlane_b32 s0, v4, 2 +; GCN-O0-NEXT: v_readlane_b32 s1, v4, 3 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] -; GCN-O0-NEXT: v_mov_b32_e32 v2, 3 -; GCN-O0-NEXT: v_mov_b32_e32 v1, 0 +; GCN-O0-NEXT: v_mov_b32_e32 v1, 3 +; GCN-O0-NEXT: v_mov_b32_e32 v0, 0 ; GCN-O0-NEXT: s_mov_b32 m0, -1 -; GCN-O0-NEXT: ds_write_b32 v1, v2 -; GCN-O0-NEXT: ; kill: killed $vgpr0 +; GCN-O0-NEXT: ds_write_b32 v0, v1 ; GCN-O0-NEXT: s_endpgm bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -222,72 +216,67 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a ; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN-O0-NEXT: s_add_u32 s12, s12, s9 ; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 -; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: v_writelane_b32 v0, s0, 0 -; GCN-O0-NEXT: v_writelane_b32 v0, s1, 1 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v1 -; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill +; GCN-O0-NEXT: ; implicit-def: $vgpr4 : SGPR spill to VGPR lane +; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) +; GCN-O0-NEXT: v_writelane_b32 v4, s0, 0 +; GCN-O0-NEXT: v_writelane_b32 v4, s1, 1 +; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 +; GCN-O0-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b32 s0, 1 -; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v1, s0 +; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v0, s0 ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec -; GCN-O0-NEXT: v_writelane_b32 v0, s0, 2 -; GCN-O0-NEXT: v_writelane_b32 v0, s1, 3 +; GCN-O0-NEXT: v_writelane_b32 v4, s0, 2 +; GCN-O0-NEXT: v_writelane_b32 v4, s1, 3 ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB1_3 ; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then +; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v0, 0 -; GCN-O0-NEXT: v_readlane_b32 s5, v0, 1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: v_readlane_b32 s4, v4, 0 +; GCN-O0-NEXT: v_readlane_b32 s5, v4, 1 ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s0, 0 ; GCN-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1 ; GCN-O0-NEXT: s_mov_b32 s1, s2 ; GCN-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-O0-NEXT: s_mov_b64 s[6:7], s[0:1] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_ashrrev_i32_e64 v4, 31, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v1 -; GCN-O0-NEXT: v_mov_b32_e32 v3, v4 +; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v0 +; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 +; GCN-O0-NEXT: v_mov_b32_e32 v2, v3 ; GCN-O0-NEXT: s_mov_b32 s0, 2 -; GCN-O0-NEXT: v_lshl_b64 v[3:4], v[2:3], s0 -; GCN-O0-NEXT: v_mov_b32_e32 v2, 0 -; GCN-O0-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 -; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[2:3], v1, s0 +; GCN-O0-NEXT: v_lshl_b64 v[2:3], v[1:2], s0 +; GCN-O0-NEXT: v_mov_b32_e32 v1, 0 +; GCN-O0-NEXT: buffer_store_dword v1, v[2:3], s[4:7], 0 addr64 +; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[2:3], v0, s0 ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec -; GCN-O0-NEXT: v_writelane_b32 v0, s0, 4 -; GCN-O0-NEXT: v_writelane_b32 v0, s1, 5 +; GCN-O0-NEXT: v_writelane_b32 v4, s0, 4 +; GCN-O0-NEXT: v_writelane_b32 v4, s1, 5 ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB1_4 ; GCN-O0-NEXT: ; %bb.2: ; %bb.inner.then +; GCN-O0-NEXT: s_waitcnt expcnt(1) +; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0 -; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0 +; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1 ; GCN-O0-NEXT: v_mov_b32_e32 v0, 1 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0 ; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1 ; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec @@ -305,27 +294,27 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a ; GCN-O0-NEXT: .LBB1_3: ; %Flow ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s0, v0, 2 -; GCN-O0-NEXT: v_readlane_b32 s1, v0, 3 +; GCN-O0-NEXT: v_readlane_b32 s0, v4, 2 +; GCN-O0-NEXT: v_readlane_b32 s1, v4, 3 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] ; GCN-O0-NEXT: s_branch .LBB1_5 ; GCN-O0-NEXT: .LBB1_4: ; %bb.inner.end +; GCN-O0-NEXT: s_waitcnt expcnt(1) +; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s2, v0, 4 -; GCN-O0-NEXT: v_readlane_b32 s3, v0, 5 +; GCN-O0-NEXT: v_readlane_b32 s2, v4, 4 +; GCN-O0-NEXT: v_readlane_b32 s3, v4, 5 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[2:3] -; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0 -; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0 +; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1 ; GCN-O0-NEXT: v_mov_b32_e32 v0, 2 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0 ; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1 ; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec @@ -340,14 +329,10 @@ define amdgpu_kernel void @uncollapsable_nested_if(ptr addrspace(1) nocapture %a ; GCN-O0-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 ; GCN-O0-NEXT: s_branch .LBB1_3 ; GCN-O0-NEXT: .LBB1_5: ; %bb.outer.end -; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] -; GCN-O0-NEXT: v_mov_b32_e32 v2, 3 -; GCN-O0-NEXT: v_mov_b32_e32 v1, 0 +; GCN-O0-NEXT: v_mov_b32_e32 v1, 3 +; GCN-O0-NEXT: v_mov_b32_e32 v0, 0 ; GCN-O0-NEXT: s_mov_b32 m0, -1 -; GCN-O0-NEXT: ds_write_b32 v1, v2 -; GCN-O0-NEXT: ; kill: killed $vgpr0 +; GCN-O0-NEXT: ds_write_b32 v0, v1 ; GCN-O0-NEXT: s_endpgm bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -433,19 +418,14 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN-O0-NEXT: s_add_u32 s12, s12, s9 ; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 -; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[6:7] ; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9 ; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[0:1] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_writelane_b32 v0, s2, 0 -; GCN-O0-NEXT: v_writelane_b32 v0, s3, 1 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v1 -; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill +; GCN-O0-NEXT: ; implicit-def: $vgpr4 : SGPR spill to VGPR lane +; GCN-O0-NEXT: v_writelane_b32 v4, s2, 0 +; GCN-O0-NEXT: v_writelane_b32 v4, s3, 1 +; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 +; GCN-O0-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s4, 0 ; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 @@ -453,42 +433,43 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] ; GCN-O0-NEXT: s_mov_b32 s4, 2 -; GCN-O0-NEXT: v_lshlrev_b32_e64 v3, s4, v1 +; GCN-O0-NEXT: v_lshlrev_b32_e64 v2, s4, v0 ; GCN-O0-NEXT: s_mov_b32 s4, 0 ; GCN-O0-NEXT: ; implicit-def: $sgpr4 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v2, 0 -; GCN-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v4, v2 -; GCN-O0-NEXT: v_mov_b32_e32 v2, 0 -; GCN-O0-NEXT: buffer_store_dword v2, v[3:4], s[0:3], 0 addr64 +; GCN-O0-NEXT: v_mov_b32_e32 v1, 0 +; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GCN-O0-NEXT: v_mov_b32_e32 v3, v1 +; GCN-O0-NEXT: v_mov_b32_e32 v1, 0 +; GCN-O0-NEXT: buffer_store_dword v1, v[2:3], s[0:3], 0 addr64 ; GCN-O0-NEXT: s_mov_b32 s0, 1 -; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v1, s0 +; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v0, s0 ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec -; GCN-O0-NEXT: v_writelane_b32 v0, s0, 2 -; GCN-O0-NEXT: v_writelane_b32 v0, s1, 3 +; GCN-O0-NEXT: v_writelane_b32 v4, s0, 2 +; GCN-O0-NEXT: v_writelane_b32 v4, s1, 3 ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[6:7] ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB2_6 ; GCN-O0-NEXT: ; %bb.1: ; %bb.outer.then +; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[6:7] -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s0, 2 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[0:1], v1, s0 +; GCN-O0-NEXT: s_waitcnt vmcnt(1) +; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[0:1], v0, s0 ; GCN-O0-NEXT: s_mov_b64 s[2:3], exec ; GCN-O0-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] ; GCN-O0-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3] -; GCN-O0-NEXT: v_writelane_b32 v0, s2, 4 -; GCN-O0-NEXT: v_writelane_b32 v0, s3, 5 +; GCN-O0-NEXT: s_waitcnt vmcnt(0) +; GCN-O0-NEXT: v_writelane_b32 v4, s2, 4 +; GCN-O0-NEXT: v_writelane_b32 v4, s3, 5 ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[6:7] ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB2_2 @@ -496,31 +477,30 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: .LBB2_2: ; %Flow ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[6:7] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s0, v0, 4 -; GCN-O0-NEXT: v_readlane_b32 s1, v0, 5 +; GCN-O0-NEXT: v_readlane_b32 s0, v4, 4 +; GCN-O0-NEXT: v_readlane_b32 s1, v4, 5 ; GCN-O0-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] ; GCN-O0-NEXT: s_and_b64 s[0:1], exec, s[0:1] -; GCN-O0-NEXT: v_writelane_b32 v0, s0, 6 -; GCN-O0-NEXT: v_writelane_b32 v0, s1, 7 +; GCN-O0-NEXT: v_writelane_b32 v4, s0, 6 +; GCN-O0-NEXT: v_writelane_b32 v4, s1, 7 ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[6:7] ; GCN-O0-NEXT: s_xor_b64 exec, exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB2_5 ; GCN-O0-NEXT: ; %bb.3: ; %bb.then +; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[6:7] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0 -; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0 +; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1 ; GCN-O0-NEXT: v_mov_b32_e32 v0, 1 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0 ; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1 ; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec @@ -536,16 +516,15 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 ; GCN-O0-NEXT: s_branch .LBB2_5 ; GCN-O0-NEXT: .LBB2_4: ; %bb.else +; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[6:7] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0 -; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: v_readlane_b32 s0, v4, 0 +; GCN-O0-NEXT: v_readlane_b32 s1, v4, 1 ; GCN-O0-NEXT: v_mov_b32_e32 v0, 2 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_add_i32_e64 v1, s[2:3], v1, v0 ; GCN-O0-NEXT: v_ashrrev_i32_e64 v3, 31, v1 ; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec @@ -562,26 +541,25 @@ define amdgpu_kernel void @nested_if_if_else(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: .LBB2_5: ; %Flow1 ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[6:7] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s0, v0, 6 -; GCN-O0-NEXT: v_readlane_b32 s1, v0, 7 +; GCN-O0-NEXT: v_readlane_b32 s0, v4, 6 +; GCN-O0-NEXT: v_readlane_b32 s1, v4, 7 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] ; GCN-O0-NEXT: .LBB2_6: ; %bb.outer.end ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[6:7] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s0, v0, 2 -; GCN-O0-NEXT: v_readlane_b32 s1, v0, 3 +; GCN-O0-NEXT: v_readlane_b32 s0, v4, 2 +; GCN-O0-NEXT: v_readlane_b32 s1, v4, 3 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] -; GCN-O0-NEXT: v_mov_b32_e32 v2, 3 -; GCN-O0-NEXT: v_mov_b32_e32 v1, 0 +; GCN-O0-NEXT: v_mov_b32_e32 v1, 3 +; GCN-O0-NEXT: v_mov_b32_e32 v0, 0 ; GCN-O0-NEXT: s_mov_b32 m0, -1 -; GCN-O0-NEXT: ds_write_b32 v1, v2 -; GCN-O0-NEXT: ; kill: killed $vgpr0 +; GCN-O0-NEXT: ds_write_b32 v0, v1 ; GCN-O0-NEXT: s_endpgm bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -681,51 +659,46 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN-O0-NEXT: s_add_u32 s12, s12, s9 ; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 -; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x9 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v1 -; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill +; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 +; GCN-O0-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b32 s0, 2 -; GCN-O0-NEXT: v_lshlrev_b32_e64 v3, s0, v1 +; GCN-O0-NEXT: v_lshlrev_b32_e64 v2, s0, v0 ; GCN-O0-NEXT: s_mov_b32 s1, 0 ; GCN-O0-NEXT: ; implicit-def: $sgpr1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v2, 0 -; GCN-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v4, v2 +; GCN-O0-NEXT: v_mov_b32_e32 v1, 0 +; GCN-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GCN-O0-NEXT: v_mov_b32_e32 v3, v1 ; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) ; GCN-O0-NEXT: s_mov_b32 s2, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v3 +; GCN-O0-NEXT: v_mov_b32_e32 v1, v2 ; GCN-O0-NEXT: s_mov_b32 s1, s5 -; GCN-O0-NEXT: v_mov_b32_e32 v6, v4 -; GCN-O0-NEXT: v_add_i32_e64 v5, s[2:3], s2, v2 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s1 -; GCN-O0-NEXT: v_addc_u32_e64 v2, s[2:3], v2, v6, s[2:3] -; GCN-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GCN-O0-NEXT: v_mov_b32_e32 v6, v2 -; GCN-O0-NEXT: buffer_store_dword v5, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v6, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill +; GCN-O0-NEXT: v_mov_b32_e32 v5, v3 +; GCN-O0-NEXT: v_add_i32_e64 v4, s[2:3], s2, v1 +; GCN-O0-NEXT: v_mov_b32_e32 v1, s1 +; GCN-O0-NEXT: v_addc_u32_e64 v1, s[2:3], v1, v5, s[2:3] +; GCN-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GCN-O0-NEXT: v_mov_b32_e32 v5, v1 +; GCN-O0-NEXT: buffer_store_dword v4, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v5, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b32 s1, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s2, 0 ; GCN-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 ; GCN-O0-NEXT: s_mov_b32 s3, s1 ; GCN-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-O0-NEXT: s_mov_b64 s[6:7], s[2:3] -; GCN-O0-NEXT: v_mov_b32_e32 v2, 0 -; GCN-O0-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 -; GCN-O0-NEXT: v_cmp_lt_u32_e64 s[0:1], v1, s0 +; GCN-O0-NEXT: v_mov_b32_e32 v1, 0 +; GCN-O0-NEXT: buffer_store_dword v1, v[2:3], s[4:7], 0 addr64 +; GCN-O0-NEXT: v_cmp_lt_u32_e64 s[0:1], v0, s0 ; GCN-O0-NEXT: s_mov_b64 s[2:3], exec ; GCN-O0-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] ; GCN-O0-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3] -; GCN-O0-NEXT: s_waitcnt vmcnt(4) -; GCN-O0-NEXT: v_writelane_b32 v0, s2, 0 -; GCN-O0-NEXT: v_writelane_b32 v0, s3, 1 +; GCN-O0-NEXT: ; implicit-def: $vgpr6 : SGPR spill to VGPR lane +; GCN-O0-NEXT: v_writelane_b32 v6, s2, 0 +; GCN-O0-NEXT: v_writelane_b32 v6, s3, 1 ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v6, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB3_1 @@ -733,28 +706,28 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: .LBB3_1: ; %Flow2 ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s0, v0, 0 -; GCN-O0-NEXT: v_readlane_b32 s1, v0, 1 +; GCN-O0-NEXT: v_readlane_b32 s0, v6, 0 +; GCN-O0-NEXT: v_readlane_b32 s1, v6, 1 ; GCN-O0-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] ; GCN-O0-NEXT: s_and_b64 s[0:1], exec, s[0:1] -; GCN-O0-NEXT: v_writelane_b32 v0, s0, 2 -; GCN-O0-NEXT: v_writelane_b32 v0, s1, 3 +; GCN-O0-NEXT: v_writelane_b32 v6, s0, 2 +; GCN-O0-NEXT: v_writelane_b32 v6, s1, 3 ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v6, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_xor_b64 exec, exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB3_8 ; GCN-O0-NEXT: ; %bb.2: ; %bb.outer.then +; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s0, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s2, 0 ; GCN-O0-NEXT: s_mov_b32 s4, s2 @@ -763,23 +736,24 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b32 s1, s2 ; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] -; GCN-O0-NEXT: v_mov_b32_e32 v2, 1 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v2, v[3:4], s[0:3], 0 addr64 offset:4 +; GCN-O0-NEXT: v_mov_b32_e32 v1, 1 +; GCN-O0-NEXT: s_waitcnt vmcnt(1) +; GCN-O0-NEXT: buffer_store_dword v1, v[2:3], s[0:3], 0 addr64 offset:4 ; GCN-O0-NEXT: s_mov_b32 s0, 2 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v1, s0 +; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v0, s0 ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec -; GCN-O0-NEXT: v_writelane_b32 v0, s0, 4 -; GCN-O0-NEXT: v_writelane_b32 v0, s1, 5 +; GCN-O0-NEXT: s_waitcnt vmcnt(1) +; GCN-O0-NEXT: v_writelane_b32 v6, s0, 4 +; GCN-O0-NEXT: v_writelane_b32 v6, s1, 5 ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v6, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB3_7 ; GCN-O0-NEXT: ; %bb.3: ; %bb.inner.then -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_waitcnt expcnt(1) +; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s0, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s2, 0 @@ -789,19 +763,18 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b32 s1, s2 ; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] -; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: v_mov_b32_e32 v0, 2 ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 offset:8 ; GCN-O0-NEXT: s_branch .LBB3_7 ; GCN-O0-NEXT: .LBB3_4: ; %bb.outer.else +; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s1, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s0, 0 ; GCN-O0-NEXT: s_mov_b32 s2, s0 @@ -810,22 +783,23 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b32 s5, s0 ; GCN-O0-NEXT: ; kill: def $sgpr4_sgpr5 killed $sgpr4_sgpr5 def $sgpr4_sgpr5_sgpr6_sgpr7 ; GCN-O0-NEXT: s_mov_b64 s[6:7], s[2:3] -; GCN-O0-NEXT: v_mov_b32_e32 v2, 3 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64 offset:12 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v1, s0 +; GCN-O0-NEXT: v_mov_b32_e32 v1, 3 +; GCN-O0-NEXT: s_waitcnt vmcnt(1) +; GCN-O0-NEXT: buffer_store_dword v1, v[2:3], s[4:7], 0 addr64 offset:12 +; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v0, s0 ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec -; GCN-O0-NEXT: v_writelane_b32 v0, s0, 6 -; GCN-O0-NEXT: v_writelane_b32 v0, s1, 7 +; GCN-O0-NEXT: s_waitcnt vmcnt(1) +; GCN-O0-NEXT: v_writelane_b32 v6, s0, 6 +; GCN-O0-NEXT: v_writelane_b32 v6, s1, 7 ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v6, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB3_6 ; GCN-O0-NEXT: ; %bb.5: ; %bb.inner.then2 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_waitcnt expcnt(1) +; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s0, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s2, 0 @@ -835,43 +809,41 @@ define amdgpu_kernel void @nested_if_else_if(ptr addrspace(1) nocapture %arg) { ; GCN-O0-NEXT: s_mov_b32 s1, s2 ; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] -; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: v_mov_b32_e32 v0, 4 ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 offset:16 ; GCN-O0-NEXT: .LBB3_6: ; %Flow ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s0, v0, 6 -; GCN-O0-NEXT: v_readlane_b32 s1, v0, 7 +; GCN-O0-NEXT: v_readlane_b32 s0, v6, 6 +; GCN-O0-NEXT: v_readlane_b32 s1, v6, 7 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] ; GCN-O0-NEXT: s_branch .LBB3_1 ; GCN-O0-NEXT: .LBB3_7: ; %Flow1 ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s0, v0, 4 -; GCN-O0-NEXT: v_readlane_b32 s1, v0, 5 +; GCN-O0-NEXT: v_readlane_b32 s0, v6, 4 +; GCN-O0-NEXT: v_readlane_b32 s1, v6, 5 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] ; GCN-O0-NEXT: .LBB3_8: ; %bb.outer.end ; GCN-O0-NEXT: s_or_saveexec_b64 s[8:9], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v6, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[8:9] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s0, v0, 2 -; GCN-O0-NEXT: v_readlane_b32 s1, v0, 3 +; GCN-O0-NEXT: v_readlane_b32 s0, v6, 2 +; GCN-O0-NEXT: v_readlane_b32 s1, v6, 3 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] -; GCN-O0-NEXT: v_mov_b32_e32 v2, 3 -; GCN-O0-NEXT: v_mov_b32_e32 v1, 0 +; GCN-O0-NEXT: v_mov_b32_e32 v1, 3 +; GCN-O0-NEXT: v_mov_b32_e32 v0, 0 ; GCN-O0-NEXT: s_mov_b32 m0, -1 -; GCN-O0-NEXT: ds_write_b32 v1, v2 -; GCN-O0-NEXT: ; kill: killed $vgpr0 +; GCN-O0-NEXT: ds_write_b32 v0, v1 ; GCN-O0-NEXT: s_endpgm bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -935,44 +907,39 @@ define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %a ; GCN-O0-NEXT: s_mov_b32 s15, 0xe8f000 ; GCN-O0-NEXT: s_add_u32 s12, s12, s9 ; GCN-O0-NEXT: s_addc_u32 s13, s13, 0 -; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 -; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[6:7] ; GCN-O0-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN-O0-NEXT: v_writelane_b32 v0, s0, 0 -; GCN-O0-NEXT: v_writelane_b32 v0, s1, 1 -; GCN-O0-NEXT: v_mov_b32_e32 v2, v1 -; GCN-O0-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill +; GCN-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane +; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) +; GCN-O0-NEXT: v_writelane_b32 v3, s0, 0 +; GCN-O0-NEXT: v_writelane_b32 v3, s1, 1 +; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 +; GCN-O0-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b32 s0, 1 -; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v1, s0 +; GCN-O0-NEXT: v_cmp_gt_u32_e64 s[2:3], v0, s0 ; GCN-O0-NEXT: s_mov_b64 s[0:1], exec -; GCN-O0-NEXT: v_writelane_b32 v0, s0, 2 -; GCN-O0-NEXT: v_writelane_b32 v0, s1, 3 +; GCN-O0-NEXT: v_writelane_b32 v3, s0, 2 +; GCN-O0-NEXT: v_writelane_b32 v3, s1, 3 ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v3, off, s[12:15], 0 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[6:7] ; GCN-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; GCN-O0-NEXT: s_mov_b64 exec, s[0:1] ; GCN-O0-NEXT: s_cbranch_execz .LBB4_2 ; GCN-O0-NEXT: ; %bb.1: ; %bb.then +; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(0) +; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[6:7] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s0, v1, 0 -; GCN-O0-NEXT: v_readlane_b32 s1, v1, 1 -; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: v_readlane_b32 s0, v3, 0 +; GCN-O0-NEXT: v_readlane_b32 s1, v3, 1 ; GCN-O0-NEXT: s_mov_b32 s2, 0xf000 ; GCN-O0-NEXT: s_mov_b32 s4, 0 ; GCN-O0-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 ; GCN-O0-NEXT: s_mov_b32 s5, s2 ; GCN-O0-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-O0-NEXT: s_mov_b64 s[2:3], s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_ashrrev_i32_e64 v2, 31, v0 ; GCN-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec ; GCN-O0-NEXT: v_mov_b32_e32 v1, v2 @@ -983,14 +950,13 @@ define amdgpu_kernel void @s_endpgm_unsafe_barrier(ptr addrspace(1) nocapture %a ; GCN-O0-NEXT: .LBB4_2: ; %bb.end ; GCN-O0-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v3, off, s[12:15], 0 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[6:7] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s0, v0, 2 -; GCN-O0-NEXT: v_readlane_b32 s1, v0, 3 +; GCN-O0-NEXT: v_readlane_b32 s0, v3, 2 +; GCN-O0-NEXT: v_readlane_b32 s1, v3, 3 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[0:1] ; GCN-O0-NEXT: s_barrier -; GCN-O0-NEXT: ; kill: killed $vgpr0 ; GCN-O0-NEXT: s_endpgm bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -1082,91 +1048,84 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0: ; %bb.0: ; %bb ; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] -; GCN-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane -; GCN-O0-NEXT: v_mov_b32_e32 v1, v0 -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: s_waitcnt expcnt(1) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 s[4:5], 0 ; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: s_waitcnt vmcnt(1) -; GCN-O0-NEXT: v_writelane_b32 v0, s6, 0 -; GCN-O0-NEXT: v_writelane_b32 v0, s7, 1 -; GCN-O0-NEXT: v_writelane_b32 v0, s4, 2 -; GCN-O0-NEXT: v_writelane_b32 v0, s5, 3 +; GCN-O0-NEXT: ; implicit-def: $vgpr6 : SGPR spill to VGPR lane +; GCN-O0-NEXT: s_waitcnt expcnt(1) +; GCN-O0-NEXT: v_writelane_b32 v6, s6, 0 +; GCN-O0-NEXT: v_writelane_b32 v6, s7, 1 +; GCN-O0-NEXT: v_writelane_b32 v6, s4, 2 +; GCN-O0-NEXT: v_writelane_b32 v6, s5, 3 ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] ; GCN-O0-NEXT: .LBB5_1: ; %bb1 ; GCN-O0-NEXT: ; =>This Inner Loop Header: Depth=1 +; GCN-O0-NEXT: s_waitcnt expcnt(1) +; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s8, v0, 2 -; GCN-O0-NEXT: v_readlane_b32 s9, v0, 3 -; GCN-O0-NEXT: v_readlane_b32 s6, v0, 0 -; GCN-O0-NEXT: v_readlane_b32 s7, v0, 1 -; GCN-O0-NEXT: v_writelane_b32 v0, s6, 4 -; GCN-O0-NEXT: v_writelane_b32 v0, s7, 5 -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: v_readlane_b32 s8, v6, 2 +; GCN-O0-NEXT: v_readlane_b32 s9, v6, 3 +; GCN-O0-NEXT: v_readlane_b32 s6, v6, 0 +; GCN-O0-NEXT: v_readlane_b32 s7, v6, 1 +; GCN-O0-NEXT: v_writelane_b32 v6, s6, 4 +; GCN-O0-NEXT: v_writelane_b32 v6, s7, 5 ; GCN-O0-NEXT: s_mov_b32 s4, 0x207 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, s4 +; GCN-O0-NEXT: v_cmp_lt_i32_e64 s[4:5], v0, s4 ; GCN-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] -; GCN-O0-NEXT: v_writelane_b32 v0, s4, 6 -; GCN-O0-NEXT: v_writelane_b32 v0, s5, 7 -; GCN-O0-NEXT: v_writelane_b32 v0, s6, 0 -; GCN-O0-NEXT: v_writelane_b32 v0, s7, 1 +; GCN-O0-NEXT: v_writelane_b32 v6, s4, 6 +; GCN-O0-NEXT: v_writelane_b32 v6, s5, 7 +; GCN-O0-NEXT: v_writelane_b32 v6, s6, 0 +; GCN-O0-NEXT: v_writelane_b32 v6, s7, 1 ; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v0, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v0, s7, 3 +; GCN-O0-NEXT: v_writelane_b32 v6, s6, 2 +; GCN-O0-NEXT: v_writelane_b32 v6, s7, 3 ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] ; GCN-O0-NEXT: s_andn2_b64 exec, exec, s[4:5] ; GCN-O0-NEXT: s_cbranch_execnz .LBB5_1 ; GCN-O0-NEXT: ; %bb.2: ; %bb2 ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 +; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v0, 6 -; GCN-O0-NEXT: v_readlane_b32 s5, v0, 7 +; GCN-O0-NEXT: v_readlane_b32 s4, v6, 6 +; GCN-O0-NEXT: v_readlane_b32 s5, v6, 7 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b32 s6, 0 -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], v1, s6 -; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v1, s6 -; GCN-O0-NEXT: v_writelane_b32 v0, s4, 8 -; GCN-O0-NEXT: v_writelane_b32 v0, s5, 9 +; GCN-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, s6 +; GCN-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v0, s6 +; GCN-O0-NEXT: v_writelane_b32 v6, s4, 8 +; GCN-O0-NEXT: v_writelane_b32 v6, s5, 9 ; GCN-O0-NEXT: s_mov_b32 s4, 0 ; GCN-O0-NEXT: s_mov_b32 s8, s4 ; GCN-O0-NEXT: s_mov_b32 s9, s4 ; GCN-O0-NEXT: s_mov_b32 s10, s4 ; GCN-O0-NEXT: s_mov_b32 s11, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s11 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GCN-O0-NEXT: v_mov_b32_e32 v0, s8 +; GCN-O0-NEXT: v_mov_b32_e32 v1, s9 +; GCN-O0-NEXT: v_mov_b32_e32 v2, s10 +; GCN-O0-NEXT: v_mov_b32_e32 v3, s11 +; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: v_writelane_b32 v0, s4, 10 -; GCN-O0-NEXT: v_writelane_b32 v0, s5, 11 +; GCN-O0-NEXT: v_writelane_b32 v6, s4, 10 +; GCN-O0-NEXT: v_writelane_b32 v6, s5, 11 ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] ; GCN-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] @@ -1175,31 +1134,31 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] ; GCN-O0-NEXT: ; implicit-def: $sgpr4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s4 -; GCN-O0-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen +; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 +; GCN-O0-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen ; GCN-O0-NEXT: s_mov_b32 s4, 0 ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_cmp_lt_f32_e64 s[6:7], v1, s4 +; GCN-O0-NEXT: v_cmp_lt_f32_e64 s[6:7], v0, s4 ; GCN-O0-NEXT: s_mov_b32 s8, s4 ; GCN-O0-NEXT: s_mov_b32 s9, s4 ; GCN-O0-NEXT: s_mov_b32 s10, s4 ; GCN-O0-NEXT: s_mov_b32 s11, s4 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s8 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s9 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s10 -; GCN-O0-NEXT: v_mov_b32_e32 v4, s11 -; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GCN-O0-NEXT: v_mov_b32_e32 v0, s8 +; GCN-O0-NEXT: v_mov_b32_e32 v1, s9 +; GCN-O0-NEXT: v_mov_b32_e32 v2, s10 +; GCN-O0-NEXT: v_mov_b32_e32 v3, s11 +; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: v_writelane_b32 v0, s4, 12 -; GCN-O0-NEXT: v_writelane_b32 v0, s5, 13 +; GCN-O0-NEXT: v_writelane_b32 v6, s4, 12 +; GCN-O0-NEXT: v_writelane_b32 v6, s5, 13 ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] ; GCN-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] @@ -1217,7 +1176,7 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: s_mov_b32 s5, s10 ; GCN-O0-NEXT: s_mov_b32 s6, s9 ; GCN-O0-NEXT: s_mov_b32 s7, s8 -; GCN-O0-NEXT: s_waitcnt expcnt(0) +; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 ; GCN-O0-NEXT: v_mov_b32_e32 v1, s5 ; GCN-O0-NEXT: v_mov_b32_e32 v2, s6 @@ -1229,69 +1188,64 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: s_branch .LBB5_6 ; GCN-O0-NEXT: .LBB5_5: ; %Flow2 ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: s_waitcnt expcnt(1) -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v4, 10 -; GCN-O0-NEXT: v_readlane_b32 s5, v4, 11 -; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt expcnt(0) +; GCN-O0-NEXT: s_waitcnt expcnt(3) ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(2) ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(3) +; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 +; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] +; GCN-O0-NEXT: s_waitcnt vmcnt(0) +; GCN-O0-NEXT: v_readlane_b32 s4, v6, 10 +; GCN-O0-NEXT: v_readlane_b32 s5, v6, 11 +; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_branch .LBB5_7 ; GCN-O0-NEXT: .LBB5_6: ; %Flow ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: s_waitcnt expcnt(1) -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v4, 12 -; GCN-O0-NEXT: v_readlane_b32 s5, v4, 13 -; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN-O0-NEXT: s_waitcnt expcnt(0) +; GCN-O0-NEXT: s_waitcnt expcnt(3) ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(2) ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_waitcnt vmcnt(3) +; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 +; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] +; GCN-O0-NEXT: s_waitcnt vmcnt(0) +; GCN-O0-NEXT: v_readlane_b32 s4, v6, 12 +; GCN-O0-NEXT: v_readlane_b32 s5, v6, 13 +; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_waitcnt vmcnt(3) ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_branch .LBB5_5 ; GCN-O0-NEXT: .LBB5_7: ; %bb10 ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: s_waitcnt expcnt(3) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s6, v0, 8 -; GCN-O0-NEXT: v_readlane_b32 s7, v0, 9 +; GCN-O0-NEXT: v_readlane_b32 s6, v6, 8 +; GCN-O0-NEXT: v_readlane_b32 s7, v6, 9 ; GCN-O0-NEXT: s_mov_b64 s[4:5], -1 -; GCN-O0-NEXT: v_writelane_b32 v0, s4, 14 -; GCN-O0-NEXT: v_writelane_b32 v0, s5, 15 +; GCN-O0-NEXT: v_writelane_b32 v6, s4, 14 +; GCN-O0-NEXT: v_writelane_b32 v6, s5, 15 ; GCN-O0-NEXT: s_mov_b64 s[4:5], exec -; GCN-O0-NEXT: v_writelane_b32 v0, s4, 16 -; GCN-O0-NEXT: v_writelane_b32 v0, s5, 17 +; GCN-O0-NEXT: v_writelane_b32 v6, s4, 16 +; GCN-O0-NEXT: v_writelane_b32 v6, s5, 17 ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] ; GCN-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] @@ -1300,103 +1254,99 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] ; GCN-O0-NEXT: s_mov_b64 s[4:5], 0 ; GCN-O0-NEXT: s_xor_b64 s[4:5], exec, -1 ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_writelane_b32 v0, s4, 14 -; GCN-O0-NEXT: v_writelane_b32 v0, s5, 15 +; GCN-O0-NEXT: v_writelane_b32 v6, s4, 14 +; GCN-O0-NEXT: v_writelane_b32 v6, s5, 15 ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] ; GCN-O0-NEXT: .LBB5_9: ; %Flow3 ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s8, v4, 16 -; GCN-O0-NEXT: v_readlane_b32 s9, v4, 17 -; GCN-O0-NEXT: s_or_b64 exec, exec, s[8:9] -; GCN-O0-NEXT: v_readlane_b32 s6, v4, 4 -; GCN-O0-NEXT: v_readlane_b32 s7, v4, 5 -; GCN-O0-NEXT: v_readlane_b32 s4, v4, 14 -; GCN-O0-NEXT: v_readlane_b32 s5, v4, 15 -; GCN-O0-NEXT: s_waitcnt expcnt(0) +; GCN-O0-NEXT: s_waitcnt expcnt(4) ; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(3) ; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(2) ; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(1) ; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 +; GCN-O0-NEXT: s_waitcnt expcnt(0) +; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] +; GCN-O0-NEXT: s_waitcnt vmcnt(0) +; GCN-O0-NEXT: v_readlane_b32 s8, v6, 16 +; GCN-O0-NEXT: v_readlane_b32 s9, v6, 17 +; GCN-O0-NEXT: s_or_b64 exec, exec, s[8:9] +; GCN-O0-NEXT: v_readlane_b32 s6, v6, 4 +; GCN-O0-NEXT: v_readlane_b32 s7, v6, 5 +; GCN-O0-NEXT: v_readlane_b32 s4, v6, 14 +; GCN-O0-NEXT: v_readlane_b32 s5, v6, 15 ; GCN-O0-NEXT: s_and_b64 s[4:5], exec, s[4:5] ; GCN-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] ; GCN-O0-NEXT: s_mov_b64 s[6:7], 0 ; GCN-O0-NEXT: s_mov_b64 s[8:9], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v4, s8, 0 -; GCN-O0-NEXT: v_writelane_b32 v4, s9, 1 -; GCN-O0-NEXT: v_writelane_b32 v4, s6, 2 -; GCN-O0-NEXT: v_writelane_b32 v4, s7, 3 +; GCN-O0-NEXT: v_writelane_b32 v6, s8, 0 +; GCN-O0-NEXT: v_writelane_b32 v6, s9, 1 +; GCN-O0-NEXT: v_writelane_b32 v6, s6, 2 +; GCN-O0-NEXT: v_writelane_b32 v6, s7, 3 ; GCN-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GCN-O0-NEXT: v_writelane_b32 v4, s6, 18 -; GCN-O0-NEXT: v_writelane_b32 v4, s7, 19 +; GCN-O0-NEXT: v_writelane_b32 v6, s6, 18 +; GCN-O0-NEXT: v_writelane_b32 v6, s7, 19 ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] -; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; GCN-O0-NEXT: s_waitcnt vmcnt(4) ; GCN-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_andn2_b64 exec, exec, s[4:5] ; GCN-O0-NEXT: s_cbranch_execnz .LBB5_1 ; GCN-O0-NEXT: ; %bb.10: ; %bb12 ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: s_waitcnt expcnt(3) -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GCN-O0-NEXT: s_waitcnt expcnt(4) +; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v0, 18 -; GCN-O0-NEXT: v_readlane_b32 s5, v0, 19 +; GCN-O0-NEXT: v_readlane_b32 s4, v6, 18 +; GCN-O0-NEXT: v_readlane_b32 s5, v6, 19 ; GCN-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-O0-NEXT: ; %bb.11: ; %bb12 -; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] +; GCN-O0-NEXT: s_waitcnt expcnt(3) +; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_waitcnt expcnt(2) -; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_waitcnt expcnt(1) -; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_waitcnt expcnt(0) -; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v5, v4 +; GCN-O0-NEXT: v_mov_b32_e32 v4, v3 ; GCN-O0-NEXT: ; implicit-def: $sgpr4 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s4 -; GCN-O0-NEXT: buffer_store_dword v5, v6, s[0:3], 0 offen +; GCN-O0-NEXT: v_mov_b32_e32 v5, s4 +; GCN-O0-NEXT: buffer_store_dword v4, v5, s[0:3], 0 offen ; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v5, v3 +; GCN-O0-NEXT: v_mov_b32_e32 v4, v2 ; GCN-O0-NEXT: ; implicit-def: $sgpr4 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s4 -; GCN-O0-NEXT: buffer_store_dword v5, v6, s[0:3], 0 offen +; GCN-O0-NEXT: v_mov_b32_e32 v5, s4 +; GCN-O0-NEXT: buffer_store_dword v4, v5, s[0:3], 0 offen ; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; GCN-O0-NEXT: v_mov_b32_e32 v5, v2 +; GCN-O0-NEXT: v_mov_b32_e32 v4, v1 ; GCN-O0-NEXT: ; implicit-def: $sgpr4 -; GCN-O0-NEXT: v_mov_b32_e32 v6, s4 -; GCN-O0-NEXT: buffer_store_dword v5, v6, s[0:3], 0 offen +; GCN-O0-NEXT: v_mov_b32_e32 v5, s4 +; GCN-O0-NEXT: buffer_store_dword v4, v5, s[0:3], 0 offen ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec +; GCN-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec ; GCN-O0-NEXT: ; implicit-def: $sgpr4 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s4 -; GCN-O0-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen +; GCN-O0-NEXT: v_mov_b32_e32 v1, s4 +; GCN-O0-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: ; kill: killed $vgpr0 ; GCN-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GCN-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[4:5] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) ; GCN-O0-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll index 789150f690d52..7c09fec908f93 100644 --- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -46,6 +46,9 @@ ; VMEM: [[ENDIF]]: +; Restore val +; VGPR: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload + ; Reload and restore exec mask ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] @@ -58,7 +61,7 @@ ; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]] ; Restore val -; GCN: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload +; VMEM: buffer_load_dword [[RELOAD_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RELOAD_VAL]] @@ -120,6 +123,7 @@ endif: ; GCN: buffer_store_dword v[[VAL_LOOP_RELOAD]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; GCN: [[END]]: +; VGPR: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] @@ -129,7 +133,8 @@ endif: ; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1 ; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]] -; GCN: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload + +; VMEM: buffer_load_dword v[[VAL_END:[0-9]+]], off, s[0:3], 0 offset:[[VAL_SUB_OFFSET]] ; 4-byte Folded Reload ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[VAL_END]] @@ -189,6 +194,7 @@ end: ; GCN-NEXT: s_branch [[ELSE:.LBB[0-9]+_[0-9]+]] ; GCN: [[FLOW]]: ; %Flow +; VGPR: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload ; VGPR: buffer_load_dword [[SPILL_VGPR:v[0-9]+]], off, s[0:3], 0 ; 4-byte Folded Reload ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] @@ -200,7 +206,7 @@ end: ; GCN: s_or_saveexec_b64 s[[[FLOW_S_RELOAD_SAVEEXEC_LO_SAVEEXEC:[0-9]+]]:[[FLOW_S_RELOAD_SAVEEXEC_HI_SAVEEXEC:[0-9]+]]], s[[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]] -; GCN: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload +; VMEM: buffer_load_dword [[FLOW_VAL:v[0-9]+]], off, s[0:3], 0 offset:[[FLOW_VAL_OFFSET:[0-9]+]] ; 4-byte Folded Reload ; Regular spill value restored after exec modification ; Followed by spill @@ -234,6 +240,7 @@ end: ; GCN-NEXT: s_branch [[FLOW]] ; GCN: [[ENDIF]]: +; VGPR: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[FLOW_SAVEEXEC_HI_LANE]] @@ -245,7 +252,7 @@ end: ; GCN: s_or_b64 exec, exec, s[[[S_RELOAD_SAVEEXEC_LO]]:[[S_RELOAD_SAVEEXEC_HI]]] -; GCN: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload +; VMEM: buffer_load_dword v[[RESULT:[0-9]+]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Reload ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v[[RESULT]] define amdgpu_kernel void @divergent_if_else_endif(ptr addrspace(1) %out) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir index d5cdf584a75de..a14d515688a8b 100644 --- a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir +++ b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir @@ -14,10 +14,10 @@ body: | ; CHECK-LABEL: name: def_csr_sgpr ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $sgpr42, $sgpr43, $sgpr46, $sgpr47, $vgpr0 + ; CHECK-NEXT: liveins: $sgpr42, $sgpr43, $sgpr46, $sgpr47 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr42, 0, $vgpr0 ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr43, 1, $vgpr0 @@ -26,8 +26,6 @@ body: | ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $sgpr42 = S_MOV_B32 0 ; CHECK-NEXT: $sgpr43 = S_MOV_B32 1 ; CHECK-NEXT: $sgpr46_sgpr47 = S_MOV_B64 2 diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll index b541be9f5aa44..6686742e449f5 100644 --- a/llvm/test/CodeGen/AMDGPU/div_i128.ll +++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll @@ -220,334 +220,327 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0: ; %bb.0: ; %_udiv-special-cases ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane -; GFX9-O0-NEXT: v_mov_b32_e32 v21, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v4 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v2 -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v20, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v22, v7 +; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_mov_b32_e32 v21, v0 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v2 +; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v3 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 +; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v10 -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v11 +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v9 +; GFX9-O0-NEXT: v_mov_b32_e32 v19, v10 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_writelane_b32 v0, s6, 0 -; GFX9-O0-NEXT: v_writelane_b32 v0, s7, 1 +; GFX9-O0-NEXT: ; implicit-def: $vgpr30 : SGPR spill to VGPR lane +; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 0 +; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 1 ; GFX9-O0-NEXT: s_mov_b32 s10, s6 -; GFX9-O0-NEXT: v_writelane_b32 v0, s10, 2 +; GFX9-O0-NEXT: v_writelane_b32 v30, s10, 2 ; GFX9-O0-NEXT: s_mov_b32 s11, s7 -; GFX9-O0-NEXT: v_writelane_b32 v0, s11, 3 -; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, s10, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s11 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v1, v3, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v14, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s11 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v20, vcc +; GFX9-O0-NEXT: v_writelane_b32 v30, s11, 3 +; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, s10, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s11 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v3, vcc, v0, v2, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s10 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v0, v13, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s11 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v19, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v4 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v7 -; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7] -; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[10:11], s[4:5] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[4:5] +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v3 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 +; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7] +; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[9:10], s[4:5] ; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5] +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v2 ; GFX9-O0-NEXT: v_mov_b32_e32 v16, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v20, v1, s[4:5] -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v14, v1, s[4:5] +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v5 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v19, v0, s[4:5] +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v13, v0, s[4:5] ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v8 -; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 killed $vgpr8_vgpr9 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v21 -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v22 -; GFX9-O0-NEXT: v_sub_co_u32_e32 v18, vcc, s10, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, s11 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v8, v9, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v8, s10 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v8, v13, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v8, s11 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v8, v15, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr7_vgpr8 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v20 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v21 +; GFX9-O0-NEXT: v_sub_co_u32_e32 v17, vcc, s10, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, s11 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v9, vcc, v7, v8, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v7, s10 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v7, v12, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v7, s11 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v14, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v10 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v19 -; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7] -; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[21:22], s[4:5] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, v9, v10, s[4:5] +; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v18, v9 ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5] +; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7] +; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[20:21], s[4:5] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v8, v9, s[4:5] +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v8, s[4:5] ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v10 +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v18, v9 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v8 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v12 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v15, v8, s[4:5] -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v13, v8, s[4:5] +; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v14, v7, s[4:5] +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v12, v7, s[4:5] ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v7 ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v9 -; GFX9-O0-NEXT: v_xor_b32_e64 v15, v15, v20 -; GFX9-O0-NEXT: v_xor_b32_e64 v13, v13, v14 -; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15 +; GFX9-O0-NEXT: v_xor_b32_e64 v14, v14, v19 +; GFX9-O0-NEXT: v_xor_b32_e64 v12, v12, v13 +; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v14 ; GFX9-O0-NEXT: s_mov_b32 s4, 63 -; GFX9-O0-NEXT: v_ashrrev_i64 v[13:14], s4, v[13:14] -; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_ashrrev_i64 v[12:13], s4, v[12:13] +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v12 +; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 -; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v10 +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v17 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v19 -; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v6 +; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5 -; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v4 +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16 -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17 -; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v12 -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v19 -; GFX9-O0-NEXT: v_or_b32_e64 v15, v13, v14 -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11 -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18 -; GFX9-O0-NEXT: v_or_b32_e64 v13, v13, v14 -; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15 -; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[13:14], s[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17 -; GFX9-O0-NEXT: v_or_b32_e64 v15, v13, v14 -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16 -; GFX9-O0-NEXT: v_or_b32_e64 v13, v13, v14 -; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15 -; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[13:14], s[6:7] +; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v18 +; GFX9-O0-NEXT: v_or_b32_e64 v14, v12, v13 +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v10 +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v17 +; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v13 +; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v14 +; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[12:13], s[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v16 +; GFX9-O0-NEXT: v_or_b32_e64 v14, v12, v13 +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15 +; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v13 +; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v14 +; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[12:13], s[6:7] ; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[11:12], s[8:9] -; GFX9-O0-NEXT: v_ffbh_u32_e64 v8, v8 +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[10:11], s[8:9] +; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7 ; GFX9-O0-NEXT: s_mov_b32 s13, 32 -; GFX9-O0-NEXT: v_add_u32_e64 v8, v8, s13 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v9, v9 -; GFX9-O0-NEXT: v_min_u32_e64 v8, v8, v9 +; GFX9-O0-NEXT: v_add_u32_e64 v7, v7, s13 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v8, v8 +; GFX9-O0-NEXT: v_min_u32_e64 v7, v7, v8 ; GFX9-O0-NEXT: s_mov_b32 s12, 0 ; GFX9-O0-NEXT: ; implicit-def: $sgpr14 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, s12 -; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v11 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v9 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7 -; GFX9-O0-NEXT: v_add_u32_e64 v7, v7, s13 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v10 -; GFX9-O0-NEXT: v_min_u32_e64 v13, v7, v10 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, s12 +; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v10 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6 +; GFX9-O0-NEXT: v_add_u32_e64 v6, v6, s13 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v9, v9 +; GFX9-O0-NEXT: v_min_u32_e64 v12, v6, v9 ; GFX9-O0-NEXT: ; implicit-def: $sgpr14 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, s12 -; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, s12 +; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v6 ; GFX9-O0-NEXT: s_mov_b64 s[14:15], 64 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v13 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v12 ; GFX9-O0-NEXT: s_mov_b32 s16, s14 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13 ; GFX9-O0-NEXT: s_mov_b32 s18, s15 -; GFX9-O0-NEXT: v_add_co_u32_e64 v10, s[16:17], v10, s16 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, s18 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v7, s[16:17], v7, v11, s[16:17] -; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v12, s[8:9] -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v10 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v8, v9, s[8:9] +; GFX9-O0-NEXT: v_add_co_u32_e64 v9, s[16:17], v9, s16 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, s18 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v6, s[16:17], v6, v10, s[16:17] +; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v11, s[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v9 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v7, v8, s[8:9] ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v7 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6 ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[5:6], s[8:9] +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[4:5], s[8:9] +; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v0 +; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s13 ; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v1 -; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s13 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v2 -; GFX9-O0-NEXT: v_min_u32_e64 v6, v5, v6 +; GFX9-O0-NEXT: v_min_u32_e64 v5, v4, v5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr16 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s12 -; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v3 -; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s13 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v11, v4 -; GFX9-O0-NEXT: v_min_u32_e64 v12, v5, v11 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s12 +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v2 +; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s13 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v3 +; GFX9-O0-NEXT: v_min_u32_e64 v11, v4, v10 ; GFX9-O0-NEXT: ; implicit-def: $sgpr13 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s12 -; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v12 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s12 +; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v11 ; GFX9-O0-NEXT: s_mov_b32 s12, s14 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12 ; GFX9-O0-NEXT: s_mov_b32 s14, s15 -; GFX9-O0-NEXT: v_add_co_u32_e64 v11, s[12:13], v11, s12 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, s14 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v5, s[12:13], v5, v12, s[12:13] -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[8:9] -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[8:9] +; GFX9-O0-NEXT: v_add_co_u32_e64 v10, s[12:13], v10, s12 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, s14 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[12:13], v4, v11, s[12:13] +; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v11 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[8:9] ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 -; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10 -; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, v5, v8 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v9, vcc, v6, v7, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v7, s10 +; GFX9-O0-NEXT: v_sub_co_u32_e32 v4, vcc, v4, v7 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v5, v6, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v6, s10 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v6, v7, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v7, s11 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, s10 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v6, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v6, s11 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v6, v7, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7 -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6 +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7] +; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7] ; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f -; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15] -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9] -; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7 -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v7, 1 +; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15] +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9] +; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6 +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1 ; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9] ; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1 ; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5] -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 ; GFX9-O0-NEXT: s_mov_b32 s14, s13 -; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14 +; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14 ; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13 -; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v9 -; GFX9-O0-NEXT: v_or_b32_e64 v7, v7, v10 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8 -; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[5:6], s[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v2, v5, s[8:9] -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s10 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[8:9] +; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v9 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 +; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9] ; GFX9-O0-NEXT: ; implicit-def: $sgpr12 ; GFX9-O0-NEXT: ; implicit-def: $sgpr12 -; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[8:9] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, s10 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[8:9] +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9] ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 ; GFX9-O0-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7] -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 s[4:5], exec -; GFX9-O0-NEXT: v_writelane_b32 v0, s4, 4 -; GFX9-O0-NEXT: v_writelane_b32 v0, s5, 5 +; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 4 +; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 5 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] @@ -555,11 +548,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_branch .LBB0_8 ; GFX9-O0-NEXT: .LBB0_1: ; %Flow ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s4, v0, 6 -; GFX9-O0-NEXT: v_readlane_b32 s5, v0, 7 +; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 6 +; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 7 ; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: ; %bb.2: ; %Flow ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload @@ -588,20 +581,19 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_5 ; GFX9-O0-NEXT: .LBB0_3: ; %Flow2 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s4, v4, 4 -; GFX9-O0-NEXT: v_readlane_b32 s5, v4, 5 -; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 4 +; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 5 +; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_nop 0 ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -648,13 +640,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_3 ; GFX9-O0-NEXT: .LBB0_5: ; %Flow1 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s4, v8, 8 -; GFX9-O0-NEXT: v_readlane_b32 s5, v8, 9 -; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload @@ -663,9 +648,15 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 8 +; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 9 +; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_nop 0 ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -679,92 +670,87 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_branch .LBB0_4 ; GFX9-O0-NEXT: .LBB0_6: ; %udiv-do-while ; GFX9-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s6, v16, 10 -; GFX9-O0-NEXT: v_readlane_b32 s7, v16, 11 ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 10 +; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 11 ; GFX9-O0-NEXT: s_mov_b32 s4, 63 -; GFX9-O0-NEXT: s_waitcnt vmcnt(16) -; GFX9-O0-NEXT: v_lshrrev_b64 v[29:30], s4, v[2:3] -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v30 +; GFX9-O0-NEXT: v_lshrrev_b64 v[28:29], s4, v[2:3] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v29 ; GFX9-O0-NEXT: s_mov_b32 s5, 1 -; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], s5, v[23:24] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v24 +; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], s5, v[22:23] +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23 ; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v29 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 -; GFX9-O0-NEXT: v_or_b32_e64 v23, v5, v10 -; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v24, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[29:30], s5, v[2:3] +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v22 +; GFX9-O0-NEXT: v_or_b32_e64 v22, v5, v10 +; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v23, v4 +; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[2:3] ; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], s4, v[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v30 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v29 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5 ; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v29 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v28 ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec ; GFX9-O0-NEXT: v_or_b32_e64 v4, v3, v4 ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2 ; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s5, v[0:1] -; GFX9-O0-NEXT: v_lshlrev_b64 v[29:30], s5, v[6:7] +; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[6:7] ; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v30 -; GFX9-O0-NEXT: s_waitcnt vmcnt(10) -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v29 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v27 ; GFX9-O0-NEXT: v_or3_b32 v6, v6, v7, v10 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v29 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v27 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v28 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v26 ; GFX9-O0-NEXT: v_or3_b32 v0, v0, v1, v7 ; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3 -; GFX9-O0-NEXT: s_waitcnt vmcnt(8) -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v26 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v25 ; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v25 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v24 ; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 ; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 ; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v24 -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v22 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v15 ; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, v13, v6 -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v10, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v4, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc @@ -784,22 +770,22 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12 ; GFX9-O0-NEXT: v_mov_b32_e32 v12, 0 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v23, v22 -; GFX9-O0-NEXT: v_and_b32_e64 v23, v7, v23 -; GFX9-O0-NEXT: v_and_b32_e64 v21, v11, v21 -; GFX9-O0-NEXT: ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v22, v23 -; GFX9-O0-NEXT: v_mov_b32_e32 v23, v20 -; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v23 -; GFX9-O0-NEXT: v_and_b32_e64 v23, v11, v19 -; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v24, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v23 -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v24 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v21 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22 -; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v20 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v19, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v22, v21 +; GFX9-O0-NEXT: v_and_b32_e64 v22, v7, v22 +; GFX9-O0-NEXT: v_and_b32_e64 v20, v11, v20 +; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v21, v22 +; GFX9-O0-NEXT: v_mov_b32_e32 v22, v19 +; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v22 +; GFX9-O0-NEXT: v_and_b32_e64 v22, v11, v18 +; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v23, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v19, v22 +; GFX9-O0-NEXT: v_mov_b32_e32 v18, v23 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21 +; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v19 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v18, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v11, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v7, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 @@ -815,66 +801,66 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1 ; GFX9-O0-NEXT: s_mov_b32 s5, s8 ; GFX9-O0-NEXT: s_mov_b32 s4, s9 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5 -; GFX9-O0-NEXT: v_add_co_u32_e32 v20, vcc, v11, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 +; GFX9-O0-NEXT: v_add_co_u32_e32 v19, vcc, v11, v16 ; GFX9-O0-NEXT: v_mov_b32_e32 v11, s4 ; GFX9-O0-NEXT: v_addc_co_u32_e32 v9, vcc, v9, v11, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5 -; GFX9-O0-NEXT: v_addc_co_u32_e32 v17, vcc, v10, v11, vcc +; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v10, v11, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v10, s4 ; GFX9-O0-NEXT: v_addc_co_u32_e32 v8, vcc, v8, v10, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v21, v9 +; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v20, v9 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v8 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v20 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v21 -; GFX9-O0-NEXT: v_mov_b32_e32 v22, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v21 -; GFX9-O0-NEXT: v_or_b32_e64 v19, v19, v22 -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v20 -; GFX9-O0-NEXT: v_or_b32_e64 v17, v17, v18 -; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v19 -; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[17:18], v[12:13] +; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v19 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20 +; GFX9-O0-NEXT: v_mov_b32_e32 v21, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v18, v20 +; GFX9-O0-NEXT: v_or_b32_e64 v18, v18, v21 +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v19 +; GFX9-O0-NEXT: v_or_b32_e64 v16, v16, v17 +; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v18 +; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13] ; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v2 -; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v2 +; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v0 -; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v0 +; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v15 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v14 -; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v15 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v14 +; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v13 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v12 -; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v13 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v12 +; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX9-O0-NEXT: v_writelane_b32 v16, s6, 6 -; GFX9-O0-NEXT: v_writelane_b32 v16, s7, 7 +; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 6 +; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 7 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX9-O0-NEXT: v_writelane_b32 v16, s6, 10 -; GFX9-O0-NEXT: v_writelane_b32 v16, s7, 11 +; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 10 +; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 11 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -912,52 +898,52 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: s_waitcnt vmcnt(10) ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[21:22] +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[20:21] ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 ; GFX9-O0-NEXT: s_mov_b32 s6, 64 ; GFX9-O0-NEXT: v_sub_u32_e64 v12, s6, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], v12, v[19:20] -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v24 +; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], v12, v[18:19] +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v23 ; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v12 ; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v23 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22 ; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 ; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 ; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7 ; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v4, s6 ; GFX9-O0-NEXT: v_sub_u32_e64 v5, v4, s6 -; GFX9-O0-NEXT: v_lshrrev_b64 v[23:24], v5, v[19:20] -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v24 +; GFX9-O0-NEXT: v_lshrrev_b64 v[22:23], v5, v[18:19] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[4:5] ; GFX9-O0-NEXT: s_mov_b32 s6, 0 ; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v4, s6 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v22 +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v21 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[6:7] ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v23 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v22 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v20 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7] ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 -; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[19:20] +; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[18:19] ; GFX9-O0-NEXT: v_mov_b32_e32 v15, v5 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 ; GFX9-O0-NEXT: s_mov_b32 s8, s7 @@ -976,12 +962,12 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1 ; GFX9-O0-NEXT: s_mov_b32 s5, s8 ; GFX9-O0-NEXT: s_mov_b32 s4, s9 -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5 -; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, s4 -; GFX9-O0-NEXT: v_addc_co_u32_e32 v17, vcc, v15, v17, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 +; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, s4 +; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v15, v16, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5 ; GFX9-O0-NEXT: v_addc_co_u32_e32 v14, vcc, v14, v15, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v15, s4 @@ -993,7 +979,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16 ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -1006,10 +992,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v14, s8 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, s7 ; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6 -; GFX9-O0-NEXT: v_writelane_b32 v16, s4, 10 -; GFX9-O0-NEXT: v_writelane_b32 v16, s5, 11 +; GFX9-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 10 +; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 11 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -1037,201 +1024,194 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_6 ; GFX9-O0-NEXT: .LBB0_8: ; %udiv-bb1 +; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1 ; GFX9-O0-NEXT: s_mov_b32 s5, s6 -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 +; GFX9-O0-NEXT: s_waitcnt vmcnt(2) +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-O0-NEXT: s_mov_b32 s4, s7 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 ; GFX9-O0-NEXT: s_mov_b32 s8, s6 ; GFX9-O0-NEXT: s_mov_b32 s9, s7 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5 -; GFX9-O0-NEXT: v_add_co_u32_e32 v9, vcc, v4, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s4 -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v5, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8 -; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v5, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s9 -; GFX9-O0-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5 +; GFX9-O0-NEXT: v_add_co_u32_e32 v8, vcc, v3, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s4 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 +; GFX9-O0-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v4, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9 +; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v2 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2 +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b32 s4, 0x7f -; GFX9-O0-NEXT: v_sub_u32_e64 v3, s4, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[5:6], v3, v[11:12] -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v6 +; GFX9-O0-NEXT: v_sub_u32_e64 v2, s4, v3 +; GFX9-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[10:11] +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5 ; GFX9-O0-NEXT: s_mov_b32 s4, 64 -; GFX9-O0-NEXT: v_sub_u32_e64 v14, s4, v3 -; GFX9-O0-NEXT: v_lshrrev_b64 v[14:15], v14, v[7:8] -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v15 -; GFX9-O0-NEXT: v_or_b32_e64 v13, v13, v16 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v14 -; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13 -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v6 -; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v3, s4 -; GFX9-O0-NEXT: s_mov_b32 s10, 63 -; GFX9-O0-NEXT: v_sub_u32_e64 v4, s10, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[13:14], v4, v[7:8] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v15, s[4:5] -; GFX9-O0-NEXT: s_mov_b32 s10, 0 -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v3, s10 -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v15, s[10:11] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 +; GFX9-O0-NEXT: v_sub_u32_e64 v13, s4, v2 +; GFX9-O0-NEXT: v_lshrrev_b64 v[13:14], v13, v[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v14 +; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v15 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[10:11] +; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5 +; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v2, s4 +; GFX9-O0-NEXT: s_mov_b32 s10, 63 +; GFX9-O0-NEXT: v_sub_u32_e64 v3, s10, v3 +; GFX9-O0-NEXT: v_lshlrev_b64 v[12:13], v3, v[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[4:5] +; GFX9-O0-NEXT: s_mov_b32 s10, 0 +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v2, s10 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[10:11] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[10:11] ; GFX9-O0-NEXT: ; implicit-def: $sgpr10 ; GFX9-O0-NEXT: ; implicit-def: $sgpr10 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[7:8], v3, v[7:8] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5] -; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v4, v7, s[4:5] +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v3 +; GFX9-O0-NEXT: v_lshlrev_b64 v[6:7], v2, v[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s9 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5] +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v3, v6, s[4:5] ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v3 -; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2 +; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10 -; GFX9-O0-NEXT: v_or_b32_e64 v3, v3, v4 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v9 -; GFX9-O0-NEXT: v_or_b32_e64 v1, v1, v2 -; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[1:2], s[6:7] +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9 +; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v8 +; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1 +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7] ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s7 -; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s8 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s7 +; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 s[6:7], exec ; GFX9-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] ; GFX9-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] -; GFX9-O0-NEXT: v_writelane_b32 v0, s6, 8 -; GFX9-O0-NEXT: v_writelane_b32 v0, s7, 9 +; GFX9-O0-NEXT: s_waitcnt vmcnt(16) +; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 8 +; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 9 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-O0-NEXT: s_cbranch_execz .LBB0_5 ; GFX9-O0-NEXT: s_branch .LBB0_7 ; GFX9-O0-NEXT: .LBB0_9: ; %udiv-end -; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 ; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v7 ; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9 -; GFX9-O0-NEXT: v_xor_b32_e64 v9, v6, v5 -; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8 +; GFX9-O0-NEXT: v_xor_b32_e64 v8, v5, v4 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10 ; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec -; GFX9-O0-NEXT: v_xor_b32_e64 v0, v0, v8 +; GFX9-O0-NEXT: v_xor_b32_e64 v0, v0, v7 ; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v10 -; GFX9-O0-NEXT: v_sub_co_u32_e32 v7, vcc, v7, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v9 +; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, v5, v7 ; GFX9-O0-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v6, vcc -; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v3, v5, vcc +; GFX9-O0-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v4, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v1 +; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v0 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 ; GFX9-O0-NEXT: s_mov_b32 s4, 32 -; GFX9-O0-NEXT: v_lshrrev_b64 v[7:8], s4, v[7:8] -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7 ; GFX9-O0-NEXT: v_lshrrev_b64 v[5:6], s4, v[5:6] -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-O0-NEXT: ; kill: killed $vgpr4 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 +; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s4, v[3:4] +; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: s_setpc_b64 s[30:31] @@ -1444,258 +1424,252 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0: ; %bb.0: ; %_udiv-special-cases ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-G-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-G-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v0 -; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v2 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v3 -; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v1 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v2 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v3 +; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v10 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v9 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v8 +; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v4 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v6 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v7 -; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14_vgpr15_vgpr16 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v3 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v2 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v1 +; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v4 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v6 +; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v1 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v0 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v7 ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0 ; GFX9-G-O0-NEXT: s_mov_b64 s[12:13], 0x7f -; GFX9-G-O0-NEXT: ; kill: def $vgpr1_vgpr2 killed $vgpr4_vgpr5 killed $exec +; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr3_vgpr4 killed $exec ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v5 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v6 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v7 -; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr1 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v2 +; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v1 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 31 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s6 -; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v12, v3, v8 -; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr1 killed $exec -; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $vgpr1_vgpr2 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v11, v2, v7 +; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec +; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec ; GFX9-G-O0-NEXT: s_mov_b32 s6, 31 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v10, v1, v2 -; GFX9-G-O0-NEXT: ; kill: def $vgpr1_vgpr2 killed $vgpr13_vgpr14 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v9, v0, v1 +; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr12_vgpr13 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v14 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v15 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v16 -; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr1 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v2 +; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v1 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 31 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s6 -; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v11, v3, v8 -; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr1 killed $exec -; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $vgpr1_vgpr2 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v10, v2, v7 +; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec +; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec ; GFX9-G-O0-NEXT: s_mov_b32 s6, 31 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v9, v1, v2 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-G-O0-NEXT: v_ashrrev_i32_e64 v8, v0, v1 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v6 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v7 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-G-O0-NEXT: v_xor_b32_e64 v1, v12, v1 -; GFX9-G-O0-NEXT: v_xor_b32_e64 v2, v12, v2 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6 -; GFX9-G-O0-NEXT: v_xor_b32_e64 v4, v10, v4 -; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v10, v3 -; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v1, s[6:7], v1, v12 -; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v2, s[6:7], v2, v12, s[6:7] -; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v6, s[6:7], v4, v10, s[6:7] -; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v3, s[6:7], v3, v10, s[6:7] -; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_xor_b32_e64 v0, v11, v0 +; GFX9-G-O0-NEXT: v_xor_b32_e64 v1, v11, v1 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v4 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v5 +; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v9, v3 +; GFX9-G-O0-NEXT: v_xor_b32_e64 v2, v9, v2 +; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v0, s[6:7], v0, v11 +; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v1, s[6:7], v1, v11, s[6:7] +; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v5, s[6:7], v3, v9, s[6:7] +; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v2, s[6:7], v2, v9, s[6:7] +; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v12 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v13 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v14 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v14 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v15 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v16 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v7 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v8 -; GFX9-G-O0-NEXT: v_xor_b32_e64 v8, v11, v5 -; GFX9-G-O0-NEXT: v_xor_b32_e64 v5, v11, v4 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v13 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-G-O0-NEXT: v_xor_b32_e64 v7, v9, v7 -; GFX9-G-O0-NEXT: v_xor_b32_e64 v4, v9, v4 -; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v8, s[6:7], v8, v11 -; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v5, s[6:7], v5, v11, s[6:7] -; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v7, s[6:7], v7, v9, s[6:7] -; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v4, s[6:7], v4, v9, s[6:7] -; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_xor_b32_e64 v13, v11, v12 -; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_xor_b32_e64 v11, v11, v12 -; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_xor_b32_e64 v11, v9, v10 -; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_xor_b32_e64 v9, v9, v10 -; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v8, v7 -; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v5, v4 -; GFX9-G-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4 -; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[6:7], v[9:10], v[11:12] -; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v1, v6 -; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v2, v3 -; GFX9-G-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4 -; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], v[11:12] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v6 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v7 +; GFX9-G-O0-NEXT: v_xor_b32_e64 v7, v10, v4 +; GFX9-G-O0-NEXT: v_xor_b32_e64 v4, v10, v3 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v12 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v13 +; GFX9-G-O0-NEXT: v_xor_b32_e64 v6, v8, v6 +; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v8, v3 +; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v7, s[6:7], v7, v10 +; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v4, s[6:7], v4, v10, s[6:7] +; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v6, s[6:7], v6, v8, s[6:7] +; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v3, s[6:7], v3, v8, s[6:7] +; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_xor_b32_e64 v12, v10, v11 +; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_xor_b32_e64 v10, v10, v11 +; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_xor_b32_e64 v10, v8, v9 +; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_xor_b32_e64 v8, v8, v9 +; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v7, v6 +; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v4, v3 +; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4 +; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[6:7], v[8:9], v[10:11] +; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v0, v5 +; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v1, v2 +; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4 +; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], v[10:11] ; GFX9-G-O0-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v7 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v4 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4 -; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], v[11:12] -; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v5, v5 -; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v8, v8 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, 32 -; GFX9-G-O0-NEXT: v_add_u32_e64 v8, v8, v9 -; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v8 -; GFX9-G-O0-NEXT: s_mov_b32 s10, 64 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s10 -; GFX9-G-O0-NEXT: v_add_u32_e64 v5, v5, v8 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v6 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v3 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4 +; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], v[10:11] ; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v4 ; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v7, v7 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, 32 ; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v7, v8 ; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v7 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[8:9] +; GFX9-G-O0-NEXT: s_mov_b32 s10, 64 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s10 +; GFX9-G-O0-NEXT: v_add_u32_e64 v4, v4, v7 +; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v3, v3 +; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v6 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 32 +; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v6, v7 +; GFX9-G-O0-NEXT: v_min_u32_e64 v3, v3, v6 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[8:9] ; GFX9-G-O0-NEXT: s_mov_b32 s16, 0 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v6 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v3 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s4 -; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], v[9:10] -; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v5, v2 -; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v7, v1 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v2 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s4 +; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[6:7], v[8:9] +; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v1 +; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v0 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 32 +; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v6, v7 +; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v6 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s10 +; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v4, v6 +; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v2 +; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v7, v5 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, 32 ; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v7, v8 -; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v7 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s10 -; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v5, v7 -; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v5, v3 -; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v8, v6 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, 32 -; GFX9-G-O0-NEXT: v_add_u32_e64 v8, v8, v9 -; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v8 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[8:9] +; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v7 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[8:9] ; GFX9-G-O0-NEXT: s_mov_b32 s15, 0 ; GFX9-G-O0-NEXT: s_mov_b32 s11, 0 ; GFX9-G-O0-NEXT: s_mov_b32 s14, 0 ; GFX9-G-O0-NEXT: s_mov_b32 s10, 0 -; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v7, s[8:9], v4, v5 -; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v6, s[8:9], v3, v4 +; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s16 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s16 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s16 -; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v4, s[8:9], v4, v5, s[8:9] -; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s15 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s14 -; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v9, s[8:9], v5, v8, s[8:9] -; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s11 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s10 -; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v5, v8, s[8:9] -; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v7 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v4 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v9 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v8 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s4 -; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[10:11], v[12:13], v[14:15] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s4 -; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[12:13], v[14:15] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s12 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s13 -; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[12:13], v[10:11], v[12:13] +; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v3, s[8:9], v3, v4, s[8:9] +; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s15 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s14 +; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v4, v7, s[8:9] +; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s11 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s10 +; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v7, s[8:9], v4, v7, s[8:9] +; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v6 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v3 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v8 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v7 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s4 +; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[10:11], v[11:12], v[13:14] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s4 +; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[11:12], v[13:14] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s12 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s13 +; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[12:13], v[9:10], v[11:12] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, 1 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v4, v9, s[12:13] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, 1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v10, v5, v10, s[12:13] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, 1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[10:11] -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v10, v5, v10, s[8:9] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, 1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[6:7] -; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v5, v10 -; GFX9-G-O0-NEXT: s_mov_b32 s7, 0x7f -; GFX9-G-O0-NEXT: s_mov_b32 s6, 0 -; GFX9-G-O0-NEXT: v_xor_b32_e64 v7, v7, s7 -; GFX9-G-O0-NEXT: v_xor_b32_e64 v4, v4, s6 -; GFX9-G-O0-NEXT: v_or_b32_e64 v7, v7, v9 -; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v8 -; GFX9-G-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v4 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s4 -; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[7:8], v[9:10] -; GFX9-G-O0-NEXT: v_and_b32_e32 v4, 1, v5 -; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[6:7] -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v2, v4, s[6:7] -; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4 -; GFX9-G-O0-NEXT: v_and_b32_e32 v4, 1, v5 -; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 0 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v4, v9, s[8:9] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, 1 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7] -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[6:7] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[6:7] +; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v9 +; GFX9-G-O0-NEXT: s_mov_b32 s7, 0x7f +; GFX9-G-O0-NEXT: s_mov_b32 s6, 0 +; GFX9-G-O0-NEXT: v_xor_b32_e64 v6, v6, s7 +; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v3, s6 +; GFX9-G-O0-NEXT: v_or_b32_e64 v6, v6, v8 +; GFX9-G-O0-NEXT: v_or_b32_e64 v3, v3, v7 ; GFX9-G-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v3 -; GFX9-G-O0-NEXT: ; kill: def $vgpr1_vgpr2 killed $vgpr1_vgpr2 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v7 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 1 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s4 +; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[6:7], v[8:9] +; GFX9-G-O0-NEXT: v_and_b32_e32 v3, 1, v4 +; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v3 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] -; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v5, v6 -; GFX9-G-O0-NEXT: v_and_b32_e32 v5, 1, v5 -; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[6:7] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v1, v3, s[6:7] +; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v3 +; GFX9-G-O0-NEXT: v_and_b32_e32 v3, 1, v4 +; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v3 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[6:7] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[6:7] +; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v2 +; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 1 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] +; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v5 +; GFX9-G-O0-NEXT: v_and_b32_e32 v4, 1, v4 +; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4 ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], -1 ; GFX9-G-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] -; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], exec -; GFX9-G-O0-NEXT: v_writelane_b32 v0, s4, 0 -; GFX9-G-O0-NEXT: v_writelane_b32 v0, s5, 1 +; GFX9-G-O0-NEXT: ; implicit-def: $vgpr34 : SGPR spill to VGPR lane +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s4, 0 +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s5, 1 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-G-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5] @@ -1703,11 +1677,11 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: s_branch .LBB0_8 ; GFX9-G-O0-NEXT: .LBB0_1: ; %Flow ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-G-O0-NEXT: v_readlane_b32 s4, v0, 2 -; GFX9-G-O0-NEXT: v_readlane_b32 s5, v0, 3 +; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 2 +; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 3 ; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-G-O0-NEXT: ; %bb.2: ; %Flow ; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload @@ -1736,24 +1710,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB0_5 ; GFX9-G-O0-NEXT: .LBB0_3: ; %Flow2 -; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-G-O0-NEXT: v_readlane_b32 s4, v4, 0 -; GFX9-G-O0-NEXT: v_readlane_b32 s5, v4, 1 -; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) +; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 0 +; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 1 +; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) +; GFX9-G-O0-NEXT: s_nop 0 ; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB0_9 ; GFX9-G-O0-NEXT: .LBB0_4: ; %udiv-loop-exit @@ -1813,13 +1784,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB0_3 ; GFX9-G-O0-NEXT: .LBB0_5: ; %Flow1 -; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-G-O0-NEXT: v_readlane_b32 s4, v8, 4 -; GFX9-G-O0-NEXT: v_readlane_b32 s5, v8, 5 -; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload @@ -1828,13 +1792,17 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) +; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 4 +; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 5 +; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) +; GFX9-G-O0-NEXT: s_nop 0 ; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) ; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 @@ -1844,41 +1812,39 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: s_branch .LBB0_4 ; GFX9-G-O0-NEXT: .LBB0_6: ; %udiv-do-while ; GFX9-G-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-G-O0-NEXT: v_readlane_b32 s6, v16, 6 -; GFX9-G-O0-NEXT: v_readlane_b32 s7, v16, 7 -; GFX9-G-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: v_readlane_b32 s6, v34, 6 +; GFX9-G-O0-NEXT: v_readlane_b32 s7, v34, 7 ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(18) ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v3 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(16) ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v4 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 1 @@ -1897,9 +1863,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v5 ; GFX9-G-O0-NEXT: v_or_b32_e64 v7, v2, v3 ; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v0, v1 -; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr23_vgpr24 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v25 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v26 +; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr22_vgpr23 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v24 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v25 ; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec ; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec ; GFX9-G-O0-NEXT: s_mov_b32 s9, 31 @@ -1911,47 +1877,44 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v15 ; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v2, v3 ; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v0, v1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v23 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v24 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v25 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v26 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v22 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v23 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v24 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v25 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8 -; GFX9-G-O0-NEXT: v_lshlrev_b64 v[27:28], v0, v[2:3] +; GFX9-G-O0-NEXT: v_lshlrev_b64 v[26:27], v0, v[2:3] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v0, v[14:15] ; GFX9-G-O0-NEXT: ; kill: def $vgpr14 killed $vgpr2 killed $exec ; GFX9-G-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr2_vgpr3 killed $exec ; GFX9-G-O0-NEXT: s_mov_b32 s8, 31 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8 -; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v23, v2, v3 +; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v22, v2, v3 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 0 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s8 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v0 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(10) +; GFX9-G-O0-NEXT: v_mov_b32_e32 v28, v30 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v29, v31 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v30, v32 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(8) +; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, v32 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v25, v33 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v26, v34 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v29 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v30 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, v27 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v28 -; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v24 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v28 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v29 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v26 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v27 +; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v23 ; GFX9-G-O0-NEXT: v_or_b32_e64 v15, v1, v15 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v15 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, v25 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v26 -; GFX9-G-O0-NEXT: v_or3_b32 v14, v14, v23, v24 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v24 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v25 +; GFX9-G-O0-NEXT: v_or3_b32 v14, v14, v22, v23 ; GFX9-G-O0-NEXT: v_or3_b32 v2, v2, v3, v15 ; GFX9-G-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v2 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v14 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v15 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v13, s[8:9], v13, v4 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v12, s[8:9], v12, v9, s[8:9] ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v10, v7, s[8:9] @@ -1968,15 +1931,15 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: v_and_b32_e64 v14, v10, s8 ; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v14 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, s5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, s4 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, s5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, s4 ; GFX9-G-O0-NEXT: ; kill: def $vgpr12_vgpr13 killed $vgpr12_vgpr13 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v23 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v24 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v22 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v23 ; GFX9-G-O0-NEXT: v_and_b32_e64 v11, v10, v11 -; GFX9-G-O0-NEXT: v_and_b32_e64 v10, v10, v22 +; GFX9-G-O0-NEXT: v_and_b32_e64 v10, v10, v21 ; GFX9-G-O0-NEXT: v_and_b32_e64 v8, v6, v8 -; GFX9-G-O0-NEXT: v_and_b32_e64 v6, v6, v21 +; GFX9-G-O0-NEXT: v_and_b32_e64 v6, v6, v20 ; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v4, s[8:9], v4, v11 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v9, v10, s[8:9] ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v9, s[8:9], v7, v8, s[8:9] @@ -1985,60 +1948,60 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v10 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v9 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v17 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v18 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v19 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v20 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v16 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v17 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v18 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v19 ; GFX9-G-O0-NEXT: s_mov_b32 s8, -1 ; GFX9-G-O0-NEXT: s_mov_b32 s12, -1 ; GFX9-G-O0-NEXT: s_mov_b32 s11, -1 ; GFX9-G-O0-NEXT: s_mov_b32 s10, -1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, s8 -; GFX9-G-O0-NEXT: v_add_co_u32_e64 v17, s[8:9], v11, v17 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s8 +; GFX9-G-O0-NEXT: v_add_co_u32_e64 v16, s[8:9], v11, v16 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s12 -; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v18, s[8:9], v10, v11, s[8:9] +; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v17, s[8:9], v10, v11, s[8:9] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s11 -; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v20, s[8:9], v9, v10, s[8:9] +; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v19, s[8:9], v9, v10, s[8:9] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s10 -; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v19, s[8:9], v8, v9, s[8:9] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v17 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v18 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v20 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v19 -; GFX9-G-O0-NEXT: v_or_b32_e64 v17, v17, v20 -; GFX9-G-O0-NEXT: v_or_b32_e64 v19, v18, v19 -; GFX9-G-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v19 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, s5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, s4 -; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[17:18], v[19:20] +; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v18, s[8:9], v8, v9, s[8:9] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v16 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v17 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v19 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v18 +; GFX9-G-O0-NEXT: v_or_b32_e64 v16, v16, v19 +; GFX9-G-O0-NEXT: v_or_b32_e64 v18, v17, v18 +; GFX9-G-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v18 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, s5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, s4 +; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[18:19] ; GFX9-G-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v3 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v2 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v0 -; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v3 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v2 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v1 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v0 +; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v15 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v14 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v13 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v12 -; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v15 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v14 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v13 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v12 +; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX9-G-O0-NEXT: v_writelane_b32 v16, s6, 2 -; GFX9-G-O0-NEXT: v_writelane_b32 v16, s7, 3 +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 2 +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 3 ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX9-G-O0-NEXT: v_writelane_b32 v16, s6, 6 -; GFX9-G-O0-NEXT: v_writelane_b32 v16, s7, 7 +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 6 +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 7 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 @@ -2072,87 +2035,88 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_mov_b32 s4, 64 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) -; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v18 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, v17 -; GFX9-G-O0-NEXT: ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1) -; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v4 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v17 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v16 +; GFX9-G-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2) +; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v4 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s4 -; GFX9-G-O0-NEXT: v_sub_u32_e64 v4, v19, v4 +; GFX9-G-O0-NEXT: v_sub_u32_e64 v4, v18, v4 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s4 -; GFX9-G-O0-NEXT: v_sub_u32_e64 v5, v5, v19 +; GFX9-G-O0-NEXT: v_sub_u32_e64 v5, v5, v18 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 0 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4 -; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v19, v6 +; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v18, v6 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s6 -; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v19, v6 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-G-O0-NEXT: v_lshrrev_b64 v[6:7], v19, v[21:22] -; GFX9-G-O0-NEXT: v_lshrrev_b64 v[26:27], v19, v[23:24] -; GFX9-G-O0-NEXT: v_lshlrev_b64 v[24:25], v5, v[21:22] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v26 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v27 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v24 +; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v18, v6 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-G-O0-NEXT: v_lshrrev_b64 v[6:7], v18, v[20:21] +; GFX9-G-O0-NEXT: v_lshrrev_b64 v[25:26], v18, v[22:23] +; GFX9-G-O0-NEXT: v_lshlrev_b64 v[23:24], v5, v[20:21] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v25 -; GFX9-G-O0-NEXT: v_or_b32_e64 v20, v20, v23 -; GFX9-G-O0-NEXT: v_or_b32_e64 v19, v5, v19 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v26 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v23 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v24 +; GFX9-G-O0-NEXT: v_or_b32_e64 v19, v19, v22 +; GFX9-G-O0-NEXT: v_or_b32_e64 v18, v5, v18 ; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], 0 -; GFX9-G-O0-NEXT: v_lshrrev_b64 v[21:22], v4, v[21:22] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v21 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v22 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v20, s[4:5] -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v19, s[4:5] -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v18, s[6:7] -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v17, v5, v17, s[6:7] +; GFX9-G-O0-NEXT: v_lshrrev_b64 v[20:21], v4, v[20:21] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v20 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v21 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v19, s[4:5] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v18, s[4:5] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v17, s[6:7] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v16, v5, v16, s[6:7] ; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v17 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v6 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v16 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v6 ; GFX9-G-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, 0 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, 0 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v17, v17, v18, s[4:5] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[4:5] ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] -; GFX9-G-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v6 +; GFX9-G-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v6 ; GFX9-G-O0-NEXT: ; kill: def $vgpr4_vgpr5 killed $vgpr4_vgpr5 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v17 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v18 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v16 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v17 ; GFX9-G-O0-NEXT: s_mov_b32 s4, -1 ; GFX9-G-O0-NEXT: s_mov_b32 s10, -1 ; GFX9-G-O0-NEXT: s_mov_b32 s7, -1 ; GFX9-G-O0-NEXT: s_mov_b32 s6, -1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, s4 -; GFX9-G-O0-NEXT: v_add_co_u32_e64 v16, s[4:5], v16, v17 -; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s10 -; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v15, s[4:5], v15, v16, s[4:5] -; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s7 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s4 +; GFX9-G-O0-NEXT: v_add_co_u32_e64 v15, s[4:5], v15, v16 +; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s10 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v14, s[4:5], v14, v15, s[4:5] -; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s6 +; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s7 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v13, s[4:5], v13, v14, s[4:5] -; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s6 +; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v12, s[4:5], v12, v13, s[4:5] +; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], s[8:9] ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[8:9] -; GFX9-G-O0-NEXT: v_writelane_b32 v12, s8, 6 -; GFX9-G-O0-NEXT: v_writelane_b32 v12, s9, 7 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s8, 6 +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s9, 7 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s7 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s6 @@ -2180,165 +2144,157 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB0_6 ; GFX9-G-O0-NEXT: .LBB0_8: ; %udiv-bb1 +; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 1 ; GFX9-G-O0-NEXT: s_mov_b32 s10, 0 ; GFX9-G-O0-NEXT: s_mov_b32 s9, 0 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 0 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s6 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) -; GFX9-G-O0-NEXT: v_add_co_u32_e64 v5, s[6:7], v3, v5 -; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s10 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1) -; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v6, v8, s[6:7] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s9 -; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v8, s[6:7], v7, v8, s[6:7] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s8 -; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v2, v7, s[6:7] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s6 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: v_add_co_u32_e64 v4, s[6:7], v2, v4 +; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s10 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2) +; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v5, s[6:7], v5, v7, s[6:7] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s9 +; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v6, v7, s[6:7] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s8 +; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v1, v6, s[6:7] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v4 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v6 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v8 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v7 -; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v7 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v6 +; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b32 s6, 0x7f -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s6 -; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v9, s[6:7], v2, v3 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s6 +; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v8, s[6:7], v1, v2 ; GFX9-G-O0-NEXT: s_mov_b32 s7, 64 -; GFX9-G-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v10 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v4 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s7 -; GFX9-G-O0-NEXT: v_sub_u32_e64 v3, v9, v1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s7 -; GFX9-G-O0-NEXT: v_sub_u32_e64 v15, v1, v9 +; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v0 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v9 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v3 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7 +; GFX9-G-O0-NEXT: v_sub_u32_e64 v2, v8, v0 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7 +; GFX9-G-O0-NEXT: v_sub_u32_e64 v14, v0, v8 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 0 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s7 -; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[8:9], v9, v1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v9, v1 -; GFX9-G-O0-NEXT: v_lshlrev_b64 v[1:2], v9, v[13:14] -; GFX9-G-O0-NEXT: v_lshrrev_b64 v[18:19], v15, v[13:14] -; GFX9-G-O0-NEXT: v_lshlrev_b64 v[16:17], v9, v[11:12] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v18 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v19 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v16 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7 +; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[8:9], v8, v0 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v8, v0 +; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v8, v[12:13] +; GFX9-G-O0-NEXT: v_lshrrev_b64 v[17:18], v14, v[12:13] +; GFX9-G-O0-NEXT: v_lshlrev_b64 v[15:16], v8, v[10:11] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v17 -; GFX9-G-O0-NEXT: v_or_b32_e64 v12, v12, v15 -; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v9, v11 -; GFX9-G-O0-NEXT: v_lshlrev_b64 v[13:14], v3, v[13:14] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v2 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v18 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v15 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v16 +; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v11, v14 +; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v8, v10 +; GFX9-G-O0-NEXT: v_lshlrev_b64 v[12:13], v2, v[12:13] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v0 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[8:9] -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[8:9] -; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v13 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v14 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v9, v12, s[8:9] -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[8:9] -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[6:7] -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[6:7] -; GFX9-G-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v3 -; GFX9-G-O0-NEXT: ; kill: def $vgpr1_vgpr2 killed $vgpr1_vgpr2 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[8:9] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[8:9] +; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v12 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v13 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v8, v8, v11, s[8:9] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[8:9] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[6:7] +; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v2 +; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v8 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v9 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v10 -; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX9-G-O0-NEXT: s_mov_b64 s[10:11], s[4:5] -; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v5, v8 -; GFX9-G-O0-NEXT: v_or_b32_e64 v7, v6, v7 -; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v7 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s4 -; GFX9-G-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[5:6], v[7:8] -; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v7 +; GFX9-G-O0-NEXT: v_or_b32_e64 v6, v5, v6 +; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v6 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4 +; GFX9-G-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[4:5], v[6:7] +; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s9 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s10 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s11 -; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s10 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s11 +; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], exec ; GFX9-G-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] ; GFX9-G-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] -; GFX9-G-O0-NEXT: v_writelane_b32 v0, s6, 4 -; GFX9-G-O0-NEXT: v_writelane_b32 v0, s7, 5 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(17) +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 4 +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 5 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-G-O0-NEXT: s_cbranch_execz .LBB0_5 ; GFX9-G-O0-NEXT: s_branch .LBB0_7 ; GFX9-G-O0-NEXT: .LBB0_9: ; %udiv-end -; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2) -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v9 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v10 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v8 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v9 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v10 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v11 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v12 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2 -; GFX9-G-O0-NEXT: v_xor_b32_e64 v0, v0, v8 -; GFX9-G-O0-NEXT: v_xor_b32_e64 v1, v1, v7 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v9 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v10 -; GFX9-G-O0-NEXT: v_xor_b32_e64 v2, v2, v6 -; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v3, v5 -; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v0, s[4:5], v0, v8 -; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v1, s[4:5], v1, v7, s[4:5] -; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v2, s[4:5], v2, v6, s[4:5] -; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v3, s[4:5], v3, v5, s[4:5] -; GFX9-G-O0-NEXT: ; kill: killed $vgpr4 +; GFX9-G-O0-NEXT: v_xor_b32_e64 v0, v0, v7 +; GFX9-G-O0-NEXT: v_xor_b32_e64 v1, v1, v6 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v8 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v9 +; GFX9-G-O0-NEXT: v_xor_b32_e64 v2, v2, v5 +; GFX9-G-O0-NEXT: v_xor_b32_e64 v3, v3, v4 +; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v0, s[4:5], v0, v7 +; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v1, s[4:5], v1, v6, s[4:5] +; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v2, s[4:5], v2, v5, s[4:5] +; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v3, s[4:5], v3, v4, s[4:5] ; GFX9-G-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-G-O0-NEXT: s_setpc_b64 s[30:31] @@ -2533,246 +2489,238 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0: ; %bb.0: ; %_udiv-special-cases ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6 ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v0 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v0 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v2 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v3 +; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v3 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7 +; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v7 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v10 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(3) -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_waitcnt vmcnt(2) ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v12 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v2 -; GFX9-O0-NEXT: v_or_b32_e64 v3, v8, v7 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1 -; GFX9-O0-NEXT: v_or_b32_e64 v1, v5, v6 -; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1 +; GFX9-O0-NEXT: v_or_b32_e64 v2, v7, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-O0-NEXT: v_or_b32_e64 v0, v4, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 -; GFX9-O0-NEXT: s_waitcnt vmcnt(8) -; GFX9-O0-NEXT: v_writelane_b32 v0, s6, 0 -; GFX9-O0-NEXT: v_writelane_b32 v0, s7, 1 -; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[1:2], s[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v14 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 -; GFX9-O0-NEXT: v_or_b32_e64 v15, v4, v2 +; GFX9-O0-NEXT: ; implicit-def: $vgpr30 : SGPR spill to VGPR lane +; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 0 +; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 1 +; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[0:1], s[6:7] ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v13 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9 -; GFX9-O0-NEXT: v_or_b32_e64 v9, v3, v1 -; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v15 -; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], s[6:7] +; GFX9-O0-NEXT: v_or_b32_e64 v14, v3, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v12 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8 +; GFX9-O0-NEXT: v_or_b32_e64 v8, v2, v0 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v14 +; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7] ; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] -; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v5 ; GFX9-O0-NEXT: s_mov_b32 s9, 32 -; GFX9-O0-NEXT: v_add_u32_e64 v6, v6, s9 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7 -; GFX9-O0-NEXT: v_min_u32_e64 v6, v6, v7 +; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6 +; GFX9-O0-NEXT: v_min_u32_e64 v5, v5, v6 ; GFX9-O0-NEXT: s_mov_b32 s8, 0 ; GFX9-O0-NEXT: ; implicit-def: $sgpr10 -; GFX9-O0-NEXT: v_mov_b32_e32 v9, s8 -; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v9 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v7 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v5 -; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v8, v8 -; GFX9-O0-NEXT: v_min_u32_e64 v15, v5, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, s8 +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v4 +; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7 +; GFX9-O0-NEXT: v_min_u32_e64 v14, v4, v7 ; GFX9-O0-NEXT: ; implicit-def: $sgpr10 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8 -; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 +; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v4 ; GFX9-O0-NEXT: s_mov_b64 s[10:11], 64 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v15 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14 ; GFX9-O0-NEXT: s_mov_b32 s12, s10 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v15 ; GFX9-O0-NEXT: s_mov_b32 s14, s11 -; GFX9-O0-NEXT: v_add_co_u32_e64 v8, s[12:13], v8, s12 -; GFX9-O0-NEXT: v_mov_b32_e32 v9, s14 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v5, s[12:13], v5, v9, s[12:13] -; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 +; GFX9-O0-NEXT: v_add_co_u32_e64 v7, s[12:13], v7, s12 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, s14 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[12:13], v4, v8, s[12:13] +; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 ; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[6:7] -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[12:13] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[12:13] -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v6, v7, s[12:13] +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[10:11], s[12:13] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[12:13] +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v5, v6, s[12:13] ; GFX9-O0-NEXT: ; implicit-def: $sgpr12 ; GFX9-O0-NEXT: ; implicit-def: $sgpr12 -; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v0 +; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9 ; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v1 -; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v2 -; GFX9-O0-NEXT: v_min_u32_e64 v6, v5, v6 +; GFX9-O0-NEXT: v_min_u32_e64 v5, v4, v5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr12 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8 -; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v3 -; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v11, v4 -; GFX9-O0-NEXT: v_min_u32_e64 v15, v5, v11 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v2 +; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v3 +; GFX9-O0-NEXT: v_min_u32_e64 v14, v4, v10 ; GFX9-O0-NEXT: ; implicit-def: $sgpr9 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8 -; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v15 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 +; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v14 ; GFX9-O0-NEXT: s_mov_b32 s8, s10 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v15 ; GFX9-O0-NEXT: s_mov_b32 s10, s11 -; GFX9-O0-NEXT: v_add_co_u32_e64 v11, s[8:9], v11, s8 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, s10 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v5, s[8:9], v5, v12, s[8:9] -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 +; GFX9-O0-NEXT: v_add_co_u32_e64 v10, s[8:9], v10, s8 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, s10 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[8:9], v4, v11, s[8:9] +; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v11 ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[8:9] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[8:9] -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[8:9] +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[12:13], s[8:9] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[8:9] ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 -; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10 ; GFX9-O0-NEXT: s_mov_b32 s10, s6 ; GFX9-O0-NEXT: s_mov_b32 s11, s7 -; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, v5, v8 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v9, vcc, v6, v7, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v7, s10 +; GFX9-O0-NEXT: v_sub_co_u32_e32 v4, vcc, v4, v7 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v5, v6, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v6, s10 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v6, v7, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v7, s11 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, s10 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v6, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v6, s11 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v6, v7, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7 -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6 +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7] +; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7] ; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f -; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15] -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9] -; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7 -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v7, 1 +; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15] +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9] +; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6 +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1 ; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9] ; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1 ; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5] -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 ; GFX9-O0-NEXT: s_mov_b32 s14, s13 -; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14 +; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14 ; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13 -; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v9 -; GFX9-O0-NEXT: v_or_b32_e64 v7, v7, v10 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8 -; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[5:6], s[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v2, v5, s[8:9] -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s10 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[8:9] +; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v9 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 +; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9] ; GFX9-O0-NEXT: ; implicit-def: $sgpr12 ; GFX9-O0-NEXT: ; implicit-def: $sgpr12 -; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[8:9] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, s10 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[8:9] +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9] ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 ; GFX9-O0-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7] -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 s[4:5], exec -; GFX9-O0-NEXT: v_writelane_b32 v0, s4, 2 -; GFX9-O0-NEXT: v_writelane_b32 v0, s5, 3 +; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 2 +; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 3 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] @@ -2780,11 +2728,11 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_branch .LBB1_8 ; GFX9-O0-NEXT: .LBB1_1: ; %Flow ; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s4, v0, 4 -; GFX9-O0-NEXT: v_readlane_b32 s5, v0, 5 +; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 4 +; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 5 ; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: ; %bb.2: ; %Flow ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload @@ -2813,20 +2761,19 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB1_5 ; GFX9-O0-NEXT: .LBB1_3: ; %Flow2 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s4, v4, 2 -; GFX9-O0-NEXT: v_readlane_b32 s5, v4, 3 -; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 2 +; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 3 +; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_nop 0 ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2873,13 +2820,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB1_3 ; GFX9-O0-NEXT: .LBB1_5: ; %Flow1 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s4, v8, 6 -; GFX9-O0-NEXT: v_readlane_b32 s5, v8, 7 -; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload @@ -2888,9 +2828,15 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 6 +; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 7 +; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_nop 0 ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2904,92 +2850,87 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_branch .LBB1_4 ; GFX9-O0-NEXT: .LBB1_6: ; %udiv-do-while ; GFX9-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s6, v16, 8 -; GFX9-O0-NEXT: v_readlane_b32 s7, v16, 9 ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 8 +; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 9 ; GFX9-O0-NEXT: s_mov_b32 s4, 63 -; GFX9-O0-NEXT: s_waitcnt vmcnt(16) -; GFX9-O0-NEXT: v_lshrrev_b64 v[29:30], s4, v[2:3] -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v30 +; GFX9-O0-NEXT: v_lshrrev_b64 v[28:29], s4, v[2:3] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v29 ; GFX9-O0-NEXT: s_mov_b32 s5, 1 -; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], s5, v[23:24] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v24 +; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], s5, v[22:23] +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23 ; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v29 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 -; GFX9-O0-NEXT: v_or_b32_e64 v23, v5, v10 -; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v24, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[29:30], s5, v[2:3] +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v22 +; GFX9-O0-NEXT: v_or_b32_e64 v22, v5, v10 +; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v23, v4 +; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[2:3] ; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], s4, v[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v30 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v29 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5 ; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v29 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v28 ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec ; GFX9-O0-NEXT: v_or_b32_e64 v4, v3, v4 ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2 ; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s5, v[0:1] -; GFX9-O0-NEXT: v_lshlrev_b64 v[29:30], s5, v[6:7] +; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[6:7] ; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v30 -; GFX9-O0-NEXT: s_waitcnt vmcnt(10) -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v29 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v27 ; GFX9-O0-NEXT: v_or3_b32 v6, v6, v7, v10 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v29 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v27 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v28 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v26 ; GFX9-O0-NEXT: v_or3_b32 v0, v0, v1, v7 ; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3 -; GFX9-O0-NEXT: s_waitcnt vmcnt(8) -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v26 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v25 ; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v25 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v24 ; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 ; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 ; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v24 -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v22 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v15 ; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, v13, v6 -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v10, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v4, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc @@ -3009,22 +2950,22 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12 ; GFX9-O0-NEXT: v_mov_b32_e32 v12, 0 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v23, v22 -; GFX9-O0-NEXT: v_and_b32_e64 v23, v7, v23 -; GFX9-O0-NEXT: v_and_b32_e64 v21, v11, v21 -; GFX9-O0-NEXT: ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v22, v23 -; GFX9-O0-NEXT: v_mov_b32_e32 v23, v20 -; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v23 -; GFX9-O0-NEXT: v_and_b32_e64 v23, v11, v19 -; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v24, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v23 -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v24 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v21 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22 -; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v20 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v19, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v22, v21 +; GFX9-O0-NEXT: v_and_b32_e64 v22, v7, v22 +; GFX9-O0-NEXT: v_and_b32_e64 v20, v11, v20 +; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v21, v22 +; GFX9-O0-NEXT: v_mov_b32_e32 v22, v19 +; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v22 +; GFX9-O0-NEXT: v_and_b32_e64 v22, v11, v18 +; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v23, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v19, v22 +; GFX9-O0-NEXT: v_mov_b32_e32 v18, v23 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21 +; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v19 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v18, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v11, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v7, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 @@ -3040,66 +2981,66 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1 ; GFX9-O0-NEXT: s_mov_b32 s5, s8 ; GFX9-O0-NEXT: s_mov_b32 s4, s9 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5 -; GFX9-O0-NEXT: v_add_co_u32_e32 v20, vcc, v11, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 +; GFX9-O0-NEXT: v_add_co_u32_e32 v19, vcc, v11, v16 ; GFX9-O0-NEXT: v_mov_b32_e32 v11, s4 ; GFX9-O0-NEXT: v_addc_co_u32_e32 v9, vcc, v9, v11, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5 -; GFX9-O0-NEXT: v_addc_co_u32_e32 v17, vcc, v10, v11, vcc +; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v10, v11, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v10, s4 ; GFX9-O0-NEXT: v_addc_co_u32_e32 v8, vcc, v8, v10, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v21, v9 +; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v20, v9 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v8 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v20 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v21 -; GFX9-O0-NEXT: v_mov_b32_e32 v22, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v21 -; GFX9-O0-NEXT: v_or_b32_e64 v19, v19, v22 -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v20 -; GFX9-O0-NEXT: v_or_b32_e64 v17, v17, v18 -; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v19 -; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[17:18], v[12:13] +; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v19 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20 +; GFX9-O0-NEXT: v_mov_b32_e32 v21, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v18, v20 +; GFX9-O0-NEXT: v_or_b32_e64 v18, v18, v21 +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v19 +; GFX9-O0-NEXT: v_or_b32_e64 v16, v16, v17 +; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v18 +; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13] ; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v2 -; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v2 +; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v0 -; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v0 +; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v15 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v14 -; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v15 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v14 +; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v13 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v12 -; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v13 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v12 +; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX9-O0-NEXT: v_writelane_b32 v16, s6, 4 -; GFX9-O0-NEXT: v_writelane_b32 v16, s7, 5 +; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 4 +; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 5 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX9-O0-NEXT: v_writelane_b32 v16, s6, 8 -; GFX9-O0-NEXT: v_writelane_b32 v16, s7, 9 +; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 8 +; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 9 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -3137,52 +3078,52 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-O0-NEXT: s_waitcnt vmcnt(10) ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[21:22] +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[20:21] ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 ; GFX9-O0-NEXT: s_mov_b32 s6, 64 ; GFX9-O0-NEXT: v_sub_u32_e64 v12, s6, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], v12, v[19:20] -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v24 +; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], v12, v[18:19] +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v23 ; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v12 ; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v23 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22 ; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 ; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 ; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7 ; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v4, s6 ; GFX9-O0-NEXT: v_sub_u32_e64 v5, v4, s6 -; GFX9-O0-NEXT: v_lshrrev_b64 v[23:24], v5, v[19:20] -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v24 +; GFX9-O0-NEXT: v_lshrrev_b64 v[22:23], v5, v[18:19] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[4:5] ; GFX9-O0-NEXT: s_mov_b32 s6, 0 ; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v4, s6 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v22 +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v21 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[6:7] ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v23 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v22 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v20 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7] ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 -; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[19:20] +; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[18:19] ; GFX9-O0-NEXT: v_mov_b32_e32 v15, v5 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 ; GFX9-O0-NEXT: s_mov_b32 s8, s7 @@ -3201,12 +3142,12 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1 ; GFX9-O0-NEXT: s_mov_b32 s5, s8 ; GFX9-O0-NEXT: s_mov_b32 s4, s9 -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5 -; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, s4 -; GFX9-O0-NEXT: v_addc_co_u32_e32 v17, vcc, v15, v17, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 +; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, s4 +; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v15, v16, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5 ; GFX9-O0-NEXT: v_addc_co_u32_e32 v14, vcc, v14, v15, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v15, s4 @@ -3218,7 +3159,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16 ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -3231,10 +3172,11 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v14, s8 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, s7 ; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6 -; GFX9-O0-NEXT: v_writelane_b32 v16, s4, 8 -; GFX9-O0-NEXT: v_writelane_b32 v16, s5, 9 +; GFX9-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 8 +; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 9 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -3262,165 +3204,158 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB1_6 ; GFX9-O0-NEXT: .LBB1_8: ; %udiv-bb1 +; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1 ; GFX9-O0-NEXT: s_mov_b32 s5, s6 -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 +; GFX9-O0-NEXT: s_waitcnt vmcnt(2) +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-O0-NEXT: s_mov_b32 s4, s7 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 ; GFX9-O0-NEXT: s_mov_b32 s8, s6 ; GFX9-O0-NEXT: s_mov_b32 s9, s7 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5 -; GFX9-O0-NEXT: v_add_co_u32_e32 v9, vcc, v4, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s4 -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v5, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8 -; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v5, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s9 -; GFX9-O0-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5 +; GFX9-O0-NEXT: v_add_co_u32_e32 v8, vcc, v3, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s4 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 +; GFX9-O0-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v4, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9 +; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v2 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2 +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b32 s4, 0x7f -; GFX9-O0-NEXT: v_sub_u32_e64 v3, s4, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[5:6], v3, v[11:12] -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v6 +; GFX9-O0-NEXT: v_sub_u32_e64 v2, s4, v3 +; GFX9-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[10:11] +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5 ; GFX9-O0-NEXT: s_mov_b32 s4, 64 -; GFX9-O0-NEXT: v_sub_u32_e64 v14, s4, v3 -; GFX9-O0-NEXT: v_lshrrev_b64 v[14:15], v14, v[7:8] -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v15 -; GFX9-O0-NEXT: v_or_b32_e64 v13, v13, v16 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v14 -; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13 -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v6 -; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v3, s4 +; GFX9-O0-NEXT: v_sub_u32_e64 v13, s4, v2 +; GFX9-O0-NEXT: v_lshrrev_b64 v[13:14], v13, v[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v14 +; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v15 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13 +; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5 +; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v2, s4 ; GFX9-O0-NEXT: s_mov_b32 s10, 63 -; GFX9-O0-NEXT: v_sub_u32_e64 v4, s10, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[13:14], v4, v[7:8] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v15, s[4:5] +; GFX9-O0-NEXT: v_sub_u32_e64 v3, s10, v3 +; GFX9-O0-NEXT: v_lshlrev_b64 v[12:13], v3, v[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[4:5] ; GFX9-O0-NEXT: s_mov_b32 s10, 0 -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v3, s10 -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v15, s[10:11] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[10:11] +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v2, s10 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[10:11] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[10:11] ; GFX9-O0-NEXT: ; implicit-def: $sgpr10 ; GFX9-O0-NEXT: ; implicit-def: $sgpr10 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[7:8], v3, v[7:8] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5] -; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v4, v7, s[4:5] +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v3 +; GFX9-O0-NEXT: v_lshlrev_b64 v[6:7], v2, v[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s9 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5] +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v3, v6, s[4:5] ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v3 -; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2 +; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10 -; GFX9-O0-NEXT: v_or_b32_e64 v3, v3, v4 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v9 -; GFX9-O0-NEXT: v_or_b32_e64 v1, v1, v2 -; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[1:2], s[6:7] +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9 +; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v8 +; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1 +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7] ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s7 -; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s8 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s7 +; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 s[6:7], exec ; GFX9-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] ; GFX9-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] -; GFX9-O0-NEXT: v_writelane_b32 v0, s6, 6 -; GFX9-O0-NEXT: v_writelane_b32 v0, s7, 7 +; GFX9-O0-NEXT: s_waitcnt vmcnt(16) +; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 6 +; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 7 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-O0-NEXT: s_cbranch_execz .LBB1_5 ; GFX9-O0-NEXT: s_branch .LBB1_7 ; GFX9-O0-NEXT: .LBB1_9: ; %udiv-end -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b32 s4, 32 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[7:8] +; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[6:7] ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[5:6] +; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[4:5] ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 -; GFX9-O0-NEXT: ; kill: killed $vgpr4 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v4 ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: s_setpc_b64 s[30:31] @@ -3610,83 +3545,94 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0: ; %bb.0: ; %_udiv-special-cases ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-G-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-G-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v0 -; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v2 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v3 -; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v4 -; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v1 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v2 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v3 +; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v10 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v9 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v8 +; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v5 -; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v6 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v7 -; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6_vgpr7_vgpr8 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v11 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v10 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v9 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v6 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v7 +; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v10 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v9 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8 +; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v6 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v8 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v4 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v7 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v6 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v9 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v12 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v13 +; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v8, v11 +; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v9, v10 +; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v13 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v14 -; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v9, v12 -; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v10, v11 -; GFX9-G-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4 -; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[6:7], v[9:10], v[11:12] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v2 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4 +; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[6:7], v[8:9], v[10:11] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v4 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v0 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v3 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v2 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v9 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v12 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v13 +; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v8, v11 +; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v9, v10 +; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v13 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v14 -; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v9, v12 -; GFX9-G-O0-NEXT: v_or_b32_e64 v11, v10, v11 -; GFX9-G-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s4 -; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], v[11:12] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s4 +; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], v[10:11] ; GFX9-G-O0-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v6 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v4 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v7 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v6 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s4 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s5 +; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], v[4:5] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v9 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v10 +; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v4 +; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v5, v5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 32 +; GFX9-G-O0-NEXT: v_add_u32_e64 v5, v5, v6 +; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v5 +; GFX9-G-O0-NEXT: s_mov_b32 s10, 64 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s10 +; GFX9-G-O0-NEXT: v_add_u32_e64 v5, v4, v5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v7 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v8 +; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v4, v4 +; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v6 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 32 +; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v6, v7 +; GFX9-G-O0-NEXT: v_min_u32_e64 v4, v4, v6 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[8:9] +; GFX9-G-O0-NEXT: s_mov_b32 s14, 0 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v1 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v0 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v3 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v2 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s5 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s4 ; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], v[5:6] @@ -3697,7 +3643,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 32 ; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v6, v7 ; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v6 -; GFX9-G-O0-NEXT: s_mov_b32 s10, 64 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s10 ; GFX9-G-O0-NEXT: v_add_u32_e64 v6, v5, v6 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8 @@ -3708,130 +3653,106 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v7, v8 ; GFX9-G-O0-NEXT: v_min_u32_e64 v5, v5, v7 ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[8:9] -; GFX9-G-O0-NEXT: s_mov_b32 s14, 0 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v2 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v4 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v3 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4 -; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], v[6:7] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v11 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v12 -; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v6 -; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v7, v7 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, 32 -; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v7, v8 -; GFX9-G-O0-NEXT: v_min_u32_e64 v6, v6, v7 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s10 -; GFX9-G-O0-NEXT: v_add_u32_e64 v7, v6, v7 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v9 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v10 -; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v6, v6 -; GFX9-G-O0-NEXT: v_ffbh_u32_e64 v8, v8 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, 32 -; GFX9-G-O0-NEXT: v_add_u32_e64 v8, v8, v9 -; GFX9-G-O0-NEXT: v_min_u32_e64 v6, v6, v8 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[8:9] ; GFX9-G-O0-NEXT: s_mov_b32 s13, 0 ; GFX9-G-O0-NEXT: s_mov_b32 s11, 0 ; GFX9-G-O0-NEXT: s_mov_b32 s12, 0 ; GFX9-G-O0-NEXT: s_mov_b32 s10, 0 -; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v6, s[8:9], v5, v6 -; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s14 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s14 -; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v7, s[8:9], v5, v7, s[8:9] -; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s13 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s12 -; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v9, s[8:9], v5, v8, s[8:9] -; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s11 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s10 -; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v5, v8, s[8:9] -; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v5, s[8:9], v4, v5 +; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s14 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s14 +; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v6, s[8:9], v4, v6, s[8:9] +; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s13 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s12 +; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v4, v7, s[8:9] +; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s11 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s10 +; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v7, s[8:9], v4, v7, s[8:9] +; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 s[12:13], 0x7f +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v5 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v6 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v7 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v9 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v8 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s4 -; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[10:11], v[12:13], v[14:15] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s4 -; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[12:13], v[14:15] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s12 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s13 -; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[12:13], v[10:11], v[12:13] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v8 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v7 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s4 +; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[10:11], v[11:12], v[13:14] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s4 +; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[11:12], v[13:14] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s12 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s13 +; GFX9-G-O0-NEXT: v_cmp_gt_u64_e64 s[12:13], v[9:10], v[11:12] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, 1 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v4, v9, s[12:13] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, 1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v10, v5, v10, s[12:13] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, 1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[10:11] -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v10, v5, v10, s[8:9] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, 1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[6:7] -; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v5, v10 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[10:11] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v4, v9, s[8:9] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, 1 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[6:7] +; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v9 ; GFX9-G-O0-NEXT: s_mov_b32 s7, 0x7f ; GFX9-G-O0-NEXT: s_mov_b32 s6, 0 -; GFX9-G-O0-NEXT: v_xor_b32_e64 v6, v6, s7 -; GFX9-G-O0-NEXT: v_xor_b32_e64 v7, v7, s6 -; GFX9-G-O0-NEXT: v_or_b32_e64 v6, v6, v9 -; GFX9-G-O0-NEXT: v_or_b32_e64 v8, v7, v8 -; GFX9-G-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s4 -; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[6:7], v[8:9] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v2 +; GFX9-G-O0-NEXT: v_xor_b32_e64 v5, v5, s7 +; GFX9-G-O0-NEXT: v_xor_b32_e64 v6, v6, s6 +; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v5, v8 +; GFX9-G-O0-NEXT: v_or_b32_e64 v7, v6, v7 +; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v7 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s4 +; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[5:6], v[7:8] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v4 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v0 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v3 -; GFX9-G-O0-NEXT: v_and_b32_e32 v1, 1, v5 -; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v2 +; GFX9-G-O0-NEXT: v_and_b32_e32 v0, 1, v4 +; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v0 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v5 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v6 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v7 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[6:7] -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[6:7] -; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-G-O0-NEXT: v_and_b32_e32 v3, 1, v5 -; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v3 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 0 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, 0 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v8 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v9 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7] -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[6:7] -; GFX9-G-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v3 -; GFX9-G-O0-NEXT: ; kill: def $vgpr1_vgpr2 killed $vgpr1_vgpr2 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v7 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, 1 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[6:7] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[6:7] +; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2 +; GFX9-G-O0-NEXT: v_and_b32_e32 v2, 1, v4 +; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v2 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] -; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v5, v6 -; GFX9-G-O0-NEXT: v_and_b32_e32 v5, 1, v5 -; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, 0 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v7 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v8 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[6:7] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[6:7] +; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v2 +; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 1 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, 0 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] +; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v5 +; GFX9-G-O0-NEXT: v_and_b32_e32 v4, 1, v4 +; GFX9-G-O0-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4 ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], -1 ; GFX9-G-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] -; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], exec -; GFX9-G-O0-NEXT: v_writelane_b32 v0, s4, 0 -; GFX9-G-O0-NEXT: v_writelane_b32 v0, s5, 1 +; GFX9-G-O0-NEXT: ; implicit-def: $vgpr34 : SGPR spill to VGPR lane +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s4, 0 +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s5, 1 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-G-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5] @@ -3839,68 +3760,65 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: s_branch .LBB1_8 ; GFX9-G-O0-NEXT: .LBB1_1: ; %Flow ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-G-O0-NEXT: v_readlane_b32 s4, v0, 2 -; GFX9-G-O0-NEXT: v_readlane_b32 s5, v0, 3 +; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 2 +; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 3 ; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-G-O0-NEXT: ; %bb.2: ; %Flow -; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) -; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) -; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) -; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) -; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) -; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) -; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) -; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(7) -; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB1_5 ; GFX9-G-O0-NEXT: .LBB1_3: ; %Flow2 -; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-G-O0-NEXT: v_readlane_b32 s4, v4, 0 -; GFX9-G-O0-NEXT: v_readlane_b32 s5, v4, 1 -; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) -; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) -; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) -; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) -; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 0 +; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 1 +; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB1_9 ; GFX9-G-O0-NEXT: .LBB1_4: ; %udiv-loop-exit -; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2) ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v4 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v5 @@ -3949,77 +3867,72 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB1_3 ; GFX9-G-O0-NEXT: .LBB1_5: ; %Flow1 +; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-G-O0-NEXT: v_readlane_b32 s4, v8, 4 -; GFX9-G-O0-NEXT: v_readlane_b32 s5, v8, 5 +; GFX9-G-O0-NEXT: v_readlane_b32 s4, v34, 4 +; GFX9-G-O0-NEXT: v_readlane_b32 s5, v34, 5 ; GFX9-G-O0-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) -; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) -; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) -; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) -; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB1_4 ; GFX9-G-O0-NEXT: .LBB1_6: ; %udiv-do-while ; GFX9-G-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-G-O0-NEXT: v_readlane_b32 s6, v16, 6 -; GFX9-G-O0-NEXT: v_readlane_b32 s7, v16, 7 -; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-G-O0-NEXT: v_readlane_b32 s6, v34, 6 +; GFX9-G-O0-NEXT: v_readlane_b32 s7, v34, 7 ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(18) ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v3 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(16) ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v4 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v5 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 1 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8 -; GFX9-G-O0-NEXT: v_lshlrev_b64 v[21:22], v2, v[0:1] +; GFX9-G-O0-NEXT: v_lshlrev_b64 v[20:21], v2, v[0:1] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[3:4] ; GFX9-G-O0-NEXT: ; kill: def $vgpr2 killed $vgpr0 killed $exec @@ -4043,8 +3956,8 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: v_lshrrev_b32_e64 v3, v0, v1 ; GFX9-G-O0-NEXT: s_mov_b32 s9, 0 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s9 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v21 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v22 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v20 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v21 ; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v2, v3 ; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v0, v1 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v12 @@ -4052,7 +3965,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v14 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v15 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8 -; GFX9-G-O0-NEXT: v_lshlrev_b64 v[23:24], v0, v[2:3] +; GFX9-G-O0-NEXT: v_lshlrev_b64 v[22:23], v0, v[2:3] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8 ; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v0, v[12:13] ; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr2 killed $exec @@ -4064,22 +3977,20 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s8 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v0 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(10) +; GFX9-G-O0-NEXT: v_mov_b32_e32 v28, v30 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v29, v31 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v30, v32 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(8) +; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v32 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v33 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v34 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v29 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v30 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v23 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v24 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v28 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v29 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v22 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v23 ; GFX9-G-O0-NEXT: v_or_b32_e64 v0, v0, v15 ; GFX9-G-O0-NEXT: v_or_b32_e64 v13, v1, v13 ; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v13 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v21 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v22 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v20 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v21 ; GFX9-G-O0-NEXT: v_or3_b32 v12, v12, v14, v15 ; GFX9-G-O0-NEXT: v_or3_b32 v2, v2, v3, v13 ; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec @@ -4087,7 +3998,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec ; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v12 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v13 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v11, s[8:9], v11, v4 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v10, v9, s[8:9] ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v8, s[8:9], v8, v7, s[8:9] @@ -4109,18 +4019,18 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: ; kill: def $vgpr12_vgpr13 killed $vgpr12_vgpr13 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v11 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v10 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v24 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v25 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v24, v26 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v26 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v27 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v28 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v23 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v24 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v22 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v23 ; GFX9-G-O0-NEXT: v_and_b32_e64 v11, v8, v11 ; GFX9-G-O0-NEXT: v_and_b32_e64 v10, v8, v10 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v21 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v22 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v20 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v21 ; GFX9-G-O0-NEXT: v_and_b32_e64 v8, v6, v8 -; GFX9-G-O0-NEXT: v_and_b32_e64 v6, v6, v21 +; GFX9-G-O0-NEXT: v_and_b32_e64 v6, v6, v20 ; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v4, s[8:9], v4, v11 ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v10, s[8:9], v9, v10, s[8:9] ; GFX9-G-O0-NEXT: v_subb_co_u32_e64 v9, s[8:9], v7, v8, s[8:9] @@ -4129,351 +4039,344 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v10 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v9 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v8 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v17 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v18 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v19 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v20 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v16 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v17 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v18 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v19 ; GFX9-G-O0-NEXT: s_mov_b32 s8, -1 ; GFX9-G-O0-NEXT: s_mov_b32 s12, -1 ; GFX9-G-O0-NEXT: s_mov_b32 s11, -1 ; GFX9-G-O0-NEXT: s_mov_b32 s10, -1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, s8 -; GFX9-G-O0-NEXT: v_add_co_u32_e64 v17, s[8:9], v11, v17 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s8 +; GFX9-G-O0-NEXT: v_add_co_u32_e64 v16, s[8:9], v11, v16 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, s12 -; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v18, s[8:9], v10, v11, s[8:9] +; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v17, s[8:9], v10, v11, s[8:9] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, s11 -; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v20, s[8:9], v9, v10, s[8:9] +; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v19, s[8:9], v9, v10, s[8:9] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, s10 -; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v19, s[8:9], v8, v9, s[8:9] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v17 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v18 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v20 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v19 -; GFX9-G-O0-NEXT: v_or_b32_e64 v17, v17, v20 -; GFX9-G-O0-NEXT: v_or_b32_e64 v19, v18, v19 -; GFX9-G-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v19 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, s5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, s4 -; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[17:18], v[19:20] +; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v18, s[8:9], v8, v9, s[8:9] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v16 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v17 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v19 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v11, v18 +; GFX9-G-O0-NEXT: v_or_b32_e64 v16, v16, v19 +; GFX9-G-O0-NEXT: v_or_b32_e64 v18, v17, v18 +; GFX9-G-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v18 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, s5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, s4 +; GFX9-G-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[18:19] ; GFX9-G-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v3 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v2 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v0 -; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v3 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v2 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v1 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v0 +; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v15 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v14 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v13 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v12 -; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v19, v15 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v18, v14 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, v13 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v12 +; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v19, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v20, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX9-G-O0-NEXT: v_writelane_b32 v16, s6, 2 -; GFX9-G-O0-NEXT: v_writelane_b32 v16, s7, 3 +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 2 +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 3 ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX9-G-O0-NEXT: v_writelane_b32 v16, s6, 6 -; GFX9-G-O0-NEXT: v_writelane_b32 v16, s7, 7 +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 6 +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 7 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_andn2_b64 exec, exec, s[4:5] ; GFX9-G-O0-NEXT: s_cbranch_execnz .LBB1_6 ; GFX9-G-O0-NEXT: s_branch .LBB1_1 ; GFX9-G-O0-NEXT: .LBB1_7: ; %udiv-preheader -; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-G-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_mov_b32 s4, 64 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2) -; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v4 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v7 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v6 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) +; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v4 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-G-O0-NEXT: v_mov_b32_e32 v21, v7 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v20, v6 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s4 -; GFX9-G-O0-NEXT: v_sub_u32_e64 v4, v13, v4 +; GFX9-G-O0-NEXT: v_sub_u32_e64 v4, v12, v4 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s4 -; GFX9-G-O0-NEXT: v_sub_u32_e64 v5, v5, v13 +; GFX9-G-O0-NEXT: v_sub_u32_e64 v5, v5, v12 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 0 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4 -; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v13, v6 +; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v12, v6 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s6 -; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v13, v6 -; GFX9-G-O0-NEXT: v_lshrrev_b64 v[6:7], v13, v[21:22] -; GFX9-G-O0-NEXT: v_lshrrev_b64 v[26:27], v13, v[15:16] -; GFX9-G-O0-NEXT: v_lshlrev_b64 v[24:25], v5, v[21:22] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v26 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v27 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v23, v24 +; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v12, v6 +; GFX9-G-O0-NEXT: v_lshrrev_b64 v[6:7], v12, v[20:21] +; GFX9-G-O0-NEXT: v_lshrrev_b64 v[25:26], v12, v[14:15] +; GFX9-G-O0-NEXT: v_lshlrev_b64 v[23:24], v5, v[20:21] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v25 -; GFX9-G-O0-NEXT: v_or_b32_e64 v14, v14, v23 -; GFX9-G-O0-NEXT: v_or_b32_e64 v13, v5, v13 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v26 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v22, v23 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v24 +; GFX9-G-O0-NEXT: v_or_b32_e64 v13, v13, v22 +; GFX9-G-O0-NEXT: v_or_b32_e64 v12, v5, v12 ; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], 0 -; GFX9-G-O0-NEXT: v_lshrrev_b64 v[21:22], v4, v[21:22] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v21 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v22 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v14, s[4:5] -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[4:5] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v15 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v16 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v14, s[6:7] -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v13, v5, v13, s[6:7] +; GFX9-G-O0-NEXT: v_lshrrev_b64 v[20:21], v4, v[20:21] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v20 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v21 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v13, s[4:5] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[4:5] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v14 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v15 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v4, v4, v13, s[6:7] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v12, v5, v12, s[6:7] ; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v13 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v6 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v12 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v6 ; GFX9-G-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, 0 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, 0 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, 0 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v13, v13, v14, s[4:5] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v12, v12, v13, s[4:5] ; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] -; GFX9-G-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v6 +; GFX9-G-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v6 ; GFX9-G-O0-NEXT: ; kill: def $vgpr4_vgpr5 killed $vgpr4_vgpr5 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v13 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v14 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v17 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v18 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v19 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v20 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v12 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, v13 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v16 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v17 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v18 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v19 ; GFX9-G-O0-NEXT: s_mov_b32 s4, -1 ; GFX9-G-O0-NEXT: s_mov_b32 s10, -1 ; GFX9-G-O0-NEXT: s_mov_b32 s7, -1 ; GFX9-G-O0-NEXT: s_mov_b32 s6, -1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v17, s4 -; GFX9-G-O0-NEXT: v_add_co_u32_e64 v16, s[4:5], v16, v17 -; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s10 -; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v15, s[4:5], v15, v16, s[4:5] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, s4 +; GFX9-G-O0-NEXT: v_add_co_u32_e64 v15, s[4:5], v15, v16 ; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s7 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s10 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v14, s[4:5], v14, v15, s[4:5] ; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s6 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s7 ; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v13, s[4:5], v13, v14, s[4:5] ; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s6 +; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v12, s[4:5], v12, v13, s[4:5] +; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], s[8:9] ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], s[8:9] -; GFX9-G-O0-NEXT: v_writelane_b32 v12, s8, 6 -; GFX9-G-O0-NEXT: v_writelane_b32 v12, s9, 7 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s8, 6 +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s9, 7 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, s7 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, s6 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, s5 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, s4 -; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_branch .LBB1_6 ; GFX9-G-O0-NEXT: .LBB1_8: ; %udiv-bb1 -; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-G-O0-NEXT: s_mov_b64 s[4:5], 0 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 1 ; GFX9-G-O0-NEXT: s_mov_b32 s10, 0 ; GFX9-G-O0-NEXT: s_mov_b32 s9, 0 ; GFX9-G-O0-NEXT: s_mov_b32 s8, 0 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s6 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) -; GFX9-G-O0-NEXT: v_add_co_u32_e64 v5, s[6:7], v2, v5 -; GFX9-G-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s10 -; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1) -; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v4, v6, s[6:7] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s9 -; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v8, s[6:7], v3, v4, s[6:7] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s8 -; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v1, v3, s[6:7] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s6 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-G-O0-NEXT: v_add_co_u32_e64 v4, s[6:7], v1, v4 +; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, s10 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2) +; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v5, s[6:7], v3, v5, s[6:7] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s9 +; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v2, v3, s[6:7] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s8 +; GFX9-G-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v0, v2, s[6:7] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v4 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v6 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v8 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v16, v7 -; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v7 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v6 +; GFX9-G-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b32 s6, 0x7f -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v4, s[6:7], v1, v2 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-G-O0-NEXT: v_sub_co_u32_e64 v3, s[6:7], v0, v1 ; GFX9-G-O0-NEXT: s_mov_b32 s7, 64 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v10 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v13, v9 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s7 -; GFX9-G-O0-NEXT: v_sub_u32_e64 v3, v4, v1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s7 -; GFX9-G-O0-NEXT: v_sub_u32_e64 v9, v1, v4 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v12, v8 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7 +; GFX9-G-O0-NEXT: v_sub_u32_e64 v2, v3, v0 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7 +; GFX9-G-O0-NEXT: v_sub_u32_e64 v8, v0, v3 ; GFX9-G-O0-NEXT: s_mov_b32 s6, 0 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s7 -; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[8:9], v4, v1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v4, v1 -; GFX9-G-O0-NEXT: v_lshlrev_b64 v[1:2], v4, v[13:14] -; GFX9-G-O0-NEXT: v_lshrrev_b64 v[18:19], v9, v[13:14] -; GFX9-G-O0-NEXT: v_lshlrev_b64 v[16:17], v4, v[11:12] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v18 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v19 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v15, v16 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s7 +; GFX9-G-O0-NEXT: v_cmp_lt_u32_e64 s[8:9], v3, v0 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-G-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v3, v0 +; GFX9-G-O0-NEXT: v_lshlrev_b64 v[0:1], v3, v[12:13] +; GFX9-G-O0-NEXT: v_lshrrev_b64 v[17:18], v8, v[12:13] +; GFX9-G-O0-NEXT: v_lshlrev_b64 v[15:16], v3, v[10:11] ; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v17 -; GFX9-G-O0-NEXT: v_or_b32_e64 v10, v10, v15 -; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v9 -; GFX9-G-O0-NEXT: v_lshlrev_b64 v[13:14], v3, v[13:14] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v1 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v2 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v18 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v14, v15 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v16 +; GFX9-G-O0-NEXT: v_or_b32_e64 v9, v9, v14 +; GFX9-G-O0-NEXT: v_or_b32_e64 v3, v3, v8 +; GFX9-G-O0-NEXT: v_lshlrev_b64 v[12:13], v2, v[12:13] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v0 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[8:9] -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[8:9] -; GFX9-G-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v13 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v14 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[8:9] -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[8:9] -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v11 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v12 -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v9, v9, v10, s[6:7] -; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[6:7] -; GFX9-G-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v10, v3 -; GFX9-G-O0-NEXT: ; kill: def $vgpr1_vgpr2 killed $vgpr1_vgpr2 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[8:9] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[8:9] +; GFX9-G-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v2 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, v12 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v13 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[8:9] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9] +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v10 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v11 +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7] +; GFX9-G-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[6:7] +; GFX9-G-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v9, v2 +; GFX9-G-O0-NEXT: ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v8 ; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v9 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, v10 -; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX9-G-O0-NEXT: s_mov_b64 s[10:11], s[4:5] -; GFX9-G-O0-NEXT: v_or_b32_e64 v5, v5, v8 -; GFX9-G-O0-NEXT: v_or_b32_e64 v7, v6, v7 -; GFX9-G-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, v7 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v8, s5 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s4 -; GFX9-G-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[5:6], v[7:8] -; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_or_b32_e64 v4, v4, v7 +; GFX9-G-O0-NEXT: v_or_b32_e64 v6, v5, v6 +; GFX9-G-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-G-O0-NEXT: v_mov_b32_e32 v5, v6 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v7, s5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v6, s4 +; GFX9-G-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[4:5], v[6:7] +; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s8 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s9 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s10 -; GFX9-G-O0-NEXT: v_mov_b32_e32 v4, s11 -; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, s8 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, s10 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, s11 +; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_nop 0 +; GFX9-G-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; GFX9-G-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 s[6:7], exec ; GFX9-G-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] ; GFX9-G-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] -; GFX9-G-O0-NEXT: v_writelane_b32 v0, s6, 4 -; GFX9-G-O0-NEXT: v_writelane_b32 v0, s7, 5 +; GFX9-G-O0-NEXT: s_waitcnt vmcnt(17) +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s6, 4 +; GFX9-G-O0-NEXT: v_writelane_b32 v34, s7, 5 ; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-G-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-G-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-G-O0-NEXT: s_cbranch_execz .LBB1_5 ; GFX9-G-O0-NEXT: s_branch .LBB1_7 ; GFX9-G-O0-NEXT: .LBB1_9: ; %udiv-end -; GFX9-G-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(3) -; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v5 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v0, v3 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(2) -; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v6 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v1, v4 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(1) -; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v7 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v2, v5 ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v8 -; GFX9-G-O0-NEXT: ; kill: killed $vgpr4 +; GFX9-G-O0-NEXT: v_mov_b32_e32 v3, v6 ; GFX9-G-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX9-G-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: s_nop 0 -; GFX9-G-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload -; GFX9-G-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload +; GFX9-G-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload ; GFX9-G-O0-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-G-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-G-O0-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir index 2695fdbda8755..a680b63a34b9a 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir @@ -294,7 +294,7 @@ body: | liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; MUBUFW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required - ; MUBUFW32: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; MUBUFW32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; MUBUFW32-NEXT: {{ $}} ; MUBUFW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc ; MUBUFW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 killed $sgpr4, $sgpr8, 0, implicit $exec @@ -302,7 +302,7 @@ body: | ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW32-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required - ; FLATSCRW32: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; FLATSCRW32: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; FLATSCRW32-NEXT: {{ $}} ; FLATSCRW32-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc ; FLATSCRW32-NEXT: renamable $vgpr0, renamable $vcc_lo = V_ADD_CO_U32_e64 killed $sgpr4, $sgpr8, 0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir index 6ec296144bf19..fa442aa849d17 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir @@ -1192,7 +1192,7 @@ body: | liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required - ; GFX7: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX7: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -1204,7 +1204,7 @@ body: | ; GFX7-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required - ; GFX8: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -1216,7 +1216,7 @@ body: | ; GFX8-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required - ; GFX900: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX900: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -1227,7 +1227,7 @@ body: | ; GFX900-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required - ; GFX90A: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -1238,7 +1238,7 @@ body: | ; GFX90A-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required - ; GFX10: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX10-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -1249,7 +1249,7 @@ body: | ; GFX10-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX940-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required - ; GFX940: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX940: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX940-NEXT: {{ $}} ; GFX940-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; GFX940-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc @@ -1260,7 +1260,7 @@ body: | ; GFX940-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX11-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required - ; GFX11: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX11: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr4, $sgpr8, 0, implicit $exec @@ -1268,7 +1268,7 @@ body: | ; GFX11-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX12-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required - ; GFX12: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX12: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr4, $sgpr8, 0, implicit $exec @@ -1296,7 +1296,7 @@ body: | liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX7-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required - ; GFX7: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX7: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -1308,7 +1308,7 @@ body: | ; GFX7-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX8-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required - ; GFX8: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -1320,7 +1320,7 @@ body: | ; GFX8-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX900-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required - ; GFX900: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX900: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -1331,7 +1331,7 @@ body: | ; GFX900-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX90A-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required - ; GFX90A: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -1342,7 +1342,7 @@ body: | ; GFX90A-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX10-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required - ; GFX10: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX10-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -1353,7 +1353,7 @@ body: | ; GFX10-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required - ; FLATSCRW64: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; FLATSCRW64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; FLATSCRW64-NEXT: {{ $}} ; FLATSCRW64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc @@ -1384,7 +1384,7 @@ body: | liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX7-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required - ; GFX7: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX7: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX7-NEXT: {{ $}} ; GFX7-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX7-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -1396,7 +1396,7 @@ body: | ; GFX7-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX8-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required - ; GFX8: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -1408,7 +1408,7 @@ body: | ; GFX8-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX900-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required - ; GFX900: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX900: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -1419,7 +1419,7 @@ body: | ; GFX900-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX90A-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required - ; GFX90A: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -1430,7 +1430,7 @@ body: | ; GFX90A-NEXT: SI_RETURN implicit $vgpr0 ; ; GFX10-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required - ; GFX10: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; GFX10-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec @@ -1441,7 +1441,7 @@ body: | ; GFX10-NEXT: SI_RETURN implicit $vgpr0 ; ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required - ; FLATSCRW64: liveins: $sgpr8, $vgpr254, $vgpr255, $vgpr252_vgpr253, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; FLATSCRW64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8 ; FLATSCRW64-NEXT: {{ $}} ; FLATSCRW64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; FLATSCRW64-NEXT: $sgpr4 = S_ADD_I32 $sgpr32, 128, implicit-def $scc diff --git a/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir b/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir index 3bf7e7b8c5696..2f43c8264bf90 100644 --- a/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir +++ b/llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir @@ -25,13 +25,12 @@ body: | ; GCN-LABEL: name: test_single_block ; GCN: liveins: $sgpr4, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0 + ; GCN-NEXT: renamable $vgpr63 = IMPLICIT_DEF + ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr63 ; GCN-NEXT: S_NOP 0 - ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0 - ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec - ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec - ; GCN-NEXT: KILL killed renamable $vgpr0 + ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 0 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec + ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, [[V_MOV_B32_e32_]], 0, 0, implicit $exec ; GCN-NEXT: SI_RETURN SI_SPILL_S32_SAVE killed $sgpr4, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 S_NOP 0 @@ -63,32 +62,31 @@ body: | ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $sgpr6, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; GCN-NEXT: S_BRANCH %bb.1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) - ; GCN-NEXT: liveins: $sgpr6, $vgpr0, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr6, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: ; GCN-NEXT: successors: %bb.3(0x80000000) - ; GCN-NEXT: liveins: $sgpr6, $vgpr0, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr6, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr6, 0, killed $vgpr0 + ; GCN-NEXT: renamable $vgpr63 = IMPLICIT_DEF + ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr6, 0, killed $vgpr63 ; GCN-NEXT: S_NOP 0 - ; GCN-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0 - ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec + ; GCN-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 0 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec ; GCN-NEXT: S_BRANCH %bb.3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr5 = V_READFIRSTLANE_B32 killed $vgpr1, implicit $exec + ; GCN-NEXT: $sgpr5 = V_READFIRSTLANE_B32 [[V_MOV_B32_e32_]], implicit $exec ; GCN-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: KILL killed renamable $vgpr0 ; GCN-NEXT: SI_RETURN bb.0: liveins: $sgpr6, $sgpr10_sgpr11 @@ -135,52 +133,50 @@ body: | ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GCN-NEXT: liveins: $sgpr4, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF - ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) - ; GCN-NEXT: liveins: $sgpr4, $vgpr0, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr4, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0 + ; GCN-NEXT: renamable $vgpr63 = IMPLICIT_DEF + ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr63 ; GCN-NEXT: S_NOP 0 - ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0 - ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec + ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 0 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: ; GCN-NEXT: successors: %bb.3(0x80000000) - ; GCN-NEXT: liveins: $sgpr4, $vgpr0, $vgpr1, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr4, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: S_STORE_DWORD_IMM $sgpr4, $sgpr10_sgpr11, 0, 0 - ; GCN-NEXT: $sgpr5 = V_READFIRSTLANE_B32 killed $vgpr1, implicit $exec + ; GCN-NEXT: $sgpr5 = V_READFIRSTLANE_B32 [[V_MOV_B32_e32_]], implicit $exec ; GCN-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 4 - ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 5, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 5, implicit $exec ; GCN-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec ; GCN-NEXT: S_BRANCH %bb.3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: ; GCN-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) - ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $vcc = V_CMP_EQ_U32_e64 0, $vgpr1, implicit $exec + ; GCN-NEXT: $vcc = V_CMP_EQ_U32_e64 0, [[V_MOV_B32_e32_1]], implicit $exec ; GCN-NEXT: $sgpr6_sgpr7 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec ; GCN-NEXT: S_CBRANCH_SCC1 %bb.5, implicit $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.4: ; GCN-NEXT: successors: %bb.3(0x80000000) - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr6_sgpr7 + ; GCN-NEXT: liveins: $sgpr6_sgpr7 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_SUB_U32_e32 1, killed $vgpr1, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 killed $vgpr1, implicit $exec + ; GCN-NEXT: [[V_SUB_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 1, [[V_MOV_B32_e32_1]], implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[V_SUB_U32_e32_]], implicit $exec ; GCN-NEXT: S_BRANCH %bb.3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.5: - ; GCN-NEXT: liveins: $vgpr0, $sgpr6_sgpr7 + ; GCN-NEXT: liveins: $sgpr6_sgpr7 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $exec = S_OR_B64 $exec, $sgpr6_sgpr7, implicit-def $scc - ; GCN-NEXT: KILL killed renamable $vgpr0 ; GCN-NEXT: SI_RETURN bb.0: liveins: $sgpr4, $sgpr10_sgpr11 @@ -239,26 +235,24 @@ body: | ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GCN-NEXT: liveins: $sgpr4, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: - ; GCN-NEXT: liveins: $sgpr4, $vgpr0, $vgpr2_vgpr3 + ; GCN-NEXT: liveins: $sgpr4, $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0 + ; GCN-NEXT: renamable $vgpr63 = IMPLICIT_DEF + ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr63 ; GCN-NEXT: S_NOP 0 - ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0 - ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec - ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec - ; GCN-NEXT: KILL killed renamable $vgpr0 + ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR killed $vgpr63, 0 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec + ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, [[V_MOV_B32_e32_]], 0, 0, implicit $exec ; GCN-NEXT: SI_RETURN ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: - ; GCN-NEXT: liveins: $vgpr0, $vgpr2_vgpr3 + ; GCN-NEXT: liveins: $vgpr2_vgpr3 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec - ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec - ; GCN-NEXT: KILL killed renamable $vgpr0 + ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec + ; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, [[V_MOV_B32_e32_1]], 0, 0, implicit $exec ; GCN-NEXT: SI_RETURN bb.0: liveins: $sgpr4, $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll index 431b7d5400f43..798cd6239d262 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll @@ -108,255 +108,114 @@ define amdgpu_kernel void @kernel_calls_no_stack() { } define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) { -; FLAT_SCR_OPT-LABEL: test: -; FLAT_SCR_OPT: ; %bb.0: -; FLAT_SCR_OPT-NEXT: s_add_u32 s6, s6, s11 -; FLAT_SCR_OPT-NEXT: s_addc_u32 s7, s7, 0 -; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6 -; FLAT_SCR_OPT-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7 -; FLAT_SCR_OPT-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; FLAT_SCR_OPT-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane -; FLAT_SCR_OPT-NEXT: s_waitcnt lgkmcnt(0) -; FLAT_SCR_OPT-NEXT: v_writelane_b32 v0, s0, 0 -; FLAT_SCR_OPT-NEXT: v_writelane_b32 v0, s1, 1 -; FLAT_SCR_OPT-NEXT: s_or_saveexec_b32 s105, -1 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s0, 0 -; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill -; FLAT_SCR_OPT-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, s105 -; FLAT_SCR_OPT-NEXT: s_load_dword vcc_lo, s[2:3], 0x8 -; FLAT_SCR_OPT-NEXT: ; kill: killed $sgpr2_sgpr3 -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: s_waitcnt lgkmcnt(0) -; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v0, vcc_lo -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: ;;#ASMSTART -; FLAT_SCR_OPT-NEXT: ;;#ASMEND -; FLAT_SCR_OPT-NEXT: s_or_saveexec_b32 s105, -1 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s0, 0 -; FLAT_SCR_OPT-NEXT: scratch_load_dword v1, off, s0 ; 4-byte Folded Reload -; FLAT_SCR_OPT-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, s105 -; FLAT_SCR_OPT-NEXT: s_waitcnt vmcnt(0) -; FLAT_SCR_OPT-NEXT: v_readlane_b32 s0, v1, 0 -; FLAT_SCR_OPT-NEXT: v_readlane_b32 s1, v1, 1 -; FLAT_SCR_OPT-NEXT: s_or_saveexec_b32 s105, -1 -; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, s105 -; FLAT_SCR_OPT-NEXT: v_mov_b32_e32 v2, 0 -; FLAT_SCR_OPT-NEXT: ; kill: killed $vgpr1 -; FLAT_SCR_OPT-NEXT: global_store_dword v2, v0, s[0:1] -; FLAT_SCR_OPT-NEXT: s_endpgm -; -; FLAT_SCR_ARCH-LABEL: test: -; FLAT_SCR_ARCH: ; %bb.0: -; FLAT_SCR_ARCH-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; FLAT_SCR_ARCH-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane -; FLAT_SCR_ARCH-NEXT: s_waitcnt lgkmcnt(0) -; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v0, s0, 0 -; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v0, s1, 1 -; FLAT_SCR_ARCH-NEXT: s_or_saveexec_b32 s105, -1 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s0, 0 -; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill -; FLAT_SCR_ARCH-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, s105 -; FLAT_SCR_ARCH-NEXT: s_load_dword vcc_lo, s[2:3], 0x8 -; FLAT_SCR_ARCH-NEXT: ; kill: killed $sgpr2_sgpr3 -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: s_waitcnt lgkmcnt(0) -; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v0, vcc_lo -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: ;;#ASMSTART -; FLAT_SCR_ARCH-NEXT: ;;#ASMEND -; FLAT_SCR_ARCH-NEXT: s_or_saveexec_b32 s105, -1 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s0, 0 -; FLAT_SCR_ARCH-NEXT: scratch_load_dword v1, off, s0 ; 4-byte Folded Reload -; FLAT_SCR_ARCH-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, s105 -; FLAT_SCR_ARCH-NEXT: s_waitcnt vmcnt(0) -; FLAT_SCR_ARCH-NEXT: v_readlane_b32 s0, v1, 0 -; FLAT_SCR_ARCH-NEXT: v_readlane_b32 s1, v1, 1 -; FLAT_SCR_ARCH-NEXT: s_or_saveexec_b32 s105, -1 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, s105 -; FLAT_SCR_ARCH-NEXT: v_mov_b32_e32 v2, 0 -; FLAT_SCR_ARCH-NEXT: ; kill: killed $vgpr1 -; FLAT_SCR_ARCH-NEXT: global_store_dword v2, v0, s[0:1] -; FLAT_SCR_ARCH-NEXT: s_endpgm +; GCN-LABEL: test: +; GCN: ; %bb.0: +; GCN-NEXT: s_clause 0x1 +; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 +; GCN-NEXT: s_load_dword vcc_lo, s[2:3], 0x8 +; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane +; GCN-NEXT: ; kill: killed $sgpr2_sgpr3 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_writelane_b32 v0, s0, 0 +; GCN-NEXT: v_writelane_b32 v0, s1, 1 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_readlane_b32 s0, v0, 0 +; GCN-NEXT: v_mov_b32_e32 v1, vcc_lo +; GCN-NEXT: v_readlane_b32 s1, v0, 1 +; GCN-NEXT: v_mov_b32_e32 v2, 0 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: global_store_dword v2, v1, s[0:1] +; GCN-NEXT: s_endpgm call void asm sideeffect "", "~{s[0:7]}" () call void asm sideeffect "", "~{s[8:15]}" () call void asm sideeffect "", "~{s[16:23]}" () @@ -371,7 +230,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) { call void asm sideeffect "", "~{s[88:95]}" () call void asm sideeffect "", "~{s[96:103]}" () call void asm sideeffect "", "~{s[104:105]}" () - call void asm sideeffect "", "~{v[0:7]}" () + call void asm sideeffect "", "~{v[1:7]}" () call void asm sideeffect "", "~{v[8:15]}" () call void asm sideeffect "", "~{v[16:23]}" () call void asm sideeffect "", "~{v[24:31]}" () diff --git a/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir b/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir index ba619a659f1b0..5f36d5403ebcf 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-reload-into-exec.mir @@ -12,13 +12,13 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec_lo - ; CHECK: renamable $vgpr0 = IMPLICIT_DEF - ; CHECK-NEXT: S_NOP 0, implicit-def $exec_lo + ; CHECK: S_NOP 0, implicit-def $exec_lo ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $exec_lo - ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0 + ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0 - ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 + ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def $exec_lo @@ -37,13 +37,13 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec_hi - ; CHECK: renamable $vgpr0 = IMPLICIT_DEF - ; CHECK-NEXT: S_NOP 0, implicit-def $exec_hi + ; CHECK: S_NOP 0, implicit-def $exec_hi ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $exec_hi - ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0 + ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0 - ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 + ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK-NEXT: $exec_hi = S_MOV_B32 killed $sgpr0 ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def $exec_hi @@ -62,16 +62,16 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_exec - ; CHECK: renamable $vgpr0 = IMPLICIT_DEF - ; CHECK-NEXT: S_NOP 0, implicit-def $exec + ; CHECK: S_NOP 0, implicit-def $exec ; CHECK-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec - ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1 - ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1 + ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1 + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1 ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1 ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1, implicit killed renamable $sgpr0_sgpr1 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1 - ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr0, 1 + ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1 ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1 ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def $exec @@ -93,12 +93,12 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec_lo - ; CHECK: renamable $vgpr0 = IMPLICIT_DEF - ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_lo - ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0 + ; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_lo + ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0 - ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 + ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_lo @@ -116,12 +116,12 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec_hi - ; CHECK: renamable $vgpr0 = IMPLICIT_DEF - ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_hi - ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0 + ; CHECK: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $exec_hi + ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0 - ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 + ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK-NEXT: $exec_hi = S_MOV_B32 killed $sgpr0 ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def %0:sreg_32, implicit-def %1:sreg_32, implicit-def $exec_hi @@ -139,15 +139,15 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_exec - ; CHECK: renamable $vgpr0 = IMPLICIT_DEF - ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def $exec - ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1 - ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1 + ; CHECK: S_NOP 0, implicit-def renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def $exec + ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1 + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1 ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1 ; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0_sgpr1, implicit-def dead renamable $sgpr2_sgpr3, implicit-def dead renamable $sgpr0_sgpr1 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr0_sgpr1 - ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr0, 1 + ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 $vgpr0, 1 ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1 ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec S_NOP 0, implicit-def %0:sreg_64, implicit-def %1:sreg_64, implicit-def $exec diff --git a/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir b/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir index 1c7896fcb4f14..1c2436bd6b6cd 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-reload-into-m0.mir @@ -13,13 +13,13 @@ body: | bb.0: ; CHECK-LABEL: name: merge_sgpr_spill_into_copy_from_m0 - ; CHECK: renamable $vgpr0 = IMPLICIT_DEF - ; CHECK-NEXT: S_NOP 0, implicit-def $m0 + ; CHECK: S_NOP 0, implicit-def $m0 ; CHECK-NEXT: $sgpr0 = S_MOV_B32 $m0 - ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0 + ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK-NEXT: S_NOP 0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0, implicit killed renamable $sgpr0 - ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 + ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK-NEXT: $m0 = S_MOV_B32 killed $sgpr0 ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec @@ -43,12 +43,12 @@ body: | bb.0: ; CHECK-LABEL: name: reload_sgpr_spill_into_copy_to_m0 - ; CHECK: renamable $vgpr0 = IMPLICIT_DEF + ; CHECK: $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def $m0 - ; CHECK-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, killed $vgpr0 + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr0, 0, $vgpr0 ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK-NEXT: S_NOP 0, implicit killed renamable $sgpr0, implicit-def dead renamable $sgpr1, implicit-def dead renamable $sgpr0 - ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 killed $vgpr0, 0 + ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr0, 0 ; CHECK-NEXT: $m0 = S_MOV_B32 killed $sgpr0 ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: S_SENDMSG 0, implicit $m0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir index f388aeb047029..0309a156171d7 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir @@ -166,7 +166,7 @@ body: | bb.0: liveins: $sgpr30_sgpr31, $sgpr10 ; GCN-LABEL: name: func_add_constant_to_fi_uniform_live_SCC_i32 - ; GCN: liveins: $sgpr10, $sgpr30_sgpr31 + ; GCN: liveins: $sgpr30_sgpr31, $sgpr10 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 $sgpr10, 4, implicit-def $scc ; GCN-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll index 3b078c41f4a84..7d07641f455e3 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll @@ -2635,7 +2635,7 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX9-NEXT: s_add_i32 s33, s32, 0x7fc0 ; GFX9-NEXT: s_and_b32 s33, s33, 0xffff8000 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:1536 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:1564 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_add_i32 s32, s32, 0x28000 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 @@ -2775,25 +2775,25 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:796 ; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:516 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1540 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1536 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:520 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1544 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1540 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:524 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1548 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1544 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:528 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1552 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1548 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:532 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1556 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1552 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:536 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1560 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1556 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:540 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1564 ; 4-byte Folded Spill +; GFX9-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:1560 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:544 ; GFX9-NEXT: s_nop 0 ; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:548 @@ -2861,13 +2861,13 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX9-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:152 ; GFX9-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:156 ; GFX9-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:160 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1540 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:1544 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:1548 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:1552 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:1556 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:1560 ; 4-byte Folded Reload -; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:1564 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1536 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:1540 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:1544 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:1548 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:1552 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:1556 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:1560 ; 4-byte Folded Reload ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX9-NEXT: v_add_u32_e32 v0, 0x400, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 42 @@ -2890,7 +2890,7 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX9-NEXT: v_readlane_b32 s31, v63, 1 ; GFX9-NEXT: v_readlane_b32 s30, v63, 0 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:1536 ; 4-byte Folded Reload +; GFX9-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:1564 ; 4-byte Folded Reload ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: s_add_i32 s32, s32, 0xfffd8000 ; GFX9-NEXT: s_mov_b32 s33, s36 @@ -2904,7 +2904,7 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: s_add_i32 s33, s32, 0x3fe0 ; GFX10-NEXT: s_and_b32 s33, s33, 0xffffc000 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:1536 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:1568 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 @@ -3046,28 +3046,28 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:796 ; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:516 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1540 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1536 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:520 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1544 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1540 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:524 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1548 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1544 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:528 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1552 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1548 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:532 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1556 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1552 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:536 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1560 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1556 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:540 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1564 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1560 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:544 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1568 ; 4-byte Folded Spill +; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:1564 ; 4-byte Folded Spill ; GFX10-NEXT: s_clause 0x15 ; GFX10-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:548 ; GFX10-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:552 @@ -3134,14 +3134,14 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:156 ; GFX10-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:160 ; GFX10-NEXT: s_clause 0x7 -; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1540 -; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:1544 -; GFX10-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:1548 -; GFX10-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:1552 -; GFX10-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:1556 -; GFX10-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:1560 -; GFX10-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:1564 -; GFX10-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:1568 +; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:1536 +; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:1540 +; GFX10-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:1544 +; GFX10-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:1548 +; GFX10-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:1552 +; GFX10-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:1556 +; GFX10-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:1560 +; GFX10-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:1564 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10-NEXT: v_mov_b32_e32 v1, 42 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 0x400, v0 @@ -3165,7 +3165,7 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX10-NEXT: v_readlane_b32 s31, v63, 1 ; GFX10-NEXT: v_readlane_b32 s30, v63, 0 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:1536 ; 4-byte Folded Reload +; GFX10-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:1568 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: s_add_i32 s32, s32, 0xfffec000 @@ -3181,7 +3181,7 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s33, s33, 0xfffffe00 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_store_b32 off, v60, s33 offset:1536 ; 4-byte Folded Spill +; GFX11-NEXT: scratch_store_b32 off, v60, s33 offset:1600 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_mov_b32 s0, 0 ; GFX11-NEXT: v_mov_b32_e32 v4, 0 @@ -3267,7 +3267,7 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: s_waitcnt vmcnt(2) ; GFX11-NEXT: v_dual_mov_b32 v14, v1 :: v_dual_mov_b32 v1, v4 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b128 off, v[16:19], s33 offset:1588 ; 16-byte Folded Spill +; GFX11-NEXT: scratch_store_b128 off, v[16:19], s33 offset:1584 ; 16-byte Folded Spill ; GFX11-NEXT: s_clause 0x3 ; GFX11-NEXT: scratch_load_b128 v[16:19], off, s33 offset:528 ; GFX11-NEXT: scratch_load_b128 v[20:23], off, s33 offset:544 @@ -3277,13 +3277,13 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: s_waitcnt vmcnt(2) ; GFX11-NEXT: v_mov_b32_e32 v10, v21 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b128 off, v[28:31], s33 offset:1572 ; 16-byte Folded Spill +; GFX11-NEXT: scratch_store_b128 off, v[28:31], s33 offset:1568 ; 16-byte Folded Spill ; GFX11-NEXT: scratch_load_b128 v[28:31], off, s33 offset:592 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b128 off, v[28:31], s33 offset:1556 ; 16-byte Folded Spill +; GFX11-NEXT: scratch_store_b128 off, v[28:31], s33 offset:1552 ; 16-byte Folded Spill ; GFX11-NEXT: scratch_load_b128 v[28:31], off, s33 offset:608 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b128 off, v[28:31], s33 offset:1540 ; 16-byte Folded Spill +; GFX11-NEXT: scratch_store_b128 off, v[28:31], s33 offset:1536 ; 16-byte Folded Spill ; GFX11-NEXT: scratch_store_b128 off, v[32:35], s32 ; GFX11-NEXT: v_dual_mov_b32 v31, v47 :: v_dual_mov_b32 v32, v36 ; GFX11-NEXT: v_dual_mov_b32 v33, v48 :: v_dual_mov_b32 v34, v49 @@ -3333,13 +3333,13 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: scratch_store_b128 off, v[48:51], s2 ; GFX11-NEXT: s_add_i32 s2, s32, 16 ; GFX11-NEXT: scratch_store_b128 off, v[32:35], s2 -; GFX11-NEXT: scratch_load_b128 v[1:4], off, s33 offset:1588 ; 16-byte Folded Reload +; GFX11-NEXT: scratch_load_b128 v[1:4], off, s33 offset:1584 ; 16-byte Folded Reload ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v1, 42 ; GFX11-NEXT: s_clause 0x2 -; GFX11-NEXT: scratch_load_b128 v[17:20], off, s33 offset:1572 -; GFX11-NEXT: scratch_load_b128 v[21:24], off, s33 offset:1556 -; GFX11-NEXT: scratch_load_b128 v[25:28], off, s33 offset:1540 +; GFX11-NEXT: scratch_load_b128 v[17:20], off, s33 offset:1568 +; GFX11-NEXT: scratch_load_b128 v[21:24], off, s33 offset:1552 +; GFX11-NEXT: scratch_load_b128 v[25:28], off, s33 offset:1536 ; GFX11-NEXT: s_add_i32 s2, s33, 0x400 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v0, s2 @@ -3360,7 +3360,7 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: v_readlane_b32 s31, v60, 1 ; GFX11-NEXT: v_readlane_b32 s30, v60, 0 ; GFX11-NEXT: s_or_saveexec_b32 s0, -1 -; GFX11-NEXT: scratch_load_b32 v60, off, s33 offset:1536 ; 4-byte Folded Reload +; GFX11-NEXT: scratch_load_b32 v60, off, s33 offset:1600 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_addk_i32 s32, 0xf600 ; GFX11-NEXT: s_mov_b32 s33, s34 diff --git a/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir b/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir index 742498cdd8bd1..c76a84cb1c5d4 100644 --- a/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir +++ b/llvm/test/CodeGen/AMDGPU/greedy-instruction-split-subrange.mir @@ -21,14 +21,10 @@ body: | ; CHECK-LABEL: name: split_instruction_subranges ; CHECK: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %1:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1) ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %3:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1) - ; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR1]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 8, 0, implicit $exec :: (load (s64), addrspace 1) - ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 - ; CHECK-NEXT: S_NOP 0, implicit [[COPY]].sub1 - ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V64_RESTORE]].sub0 - ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub1:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1 - ; CHECK-NEXT: S_NOP 0, implicit [[COPY1]].sub1 + ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 + ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR1]].sub0 + ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1 ; CHECK-NEXT: S_ENDPGM 0 %1:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %4:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1) %2:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1) @@ -61,23 +57,13 @@ body: | ; CHECK-LABEL: name: split_instruction_subranges_use_is_subreg_def ; CHECK: [[GLOBAL_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %1:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1) ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %3:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1) - ; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR1]], %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5) ; CHECK-NEXT: [[GLOBAL_LOAD_DWORDX2_SADDR2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 8, 0, implicit $exec :: (load (s64), addrspace 1) - ; CHECK-NEXT: SI_SPILL_V64_SAVE [[GLOBAL_LOAD_DWORDX2_SADDR2]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; CHECK-NEXT: S_NOP 0, implicit-def [[GLOBAL_LOAD_DWORDX2_SADDR]].sub0 - ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:vreg_64 = COPY [[SI_SPILL_V64_RESTORE]].sub0 - ; CHECK-NEXT: S_NOP 0, implicit-def [[COPY]].sub1 - ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:vreg_64 = COPY [[COPY]].sub0 - ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE1:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub1:vreg_64 = COPY [[SI_SPILL_V64_RESTORE1]].sub1 - ; CHECK-NEXT: S_NOP 0, implicit-def [[COPY2]].sub0 - ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub1:vreg_64 = COPY [[COPY2]].sub1 + ; CHECK-NEXT: S_NOP 0, implicit-def [[GLOBAL_LOAD_DWORDX2_SADDR1]].sub1 + ; CHECK-NEXT: S_NOP 0, implicit-def [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub0 ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR]].sub1 - ; CHECK-NEXT: undef [[COPY4:%[0-9]+]].sub0:vreg_64 = COPY [[COPY1]].sub0 - ; CHECK-NEXT: S_NOP 0, implicit [[COPY4]].sub0 - ; CHECK-NEXT: undef [[COPY5:%[0-9]+]].sub1:vreg_64 = COPY [[COPY3]].sub1 - ; CHECK-NEXT: S_NOP 0, implicit [[COPY5]].sub1 + ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR1]].sub0 + ; CHECK-NEXT: S_NOP 0, implicit [[GLOBAL_LOAD_DWORDX2_SADDR2]].sub1 ; CHECK-NEXT: S_ENDPGM 0 %1:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %4:vgpr_32, 0, 0, implicit $exec :: (load (s64), addrspace 1) %2:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR undef $sgpr0_sgpr1, undef %5:vgpr_32, 4, 0, implicit $exec :: (load (s64), addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll index 5abd4c9069c91..a4a8f43646d4b 100644 --- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll +++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll @@ -6,209 +6,209 @@ define void @main(i1 %arg) #0 { ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-NEXT: v_writelane_b32 v8, s30, 0 -; CHECK-NEXT: v_writelane_b32 v8, s31, 1 -; CHECK-NEXT: v_writelane_b32 v8, s36, 2 -; CHECK-NEXT: v_writelane_b32 v8, s37, 3 -; CHECK-NEXT: v_writelane_b32 v8, s38, 4 -; CHECK-NEXT: v_writelane_b32 v8, s39, 5 -; CHECK-NEXT: v_writelane_b32 v8, s40, 6 -; CHECK-NEXT: v_writelane_b32 v8, s41, 7 -; CHECK-NEXT: v_writelane_b32 v8, s42, 8 -; CHECK-NEXT: v_writelane_b32 v8, s43, 9 -; CHECK-NEXT: v_writelane_b32 v8, s44, 10 -; CHECK-NEXT: v_writelane_b32 v8, s45, 11 -; CHECK-NEXT: v_writelane_b32 v8, s46, 12 -; CHECK-NEXT: v_writelane_b32 v8, s47, 13 -; CHECK-NEXT: v_writelane_b32 v8, s48, 14 -; CHECK-NEXT: v_writelane_b32 v8, s49, 15 +; CHECK-NEXT: v_writelane_b32 v5, s30, 0 +; CHECK-NEXT: v_writelane_b32 v5, s31, 1 +; CHECK-NEXT: v_writelane_b32 v5, s36, 2 +; CHECK-NEXT: v_writelane_b32 v5, s37, 3 +; CHECK-NEXT: v_writelane_b32 v5, s38, 4 +; CHECK-NEXT: v_writelane_b32 v5, s39, 5 +; CHECK-NEXT: v_writelane_b32 v5, s40, 6 +; CHECK-NEXT: v_writelane_b32 v5, s41, 7 +; CHECK-NEXT: v_writelane_b32 v5, s42, 8 +; CHECK-NEXT: v_writelane_b32 v5, s43, 9 +; CHECK-NEXT: v_writelane_b32 v5, s44, 10 +; CHECK-NEXT: v_writelane_b32 v5, s45, 11 +; CHECK-NEXT: v_writelane_b32 v5, s46, 12 +; CHECK-NEXT: v_writelane_b32 v5, s47, 13 +; CHECK-NEXT: v_writelane_b32 v5, s48, 14 +; CHECK-NEXT: v_writelane_b32 v5, s49, 15 ; CHECK-NEXT: s_getpc_b64 s[24:25] -; CHECK-NEXT: v_writelane_b32 v8, s50, 16 +; CHECK-NEXT: v_writelane_b32 v5, s50, 16 ; CHECK-NEXT: s_movk_i32 s4, 0xf0 ; CHECK-NEXT: s_mov_b32 s5, s24 -; CHECK-NEXT: v_writelane_b32 v8, s51, 17 +; CHECK-NEXT: v_writelane_b32 v5, s51, 17 ; CHECK-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x0 -; CHECK-NEXT: ; implicit-def: $vgpr4 : SGPR spill to VGPR lane +; CHECK-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane ; CHECK-NEXT: s_mov_b64 s[4:5], 0 ; CHECK-NEXT: s_load_dwordx4 s[28:31], s[4:5], 0x0 ; CHECK-NEXT: s_movk_i32 s20, 0x130 ; CHECK-NEXT: s_mov_b32 s21, s24 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_writelane_b32 v4, s36, 0 -; CHECK-NEXT: v_writelane_b32 v4, s37, 1 -; CHECK-NEXT: v_writelane_b32 v4, s38, 2 -; CHECK-NEXT: v_writelane_b32 v4, s39, 3 -; CHECK-NEXT: v_writelane_b32 v4, s40, 4 -; CHECK-NEXT: v_writelane_b32 v4, s41, 5 -; CHECK-NEXT: v_writelane_b32 v4, s42, 6 -; CHECK-NEXT: v_writelane_b32 v4, s43, 7 -; CHECK-NEXT: v_writelane_b32 v4, s44, 8 -; CHECK-NEXT: v_writelane_b32 v4, s45, 9 -; CHECK-NEXT: v_writelane_b32 v4, s46, 10 +; CHECK-NEXT: v_writelane_b32 v7, s36, 0 +; CHECK-NEXT: v_writelane_b32 v7, s37, 1 +; CHECK-NEXT: v_writelane_b32 v7, s38, 2 +; CHECK-NEXT: v_writelane_b32 v7, s39, 3 +; CHECK-NEXT: v_writelane_b32 v7, s40, 4 +; CHECK-NEXT: v_writelane_b32 v7, s41, 5 +; CHECK-NEXT: v_writelane_b32 v7, s42, 6 +; CHECK-NEXT: v_writelane_b32 v7, s43, 7 +; CHECK-NEXT: v_writelane_b32 v7, s44, 8 +; CHECK-NEXT: v_writelane_b32 v7, s45, 9 +; CHECK-NEXT: v_writelane_b32 v7, s46, 10 ; CHECK-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0 -; CHECK-NEXT: v_writelane_b32 v4, s47, 11 -; CHECK-NEXT: v_writelane_b32 v4, s48, 12 -; CHECK-NEXT: v_writelane_b32 v4, s49, 13 +; CHECK-NEXT: v_writelane_b32 v7, s47, 11 +; CHECK-NEXT: v_writelane_b32 v7, s48, 12 ; CHECK-NEXT: s_mov_b32 s20, 0 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: v_writelane_b32 v4, s50, 14 -; CHECK-NEXT: v_mov_b32_e32 v5, s28 -; CHECK-NEXT: v_mov_b32_e32 v6, v1 +; CHECK-NEXT: v_writelane_b32 v7, s49, 13 +; CHECK-NEXT: v_mov_b32_e32 v2, s28 +; CHECK-NEXT: v_mov_b32_e32 v3, v1 ; CHECK-NEXT: s_mov_b32 s21, s20 ; CHECK-NEXT: s_mov_b32 s22, s20 ; CHECK-NEXT: s_mov_b32 s23, s20 -; CHECK-NEXT: v_writelane_b32 v4, s51, 15 +; CHECK-NEXT: v_writelane_b32 v7, s50, 14 +; CHECK-NEXT: v_writelane_b32 v7, s51, 15 +; CHECK-NEXT: image_sample_lz v3, v[2:3], s[44:51], s[20:23] dmask:0x1 ; CHECK-NEXT: v_mov_b32_e32 v2, v1 -; CHECK-NEXT: image_sample_lz v5, v[5:6], s[44:51], s[20:23] dmask:0x1 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_writelane_b32 v4, s4, 16 -; CHECK-NEXT: v_writelane_b32 v4, s5, 17 -; CHECK-NEXT: v_writelane_b32 v4, s6, 18 -; CHECK-NEXT: v_writelane_b32 v4, s7, 19 -; CHECK-NEXT: v_writelane_b32 v4, s8, 20 -; CHECK-NEXT: v_writelane_b32 v4, s9, 21 -; CHECK-NEXT: image_sample_lz v6, v[1:2], s[4:11], s[20:23] dmask:0x1 -; CHECK-NEXT: v_writelane_b32 v4, s10, 22 -; CHECK-NEXT: v_writelane_b32 v4, s11, 23 -; CHECK-NEXT: v_writelane_b32 v4, s12, 24 -; CHECK-NEXT: v_writelane_b32 v4, s13, 25 -; CHECK-NEXT: v_writelane_b32 v4, s14, 26 -; CHECK-NEXT: v_writelane_b32 v4, s15, 27 -; CHECK-NEXT: v_writelane_b32 v8, s52, 18 -; CHECK-NEXT: v_writelane_b32 v4, s16, 28 -; CHECK-NEXT: v_writelane_b32 v8, s53, 19 -; CHECK-NEXT: v_writelane_b32 v4, s17, 29 -; CHECK-NEXT: v_writelane_b32 v8, s54, 20 -; CHECK-NEXT: v_writelane_b32 v4, s18, 30 +; CHECK-NEXT: v_writelane_b32 v7, s4, 16 +; CHECK-NEXT: v_writelane_b32 v7, s5, 17 +; CHECK-NEXT: v_writelane_b32 v7, s6, 18 +; CHECK-NEXT: v_writelane_b32 v7, s7, 19 +; CHECK-NEXT: v_writelane_b32 v7, s8, 20 +; CHECK-NEXT: v_writelane_b32 v7, s9, 21 +; CHECK-NEXT: image_sample_lz v4, v[1:2], s[4:11], s[20:23] dmask:0x1 +; CHECK-NEXT: v_writelane_b32 v7, s10, 22 +; CHECK-NEXT: v_writelane_b32 v7, s11, 23 +; CHECK-NEXT: v_writelane_b32 v7, s12, 24 +; CHECK-NEXT: v_writelane_b32 v7, s13, 25 +; CHECK-NEXT: v_writelane_b32 v7, s14, 26 +; CHECK-NEXT: v_writelane_b32 v7, s15, 27 +; CHECK-NEXT: v_writelane_b32 v5, s52, 18 +; CHECK-NEXT: v_writelane_b32 v7, s16, 28 +; CHECK-NEXT: v_writelane_b32 v5, s53, 19 +; CHECK-NEXT: v_writelane_b32 v7, s17, 29 +; CHECK-NEXT: v_writelane_b32 v5, s54, 20 +; CHECK-NEXT: v_writelane_b32 v7, s18, 30 ; CHECK-NEXT: s_mov_b32 s26, 48 ; CHECK-NEXT: s_mov_b32 s27, s24 -; CHECK-NEXT: v_writelane_b32 v8, s55, 21 -; CHECK-NEXT: v_writelane_b32 v4, s19, 31 +; CHECK-NEXT: v_writelane_b32 v5, s55, 21 +; CHECK-NEXT: v_writelane_b32 v7, s19, 31 ; CHECK-NEXT: s_load_dwordx8 s[4:11], s[26:27], 0x0 -; CHECK-NEXT: v_writelane_b32 v8, s56, 22 -; CHECK-NEXT: v_writelane_b32 v8, s57, 23 -; CHECK-NEXT: v_writelane_b32 v8, s58, 24 -; CHECK-NEXT: v_writelane_b32 v8, s59, 25 -; CHECK-NEXT: v_writelane_b32 v8, s60, 26 +; CHECK-NEXT: v_writelane_b32 v5, s56, 22 +; CHECK-NEXT: v_writelane_b32 v5, s57, 23 +; CHECK-NEXT: v_writelane_b32 v5, s58, 24 +; CHECK-NEXT: v_writelane_b32 v5, s59, 25 +; CHECK-NEXT: v_writelane_b32 v5, s60, 26 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_writelane_b32 v4, s4, 32 -; CHECK-NEXT: v_writelane_b32 v8, s61, 27 -; CHECK-NEXT: v_writelane_b32 v4, s5, 33 -; CHECK-NEXT: v_writelane_b32 v8, s62, 28 -; CHECK-NEXT: v_writelane_b32 v4, s6, 34 -; CHECK-NEXT: v_writelane_b32 v8, s63, 29 -; CHECK-NEXT: v_writelane_b32 v4, s7, 35 -; CHECK-NEXT: v_writelane_b32 v8, s64, 30 -; CHECK-NEXT: v_writelane_b32 v4, s8, 36 -; CHECK-NEXT: v_writelane_b32 v8, s65, 31 -; CHECK-NEXT: v_writelane_b32 v4, s9, 37 -; CHECK-NEXT: v_writelane_b32 v8, s66, 32 +; CHECK-NEXT: v_writelane_b32 v7, s4, 32 +; CHECK-NEXT: v_writelane_b32 v5, s61, 27 +; CHECK-NEXT: v_writelane_b32 v7, s5, 33 +; CHECK-NEXT: v_writelane_b32 v5, s62, 28 +; CHECK-NEXT: v_writelane_b32 v7, s6, 34 +; CHECK-NEXT: v_writelane_b32 v5, s63, 29 +; CHECK-NEXT: v_writelane_b32 v7, s7, 35 +; CHECK-NEXT: v_writelane_b32 v5, s64, 30 +; CHECK-NEXT: v_writelane_b32 v7, s8, 36 +; CHECK-NEXT: v_writelane_b32 v5, s65, 31 +; CHECK-NEXT: v_writelane_b32 v7, s9, 37 +; CHECK-NEXT: v_writelane_b32 v5, s66, 32 ; CHECK-NEXT: s_movk_i32 s28, 0x1f0 ; CHECK-NEXT: s_movk_i32 s30, 0x2f0 ; CHECK-NEXT: s_mov_b32 s29, s24 ; CHECK-NEXT: s_mov_b32 s31, s24 -; CHECK-NEXT: v_writelane_b32 v4, s10, 38 -; CHECK-NEXT: v_writelane_b32 v8, s67, 33 -; CHECK-NEXT: v_writelane_b32 v4, s11, 39 +; CHECK-NEXT: v_writelane_b32 v7, s10, 38 +; CHECK-NEXT: v_writelane_b32 v5, s67, 33 +; CHECK-NEXT: v_writelane_b32 v7, s11, 39 ; CHECK-NEXT: s_load_dwordx16 s[52:67], s[28:29], 0x0 ; CHECK-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 ; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; CHECK-NEXT: s_xor_b64 s[24:25], vcc, -1 -; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_mul_f32_e32 v0, v6, v5 +; CHECK-NEXT: v_mul_f32_e32 v0, v4, v3 ; CHECK-NEXT: s_and_saveexec_b64 s[26:27], s[24:25] ; CHECK-NEXT: s_xor_b64 s[26:27], exec, s[26:27] ; CHECK-NEXT: s_cbranch_execz .LBB0_3 ; CHECK-NEXT: ; %bb.1: ; %bb48 -; CHECK-NEXT: v_readlane_b32 s36, v4, 0 -; CHECK-NEXT: v_readlane_b32 s44, v4, 8 -; CHECK-NEXT: v_readlane_b32 s45, v4, 9 -; CHECK-NEXT: v_readlane_b32 s46, v4, 10 -; CHECK-NEXT: v_readlane_b32 s47, v4, 11 -; CHECK-NEXT: v_readlane_b32 s48, v4, 12 -; CHECK-NEXT: v_readlane_b32 s49, v4, 13 -; CHECK-NEXT: v_readlane_b32 s50, v4, 14 -; CHECK-NEXT: v_readlane_b32 s51, v4, 15 +; CHECK-NEXT: v_readlane_b32 s36, v7, 0 +; CHECK-NEXT: v_readlane_b32 s44, v7, 8 +; CHECK-NEXT: v_readlane_b32 s45, v7, 9 +; CHECK-NEXT: v_readlane_b32 s46, v7, 10 +; CHECK-NEXT: v_readlane_b32 s47, v7, 11 +; CHECK-NEXT: v_readlane_b32 s48, v7, 12 +; CHECK-NEXT: v_readlane_b32 s49, v7, 13 +; CHECK-NEXT: v_readlane_b32 s50, v7, 14 +; CHECK-NEXT: v_readlane_b32 s51, v7, 15 ; CHECK-NEXT: s_and_b64 vcc, exec, -1 -; CHECK-NEXT: v_readlane_b32 s37, v4, 1 -; CHECK-NEXT: v_readlane_b32 s38, v4, 2 -; CHECK-NEXT: v_readlane_b32 s39, v4, 3 -; CHECK-NEXT: v_readlane_b32 s40, v4, 4 -; CHECK-NEXT: image_sample_lz v5, v[1:2], s[44:51], s[20:23] dmask:0x1 +; CHECK-NEXT: v_readlane_b32 s37, v7, 1 +; CHECK-NEXT: v_readlane_b32 s38, v7, 2 +; CHECK-NEXT: v_readlane_b32 s39, v7, 3 +; CHECK-NEXT: v_readlane_b32 s40, v7, 4 +; CHECK-NEXT: image_sample_lz v3, v[1:2], s[44:51], s[20:23] dmask:0x1 ; CHECK-NEXT: v_mov_b32_e32 v2, 0 -; CHECK-NEXT: v_readlane_b32 s41, v4, 5 -; CHECK-NEXT: v_readlane_b32 s42, v4, 6 -; CHECK-NEXT: v_readlane_b32 s43, v4, 7 +; CHECK-NEXT: v_readlane_b32 s41, v7, 5 +; CHECK-NEXT: v_readlane_b32 s42, v7, 6 +; CHECK-NEXT: v_readlane_b32 s43, v7, 7 ; CHECK-NEXT: .LBB0_2: ; %bb50 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: v_readlane_b32 s36, v4, 32 -; CHECK-NEXT: v_readlane_b32 s40, v4, 36 -; CHECK-NEXT: v_readlane_b32 s41, v4, 37 -; CHECK-NEXT: v_readlane_b32 s42, v4, 38 -; CHECK-NEXT: v_readlane_b32 s43, v4, 39 +; CHECK-NEXT: v_readlane_b32 s36, v7, 32 +; CHECK-NEXT: v_readlane_b32 s40, v7, 36 +; CHECK-NEXT: v_readlane_b32 s41, v7, 37 +; CHECK-NEXT: v_readlane_b32 s42, v7, 38 +; CHECK-NEXT: v_readlane_b32 s43, v7, 39 ; CHECK-NEXT: s_mov_b32 s21, s20 ; CHECK-NEXT: s_mov_b32 s22, s20 ; CHECK-NEXT: s_mov_b32 s23, s20 -; CHECK-NEXT: v_readlane_b32 s37, v4, 33 -; CHECK-NEXT: v_readlane_b32 s38, v4, 34 +; CHECK-NEXT: v_readlane_b32 s37, v7, 33 +; CHECK-NEXT: v_readlane_b32 s38, v7, 34 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: image_sample_lz v6, v[1:2], s[60:67], s[40:43] dmask:0x1 -; CHECK-NEXT: v_readlane_b32 s39, v4, 35 +; CHECK-NEXT: image_sample_lz v4, v[1:2], s[60:67], s[40:43] dmask:0x1 +; CHECK-NEXT: v_readlane_b32 s39, v7, 35 ; CHECK-NEXT: image_sample_lz v1, v[1:2], s[12:19], s[20:23] dmask:0x1 ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_sub_f32_e32 v1, v1, v6 +; CHECK-NEXT: v_sub_f32_e32 v1, v1, v4 ; CHECK-NEXT: v_mul_f32_e32 v1, v1, v0 -; CHECK-NEXT: v_mul_f32_e32 v1, v1, v5 +; CHECK-NEXT: v_mul_f32_e32 v1, v1, v3 ; CHECK-NEXT: s_mov_b64 vcc, vcc ; CHECK-NEXT: s_cbranch_vccnz .LBB0_2 ; CHECK-NEXT: .LBB0_3: ; %Flow14 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_readlane_b32 s12, v4, 32 -; CHECK-NEXT: v_readlane_b32 s13, v4, 33 -; CHECK-NEXT: v_readlane_b32 s14, v4, 34 -; CHECK-NEXT: v_readlane_b32 s15, v4, 35 -; CHECK-NEXT: v_readlane_b32 s16, v4, 36 -; CHECK-NEXT: v_readlane_b32 s17, v4, 37 -; CHECK-NEXT: v_readlane_b32 s18, v4, 38 -; CHECK-NEXT: v_readlane_b32 s19, v4, 39 -; CHECK-NEXT: v_writelane_b32 v4, s4, 40 -; CHECK-NEXT: v_writelane_b32 v4, s5, 41 -; CHECK-NEXT: v_writelane_b32 v4, s6, 42 -; CHECK-NEXT: v_writelane_b32 v4, s7, 43 -; CHECK-NEXT: v_writelane_b32 v4, s8, 44 -; CHECK-NEXT: v_writelane_b32 v4, s9, 45 -; CHECK-NEXT: v_writelane_b32 v4, s10, 46 -; CHECK-NEXT: v_writelane_b32 v4, s11, 47 -; CHECK-NEXT: v_writelane_b32 v4, s12, 48 -; CHECK-NEXT: v_writelane_b32 v4, s13, 49 -; CHECK-NEXT: v_writelane_b32 v4, s14, 50 -; CHECK-NEXT: v_writelane_b32 v4, s15, 51 -; CHECK-NEXT: v_writelane_b32 v4, s16, 52 -; CHECK-NEXT: v_writelane_b32 v4, s17, 53 -; CHECK-NEXT: v_writelane_b32 v4, s18, 54 -; CHECK-NEXT: v_writelane_b32 v4, s19, 55 -; CHECK-NEXT: v_writelane_b32 v4, s52, 56 -; CHECK-NEXT: v_writelane_b32 v3, s60, 0 -; CHECK-NEXT: v_writelane_b32 v4, s53, 57 -; CHECK-NEXT: v_writelane_b32 v3, s61, 1 -; CHECK-NEXT: v_writelane_b32 v4, s54, 58 -; CHECK-NEXT: v_writelane_b32 v3, s62, 2 -; CHECK-NEXT: v_writelane_b32 v4, s55, 59 -; CHECK-NEXT: v_writelane_b32 v3, s63, 3 -; CHECK-NEXT: v_writelane_b32 v4, s56, 60 -; CHECK-NEXT: v_writelane_b32 v3, s64, 4 -; CHECK-NEXT: v_writelane_b32 v4, s57, 61 -; CHECK-NEXT: v_writelane_b32 v3, s65, 5 -; CHECK-NEXT: v_writelane_b32 v4, s58, 62 -; CHECK-NEXT: v_writelane_b32 v3, s66, 6 -; CHECK-NEXT: v_writelane_b32 v4, s59, 63 -; CHECK-NEXT: v_writelane_b32 v3, s67, 7 +; CHECK-NEXT: v_readlane_b32 s12, v7, 32 +; CHECK-NEXT: v_readlane_b32 s13, v7, 33 +; CHECK-NEXT: v_readlane_b32 s14, v7, 34 +; CHECK-NEXT: v_readlane_b32 s15, v7, 35 +; CHECK-NEXT: v_readlane_b32 s16, v7, 36 +; CHECK-NEXT: v_readlane_b32 s17, v7, 37 +; CHECK-NEXT: v_readlane_b32 s18, v7, 38 +; CHECK-NEXT: v_readlane_b32 s19, v7, 39 +; CHECK-NEXT: v_writelane_b32 v7, s4, 40 +; CHECK-NEXT: v_writelane_b32 v7, s5, 41 +; CHECK-NEXT: v_writelane_b32 v7, s6, 42 +; CHECK-NEXT: v_writelane_b32 v7, s7, 43 +; CHECK-NEXT: v_writelane_b32 v7, s8, 44 +; CHECK-NEXT: v_writelane_b32 v7, s9, 45 +; CHECK-NEXT: v_writelane_b32 v7, s10, 46 +; CHECK-NEXT: v_writelane_b32 v7, s11, 47 +; CHECK-NEXT: v_writelane_b32 v7, s12, 48 +; CHECK-NEXT: v_writelane_b32 v7, s13, 49 +; CHECK-NEXT: v_writelane_b32 v7, s14, 50 +; CHECK-NEXT: v_writelane_b32 v7, s15, 51 +; CHECK-NEXT: v_writelane_b32 v7, s16, 52 +; CHECK-NEXT: v_writelane_b32 v7, s17, 53 +; CHECK-NEXT: v_writelane_b32 v7, s18, 54 +; CHECK-NEXT: v_writelane_b32 v7, s19, 55 +; CHECK-NEXT: ; implicit-def: $vgpr6 : SGPR spill to VGPR lane +; CHECK-NEXT: v_writelane_b32 v7, s52, 56 +; CHECK-NEXT: v_writelane_b32 v6, s60, 0 +; CHECK-NEXT: v_writelane_b32 v7, s53, 57 +; CHECK-NEXT: v_writelane_b32 v6, s61, 1 +; CHECK-NEXT: v_writelane_b32 v7, s54, 58 +; CHECK-NEXT: v_writelane_b32 v6, s62, 2 +; CHECK-NEXT: v_writelane_b32 v7, s55, 59 +; CHECK-NEXT: v_writelane_b32 v6, s63, 3 +; CHECK-NEXT: v_writelane_b32 v7, s56, 60 +; CHECK-NEXT: v_writelane_b32 v6, s64, 4 +; CHECK-NEXT: v_writelane_b32 v7, s57, 61 +; CHECK-NEXT: v_writelane_b32 v6, s65, 5 +; CHECK-NEXT: v_writelane_b32 v7, s58, 62 +; CHECK-NEXT: v_writelane_b32 v6, s66, 6 +; CHECK-NEXT: v_writelane_b32 v7, s59, 63 +; CHECK-NEXT: v_writelane_b32 v6, s67, 7 ; CHECK-NEXT: s_andn2_saveexec_b64 s[20:21], s[26:27] ; CHECK-NEXT: s_cbranch_execz .LBB0_10 ; CHECK-NEXT: ; %bb.4: ; %bb32 @@ -219,68 +219,68 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: s_mov_b32 s8, 0 ; CHECK-NEXT: s_mov_b32 s9, s8 ; CHECK-NEXT: v_mov_b32_e32 v0, s8 -; CHECK-NEXT: v_readlane_b32 s36, v4, 0 +; CHECK-NEXT: v_readlane_b32 s36, v7, 0 ; CHECK-NEXT: v_mov_b32_e32 v1, s9 ; CHECK-NEXT: s_mov_b32 s10, s8 ; CHECK-NEXT: s_mov_b32 s11, s8 -; CHECK-NEXT: v_readlane_b32 s37, v4, 1 -; CHECK-NEXT: v_readlane_b32 s38, v4, 2 -; CHECK-NEXT: v_readlane_b32 s39, v4, 3 -; CHECK-NEXT: v_readlane_b32 s40, v4, 4 -; CHECK-NEXT: v_readlane_b32 s41, v4, 5 -; CHECK-NEXT: v_readlane_b32 s42, v4, 6 -; CHECK-NEXT: v_readlane_b32 s43, v4, 7 -; CHECK-NEXT: v_readlane_b32 s44, v4, 8 -; CHECK-NEXT: v_readlane_b32 s45, v4, 9 -; CHECK-NEXT: v_readlane_b32 s46, v4, 10 -; CHECK-NEXT: v_readlane_b32 s47, v4, 11 -; CHECK-NEXT: v_readlane_b32 s48, v4, 12 -; CHECK-NEXT: v_readlane_b32 s49, v4, 13 -; CHECK-NEXT: v_readlane_b32 s50, v4, 14 -; CHECK-NEXT: v_readlane_b32 s51, v4, 15 -; CHECK-NEXT: image_sample_lz v5, v[0:1], s[36:43], s[8:11] dmask:0x1 -; CHECK-NEXT: v_readlane_b32 s36, v4, 16 -; CHECK-NEXT: v_readlane_b32 s44, v4, 24 -; CHECK-NEXT: v_readlane_b32 s45, v4, 25 -; CHECK-NEXT: v_readlane_b32 s46, v4, 26 -; CHECK-NEXT: v_readlane_b32 s47, v4, 27 -; CHECK-NEXT: v_readlane_b32 s48, v4, 28 -; CHECK-NEXT: v_readlane_b32 s49, v4, 29 -; CHECK-NEXT: v_readlane_b32 s50, v4, 30 -; CHECK-NEXT: v_readlane_b32 s51, v4, 31 -; CHECK-NEXT: v_mov_b32_e32 v6, 0 -; CHECK-NEXT: v_mov_b32_e32 v7, v6 -; CHECK-NEXT: v_readlane_b32 s37, v4, 17 -; CHECK-NEXT: v_readlane_b32 s38, v4, 18 -; CHECK-NEXT: v_readlane_b32 s39, v4, 19 +; CHECK-NEXT: v_readlane_b32 s37, v7, 1 +; CHECK-NEXT: v_readlane_b32 s38, v7, 2 +; CHECK-NEXT: v_readlane_b32 s39, v7, 3 +; CHECK-NEXT: v_readlane_b32 s40, v7, 4 +; CHECK-NEXT: v_readlane_b32 s41, v7, 5 +; CHECK-NEXT: v_readlane_b32 s42, v7, 6 +; CHECK-NEXT: v_readlane_b32 s43, v7, 7 +; CHECK-NEXT: v_readlane_b32 s44, v7, 8 +; CHECK-NEXT: v_readlane_b32 s45, v7, 9 +; CHECK-NEXT: v_readlane_b32 s46, v7, 10 +; CHECK-NEXT: v_readlane_b32 s47, v7, 11 +; CHECK-NEXT: v_readlane_b32 s48, v7, 12 +; CHECK-NEXT: v_readlane_b32 s49, v7, 13 +; CHECK-NEXT: v_readlane_b32 s50, v7, 14 +; CHECK-NEXT: v_readlane_b32 s51, v7, 15 +; CHECK-NEXT: image_sample_lz v2, v[0:1], s[36:43], s[8:11] dmask:0x1 +; CHECK-NEXT: v_readlane_b32 s36, v7, 16 +; CHECK-NEXT: v_readlane_b32 s44, v7, 24 +; CHECK-NEXT: v_readlane_b32 s45, v7, 25 +; CHECK-NEXT: v_readlane_b32 s46, v7, 26 +; CHECK-NEXT: v_readlane_b32 s47, v7, 27 +; CHECK-NEXT: v_readlane_b32 s48, v7, 28 +; CHECK-NEXT: v_readlane_b32 s49, v7, 29 +; CHECK-NEXT: v_readlane_b32 s50, v7, 30 +; CHECK-NEXT: v_readlane_b32 s51, v7, 31 +; CHECK-NEXT: v_mov_b32_e32 v3, 0 +; CHECK-NEXT: v_mov_b32_e32 v4, v3 +; CHECK-NEXT: v_readlane_b32 s37, v7, 17 +; CHECK-NEXT: v_readlane_b32 s38, v7, 18 +; CHECK-NEXT: v_readlane_b32 s39, v7, 19 ; CHECK-NEXT: image_sample_lz v0, v[0:1], s[44:51], s[12:15] dmask:0x1 -; CHECK-NEXT: v_readlane_b32 s40, v4, 20 -; CHECK-NEXT: v_readlane_b32 s41, v4, 21 -; CHECK-NEXT: v_readlane_b32 s42, v4, 22 -; CHECK-NEXT: v_readlane_b32 s43, v4, 23 +; CHECK-NEXT: v_readlane_b32 s40, v7, 20 +; CHECK-NEXT: v_readlane_b32 s41, v7, 21 +; CHECK-NEXT: v_readlane_b32 s42, v7, 22 +; CHECK-NEXT: v_readlane_b32 s43, v7, 23 ; CHECK-NEXT: s_waitcnt vmcnt(1) -; CHECK-NEXT: buffer_store_dwordx3 v[5:7], off, s[8:11], 0 +; CHECK-NEXT: buffer_store_dwordx3 v[2:4], off, s[8:11], 0 ; CHECK-NEXT: s_waitcnt vmcnt(1) ; CHECK-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 ; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: .LBB0_6: ; %Flow12 ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[22:23] -; CHECK-NEXT: v_readlane_b32 s52, v4, 40 -; CHECK-NEXT: v_readlane_b32 s53, v4, 41 -; CHECK-NEXT: v_readlane_b32 s54, v4, 42 -; CHECK-NEXT: v_readlane_b32 s55, v4, 43 -; CHECK-NEXT: v_readlane_b32 s56, v4, 44 -; CHECK-NEXT: v_readlane_b32 s57, v4, 45 -; CHECK-NEXT: v_readlane_b32 s58, v4, 46 -; CHECK-NEXT: v_readlane_b32 s59, v4, 47 -; CHECK-NEXT: v_readlane_b32 s60, v4, 48 -; CHECK-NEXT: v_readlane_b32 s61, v4, 49 -; CHECK-NEXT: v_readlane_b32 s62, v4, 50 -; CHECK-NEXT: v_readlane_b32 s63, v4, 51 -; CHECK-NEXT: v_readlane_b32 s64, v4, 52 -; CHECK-NEXT: v_readlane_b32 s65, v4, 53 -; CHECK-NEXT: v_readlane_b32 s66, v4, 54 -; CHECK-NEXT: v_readlane_b32 s67, v4, 55 +; CHECK-NEXT: v_readlane_b32 s52, v7, 40 +; CHECK-NEXT: v_readlane_b32 s53, v7, 41 +; CHECK-NEXT: v_readlane_b32 s54, v7, 42 +; CHECK-NEXT: v_readlane_b32 s55, v7, 43 +; CHECK-NEXT: v_readlane_b32 s56, v7, 44 +; CHECK-NEXT: v_readlane_b32 s57, v7, 45 +; CHECK-NEXT: v_readlane_b32 s58, v7, 46 +; CHECK-NEXT: v_readlane_b32 s59, v7, 47 +; CHECK-NEXT: v_readlane_b32 s60, v7, 48 +; CHECK-NEXT: v_readlane_b32 s61, v7, 49 +; CHECK-NEXT: v_readlane_b32 s62, v7, 50 +; CHECK-NEXT: v_readlane_b32 s63, v7, 51 +; CHECK-NEXT: v_readlane_b32 s64, v7, 52 +; CHECK-NEXT: v_readlane_b32 s65, v7, 53 +; CHECK-NEXT: v_readlane_b32 s66, v7, 54 +; CHECK-NEXT: v_readlane_b32 s67, v7, 55 ; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5] ; CHECK-NEXT: s_cbranch_execz .LBB0_9 ; CHECK-NEXT: ; %bb.7: ; %bb33.preheader @@ -288,32 +288,32 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: s_mov_b32 s6, s8 ; CHECK-NEXT: s_mov_b32 s7, s8 ; CHECK-NEXT: v_mov_b32_e32 v1, s6 -; CHECK-NEXT: v_readlane_b32 s36, v4, 56 +; CHECK-NEXT: v_readlane_b32 s36, v7, 56 ; CHECK-NEXT: s_mov_b32 s9, s8 ; CHECK-NEXT: s_mov_b32 s10, s8 ; CHECK-NEXT: s_mov_b32 s11, s8 ; CHECK-NEXT: v_mov_b32_e32 v2, s7 -; CHECK-NEXT: v_readlane_b32 s37, v4, 57 -; CHECK-NEXT: v_readlane_b32 s38, v4, 58 -; CHECK-NEXT: v_readlane_b32 s39, v4, 59 -; CHECK-NEXT: v_readlane_b32 s40, v4, 60 -; CHECK-NEXT: v_readlane_b32 s41, v4, 61 -; CHECK-NEXT: v_readlane_b32 s42, v4, 62 -; CHECK-NEXT: v_readlane_b32 s43, v4, 63 +; CHECK-NEXT: v_readlane_b32 s37, v7, 57 +; CHECK-NEXT: v_readlane_b32 s38, v7, 58 +; CHECK-NEXT: v_readlane_b32 s39, v7, 59 +; CHECK-NEXT: v_readlane_b32 s40, v7, 60 +; CHECK-NEXT: v_readlane_b32 s41, v7, 61 +; CHECK-NEXT: v_readlane_b32 s42, v7, 62 +; CHECK-NEXT: v_readlane_b32 s43, v7, 63 ; CHECK-NEXT: s_nop 4 -; CHECK-NEXT: image_sample_lz v5, v[1:2], s[36:43], s[8:11] dmask:0x1 -; CHECK-NEXT: image_sample_lz v6, v[1:2], s[52:59], s[8:11] dmask:0x1 +; CHECK-NEXT: image_sample_lz v3, v[1:2], s[36:43], s[8:11] dmask:0x1 +; CHECK-NEXT: image_sample_lz v4, v[1:2], s[52:59], s[8:11] dmask:0x1 ; CHECK-NEXT: ; kill: killed $vgpr1_vgpr2 ; CHECK-NEXT: s_mov_b64 s[12:13], s[36:37] ; CHECK-NEXT: s_and_b64 vcc, exec, 0 -; CHECK-NEXT: v_readlane_b32 s44, v3, 0 -; CHECK-NEXT: v_readlane_b32 s45, v3, 1 -; CHECK-NEXT: v_readlane_b32 s46, v3, 2 -; CHECK-NEXT: v_readlane_b32 s47, v3, 3 -; CHECK-NEXT: v_readlane_b32 s48, v3, 4 -; CHECK-NEXT: v_readlane_b32 s49, v3, 5 -; CHECK-NEXT: v_readlane_b32 s50, v3, 6 -; CHECK-NEXT: v_readlane_b32 s51, v3, 7 +; CHECK-NEXT: v_readlane_b32 s44, v6, 0 +; CHECK-NEXT: v_readlane_b32 s45, v6, 1 +; CHECK-NEXT: v_readlane_b32 s46, v6, 2 +; CHECK-NEXT: v_readlane_b32 s47, v6, 3 +; CHECK-NEXT: v_readlane_b32 s48, v6, 4 +; CHECK-NEXT: v_readlane_b32 s49, v6, 5 +; CHECK-NEXT: v_readlane_b32 s50, v6, 6 +; CHECK-NEXT: v_readlane_b32 s51, v6, 7 ; CHECK-NEXT: s_mov_b64 s[14:15], s[38:39] ; CHECK-NEXT: s_mov_b64 s[16:17], s[40:41] ; CHECK-NEXT: s_mov_b64 s[18:19], s[42:43] @@ -321,7 +321,7 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: ; kill: killed $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59 ; CHECK-NEXT: ; kill: killed $sgpr8_sgpr9_sgpr10 killed $sgpr11 ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_sub_f32_e32 v1, v6, v5 +; CHECK-NEXT: v_sub_f32_e32 v1, v4, v3 ; CHECK-NEXT: v_mul_f32_e32 v0, v1, v0 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: .LBB0_8: ; %bb33 @@ -334,46 +334,44 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: .LBB0_10: ; %UnifiedReturnBlock ; CHECK-NEXT: s_or_b64 exec, exec, s[20:21] -; CHECK-NEXT: v_readlane_b32 s67, v8, 33 -; CHECK-NEXT: v_readlane_b32 s66, v8, 32 -; CHECK-NEXT: v_readlane_b32 s65, v8, 31 -; CHECK-NEXT: v_readlane_b32 s64, v8, 30 -; CHECK-NEXT: v_readlane_b32 s63, v8, 29 -; CHECK-NEXT: v_readlane_b32 s62, v8, 28 -; CHECK-NEXT: v_readlane_b32 s61, v8, 27 -; CHECK-NEXT: v_readlane_b32 s60, v8, 26 -; CHECK-NEXT: v_readlane_b32 s59, v8, 25 -; CHECK-NEXT: v_readlane_b32 s58, v8, 24 -; CHECK-NEXT: v_readlane_b32 s57, v8, 23 -; CHECK-NEXT: v_readlane_b32 s56, v8, 22 -; CHECK-NEXT: v_readlane_b32 s55, v8, 21 -; CHECK-NEXT: v_readlane_b32 s54, v8, 20 -; CHECK-NEXT: v_readlane_b32 s53, v8, 19 -; CHECK-NEXT: v_readlane_b32 s52, v8, 18 -; CHECK-NEXT: v_readlane_b32 s51, v8, 17 -; CHECK-NEXT: v_readlane_b32 s50, v8, 16 -; CHECK-NEXT: v_readlane_b32 s49, v8, 15 -; CHECK-NEXT: v_readlane_b32 s48, v8, 14 -; CHECK-NEXT: v_readlane_b32 s47, v8, 13 -; CHECK-NEXT: v_readlane_b32 s46, v8, 12 -; CHECK-NEXT: v_readlane_b32 s45, v8, 11 -; CHECK-NEXT: v_readlane_b32 s44, v8, 10 -; CHECK-NEXT: v_readlane_b32 s43, v8, 9 -; CHECK-NEXT: v_readlane_b32 s42, v8, 8 -; CHECK-NEXT: v_readlane_b32 s41, v8, 7 -; CHECK-NEXT: v_readlane_b32 s40, v8, 6 -; CHECK-NEXT: v_readlane_b32 s39, v8, 5 -; CHECK-NEXT: v_readlane_b32 s38, v8, 4 -; CHECK-NEXT: v_readlane_b32 s37, v8, 3 -; CHECK-NEXT: v_readlane_b32 s36, v8, 2 -; CHECK-NEXT: v_readlane_b32 s31, v8, 1 -; CHECK-NEXT: v_readlane_b32 s30, v8, 0 -; CHECK-NEXT: ; kill: killed $vgpr4 -; CHECK-NEXT: ; kill: killed $vgpr3 +; CHECK-NEXT: v_readlane_b32 s67, v5, 33 +; CHECK-NEXT: v_readlane_b32 s66, v5, 32 +; CHECK-NEXT: v_readlane_b32 s65, v5, 31 +; CHECK-NEXT: v_readlane_b32 s64, v5, 30 +; CHECK-NEXT: v_readlane_b32 s63, v5, 29 +; CHECK-NEXT: v_readlane_b32 s62, v5, 28 +; CHECK-NEXT: v_readlane_b32 s61, v5, 27 +; CHECK-NEXT: v_readlane_b32 s60, v5, 26 +; CHECK-NEXT: v_readlane_b32 s59, v5, 25 +; CHECK-NEXT: v_readlane_b32 s58, v5, 24 +; CHECK-NEXT: v_readlane_b32 s57, v5, 23 +; CHECK-NEXT: v_readlane_b32 s56, v5, 22 +; CHECK-NEXT: v_readlane_b32 s55, v5, 21 +; CHECK-NEXT: v_readlane_b32 s54, v5, 20 +; CHECK-NEXT: v_readlane_b32 s53, v5, 19 +; CHECK-NEXT: v_readlane_b32 s52, v5, 18 +; CHECK-NEXT: v_readlane_b32 s51, v5, 17 +; CHECK-NEXT: v_readlane_b32 s50, v5, 16 +; CHECK-NEXT: v_readlane_b32 s49, v5, 15 +; CHECK-NEXT: v_readlane_b32 s48, v5, 14 +; CHECK-NEXT: v_readlane_b32 s47, v5, 13 +; CHECK-NEXT: v_readlane_b32 s46, v5, 12 +; CHECK-NEXT: v_readlane_b32 s45, v5, 11 +; CHECK-NEXT: v_readlane_b32 s44, v5, 10 +; CHECK-NEXT: v_readlane_b32 s43, v5, 9 +; CHECK-NEXT: v_readlane_b32 s42, v5, 8 +; CHECK-NEXT: v_readlane_b32 s41, v5, 7 +; CHECK-NEXT: v_readlane_b32 s40, v5, 6 +; CHECK-NEXT: v_readlane_b32 s39, v5, 5 +; CHECK-NEXT: v_readlane_b32 s38, v5, 4 +; CHECK-NEXT: v_readlane_b32 s37, v5, 3 +; CHECK-NEXT: v_readlane_b32 s36, v5, 2 +; CHECK-NEXT: v_readlane_b32 s31, v5, 1 +; CHECK-NEXT: v_readlane_b32 s30, v5, 0 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir b/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir index 96fb7cfeb2775..40089ed82b5db 100644 --- a/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir +++ b/llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: not llc -mtriple=amdgcn -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,1 -o - 2>%t.err %s | FileCheck %s +# RUN: not llc -mtriple=amdgcn -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,2 -o - 2>%t.err %s | FileCheck %s # RUN: FileCheck -check-prefix=ERR %s < %t.err # This testcase cannot be compiled. An attempted eviction legality diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll index 60946956547a7..f1f4abe580c00 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -1510,12 +1510,7 @@ define amdgpu_kernel void @extract_neg_offset_vgpr(ptr addrspace(1) %out) { ; NOOPT-NEXT: s_mov_b32 s23, 0xe8f000 ; NOOPT-NEXT: s_add_u32 s20, s20, s9 ; NOOPT-NEXT: s_addc_u32 s21, s21, 0 -; NOOPT-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane -; NOOPT-NEXT: v_mov_b32_e32 v1, v0 -; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 -; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[16:17] -; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:68 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:68 ; 4-byte Folded Spill ; NOOPT-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9 ; NOOPT-NEXT: s_waitcnt lgkmcnt(0) ; NOOPT-NEXT: s_mov_b32 s6, s1 @@ -1526,11 +1521,11 @@ define amdgpu_kernel void @extract_neg_offset_vgpr(ptr addrspace(1) %out) { ; NOOPT-NEXT: s_mov_b32 s1, s6 ; NOOPT-NEXT: s_mov_b32 s2, s5 ; NOOPT-NEXT: s_mov_b32 s3, s4 -; NOOPT-NEXT: s_waitcnt vmcnt(1) -; NOOPT-NEXT: v_writelane_b32 v0, s0, 0 -; NOOPT-NEXT: v_writelane_b32 v0, s1, 1 -; NOOPT-NEXT: v_writelane_b32 v0, s2, 2 -; NOOPT-NEXT: v_writelane_b32 v0, s3, 3 +; NOOPT-NEXT: ; implicit-def: $vgpr31 : SGPR spill to VGPR lane +; NOOPT-NEXT: v_writelane_b32 v31, s0, 0 +; NOOPT-NEXT: v_writelane_b32 v31, s1, 1 +; NOOPT-NEXT: v_writelane_b32 v31, s2, 2 +; NOOPT-NEXT: v_writelane_b32 v31, s3, 3 ; NOOPT-NEXT: s_mov_b32 s0, 16 ; NOOPT-NEXT: s_mov_b32 s1, 15 ; NOOPT-NEXT: s_mov_b32 s2, 14 @@ -1548,126 +1543,130 @@ define amdgpu_kernel void @extract_neg_offset_vgpr(ptr addrspace(1) %out) { ; NOOPT-NEXT: s_mov_b32 s14, 1 ; NOOPT-NEXT: s_mov_b32 s15, 0 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: v_mov_b32_e32 v1, s15 -; NOOPT-NEXT: v_mov_b32_e32 v31, s14 -; NOOPT-NEXT: v_mov_b32_e32 v30, s13 -; NOOPT-NEXT: v_mov_b32_e32 v29, s12 -; NOOPT-NEXT: v_mov_b32_e32 v28, s11 -; NOOPT-NEXT: v_mov_b32_e32 v27, s10 -; NOOPT-NEXT: v_mov_b32_e32 v26, s9 -; NOOPT-NEXT: v_mov_b32_e32 v25, s8 -; NOOPT-NEXT: v_mov_b32_e32 v24, s7 -; NOOPT-NEXT: v_mov_b32_e32 v23, s6 -; NOOPT-NEXT: v_mov_b32_e32 v22, s5 -; NOOPT-NEXT: v_mov_b32_e32 v21, s4 -; NOOPT-NEXT: v_mov_b32_e32 v20, s3 -; NOOPT-NEXT: v_mov_b32_e32 v19, s2 -; NOOPT-NEXT: v_mov_b32_e32 v18, s1 -; NOOPT-NEXT: v_mov_b32_e32 v17, s0 -; NOOPT-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16 killed $exec -; NOOPT-NEXT: v_mov_b32_e32 v2, v31 -; NOOPT-NEXT: v_mov_b32_e32 v3, v30 -; NOOPT-NEXT: v_mov_b32_e32 v4, v29 -; NOOPT-NEXT: v_mov_b32_e32 v5, v28 -; NOOPT-NEXT: v_mov_b32_e32 v6, v27 -; NOOPT-NEXT: v_mov_b32_e32 v7, v26 -; NOOPT-NEXT: v_mov_b32_e32 v8, v25 -; NOOPT-NEXT: v_mov_b32_e32 v9, v24 -; NOOPT-NEXT: v_mov_b32_e32 v10, v23 -; NOOPT-NEXT: v_mov_b32_e32 v11, v22 -; NOOPT-NEXT: v_mov_b32_e32 v12, v21 -; NOOPT-NEXT: v_mov_b32_e32 v13, v20 -; NOOPT-NEXT: v_mov_b32_e32 v14, v19 -; NOOPT-NEXT: v_mov_b32_e32 v15, v18 -; NOOPT-NEXT: v_mov_b32_e32 v16, v17 -; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill +; NOOPT-NEXT: v_mov_b32_e32 v0, s15 +; NOOPT-NEXT: v_mov_b32_e32 v30, s14 +; NOOPT-NEXT: v_mov_b32_e32 v29, s13 +; NOOPT-NEXT: v_mov_b32_e32 v28, s12 +; NOOPT-NEXT: v_mov_b32_e32 v27, s11 +; NOOPT-NEXT: v_mov_b32_e32 v26, s10 +; NOOPT-NEXT: v_mov_b32_e32 v25, s9 +; NOOPT-NEXT: v_mov_b32_e32 v24, s8 +; NOOPT-NEXT: v_mov_b32_e32 v23, s7 +; NOOPT-NEXT: v_mov_b32_e32 v22, s6 +; NOOPT-NEXT: v_mov_b32_e32 v21, s5 +; NOOPT-NEXT: v_mov_b32_e32 v20, s4 +; NOOPT-NEXT: v_mov_b32_e32 v19, s3 +; NOOPT-NEXT: v_mov_b32_e32 v18, s2 +; NOOPT-NEXT: v_mov_b32_e32 v17, s1 +; NOOPT-NEXT: v_mov_b32_e32 v16, s0 +; NOOPT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 killed $exec +; NOOPT-NEXT: v_mov_b32_e32 v1, v30 +; NOOPT-NEXT: v_mov_b32_e32 v2, v29 +; NOOPT-NEXT: v_mov_b32_e32 v3, v28 +; NOOPT-NEXT: v_mov_b32_e32 v4, v27 +; NOOPT-NEXT: v_mov_b32_e32 v5, v26 +; NOOPT-NEXT: v_mov_b32_e32 v6, v25 +; NOOPT-NEXT: v_mov_b32_e32 v7, v24 +; NOOPT-NEXT: v_mov_b32_e32 v8, v23 +; NOOPT-NEXT: v_mov_b32_e32 v9, v22 +; NOOPT-NEXT: v_mov_b32_e32 v10, v21 +; NOOPT-NEXT: v_mov_b32_e32 v11, v20 +; NOOPT-NEXT: v_mov_b32_e32 v12, v19 +; NOOPT-NEXT: v_mov_b32_e32 v13, v18 +; NOOPT-NEXT: v_mov_b32_e32 v14, v17 +; NOOPT-NEXT: v_mov_b32_e32 v15, v16 +; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 s[0:1], exec -; NOOPT-NEXT: v_writelane_b32 v0, s0, 4 -; NOOPT-NEXT: v_writelane_b32 v0, s1, 5 +; NOOPT-NEXT: v_writelane_b32 v31, s0, 4 +; NOOPT-NEXT: v_writelane_b32 v31, s1, 5 ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v31, off, s[20:23], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] ; NOOPT-NEXT: ; implicit-def: $vgpr0 ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: .LBB5_1: ; =>This Inner Loop Header: Depth=1 +; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:72 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) +; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v3, off, s[20:23], 0 offset:16 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v4, off, s[20:23], 0 offset:20 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v5, off, s[20:23], 0 offset:24 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v6, off, s[20:23], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) +; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) +; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) +; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) +; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) +; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) +; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 6 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 7 -; NOOPT-NEXT: buffer_load_dword v18, off, s[20:23], 0 offset:72 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v3, off, s[20:23], 0 offset:12 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v4, off, s[20:23], 0 offset:16 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v5, off, s[20:23], 0 offset:20 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v6, off, s[20:23], 0 offset:24 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:28 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readfirstlane_b32 s2, v17 -; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17 +; NOOPT-NEXT: v_readlane_b32 s0, v31, 6 +; NOOPT-NEXT: v_readlane_b32 s1, v31, 7 +; NOOPT-NEXT: v_readfirstlane_b32 s2, v16 +; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v16 ; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] ; NOOPT-NEXT: s_add_i32 m0, s2, 0xfffffe00 -; NOOPT-NEXT: v_movrels_b32_e32 v1, v1 -; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:76 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:72 ; 4-byte Folded Spill +; NOOPT-NEXT: v_movrels_b32_e32 v0, v0 +; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:76 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:72 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 s[2:3], s[0:1] -; NOOPT-NEXT: v_writelane_b32 v0, s2, 6 -; NOOPT-NEXT: v_writelane_b32 v0, s3, 7 +; NOOPT-NEXT: v_writelane_b32 v31, s2, 6 +; NOOPT-NEXT: v_writelane_b32 v31, s3, 7 ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v31, off, s[20:23], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] ; NOOPT-NEXT: s_xor_b64 exec, exec, s[0:1] ; NOOPT-NEXT: s_cbranch_execnz .LBB5_1 ; NOOPT-NEXT: ; %bb.2: ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 4 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 5 +; NOOPT-NEXT: v_readlane_b32 s0, v31, 4 +; NOOPT-NEXT: v_readlane_b32 s1, v31, 5 ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: ; %bb.3: +; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:76 ; 4-byte Folded Reload ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 -; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 0 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 1 -; NOOPT-NEXT: v_readlane_b32 s2, v0, 2 -; NOOPT-NEXT: v_readlane_b32 s3, v0, 3 -; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 offset:76 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: buffer_store_dword v1, off, s[0:3], 0 -; NOOPT-NEXT: ; kill: killed $vgpr0 +; NOOPT-NEXT: v_readlane_b32 s0, v31, 0 +; NOOPT-NEXT: v_readlane_b32 s1, v31, 1 +; NOOPT-NEXT: v_readlane_b32 s2, v31, 2 +; NOOPT-NEXT: v_readlane_b32 s3, v31, 3 +; NOOPT-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; NOOPT-NEXT: s_endpgm ; ; SI-MOVREL-LABEL: extract_neg_offset_vgpr: @@ -4022,7 +4021,6 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: s_mov_b32 s23, 0xe8f000 ; NOOPT-NEXT: s_add_u32 s20, s20, s9 ; NOOPT-NEXT: s_addc_u32 s21, s21, 0 -; NOOPT-NEXT: ; implicit-def: $vgpr16 : SGPR spill to VGPR lane ; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:136 ; 4-byte Folded Spill ; NOOPT-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xb ; NOOPT-NEXT: s_waitcnt lgkmcnt(0) @@ -4034,10 +4032,11 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: s_mov_b32 s1, s6 ; NOOPT-NEXT: s_mov_b32 s2, s5 ; NOOPT-NEXT: s_mov_b32 s3, s4 -; NOOPT-NEXT: v_writelane_b32 v16, s0, 0 -; NOOPT-NEXT: v_writelane_b32 v16, s1, 1 -; NOOPT-NEXT: v_writelane_b32 v16, s2, 2 -; NOOPT-NEXT: v_writelane_b32 v16, s3, 3 +; NOOPT-NEXT: ; implicit-def: $vgpr31 : SGPR spill to VGPR lane +; NOOPT-NEXT: v_writelane_b32 v31, s0, 0 +; NOOPT-NEXT: v_writelane_b32 v31, s1, 1 +; NOOPT-NEXT: v_writelane_b32 v31, s2, 2 +; NOOPT-NEXT: v_writelane_b32 v31, s3, 3 ; NOOPT-NEXT: s_mov_b32 s0, 16 ; NOOPT-NEXT: s_mov_b32 s1, 15 ; NOOPT-NEXT: s_mov_b32 s2, 14 @@ -4056,37 +4055,37 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: s_mov_b32 s15, 1 ; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: v_mov_b32_e32 v0, s15 -; NOOPT-NEXT: v_mov_b32_e32 v31, s14 -; NOOPT-NEXT: v_mov_b32_e32 v30, s13 -; NOOPT-NEXT: v_mov_b32_e32 v29, s12 -; NOOPT-NEXT: v_mov_b32_e32 v28, s11 -; NOOPT-NEXT: v_mov_b32_e32 v27, s10 -; NOOPT-NEXT: v_mov_b32_e32 v26, s9 -; NOOPT-NEXT: v_mov_b32_e32 v25, s8 -; NOOPT-NEXT: v_mov_b32_e32 v24, s7 -; NOOPT-NEXT: v_mov_b32_e32 v23, s6 -; NOOPT-NEXT: v_mov_b32_e32 v22, s5 -; NOOPT-NEXT: v_mov_b32_e32 v21, s4 -; NOOPT-NEXT: v_mov_b32_e32 v20, s3 -; NOOPT-NEXT: v_mov_b32_e32 v19, s2 -; NOOPT-NEXT: v_mov_b32_e32 v18, s1 -; NOOPT-NEXT: v_mov_b32_e32 v17, s0 +; NOOPT-NEXT: v_mov_b32_e32 v30, s14 +; NOOPT-NEXT: v_mov_b32_e32 v29, s13 +; NOOPT-NEXT: v_mov_b32_e32 v28, s12 +; NOOPT-NEXT: v_mov_b32_e32 v27, s11 +; NOOPT-NEXT: v_mov_b32_e32 v26, s10 +; NOOPT-NEXT: v_mov_b32_e32 v25, s9 +; NOOPT-NEXT: v_mov_b32_e32 v24, s8 +; NOOPT-NEXT: v_mov_b32_e32 v23, s7 +; NOOPT-NEXT: v_mov_b32_e32 v22, s6 +; NOOPT-NEXT: v_mov_b32_e32 v21, s5 +; NOOPT-NEXT: v_mov_b32_e32 v20, s4 +; NOOPT-NEXT: v_mov_b32_e32 v19, s3 +; NOOPT-NEXT: v_mov_b32_e32 v18, s2 +; NOOPT-NEXT: v_mov_b32_e32 v17, s1 +; NOOPT-NEXT: v_mov_b32_e32 v16, s0 ; NOOPT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 killed $exec -; NOOPT-NEXT: v_mov_b32_e32 v1, v31 -; NOOPT-NEXT: v_mov_b32_e32 v2, v30 -; NOOPT-NEXT: v_mov_b32_e32 v3, v29 -; NOOPT-NEXT: v_mov_b32_e32 v4, v28 -; NOOPT-NEXT: v_mov_b32_e32 v5, v27 -; NOOPT-NEXT: v_mov_b32_e32 v6, v26 -; NOOPT-NEXT: v_mov_b32_e32 v7, v25 -; NOOPT-NEXT: v_mov_b32_e32 v8, v24 -; NOOPT-NEXT: v_mov_b32_e32 v9, v23 -; NOOPT-NEXT: v_mov_b32_e32 v10, v22 -; NOOPT-NEXT: v_mov_b32_e32 v11, v21 -; NOOPT-NEXT: v_mov_b32_e32 v12, v20 -; NOOPT-NEXT: v_mov_b32_e32 v13, v19 -; NOOPT-NEXT: v_mov_b32_e32 v14, v18 -; NOOPT-NEXT: v_mov_b32_e32 v15, v17 +; NOOPT-NEXT: v_mov_b32_e32 v1, v30 +; NOOPT-NEXT: v_mov_b32_e32 v2, v29 +; NOOPT-NEXT: v_mov_b32_e32 v3, v28 +; NOOPT-NEXT: v_mov_b32_e32 v4, v27 +; NOOPT-NEXT: v_mov_b32_e32 v5, v26 +; NOOPT-NEXT: v_mov_b32_e32 v6, v25 +; NOOPT-NEXT: v_mov_b32_e32 v7, v24 +; NOOPT-NEXT: v_mov_b32_e32 v8, v23 +; NOOPT-NEXT: v_mov_b32_e32 v9, v22 +; NOOPT-NEXT: v_mov_b32_e32 v10, v21 +; NOOPT-NEXT: v_mov_b32_e32 v11, v20 +; NOOPT-NEXT: v_mov_b32_e32 v12, v19 +; NOOPT-NEXT: v_mov_b32_e32 v13, v18 +; NOOPT-NEXT: v_mov_b32_e32 v14, v17 +; NOOPT-NEXT: v_mov_b32_e32 v15, v16 ; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:72 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:76 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:80 ; 4-byte Folded Spill @@ -4103,202 +4102,195 @@ define amdgpu_kernel void @insert_neg_offset_vgpr(ptr addrspace(1) %in, ptr addr ; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:124 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:128 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:132 ; 4-byte Folded Spill -; NOOPT-NEXT: v_mov_b32_e32 v17, 33 -; NOOPT-NEXT: buffer_store_dword v17, off, s[20:23], 0 offset:68 ; 4-byte Folded Spill +; NOOPT-NEXT: v_mov_b32_e32 v16, 33 +; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:68 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 s[0:1], exec -; NOOPT-NEXT: v_writelane_b32 v16, s0, 4 -; NOOPT-NEXT: v_writelane_b32 v16, s1, 5 +; NOOPT-NEXT: v_writelane_b32 v31, s0, 4 +; NOOPT-NEXT: v_writelane_b32 v31, s1, 5 ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 -; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v31, off, s[20:23], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] -; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: .LBB14_1: ; =>This Inner Loop Header: Depth=1 -; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 -; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[16:17] -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 6 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 7 -; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v4, off, s[20:23], 0 offset:12 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v5, off, s[20:23], 0 offset:16 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v6, off, s[20:23], 0 offset:20 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:24 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v3, off, s[20:23], 0 offset:16 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v4, off, s[20:23], 0 offset:20 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v5, off, s[20:23], 0 offset:24 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v6, off, s[20:23], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(6) -; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(5) -; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(4) -; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(3) -; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(2) -; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(1) -; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v18, off, s[20:23], 0 offset:136 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:136 ; 4-byte Folded Reload +; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 +; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[16:17] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readfirstlane_b32 s2, v18 -; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v18 +; NOOPT-NEXT: v_readlane_b32 s0, v31, 6 +; NOOPT-NEXT: v_readlane_b32 s1, v31, 7 +; NOOPT-NEXT: v_readfirstlane_b32 s2, v17 +; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17 ; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] ; NOOPT-NEXT: s_add_i32 m0, s2, 0xfffffe00 -; NOOPT-NEXT: v_movreld_b32_e32 v1, v17 -; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:140 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:144 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:148 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:152 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:156 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:160 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:164 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:168 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:172 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:176 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:180 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:184 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:188 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:192 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:196 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:200 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: v_movreld_b32_e32 v0, v16 +; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:140 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:144 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:148 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:152 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:156 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:160 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:164 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:168 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:172 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:176 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:180 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:184 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:188 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:192 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:196 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:200 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 s[2:3], s[0:1] -; NOOPT-NEXT: v_writelane_b32 v0, s2, 6 -; NOOPT-NEXT: v_writelane_b32 v0, s3, 7 +; NOOPT-NEXT: v_writelane_b32 v31, s2, 6 +; NOOPT-NEXT: v_writelane_b32 v31, s3, 7 ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v31, off, s[20:23], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] ; NOOPT-NEXT: s_xor_b64 exec, exec, s[0:1] ; NOOPT-NEXT: s_cbranch_execnz .LBB14_1 ; NOOPT-NEXT: ; %bb.2: ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 4 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 5 +; NOOPT-NEXT: v_readlane_b32 s0, v31, 4 +; NOOPT-NEXT: v_readlane_b32 s1, v31, 5 ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: ; %bb.3: +; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:140 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:144 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:148 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v18, off, s[20:23], 0 offset:152 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v19, off, s[20:23], 0 offset:156 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v20, off, s[20:23], 0 offset:160 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v21, off, s[20:23], 0 offset:164 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v22, off, s[20:23], 0 offset:168 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v23, off, s[20:23], 0 offset:172 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v24, off, s[20:23], 0 offset:176 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v25, off, s[20:23], 0 offset:180 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v26, off, s[20:23], 0 offset:184 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v27, off, s[20:23], 0 offset:188 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v28, off, s[20:23], 0 offset:192 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v29, off, s[20:23], 0 offset:196 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v30, off, s[20:23], 0 offset:200 ; 4-byte Folded Reload ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 -; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 0 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 1 -; NOOPT-NEXT: v_readlane_b32 s2, v0, 2 -; NOOPT-NEXT: v_readlane_b32 s3, v0, 3 -; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:140 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:144 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v18, off, s[20:23], 0 offset:148 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v19, off, s[20:23], 0 offset:152 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v20, off, s[20:23], 0 offset:156 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v21, off, s[20:23], 0 offset:160 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v22, off, s[20:23], 0 offset:164 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v23, off, s[20:23], 0 offset:168 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v24, off, s[20:23], 0 offset:172 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v25, off, s[20:23], 0 offset:176 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v26, off, s[20:23], 0 offset:180 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v27, off, s[20:23], 0 offset:184 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v28, off, s[20:23], 0 offset:188 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v29, off, s[20:23], 0 offset:192 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v30, off, s[20:23], 0 offset:196 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 offset:200 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt vmcnt(12) -; NOOPT-NEXT: v_mov_b32_e32 v5, v19 -; NOOPT-NEXT: v_mov_b32_e32 v6, v18 -; NOOPT-NEXT: v_mov_b32_e32 v7, v17 -; NOOPT-NEXT: v_mov_b32_e32 v1, v16 -; NOOPT-NEXT: s_waitcnt vmcnt(8) -; NOOPT-NEXT: v_mov_b32_e32 v2, v23 -; NOOPT-NEXT: v_mov_b32_e32 v3, v22 -; NOOPT-NEXT: v_mov_b32_e32 v4, v21 -; NOOPT-NEXT: v_mov_b32_e32 v8, v20 -; NOOPT-NEXT: s_waitcnt vmcnt(4) -; NOOPT-NEXT: v_mov_b32_e32 v13, v27 -; NOOPT-NEXT: v_mov_b32_e32 v14, v26 -; NOOPT-NEXT: v_mov_b32_e32 v15, v25 -; NOOPT-NEXT: v_mov_b32_e32 v9, v24 -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_mov_b32_e32 v10, v31 -; NOOPT-NEXT: v_mov_b32_e32 v11, v30 -; NOOPT-NEXT: v_mov_b32_e32 v12, v29 -; NOOPT-NEXT: v_mov_b32_e32 v16, v28 +; NOOPT-NEXT: v_readlane_b32 s0, v31, 0 +; NOOPT-NEXT: v_readlane_b32 s1, v31, 1 +; NOOPT-NEXT: v_readlane_b32 s2, v31, 2 +; NOOPT-NEXT: v_readlane_b32 s3, v31, 3 +; NOOPT-NEXT: v_mov_b32_e32 v4, v18 +; NOOPT-NEXT: v_mov_b32_e32 v5, v17 +; NOOPT-NEXT: v_mov_b32_e32 v6, v16 +; NOOPT-NEXT: v_mov_b32_e32 v0, v15 +; NOOPT-NEXT: v_mov_b32_e32 v1, v22 +; NOOPT-NEXT: v_mov_b32_e32 v2, v21 +; NOOPT-NEXT: v_mov_b32_e32 v3, v20 +; NOOPT-NEXT: v_mov_b32_e32 v7, v19 +; NOOPT-NEXT: v_mov_b32_e32 v12, v26 +; NOOPT-NEXT: v_mov_b32_e32 v13, v25 +; NOOPT-NEXT: v_mov_b32_e32 v14, v24 +; NOOPT-NEXT: v_mov_b32_e32 v8, v23 +; NOOPT-NEXT: v_mov_b32_e32 v9, v30 +; NOOPT-NEXT: v_mov_b32_e32 v10, v29 +; NOOPT-NEXT: v_mov_b32_e32 v11, v28 +; NOOPT-NEXT: v_mov_b32_e32 v15, v27 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 -; NOOPT-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17_vgpr18_vgpr19 killed $exec -; NOOPT-NEXT: v_mov_b32_e32 v17, v12 -; NOOPT-NEXT: v_mov_b32_e32 v18, v11 -; NOOPT-NEXT: v_mov_b32_e32 v19, v10 -; NOOPT-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 +; NOOPT-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16_vgpr17_vgpr18 killed $exec +; NOOPT-NEXT: v_mov_b32_e32 v16, v11 +; NOOPT-NEXT: v_mov_b32_e32 v17, v10 +; NOOPT-NEXT: v_mov_b32_e32 v18, v9 +; NOOPT-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:48 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 -; NOOPT-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10_vgpr11_vgpr12 killed $exec -; NOOPT-NEXT: v_mov_b32_e32 v10, v15 -; NOOPT-NEXT: v_mov_b32_e32 v11, v14 -; NOOPT-NEXT: v_mov_b32_e32 v12, v13 -; NOOPT-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:32 +; NOOPT-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec +; NOOPT-NEXT: v_mov_b32_e32 v9, v14 +; NOOPT-NEXT: v_mov_b32_e32 v10, v13 +; NOOPT-NEXT: v_mov_b32_e32 v11, v12 +; NOOPT-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 -; NOOPT-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec +; NOOPT-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8_vgpr9_vgpr10 killed $exec ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: v_mov_b32_e32 v9, v4 -; NOOPT-NEXT: v_mov_b32_e32 v10, v3 -; NOOPT-NEXT: v_mov_b32_e32 v11, v2 -; NOOPT-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16 +; NOOPT-NEXT: v_mov_b32_e32 v8, v3 +; NOOPT-NEXT: v_mov_b32_e32 v9, v2 +; NOOPT-NEXT: v_mov_b32_e32 v10, v1 +; NOOPT-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:16 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 -; NOOPT-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec -; NOOPT-NEXT: v_mov_b32_e32 v2, v7 -; NOOPT-NEXT: v_mov_b32_e32 v3, v6 -; NOOPT-NEXT: v_mov_b32_e32 v4, v5 -; NOOPT-NEXT: buffer_store_dwordx4 v[1:4], off, s[0:3], 0 -; NOOPT-NEXT: ; kill: killed $vgpr0 +; NOOPT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec +; NOOPT-NEXT: v_mov_b32_e32 v1, v6 +; NOOPT-NEXT: v_mov_b32_e32 v2, v5 +; NOOPT-NEXT: v_mov_b32_e32 v3, v4 +; NOOPT-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; NOOPT-NEXT: s_endpgm ; ; SI-MOVREL-LABEL: insert_neg_offset_vgpr: @@ -4512,7 +4504,6 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: s_mov_b32 s23, 0xe8f000 ; NOOPT-NEXT: s_add_u32 s20, s20, s9 ; NOOPT-NEXT: s_addc_u32 s21, s21, 0 -; NOOPT-NEXT: ; implicit-def: $vgpr16 : SGPR spill to VGPR lane ; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:136 ; 4-byte Folded Spill ; NOOPT-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xb ; NOOPT-NEXT: s_waitcnt lgkmcnt(0) @@ -4524,10 +4515,11 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: s_mov_b32 s1, s6 ; NOOPT-NEXT: s_mov_b32 s2, s5 ; NOOPT-NEXT: s_mov_b32 s3, s4 -; NOOPT-NEXT: v_writelane_b32 v16, s0, 0 -; NOOPT-NEXT: v_writelane_b32 v16, s1, 1 -; NOOPT-NEXT: v_writelane_b32 v16, s2, 2 -; NOOPT-NEXT: v_writelane_b32 v16, s3, 3 +; NOOPT-NEXT: ; implicit-def: $vgpr31 : SGPR spill to VGPR lane +; NOOPT-NEXT: v_writelane_b32 v31, s0, 0 +; NOOPT-NEXT: v_writelane_b32 v31, s1, 1 +; NOOPT-NEXT: v_writelane_b32 v31, s2, 2 +; NOOPT-NEXT: v_writelane_b32 v31, s3, 3 ; NOOPT-NEXT: s_mov_b32 s0, 16 ; NOOPT-NEXT: s_mov_b32 s1, 15 ; NOOPT-NEXT: s_mov_b32 s2, 14 @@ -4546,37 +4538,37 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: s_mov_b32 s15, 1 ; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: v_mov_b32_e32 v0, s15 -; NOOPT-NEXT: v_mov_b32_e32 v31, s14 -; NOOPT-NEXT: v_mov_b32_e32 v30, s13 -; NOOPT-NEXT: v_mov_b32_e32 v29, s12 -; NOOPT-NEXT: v_mov_b32_e32 v28, s11 -; NOOPT-NEXT: v_mov_b32_e32 v27, s10 -; NOOPT-NEXT: v_mov_b32_e32 v26, s9 -; NOOPT-NEXT: v_mov_b32_e32 v25, s8 -; NOOPT-NEXT: v_mov_b32_e32 v24, s7 -; NOOPT-NEXT: v_mov_b32_e32 v23, s6 -; NOOPT-NEXT: v_mov_b32_e32 v22, s5 -; NOOPT-NEXT: v_mov_b32_e32 v21, s4 -; NOOPT-NEXT: v_mov_b32_e32 v20, s3 -; NOOPT-NEXT: v_mov_b32_e32 v19, s2 -; NOOPT-NEXT: v_mov_b32_e32 v18, s1 -; NOOPT-NEXT: v_mov_b32_e32 v17, s0 +; NOOPT-NEXT: v_mov_b32_e32 v30, s14 +; NOOPT-NEXT: v_mov_b32_e32 v29, s13 +; NOOPT-NEXT: v_mov_b32_e32 v28, s12 +; NOOPT-NEXT: v_mov_b32_e32 v27, s11 +; NOOPT-NEXT: v_mov_b32_e32 v26, s10 +; NOOPT-NEXT: v_mov_b32_e32 v25, s9 +; NOOPT-NEXT: v_mov_b32_e32 v24, s8 +; NOOPT-NEXT: v_mov_b32_e32 v23, s7 +; NOOPT-NEXT: v_mov_b32_e32 v22, s6 +; NOOPT-NEXT: v_mov_b32_e32 v21, s5 +; NOOPT-NEXT: v_mov_b32_e32 v20, s4 +; NOOPT-NEXT: v_mov_b32_e32 v19, s3 +; NOOPT-NEXT: v_mov_b32_e32 v18, s2 +; NOOPT-NEXT: v_mov_b32_e32 v17, s1 +; NOOPT-NEXT: v_mov_b32_e32 v16, s0 ; NOOPT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 killed $exec -; NOOPT-NEXT: v_mov_b32_e32 v1, v31 -; NOOPT-NEXT: v_mov_b32_e32 v2, v30 -; NOOPT-NEXT: v_mov_b32_e32 v3, v29 -; NOOPT-NEXT: v_mov_b32_e32 v4, v28 -; NOOPT-NEXT: v_mov_b32_e32 v5, v27 -; NOOPT-NEXT: v_mov_b32_e32 v6, v26 -; NOOPT-NEXT: v_mov_b32_e32 v7, v25 -; NOOPT-NEXT: v_mov_b32_e32 v8, v24 -; NOOPT-NEXT: v_mov_b32_e32 v9, v23 -; NOOPT-NEXT: v_mov_b32_e32 v10, v22 -; NOOPT-NEXT: v_mov_b32_e32 v11, v21 -; NOOPT-NEXT: v_mov_b32_e32 v12, v20 -; NOOPT-NEXT: v_mov_b32_e32 v13, v19 -; NOOPT-NEXT: v_mov_b32_e32 v14, v18 -; NOOPT-NEXT: v_mov_b32_e32 v15, v17 +; NOOPT-NEXT: v_mov_b32_e32 v1, v30 +; NOOPT-NEXT: v_mov_b32_e32 v2, v29 +; NOOPT-NEXT: v_mov_b32_e32 v3, v28 +; NOOPT-NEXT: v_mov_b32_e32 v4, v27 +; NOOPT-NEXT: v_mov_b32_e32 v5, v26 +; NOOPT-NEXT: v_mov_b32_e32 v6, v25 +; NOOPT-NEXT: v_mov_b32_e32 v7, v24 +; NOOPT-NEXT: v_mov_b32_e32 v8, v23 +; NOOPT-NEXT: v_mov_b32_e32 v9, v22 +; NOOPT-NEXT: v_mov_b32_e32 v10, v21 +; NOOPT-NEXT: v_mov_b32_e32 v11, v20 +; NOOPT-NEXT: v_mov_b32_e32 v12, v19 +; NOOPT-NEXT: v_mov_b32_e32 v13, v18 +; NOOPT-NEXT: v_mov_b32_e32 v14, v17 +; NOOPT-NEXT: v_mov_b32_e32 v15, v16 ; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:72 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:76 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:80 ; 4-byte Folded Spill @@ -4593,202 +4585,195 @@ define amdgpu_kernel void @insert_neg_inline_offset_vgpr(ptr addrspace(1) %in, p ; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:124 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:128 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:132 ; 4-byte Folded Spill -; NOOPT-NEXT: v_mov_b32_e32 v17, 0x1f4 -; NOOPT-NEXT: buffer_store_dword v17, off, s[20:23], 0 offset:68 ; 4-byte Folded Spill +; NOOPT-NEXT: v_mov_b32_e32 v16, 0x1f4 +; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:68 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 s[0:1], exec -; NOOPT-NEXT: v_writelane_b32 v16, s0, 4 -; NOOPT-NEXT: v_writelane_b32 v16, s1, 5 +; NOOPT-NEXT: v_writelane_b32 v31, s0, 4 +; NOOPT-NEXT: v_writelane_b32 v31, s1, 5 ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 -; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v31, off, s[20:23], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] -; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 -; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[16:17] -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 6 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 7 -; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v4, off, s[20:23], 0 offset:12 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v5, off, s[20:23], 0 offset:16 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v6, off, s[20:23], 0 offset:20 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:24 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v3, off, s[20:23], 0 offset:16 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v4, off, s[20:23], 0 offset:20 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v5, off, s[20:23], 0 offset:24 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v6, off, s[20:23], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v7, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v8, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(6) -; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v9, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(5) -; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v10, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(4) -; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v11, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(3) -; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v12, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(2) -; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v13, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(1) -; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v14, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:56 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:60 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v18, off, s[20:23], 0 offset:136 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:68 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:136 ; 4-byte Folded Reload +; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 +; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[16:17] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readfirstlane_b32 s2, v18 -; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v18 +; NOOPT-NEXT: v_readlane_b32 s0, v31, 6 +; NOOPT-NEXT: v_readlane_b32 s1, v31, 7 +; NOOPT-NEXT: v_readfirstlane_b32 s2, v17 +; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17 ; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] ; NOOPT-NEXT: s_add_i32 m0, s2, -16 -; NOOPT-NEXT: v_movreld_b32_e32 v1, v17 -; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:140 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:144 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:148 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:152 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:156 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:160 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:164 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:168 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:172 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:176 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:180 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:184 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:188 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:192 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:196 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:200 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v16, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: v_movreld_b32_e32 v0, v16 +; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:140 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:144 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:148 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:152 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:156 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:160 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:164 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:168 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:172 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:176 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:180 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:184 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:188 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:192 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:196 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:200 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[20:23], 0 offset:8 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[20:23], 0 offset:12 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:16 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[20:23], 0 offset:20 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[20:23], 0 offset:24 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[20:23], 0 offset:28 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[20:23], 0 offset:32 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[20:23], 0 offset:36 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[20:23], 0 offset:40 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[20:23], 0 offset:44 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[20:23], 0 offset:48 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[20:23], 0 offset:52 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[20:23], 0 offset:56 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[20:23], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 s[2:3], s[0:1] -; NOOPT-NEXT: v_writelane_b32 v0, s2, 6 -; NOOPT-NEXT: v_writelane_b32 v0, s3, 7 +; NOOPT-NEXT: v_writelane_b32 v31, s2, 6 +; NOOPT-NEXT: v_writelane_b32 v31, s3, 7 ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:64 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v31, off, s[20:23], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] ; NOOPT-NEXT: s_xor_b64 exec, exec, s[0:1] ; NOOPT-NEXT: s_cbranch_execnz .LBB15_1 ; NOOPT-NEXT: ; %bb.2: ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 4 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 5 +; NOOPT-NEXT: v_readlane_b32 s0, v31, 4 +; NOOPT-NEXT: v_readlane_b32 s1, v31, 5 ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: ; %bb.3: +; NOOPT-NEXT: buffer_load_dword v15, off, s[20:23], 0 offset:140 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:144 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:148 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v18, off, s[20:23], 0 offset:152 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v19, off, s[20:23], 0 offset:156 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v20, off, s[20:23], 0 offset:160 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v21, off, s[20:23], 0 offset:164 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v22, off, s[20:23], 0 offset:168 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v23, off, s[20:23], 0 offset:172 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v24, off, s[20:23], 0 offset:176 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v25, off, s[20:23], 0 offset:180 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v26, off, s[20:23], 0 offset:184 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v27, off, s[20:23], 0 offset:188 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v28, off, s[20:23], 0 offset:192 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v29, off, s[20:23], 0 offset:196 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v30, off, s[20:23], 0 offset:200 ; 4-byte Folded Reload ; NOOPT-NEXT: s_or_saveexec_b64 s[16:17], -1 -; NOOPT-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[16:17] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 0 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 1 -; NOOPT-NEXT: v_readlane_b32 s2, v0, 2 -; NOOPT-NEXT: v_readlane_b32 s3, v0, 3 -; NOOPT-NEXT: buffer_load_dword v16, off, s[20:23], 0 offset:140 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v17, off, s[20:23], 0 offset:144 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v18, off, s[20:23], 0 offset:148 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v19, off, s[20:23], 0 offset:152 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v20, off, s[20:23], 0 offset:156 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v21, off, s[20:23], 0 offset:160 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v22, off, s[20:23], 0 offset:164 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v23, off, s[20:23], 0 offset:168 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v24, off, s[20:23], 0 offset:172 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v25, off, s[20:23], 0 offset:176 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v26, off, s[20:23], 0 offset:180 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v27, off, s[20:23], 0 offset:184 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v28, off, s[20:23], 0 offset:188 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v29, off, s[20:23], 0 offset:192 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v30, off, s[20:23], 0 offset:196 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v31, off, s[20:23], 0 offset:200 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt vmcnt(12) -; NOOPT-NEXT: v_mov_b32_e32 v5, v19 -; NOOPT-NEXT: v_mov_b32_e32 v6, v18 -; NOOPT-NEXT: v_mov_b32_e32 v7, v17 -; NOOPT-NEXT: v_mov_b32_e32 v1, v16 -; NOOPT-NEXT: s_waitcnt vmcnt(8) -; NOOPT-NEXT: v_mov_b32_e32 v2, v23 -; NOOPT-NEXT: v_mov_b32_e32 v3, v22 -; NOOPT-NEXT: v_mov_b32_e32 v4, v21 -; NOOPT-NEXT: v_mov_b32_e32 v8, v20 -; NOOPT-NEXT: s_waitcnt vmcnt(4) -; NOOPT-NEXT: v_mov_b32_e32 v13, v27 -; NOOPT-NEXT: v_mov_b32_e32 v14, v26 -; NOOPT-NEXT: v_mov_b32_e32 v15, v25 -; NOOPT-NEXT: v_mov_b32_e32 v9, v24 -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_mov_b32_e32 v10, v31 -; NOOPT-NEXT: v_mov_b32_e32 v11, v30 -; NOOPT-NEXT: v_mov_b32_e32 v12, v29 -; NOOPT-NEXT: v_mov_b32_e32 v16, v28 +; NOOPT-NEXT: v_readlane_b32 s0, v31, 0 +; NOOPT-NEXT: v_readlane_b32 s1, v31, 1 +; NOOPT-NEXT: v_readlane_b32 s2, v31, 2 +; NOOPT-NEXT: v_readlane_b32 s3, v31, 3 +; NOOPT-NEXT: v_mov_b32_e32 v4, v18 +; NOOPT-NEXT: v_mov_b32_e32 v5, v17 +; NOOPT-NEXT: v_mov_b32_e32 v6, v16 +; NOOPT-NEXT: v_mov_b32_e32 v0, v15 +; NOOPT-NEXT: v_mov_b32_e32 v1, v22 +; NOOPT-NEXT: v_mov_b32_e32 v2, v21 +; NOOPT-NEXT: v_mov_b32_e32 v3, v20 +; NOOPT-NEXT: v_mov_b32_e32 v7, v19 +; NOOPT-NEXT: v_mov_b32_e32 v12, v26 +; NOOPT-NEXT: v_mov_b32_e32 v13, v25 +; NOOPT-NEXT: v_mov_b32_e32 v14, v24 +; NOOPT-NEXT: v_mov_b32_e32 v8, v23 +; NOOPT-NEXT: v_mov_b32_e32 v9, v30 +; NOOPT-NEXT: v_mov_b32_e32 v10, v29 +; NOOPT-NEXT: v_mov_b32_e32 v11, v28 +; NOOPT-NEXT: v_mov_b32_e32 v15, v27 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 -; NOOPT-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17_vgpr18_vgpr19 killed $exec -; NOOPT-NEXT: v_mov_b32_e32 v17, v12 -; NOOPT-NEXT: v_mov_b32_e32 v18, v11 -; NOOPT-NEXT: v_mov_b32_e32 v19, v10 -; NOOPT-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 +; NOOPT-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16_vgpr17_vgpr18 killed $exec +; NOOPT-NEXT: v_mov_b32_e32 v16, v11 +; NOOPT-NEXT: v_mov_b32_e32 v17, v10 +; NOOPT-NEXT: v_mov_b32_e32 v18, v9 +; NOOPT-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:48 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 -; NOOPT-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10_vgpr11_vgpr12 killed $exec -; NOOPT-NEXT: v_mov_b32_e32 v10, v15 -; NOOPT-NEXT: v_mov_b32_e32 v11, v14 -; NOOPT-NEXT: v_mov_b32_e32 v12, v13 -; NOOPT-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:32 +; NOOPT-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec +; NOOPT-NEXT: v_mov_b32_e32 v9, v14 +; NOOPT-NEXT: v_mov_b32_e32 v10, v13 +; NOOPT-NEXT: v_mov_b32_e32 v11, v12 +; NOOPT-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 -; NOOPT-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec +; NOOPT-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8_vgpr9_vgpr10 killed $exec ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: v_mov_b32_e32 v9, v4 -; NOOPT-NEXT: v_mov_b32_e32 v10, v3 -; NOOPT-NEXT: v_mov_b32_e32 v11, v2 -; NOOPT-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16 +; NOOPT-NEXT: v_mov_b32_e32 v8, v3 +; NOOPT-NEXT: v_mov_b32_e32 v9, v2 +; NOOPT-NEXT: v_mov_b32_e32 v10, v1 +; NOOPT-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:16 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 -; NOOPT-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec -; NOOPT-NEXT: v_mov_b32_e32 v2, v7 -; NOOPT-NEXT: v_mov_b32_e32 v3, v6 -; NOOPT-NEXT: v_mov_b32_e32 v4, v5 -; NOOPT-NEXT: buffer_store_dwordx4 v[1:4], off, s[0:3], 0 -; NOOPT-NEXT: ; kill: killed $vgpr0 +; NOOPT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec +; NOOPT-NEXT: v_mov_b32_e32 v1, v6 +; NOOPT-NEXT: v_mov_b32_e32 v2, v5 +; NOOPT-NEXT: v_mov_b32_e32 v3, v4 +; NOOPT-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; NOOPT-NEXT: s_endpgm ; ; SI-MOVREL-LABEL: insert_neg_inline_offset_vgpr: @@ -5053,13 +5038,8 @@ define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1 ; NOOPT-NEXT: s_mov_b32 s39, 0xe8f000 ; NOOPT-NEXT: s_add_u32 s36, s36, s9 ; NOOPT-NEXT: s_addc_u32 s37, s37, 0 -; NOOPT-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; NOOPT-NEXT: s_mov_b64 s[0:1], s[2:3] -; NOOPT-NEXT: v_mov_b32_e32 v1, v0 -; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 -; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[28:29] -; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:76 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:76 ; 4-byte Folded Spill ; NOOPT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x9 ; NOOPT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd ; NOOPT-NEXT: s_waitcnt lgkmcnt(0) @@ -5071,32 +5051,32 @@ define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1 ; NOOPT-NEXT: s_mov_b32 s5, s8 ; NOOPT-NEXT: s_mov_b32 s6, s3 ; NOOPT-NEXT: s_mov_b32 s7, s2 -; NOOPT-NEXT: s_waitcnt vmcnt(1) -; NOOPT-NEXT: v_writelane_b32 v0, s4, 0 -; NOOPT-NEXT: v_writelane_b32 v0, s5, 1 -; NOOPT-NEXT: v_writelane_b32 v0, s6, 2 -; NOOPT-NEXT: v_writelane_b32 v0, s7, 3 +; NOOPT-NEXT: ; implicit-def: $vgpr18 : SGPR spill to VGPR lane +; NOOPT-NEXT: v_writelane_b32 v18, s4, 0 +; NOOPT-NEXT: v_writelane_b32 v18, s5, 1 +; NOOPT-NEXT: v_writelane_b32 v18, s6, 2 +; NOOPT-NEXT: v_writelane_b32 v18, s7, 3 ; NOOPT-NEXT: s_mov_b32 s4, 0 -; NOOPT-NEXT: v_writelane_b32 v0, s4, 4 +; NOOPT-NEXT: v_writelane_b32 v18, s4, 4 ; NOOPT-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5 ; NOOPT-NEXT: s_mov_b32 s5, s2 ; NOOPT-NEXT: ; kill: def $sgpr0_sgpr1 killed $sgpr0_sgpr1 def $sgpr0_sgpr1_sgpr2_sgpr3 ; NOOPT-NEXT: s_mov_b64 s[2:3], s[4:5] ; NOOPT-NEXT: s_mov_b32 s4, 2 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: v_lshlrev_b32_e64 v1, s4, v1 +; NOOPT-NEXT: v_lshlrev_b32_e64 v0, s4, v0 ; NOOPT-NEXT: s_mov_b32 s4, 0 ; NOOPT-NEXT: ; implicit-def: $sgpr4 -; NOOPT-NEXT: v_mov_b32_e32 v3, 0 -; NOOPT-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; NOOPT-NEXT: v_mov_b32_e32 v2, v3 -; NOOPT-NEXT: buffer_load_dword v1, v[1:2], s[0:3], 0 addr64 glc +; NOOPT-NEXT: v_mov_b32_e32 v2, 0 +; NOOPT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; NOOPT-NEXT: v_mov_b32_e32 v1, v2 +; NOOPT-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:72 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:72 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b32 s0, 1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: v_add_i32_e64 v1, s[0:1], v1, s0 -; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:68 ; 4-byte Folded Spill +; NOOPT-NEXT: v_add_i32_e64 v0, s[0:1], v0, s0 +; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:68 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b32 s16, 16 ; NOOPT-NEXT: s_mov_b32 s17, 15 ; NOOPT-NEXT: s_mov_b32 s18, 14 @@ -5125,255 +5105,266 @@ define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1 ; NOOPT-NEXT: s_mov_b32 s13, s18 ; NOOPT-NEXT: s_mov_b32 s14, s17 ; NOOPT-NEXT: s_mov_b32 s15, s16 -; NOOPT-NEXT: v_writelane_b32 v0, s0, 5 -; NOOPT-NEXT: v_writelane_b32 v0, s1, 6 -; NOOPT-NEXT: v_writelane_b32 v0, s2, 7 -; NOOPT-NEXT: v_writelane_b32 v0, s3, 8 -; NOOPT-NEXT: v_writelane_b32 v0, s4, 9 -; NOOPT-NEXT: v_writelane_b32 v0, s5, 10 -; NOOPT-NEXT: v_writelane_b32 v0, s6, 11 -; NOOPT-NEXT: v_writelane_b32 v0, s7, 12 -; NOOPT-NEXT: v_writelane_b32 v0, s8, 13 -; NOOPT-NEXT: v_writelane_b32 v0, s9, 14 -; NOOPT-NEXT: v_writelane_b32 v0, s10, 15 -; NOOPT-NEXT: v_writelane_b32 v0, s11, 16 -; NOOPT-NEXT: v_writelane_b32 v0, s12, 17 -; NOOPT-NEXT: v_writelane_b32 v0, s13, 18 -; NOOPT-NEXT: v_writelane_b32 v0, s14, 19 -; NOOPT-NEXT: v_writelane_b32 v0, s15, 20 +; NOOPT-NEXT: v_writelane_b32 v18, s0, 5 +; NOOPT-NEXT: v_writelane_b32 v18, s1, 6 +; NOOPT-NEXT: v_writelane_b32 v18, s2, 7 +; NOOPT-NEXT: v_writelane_b32 v18, s3, 8 +; NOOPT-NEXT: v_writelane_b32 v18, s4, 9 +; NOOPT-NEXT: v_writelane_b32 v18, s5, 10 +; NOOPT-NEXT: v_writelane_b32 v18, s6, 11 +; NOOPT-NEXT: v_writelane_b32 v18, s7, 12 +; NOOPT-NEXT: v_writelane_b32 v18, s8, 13 +; NOOPT-NEXT: v_writelane_b32 v18, s9, 14 +; NOOPT-NEXT: v_writelane_b32 v18, s10, 15 +; NOOPT-NEXT: v_writelane_b32 v18, s11, 16 +; NOOPT-NEXT: v_writelane_b32 v18, s12, 17 +; NOOPT-NEXT: v_writelane_b32 v18, s13, 18 +; NOOPT-NEXT: v_writelane_b32 v18, s14, 19 +; NOOPT-NEXT: v_writelane_b32 v18, s15, 20 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: v_mov_b32_e32 v16, s15 -; NOOPT-NEXT: v_mov_b32_e32 v15, s14 -; NOOPT-NEXT: v_mov_b32_e32 v14, s13 -; NOOPT-NEXT: v_mov_b32_e32 v13, s12 -; NOOPT-NEXT: v_mov_b32_e32 v12, s11 -; NOOPT-NEXT: v_mov_b32_e32 v11, s10 -; NOOPT-NEXT: v_mov_b32_e32 v10, s9 -; NOOPT-NEXT: v_mov_b32_e32 v9, s8 -; NOOPT-NEXT: v_mov_b32_e32 v8, s7 -; NOOPT-NEXT: v_mov_b32_e32 v7, s6 -; NOOPT-NEXT: v_mov_b32_e32 v6, s5 -; NOOPT-NEXT: v_mov_b32_e32 v5, s4 -; NOOPT-NEXT: v_mov_b32_e32 v4, s3 -; NOOPT-NEXT: v_mov_b32_e32 v3, s2 -; NOOPT-NEXT: v_mov_b32_e32 v2, s1 -; NOOPT-NEXT: v_mov_b32_e32 v1, s0 -; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:4 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[36:39], 0 offset:8 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[36:39], 0 offset:12 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[36:39], 0 offset:16 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[36:39], 0 offset:20 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[36:39], 0 offset:24 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[36:39], 0 offset:28 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[36:39], 0 offset:32 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[36:39], 0 offset:36 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[36:39], 0 offset:40 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[36:39], 0 offset:44 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[36:39], 0 offset:48 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[36:39], 0 offset:52 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[36:39], 0 offset:56 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[36:39], 0 offset:60 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v16, off, s[36:39], 0 offset:64 ; 4-byte Folded Spill +; NOOPT-NEXT: v_mov_b32_e32 v0, s0 +; NOOPT-NEXT: v_mov_b32_e32 v1, s1 +; NOOPT-NEXT: v_mov_b32_e32 v2, s2 +; NOOPT-NEXT: v_mov_b32_e32 v3, s3 +; NOOPT-NEXT: v_mov_b32_e32 v4, s4 +; NOOPT-NEXT: v_mov_b32_e32 v5, s5 +; NOOPT-NEXT: v_mov_b32_e32 v6, s6 +; NOOPT-NEXT: v_mov_b32_e32 v7, s7 +; NOOPT-NEXT: v_mov_b32_e32 v8, s8 +; NOOPT-NEXT: v_mov_b32_e32 v9, s9 +; NOOPT-NEXT: v_mov_b32_e32 v10, s10 +; NOOPT-NEXT: v_mov_b32_e32 v11, s11 +; NOOPT-NEXT: v_mov_b32_e32 v12, s12 +; NOOPT-NEXT: v_mov_b32_e32 v13, s13 +; NOOPT-NEXT: v_mov_b32_e32 v14, s14 +; NOOPT-NEXT: v_mov_b32_e32 v15, s15 +; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:8 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[36:39], 0 offset:12 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[36:39], 0 offset:16 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[36:39], 0 offset:20 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[36:39], 0 offset:24 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[36:39], 0 offset:28 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[36:39], 0 offset:32 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[36:39], 0 offset:36 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[36:39], 0 offset:40 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[36:39], 0 offset:44 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[36:39], 0 offset:48 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[36:39], 0 offset:52 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[36:39], 0 offset:56 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[36:39], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[36:39], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 s[0:1], exec -; NOOPT-NEXT: v_writelane_b32 v0, s0, 21 -; NOOPT-NEXT: v_writelane_b32 v0, s1, 22 +; NOOPT-NEXT: v_writelane_b32 v18, s0, 21 +; NOOPT-NEXT: v_writelane_b32 v18, s1, 22 ; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v18, off, s[36:39], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[28:29] ; NOOPT-NEXT: ; implicit-def: $vgpr0 ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: .LBB16_1: ; =>This Inner Loop Header: Depth=1 +; NOOPT-NEXT: buffer_load_dword v17, off, s[36:39], 0 offset:80 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) +; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:8 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v2, off, s[36:39], 0 offset:12 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v3, off, s[36:39], 0 offset:16 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v4, off, s[36:39], 0 offset:20 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v5, off, s[36:39], 0 offset:24 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v6, off, s[36:39], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v7, off, s[36:39], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v8, off, s[36:39], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v9, off, s[36:39], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) +; NOOPT-NEXT: buffer_load_dword v10, off, s[36:39], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) +; NOOPT-NEXT: buffer_load_dword v11, off, s[36:39], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) +; NOOPT-NEXT: buffer_load_dword v12, off, s[36:39], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) +; NOOPT-NEXT: buffer_load_dword v13, off, s[36:39], 0 offset:56 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) +; NOOPT-NEXT: buffer_load_dword v14, off, s[36:39], 0 offset:60 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) +; NOOPT-NEXT: buffer_load_dword v15, off, s[36:39], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v16, off, s[36:39], 0 offset:72 ; 4-byte Folded Reload ; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[28:29] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 23 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 24 -; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 offset:80 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:4 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v2, off, s[36:39], 0 offset:8 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v3, off, s[36:39], 0 offset:12 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v4, off, s[36:39], 0 offset:16 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v5, off, s[36:39], 0 offset:20 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v6, off, s[36:39], 0 offset:24 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v7, off, s[36:39], 0 offset:28 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v8, off, s[36:39], 0 offset:32 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v9, off, s[36:39], 0 offset:36 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v10, off, s[36:39], 0 offset:40 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v11, off, s[36:39], 0 offset:44 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v12, off, s[36:39], 0 offset:48 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v13, off, s[36:39], 0 offset:52 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v14, off, s[36:39], 0 offset:56 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v15, off, s[36:39], 0 offset:60 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v16, off, s[36:39], 0 offset:64 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v17, off, s[36:39], 0 offset:72 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readfirstlane_b32 s2, v17 -; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17 +; NOOPT-NEXT: v_readlane_b32 s0, v18, 23 +; NOOPT-NEXT: v_readlane_b32 s1, v18, 24 +; NOOPT-NEXT: v_readfirstlane_b32 s2, v16 +; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v16 ; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] ; NOOPT-NEXT: s_mov_b32 m0, s2 -; NOOPT-NEXT: v_movrels_b32_e32 v1, v1 -; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:84 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:80 ; 4-byte Folded Spill +; NOOPT-NEXT: v_movrels_b32_e32 v0, v0 +; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:84 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:80 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 s[2:3], s[0:1] -; NOOPT-NEXT: v_writelane_b32 v0, s2, 23 -; NOOPT-NEXT: v_writelane_b32 v0, s3, 24 +; NOOPT-NEXT: v_writelane_b32 v18, s2, 23 +; NOOPT-NEXT: v_writelane_b32 v18, s3, 24 ; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v18, off, s[36:39], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[28:29] ; NOOPT-NEXT: s_xor_b64 exec, exec, s[0:1] ; NOOPT-NEXT: s_cbranch_execnz .LBB16_1 ; NOOPT-NEXT: ; %bb.2: ; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[28:29] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 21 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 22 +; NOOPT-NEXT: v_readlane_b32 s0, v18, 21 +; NOOPT-NEXT: v_readlane_b32 s1, v18, 22 ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: ; %bb.3: ; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 -; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[28:29] ; NOOPT-NEXT: ;;#ASMSTART ; NOOPT-NEXT: s_mov_b32 s4, 17 ; NOOPT-NEXT: ;;#ASMEND ; NOOPT-NEXT: s_mov_b32 s16, s4 ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 5 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 6 -; NOOPT-NEXT: v_readlane_b32 s2, v0, 7 -; NOOPT-NEXT: v_readlane_b32 s3, v0, 8 -; NOOPT-NEXT: v_readlane_b32 s4, v0, 9 -; NOOPT-NEXT: v_readlane_b32 s5, v0, 10 -; NOOPT-NEXT: v_readlane_b32 s6, v0, 11 -; NOOPT-NEXT: v_readlane_b32 s7, v0, 12 -; NOOPT-NEXT: v_readlane_b32 s8, v0, 13 -; NOOPT-NEXT: v_readlane_b32 s9, v0, 14 -; NOOPT-NEXT: v_readlane_b32 s10, v0, 15 -; NOOPT-NEXT: v_readlane_b32 s11, v0, 16 -; NOOPT-NEXT: v_readlane_b32 s12, v0, 17 -; NOOPT-NEXT: v_readlane_b32 s13, v0, 18 -; NOOPT-NEXT: v_readlane_b32 s14, v0, 19 -; NOOPT-NEXT: v_readlane_b32 s15, v0, 20 -; NOOPT-NEXT: v_writelane_b32 v0, s16, 25 -; NOOPT-NEXT: v_mov_b32_e32 v16, s15 -; NOOPT-NEXT: v_mov_b32_e32 v15, s14 -; NOOPT-NEXT: v_mov_b32_e32 v14, s13 -; NOOPT-NEXT: v_mov_b32_e32 v13, s12 -; NOOPT-NEXT: v_mov_b32_e32 v12, s11 -; NOOPT-NEXT: v_mov_b32_e32 v11, s10 -; NOOPT-NEXT: v_mov_b32_e32 v10, s9 -; NOOPT-NEXT: v_mov_b32_e32 v9, s8 -; NOOPT-NEXT: v_mov_b32_e32 v8, s7 -; NOOPT-NEXT: v_mov_b32_e32 v7, s6 -; NOOPT-NEXT: v_mov_b32_e32 v6, s5 -; NOOPT-NEXT: v_mov_b32_e32 v5, s4 -; NOOPT-NEXT: v_mov_b32_e32 v4, s3 -; NOOPT-NEXT: v_mov_b32_e32 v3, s2 -; NOOPT-NEXT: v_mov_b32_e32 v2, s1 -; NOOPT-NEXT: v_mov_b32_e32 v1, s0 -; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:88 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[36:39], 0 offset:92 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[36:39], 0 offset:96 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[36:39], 0 offset:100 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[36:39], 0 offset:104 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[36:39], 0 offset:108 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[36:39], 0 offset:112 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[36:39], 0 offset:116 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[36:39], 0 offset:120 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[36:39], 0 offset:124 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[36:39], 0 offset:128 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[36:39], 0 offset:132 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[36:39], 0 offset:136 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[36:39], 0 offset:140 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[36:39], 0 offset:144 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v16, off, s[36:39], 0 offset:148 ; 4-byte Folded Spill +; NOOPT-NEXT: v_readlane_b32 s0, v18, 5 +; NOOPT-NEXT: v_readlane_b32 s1, v18, 6 +; NOOPT-NEXT: v_readlane_b32 s2, v18, 7 +; NOOPT-NEXT: v_readlane_b32 s3, v18, 8 +; NOOPT-NEXT: v_readlane_b32 s4, v18, 9 +; NOOPT-NEXT: v_readlane_b32 s5, v18, 10 +; NOOPT-NEXT: v_readlane_b32 s6, v18, 11 +; NOOPT-NEXT: v_readlane_b32 s7, v18, 12 +; NOOPT-NEXT: v_readlane_b32 s8, v18, 13 +; NOOPT-NEXT: v_readlane_b32 s9, v18, 14 +; NOOPT-NEXT: v_readlane_b32 s10, v18, 15 +; NOOPT-NEXT: v_readlane_b32 s11, v18, 16 +; NOOPT-NEXT: v_readlane_b32 s12, v18, 17 +; NOOPT-NEXT: v_readlane_b32 s13, v18, 18 +; NOOPT-NEXT: v_readlane_b32 s14, v18, 19 +; NOOPT-NEXT: v_readlane_b32 s15, v18, 20 +; NOOPT-NEXT: v_writelane_b32 v18, s16, 25 +; NOOPT-NEXT: v_mov_b32_e32 v0, s0 +; NOOPT-NEXT: v_mov_b32_e32 v1, s1 +; NOOPT-NEXT: v_mov_b32_e32 v2, s2 +; NOOPT-NEXT: v_mov_b32_e32 v3, s3 +; NOOPT-NEXT: v_mov_b32_e32 v4, s4 +; NOOPT-NEXT: v_mov_b32_e32 v5, s5 +; NOOPT-NEXT: v_mov_b32_e32 v6, s6 +; NOOPT-NEXT: v_mov_b32_e32 v7, s7 +; NOOPT-NEXT: v_mov_b32_e32 v8, s8 +; NOOPT-NEXT: v_mov_b32_e32 v9, s9 +; NOOPT-NEXT: v_mov_b32_e32 v10, s10 +; NOOPT-NEXT: v_mov_b32_e32 v11, s11 +; NOOPT-NEXT: v_mov_b32_e32 v12, s12 +; NOOPT-NEXT: v_mov_b32_e32 v13, s13 +; NOOPT-NEXT: v_mov_b32_e32 v14, s14 +; NOOPT-NEXT: v_mov_b32_e32 v15, s15 +; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:88 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:92 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[36:39], 0 offset:96 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[36:39], 0 offset:100 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[36:39], 0 offset:104 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[36:39], 0 offset:108 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[36:39], 0 offset:112 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[36:39], 0 offset:116 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[36:39], 0 offset:120 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[36:39], 0 offset:124 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[36:39], 0 offset:128 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[36:39], 0 offset:132 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[36:39], 0 offset:136 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[36:39], 0 offset:140 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[36:39], 0 offset:144 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[36:39], 0 offset:148 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 s[0:1], exec -; NOOPT-NEXT: v_writelane_b32 v0, s0, 26 -; NOOPT-NEXT: v_writelane_b32 v0, s1, 27 +; NOOPT-NEXT: v_writelane_b32 v18, s0, 26 +; NOOPT-NEXT: v_writelane_b32 v18, s1, 27 ; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v18, off, s[36:39], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[28:29] ; NOOPT-NEXT: ; implicit-def: $vgpr0 ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: .LBB16_4: ; =>This Inner Loop Header: Depth=1 +; NOOPT-NEXT: buffer_load_dword v17, off, s[36:39], 0 offset:152 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) +; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:88 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:92 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v2, off, s[36:39], 0 offset:96 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v3, off, s[36:39], 0 offset:100 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v4, off, s[36:39], 0 offset:104 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v5, off, s[36:39], 0 offset:108 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v6, off, s[36:39], 0 offset:112 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v7, off, s[36:39], 0 offset:116 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v8, off, s[36:39], 0 offset:120 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v9, off, s[36:39], 0 offset:124 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(6) +; NOOPT-NEXT: buffer_load_dword v10, off, s[36:39], 0 offset:128 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(5) +; NOOPT-NEXT: buffer_load_dword v11, off, s[36:39], 0 offset:132 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(4) +; NOOPT-NEXT: buffer_load_dword v12, off, s[36:39], 0 offset:136 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(3) +; NOOPT-NEXT: buffer_load_dword v13, off, s[36:39], 0 offset:140 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(2) +; NOOPT-NEXT: buffer_load_dword v14, off, s[36:39], 0 offset:144 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(1) +; NOOPT-NEXT: buffer_load_dword v15, off, s[36:39], 0 offset:148 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v16, off, s[36:39], 0 offset:68 ; 4-byte Folded Reload ; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[28:29] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 28 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 29 -; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 offset:152 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:88 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v2, off, s[36:39], 0 offset:92 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v3, off, s[36:39], 0 offset:96 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v4, off, s[36:39], 0 offset:100 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v5, off, s[36:39], 0 offset:104 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v6, off, s[36:39], 0 offset:108 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v7, off, s[36:39], 0 offset:112 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v8, off, s[36:39], 0 offset:116 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v9, off, s[36:39], 0 offset:120 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v10, off, s[36:39], 0 offset:124 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v11, off, s[36:39], 0 offset:128 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v12, off, s[36:39], 0 offset:132 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v13, off, s[36:39], 0 offset:136 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v14, off, s[36:39], 0 offset:140 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v15, off, s[36:39], 0 offset:144 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v16, off, s[36:39], 0 offset:148 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v17, off, s[36:39], 0 offset:68 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readfirstlane_b32 s2, v17 -; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17 +; NOOPT-NEXT: v_readlane_b32 s0, v18, 28 +; NOOPT-NEXT: v_readlane_b32 s1, v18, 29 +; NOOPT-NEXT: v_readfirstlane_b32 s2, v16 +; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v16 ; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] ; NOOPT-NEXT: s_mov_b32 m0, s2 -; NOOPT-NEXT: v_movrels_b32_e32 v1, v1 -; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:156 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v1, off, s[36:39], 0 offset:152 ; 4-byte Folded Spill +; NOOPT-NEXT: v_movrels_b32_e32 v0, v0 +; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:156 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:152 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 s[2:3], s[0:1] -; NOOPT-NEXT: v_writelane_b32 v0, s2, 28 -; NOOPT-NEXT: v_writelane_b32 v0, s3, 29 +; NOOPT-NEXT: v_writelane_b32 v18, s2, 28 +; NOOPT-NEXT: v_writelane_b32 v18, s3, 29 ; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v18, off, s[36:39], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[28:29] ; NOOPT-NEXT: s_xor_b64 exec, exec, s[0:1] ; NOOPT-NEXT: s_cbranch_execnz .LBB16_4 ; NOOPT-NEXT: ; %bb.5: ; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[28:29] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 26 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 27 +; NOOPT-NEXT: v_readlane_b32 s0, v18, 26 +; NOOPT-NEXT: v_readlane_b32 s1, v18, 27 ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: ; %bb.6: +; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:76 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:156 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v2, off, s[36:39], 0 offset:84 ; 4-byte Folded Reload ; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 -; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[28:29] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 4 -; NOOPT-NEXT: v_readlane_b32 s4, v0, 0 -; NOOPT-NEXT: v_readlane_b32 s5, v0, 1 -; NOOPT-NEXT: v_readlane_b32 s6, v0, 2 -; NOOPT-NEXT: v_readlane_b32 s7, v0, 3 -; NOOPT-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:76 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v2, off, s[36:39], 0 offset:156 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v3, off, s[36:39], 0 offset:84 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: buffer_store_dword v3, off, s[4:7], 0 -; NOOPT-NEXT: s_waitcnt vmcnt(0) +; NOOPT-NEXT: v_readlane_b32 s0, v18, 4 +; NOOPT-NEXT: v_readlane_b32 s4, v18, 0 +; NOOPT-NEXT: v_readlane_b32 s5, v18, 1 +; NOOPT-NEXT: v_readlane_b32 s6, v18, 2 +; NOOPT-NEXT: v_readlane_b32 s7, v18, 3 ; NOOPT-NEXT: buffer_store_dword v2, off, s[4:7], 0 ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_cmp_eq_u32_e64 s[2:3], v1, s0 +; NOOPT-NEXT: buffer_store_dword v1, off, s[4:7], 0 +; NOOPT-NEXT: s_waitcnt vmcnt(0) +; NOOPT-NEXT: v_cmp_eq_u32_e64 s[2:3], v0, s0 ; NOOPT-NEXT: s_mov_b64 s[0:1], exec -; NOOPT-NEXT: v_writelane_b32 v0, s0, 30 -; NOOPT-NEXT: v_writelane_b32 v0, s1, 31 +; NOOPT-NEXT: v_writelane_b32 v18, s0, 30 +; NOOPT-NEXT: v_writelane_b32 v18, s1, 31 ; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v18, off, s[36:39], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[28:29] ; NOOPT-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] @@ -5381,10 +5372,10 @@ define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1 ; NOOPT-NEXT: ; %bb.7: ; %bb1 ; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[28:29] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s4, v0, 25 +; NOOPT-NEXT: v_readlane_b32 s4, v18, 25 ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: s_mov_b32 s7, s1 ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 @@ -5401,13 +5392,12 @@ define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1 ; NOOPT-NEXT: .LBB16_8: ; %bb2 ; NOOPT-NEXT: s_or_saveexec_b64 s[28:29], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[36:39], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[28:29] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 30 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 31 +; NOOPT-NEXT: v_readlane_b32 s0, v18, 30 +; NOOPT-NEXT: v_readlane_b32 s1, v18, 31 ; NOOPT-NEXT: s_or_b64 exec, exec, s[0:1] -; NOOPT-NEXT: ; kill: killed $vgpr0 ; NOOPT-NEXT: s_endpgm ; ; SI-MOVREL-LABEL: extract_vgpr_offset_multiple_in_block: @@ -5827,7 +5817,6 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: s_mov_b32 s31, 0xe8f000 ; NOOPT-NEXT: s_add_u32 s28, s28, s9 ; NOOPT-NEXT: s_addc_u32 s29, s29, 0 -; NOOPT-NEXT: ; implicit-def: $vgpr16 : SGPR spill to VGPR lane ; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:84 ; 4-byte Folded Spill ; NOOPT-NEXT: s_load_dwordx2 s[18:19], s[2:3], 0x9 ; NOOPT-NEXT: s_load_dwordx2 s[16:17], s[2:3], 0xd @@ -5841,12 +5830,13 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: s_mov_b32 s21, s24 ; NOOPT-NEXT: s_mov_b32 s22, s19 ; NOOPT-NEXT: s_mov_b32 s23, s18 -; NOOPT-NEXT: v_writelane_b32 v16, s20, 0 -; NOOPT-NEXT: v_writelane_b32 v16, s21, 1 -; NOOPT-NEXT: v_writelane_b32 v16, s22, 2 -; NOOPT-NEXT: v_writelane_b32 v16, s23, 3 +; NOOPT-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane +; NOOPT-NEXT: v_writelane_b32 v32, s20, 0 +; NOOPT-NEXT: v_writelane_b32 v32, s21, 1 +; NOOPT-NEXT: v_writelane_b32 v32, s22, 2 +; NOOPT-NEXT: v_writelane_b32 v32, s23, 3 ; NOOPT-NEXT: s_mov_b32 s20, 0 -; NOOPT-NEXT: v_writelane_b32 v16, s20, 4 +; NOOPT-NEXT: v_writelane_b32 v32, s20, 4 ; NOOPT-NEXT: ; kill: def $sgpr20 killed $sgpr20 def $sgpr20_sgpr21 ; NOOPT-NEXT: s_mov_b32 s21, s18 ; NOOPT-NEXT: ; kill: def $sgpr16_sgpr17 killed $sgpr16_sgpr17 def $sgpr16_sgpr17_sgpr18_sgpr19 @@ -5890,115 +5880,113 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: v_mov_b32_e32 v14, s14 ; NOOPT-NEXT: v_mov_b32_e32 v15, s15 ; NOOPT-NEXT: s_mov_b64 s[0:1], exec -; NOOPT-NEXT: v_writelane_b32 v16, s0, 5 -; NOOPT-NEXT: v_writelane_b32 v16, s1, 6 +; NOOPT-NEXT: v_writelane_b32 v32, s0, 5 +; NOOPT-NEXT: v_writelane_b32 v32, s1, 6 ; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 -; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:64 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v32, off, s[28:31], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[26:27] -; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:4 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:8 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:12 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:16 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:20 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:24 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:28 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:32 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:36 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:40 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:44 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:48 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:52 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:56 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:4 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:8 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:12 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:16 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:20 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:24 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:28 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:32 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:36 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:40 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:44 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:48 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:52 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:56 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: .LBB17_1: ; =>This Inner Loop Header: Depth=1 -; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 -; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:64 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[26:27] -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 7 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 8 -; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v2, off, s[28:31], 0 offset:4 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v3, off, s[28:31], 0 offset:8 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v4, off, s[28:31], 0 offset:12 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v5, off, s[28:31], 0 offset:16 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v6, off, s[28:31], 0 offset:20 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v7, off, s[28:31], 0 offset:24 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v8, off, s[28:31], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:4 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 offset:8 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v2, off, s[28:31], 0 offset:12 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v3, off, s[28:31], 0 offset:16 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v4, off, s[28:31], 0 offset:20 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v5, off, s[28:31], 0 offset:24 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v6, off, s[28:31], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v7, off, s[28:31], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v8, off, s[28:31], 0 offset:36 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(6) -; NOOPT-NEXT: buffer_load_dword v9, off, s[28:31], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v9, off, s[28:31], 0 offset:40 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(5) -; NOOPT-NEXT: buffer_load_dword v10, off, s[28:31], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v10, off, s[28:31], 0 offset:44 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(4) -; NOOPT-NEXT: buffer_load_dword v11, off, s[28:31], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v11, off, s[28:31], 0 offset:48 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(3) -; NOOPT-NEXT: buffer_load_dword v12, off, s[28:31], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v12, off, s[28:31], 0 offset:52 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(2) -; NOOPT-NEXT: buffer_load_dword v13, off, s[28:31], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v13, off, s[28:31], 0 offset:56 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(1) -; NOOPT-NEXT: buffer_load_dword v14, off, s[28:31], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v14, off, s[28:31], 0 offset:60 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:56 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:60 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:72 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v18, off, s[28:31], 0 offset:80 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:72 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:80 ; 4-byte Folded Reload +; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 +; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[26:27] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readfirstlane_b32 s2, v18 -; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v18 +; NOOPT-NEXT: v_readlane_b32 s0, v32, 7 +; NOOPT-NEXT: v_readlane_b32 s1, v32, 8 +; NOOPT-NEXT: v_readfirstlane_b32 s2, v17 +; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17 ; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] ; NOOPT-NEXT: s_mov_b32 m0, s2 -; NOOPT-NEXT: v_movreld_b32_e32 v1, v17 -; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:88 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:92 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:96 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:100 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:104 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:108 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:112 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:116 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:120 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:124 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:128 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:132 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:136 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:140 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:144 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:148 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:4 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:8 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:12 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:16 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:20 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:24 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:28 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:32 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:36 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:40 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:44 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:48 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:52 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:56 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: v_movreld_b32_e32 v0, v16 +; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:88 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:92 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:96 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:100 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:104 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:108 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:112 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:116 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:120 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:124 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:128 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:132 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:136 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:140 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:144 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:148 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:4 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:8 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:12 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:16 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:20 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:24 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:28 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:32 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:36 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:40 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:44 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:48 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:52 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:56 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 s[2:3], s[0:1] -; NOOPT-NEXT: v_writelane_b32 v0, s2, 7 -; NOOPT-NEXT: v_writelane_b32 v0, s3, 8 +; NOOPT-NEXT: v_writelane_b32 v32, s2, 7 +; NOOPT-NEXT: v_writelane_b32 v32, s3, 8 ; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:64 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v32, off, s[28:31], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[26:27] ; NOOPT-NEXT: s_xor_b64 exec, exec, s[0:1] ; NOOPT-NEXT: s_cbranch_execnz .LBB17_1 ; NOOPT-NEXT: ; %bb.2: ; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[26:27] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 5 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 6 +; NOOPT-NEXT: v_readlane_b32 s0, v32, 5 +; NOOPT-NEXT: v_readlane_b32 s1, v32, 6 ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: ; %bb.3: ; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:88 ; 4-byte Folded Reload @@ -6018,16 +6006,16 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_load_dword v14, off, s[28:31], 0 offset:144 ; 4-byte Folded Reload ; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:148 ; 4-byte Folded Reload ; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 -; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[26:27] -; NOOPT-NEXT: v_mov_b32_e32 v17, 63 -; NOOPT-NEXT: buffer_store_dword v17, off, s[28:31], 0 offset:216 ; 4-byte Folded Spill +; NOOPT-NEXT: v_mov_b32_e32 v16, 63 +; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:216 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 s[0:1], exec ; NOOPT-NEXT: s_waitcnt vmcnt(1) -; NOOPT-NEXT: v_writelane_b32 v16, s0, 9 -; NOOPT-NEXT: v_writelane_b32 v16, s1, 10 +; NOOPT-NEXT: v_writelane_b32 v32, s0, 9 +; NOOPT-NEXT: v_writelane_b32 v32, s1, 10 ; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 -; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:64 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v32, off, s[28:31], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[26:27] ; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:152 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:156 ; 4-byte Folded Spill @@ -6047,193 +6035,186 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:212 ; 4-byte Folded Spill ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: .LBB17_4: ; =>This Inner Loop Header: Depth=1 -; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 -; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:64 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[26:27] -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 11 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 12 -; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 offset:152 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v2, off, s[28:31], 0 offset:156 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v3, off, s[28:31], 0 offset:160 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v4, off, s[28:31], 0 offset:164 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v5, off, s[28:31], 0 offset:168 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v6, off, s[28:31], 0 offset:172 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v7, off, s[28:31], 0 offset:176 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v8, off, s[28:31], 0 offset:180 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:152 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 offset:156 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v2, off, s[28:31], 0 offset:160 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v3, off, s[28:31], 0 offset:164 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v4, off, s[28:31], 0 offset:168 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v5, off, s[28:31], 0 offset:172 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v6, off, s[28:31], 0 offset:176 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v7, off, s[28:31], 0 offset:180 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v8, off, s[28:31], 0 offset:184 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(6) -; NOOPT-NEXT: buffer_load_dword v9, off, s[28:31], 0 offset:184 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v9, off, s[28:31], 0 offset:188 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(5) -; NOOPT-NEXT: buffer_load_dword v10, off, s[28:31], 0 offset:188 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v10, off, s[28:31], 0 offset:192 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(4) -; NOOPT-NEXT: buffer_load_dword v11, off, s[28:31], 0 offset:192 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v11, off, s[28:31], 0 offset:196 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(3) -; NOOPT-NEXT: buffer_load_dword v12, off, s[28:31], 0 offset:196 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v12, off, s[28:31], 0 offset:200 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(2) -; NOOPT-NEXT: buffer_load_dword v13, off, s[28:31], 0 offset:200 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v13, off, s[28:31], 0 offset:204 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(1) -; NOOPT-NEXT: buffer_load_dword v14, off, s[28:31], 0 offset:204 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v14, off, s[28:31], 0 offset:208 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:208 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:212 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:216 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v18, off, s[28:31], 0 offset:76 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v15, off, s[28:31], 0 offset:212 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:216 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:76 ; 4-byte Folded Reload +; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 +; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[26:27] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readfirstlane_b32 s2, v18 -; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v18 +; NOOPT-NEXT: v_readlane_b32 s0, v32, 11 +; NOOPT-NEXT: v_readlane_b32 s1, v32, 12 +; NOOPT-NEXT: v_readfirstlane_b32 s2, v17 +; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17 ; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] ; NOOPT-NEXT: s_mov_b32 m0, s2 -; NOOPT-NEXT: v_movreld_b32_e32 v1, v17 -; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:220 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:224 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:228 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:232 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:236 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:240 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:244 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:248 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:252 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:256 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:260 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:264 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:268 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:272 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:276 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:280 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:152 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:156 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:160 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:164 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:168 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:172 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:176 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:180 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:184 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:188 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:192 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:196 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:200 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:204 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:208 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v16, off, s[28:31], 0 offset:212 ; 4-byte Folded Spill +; NOOPT-NEXT: v_movreld_b32_e32 v0, v16 +; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:220 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:224 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:228 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:232 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:236 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:240 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:244 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:248 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:252 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:256 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:260 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:264 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:268 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:272 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:276 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:280 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:152 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[28:31], 0 offset:156 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[28:31], 0 offset:160 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[28:31], 0 offset:164 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[28:31], 0 offset:168 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[28:31], 0 offset:172 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[28:31], 0 offset:176 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[28:31], 0 offset:180 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[28:31], 0 offset:184 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[28:31], 0 offset:188 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[28:31], 0 offset:192 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[28:31], 0 offset:196 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[28:31], 0 offset:200 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[28:31], 0 offset:204 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[28:31], 0 offset:208 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[28:31], 0 offset:212 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 s[2:3], s[0:1] -; NOOPT-NEXT: v_writelane_b32 v0, s2, 11 -; NOOPT-NEXT: v_writelane_b32 v0, s3, 12 +; NOOPT-NEXT: v_writelane_b32 v32, s2, 11 +; NOOPT-NEXT: v_writelane_b32 v32, s3, 12 ; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:64 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v32, off, s[28:31], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[26:27] ; NOOPT-NEXT: s_xor_b64 exec, exec, s[0:1] ; NOOPT-NEXT: s_cbranch_execnz .LBB17_4 ; NOOPT-NEXT: ; %bb.5: ; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[26:27] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 9 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 10 +; NOOPT-NEXT: v_readlane_b32 s0, v32, 9 +; NOOPT-NEXT: v_readlane_b32 s1, v32, 10 ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: ; %bb.6: +; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:84 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v16, off, s[28:31], 0 offset:220 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:224 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v18, off, s[28:31], 0 offset:228 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v19, off, s[28:31], 0 offset:232 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v20, off, s[28:31], 0 offset:236 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v21, off, s[28:31], 0 offset:240 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v22, off, s[28:31], 0 offset:244 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v23, off, s[28:31], 0 offset:248 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v24, off, s[28:31], 0 offset:252 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v25, off, s[28:31], 0 offset:256 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v26, off, s[28:31], 0 offset:260 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v27, off, s[28:31], 0 offset:264 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v28, off, s[28:31], 0 offset:268 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v29, off, s[28:31], 0 offset:272 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v30, off, s[28:31], 0 offset:276 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v31, off, s[28:31], 0 offset:280 ; 4-byte Folded Reload ; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 -; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[26:27] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 4 -; NOOPT-NEXT: v_readlane_b32 s4, v0, 0 -; NOOPT-NEXT: v_readlane_b32 s5, v0, 1 -; NOOPT-NEXT: v_readlane_b32 s6, v0, 2 -; NOOPT-NEXT: v_readlane_b32 s7, v0, 3 -; NOOPT-NEXT: buffer_load_dword v1, off, s[28:31], 0 offset:84 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v17, off, s[28:31], 0 offset:220 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v18, off, s[28:31], 0 offset:224 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v19, off, s[28:31], 0 offset:228 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v20, off, s[28:31], 0 offset:232 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v21, off, s[28:31], 0 offset:236 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v22, off, s[28:31], 0 offset:240 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v23, off, s[28:31], 0 offset:244 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v24, off, s[28:31], 0 offset:248 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v25, off, s[28:31], 0 offset:252 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v26, off, s[28:31], 0 offset:256 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v27, off, s[28:31], 0 offset:260 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v28, off, s[28:31], 0 offset:264 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v29, off, s[28:31], 0 offset:268 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v30, off, s[28:31], 0 offset:272 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v31, off, s[28:31], 0 offset:276 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 offset:280 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt vmcnt(12) -; NOOPT-NEXT: v_mov_b32_e32 v6, v20 -; NOOPT-NEXT: v_mov_b32_e32 v7, v19 -; NOOPT-NEXT: v_mov_b32_e32 v8, v18 -; NOOPT-NEXT: v_mov_b32_e32 v2, v17 -; NOOPT-NEXT: s_waitcnt vmcnt(8) -; NOOPT-NEXT: v_mov_b32_e32 v3, v24 -; NOOPT-NEXT: v_mov_b32_e32 v4, v23 -; NOOPT-NEXT: v_mov_b32_e32 v5, v22 -; NOOPT-NEXT: v_mov_b32_e32 v9, v21 -; NOOPT-NEXT: s_waitcnt vmcnt(4) -; NOOPT-NEXT: v_mov_b32_e32 v14, v28 -; NOOPT-NEXT: v_mov_b32_e32 v15, v27 -; NOOPT-NEXT: v_mov_b32_e32 v16, v26 -; NOOPT-NEXT: v_mov_b32_e32 v10, v25 -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_mov_b32_e32 v11, v32 -; NOOPT-NEXT: v_mov_b32_e32 v12, v31 -; NOOPT-NEXT: v_mov_b32_e32 v13, v30 -; NOOPT-NEXT: v_mov_b32_e32 v17, v29 +; NOOPT-NEXT: v_readlane_b32 s0, v32, 4 +; NOOPT-NEXT: v_readlane_b32 s4, v32, 0 +; NOOPT-NEXT: v_readlane_b32 s5, v32, 1 +; NOOPT-NEXT: v_readlane_b32 s6, v32, 2 +; NOOPT-NEXT: v_readlane_b32 s7, v32, 3 +; NOOPT-NEXT: v_mov_b32_e32 v5, v19 +; NOOPT-NEXT: v_mov_b32_e32 v6, v18 +; NOOPT-NEXT: v_mov_b32_e32 v7, v17 +; NOOPT-NEXT: v_mov_b32_e32 v1, v16 +; NOOPT-NEXT: v_mov_b32_e32 v2, v23 +; NOOPT-NEXT: v_mov_b32_e32 v3, v22 +; NOOPT-NEXT: v_mov_b32_e32 v4, v21 +; NOOPT-NEXT: v_mov_b32_e32 v8, v20 +; NOOPT-NEXT: v_mov_b32_e32 v13, v27 +; NOOPT-NEXT: v_mov_b32_e32 v14, v26 +; NOOPT-NEXT: v_mov_b32_e32 v15, v25 +; NOOPT-NEXT: v_mov_b32_e32 v9, v24 +; NOOPT-NEXT: v_mov_b32_e32 v10, v31 +; NOOPT-NEXT: v_mov_b32_e32 v11, v30 +; NOOPT-NEXT: v_mov_b32_e32 v12, v29 +; NOOPT-NEXT: v_mov_b32_e32 v16, v28 ; NOOPT-NEXT: ; implicit-def: $sgpr1 ; NOOPT-NEXT: ; implicit-def: $sgpr1 ; NOOPT-NEXT: ; implicit-def: $sgpr1 ; NOOPT-NEXT: ; implicit-def: $sgpr1 -; NOOPT-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18_vgpr19_vgpr20 killed $exec -; NOOPT-NEXT: v_mov_b32_e32 v18, v13 -; NOOPT-NEXT: v_mov_b32_e32 v19, v12 -; NOOPT-NEXT: v_mov_b32_e32 v20, v11 -; NOOPT-NEXT: buffer_store_dwordx4 v[17:20], off, s[4:7], 0 offset:48 +; NOOPT-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17_vgpr18_vgpr19 killed $exec +; NOOPT-NEXT: v_mov_b32_e32 v17, v12 +; NOOPT-NEXT: v_mov_b32_e32 v18, v11 +; NOOPT-NEXT: v_mov_b32_e32 v19, v10 +; NOOPT-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 offset:48 ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: ; implicit-def: $sgpr1 ; NOOPT-NEXT: ; implicit-def: $sgpr1 ; NOOPT-NEXT: ; implicit-def: $sgpr1 ; NOOPT-NEXT: ; implicit-def: $sgpr1 -; NOOPT-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11_vgpr12_vgpr13 killed $exec -; NOOPT-NEXT: v_mov_b32_e32 v11, v16 -; NOOPT-NEXT: v_mov_b32_e32 v12, v15 -; NOOPT-NEXT: v_mov_b32_e32 v13, v14 -; NOOPT-NEXT: buffer_store_dwordx4 v[10:13], off, s[4:7], 0 offset:32 +; NOOPT-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10_vgpr11_vgpr12 killed $exec +; NOOPT-NEXT: v_mov_b32_e32 v10, v15 +; NOOPT-NEXT: v_mov_b32_e32 v11, v14 +; NOOPT-NEXT: v_mov_b32_e32 v12, v13 +; NOOPT-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0 offset:32 ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: ; implicit-def: $sgpr1 ; NOOPT-NEXT: ; implicit-def: $sgpr1 ; NOOPT-NEXT: ; implicit-def: $sgpr1 ; NOOPT-NEXT: ; implicit-def: $sgpr1 -; NOOPT-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10_vgpr11_vgpr12 killed $exec +; NOOPT-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: v_mov_b32_e32 v10, v5 -; NOOPT-NEXT: v_mov_b32_e32 v11, v4 -; NOOPT-NEXT: v_mov_b32_e32 v12, v3 -; NOOPT-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0 offset:16 +; NOOPT-NEXT: v_mov_b32_e32 v9, v4 +; NOOPT-NEXT: v_mov_b32_e32 v10, v3 +; NOOPT-NEXT: v_mov_b32_e32 v11, v2 +; NOOPT-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16 ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: ; implicit-def: $sgpr1 ; NOOPT-NEXT: ; implicit-def: $sgpr1 ; NOOPT-NEXT: ; implicit-def: $sgpr1 ; NOOPT-NEXT: ; implicit-def: $sgpr1 -; NOOPT-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec -; NOOPT-NEXT: v_mov_b32_e32 v3, v8 -; NOOPT-NEXT: v_mov_b32_e32 v4, v7 -; NOOPT-NEXT: v_mov_b32_e32 v5, v6 -; NOOPT-NEXT: buffer_store_dwordx4 v[2:5], off, s[4:7], 0 +; NOOPT-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec +; NOOPT-NEXT: v_mov_b32_e32 v2, v7 +; NOOPT-NEXT: v_mov_b32_e32 v3, v6 +; NOOPT-NEXT: v_mov_b32_e32 v4, v5 +; NOOPT-NEXT: buffer_store_dwordx4 v[1:4], off, s[4:7], 0 ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_cmp_eq_u32_e64 s[2:3], v1, s0 +; NOOPT-NEXT: v_cmp_eq_u32_e64 s[2:3], v0, s0 ; NOOPT-NEXT: s_mov_b64 s[0:1], exec -; NOOPT-NEXT: v_writelane_b32 v0, s0, 13 -; NOOPT-NEXT: v_writelane_b32 v0, s1, 14 +; NOOPT-NEXT: v_writelane_b32 v32, s0, 13 +; NOOPT-NEXT: v_writelane_b32 v32, s1, 14 ; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[28:31], 0 offset:64 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v32, off, s[28:31], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[26:27] ; NOOPT-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: s_cbranch_execz .LBB17_8 ; NOOPT-NEXT: ; %bb.7: ; %bb1 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:68 ; 4-byte Folded Reload ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: s_mov_b32 s6, s1 @@ -6251,13 +6232,12 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: .LBB17_8: ; %bb2 ; NOOPT-NEXT: s_or_saveexec_b64 s[26:27], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v32, off, s[28:31], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[26:27] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 13 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 14 +; NOOPT-NEXT: v_readlane_b32 s0, v32, 13 +; NOOPT-NEXT: v_readlane_b32 s1, v32, 14 ; NOOPT-NEXT: s_or_b64 exec, exec, s[0:1] -; NOOPT-NEXT: ; kill: killed $vgpr0 ; NOOPT-NEXT: s_endpgm ; ; SI-MOVREL-LABEL: insert_vgpr_offset_multiple_in_block: @@ -7279,28 +7259,28 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) { ; NOOPT-NEXT: s_mov_b32 s15, 0xe8f000 ; NOOPT-NEXT: s_add_u32 s12, s12, s9 ; NOOPT-NEXT: s_addc_u32 s13, s13, 0 -; NOOPT-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; NOOPT-NEXT: s_load_dword s2, s[2:3], 0x9 ; NOOPT-NEXT: s_mov_b64 s[0:1], -1 ; NOOPT-NEXT: ; implicit-def: $sgpr3 ; NOOPT-NEXT: s_mov_b32 s3, 0 ; NOOPT-NEXT: s_waitcnt lgkmcnt(0) ; NOOPT-NEXT: s_cmp_lg_u32 s2, s3 -; NOOPT-NEXT: v_writelane_b32 v0, s0, 0 -; NOOPT-NEXT: v_writelane_b32 v0, s1, 1 +; NOOPT-NEXT: ; implicit-def: $vgpr4 : SGPR spill to VGPR lane +; NOOPT-NEXT: v_writelane_b32 v4, s0, 0 +; NOOPT-NEXT: v_writelane_b32 v4, s1, 1 ; NOOPT-NEXT: s_mov_b64 s[8:9], exec ; NOOPT-NEXT: s_mov_b64 exec, -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[8:9] ; NOOPT-NEXT: s_cbranch_scc1 .LBB19_3 ; NOOPT-NEXT: .LBB19_1: ; %Flow ; NOOPT-NEXT: s_or_saveexec_b64 s[8:9], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[8:9] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 0 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 1 +; NOOPT-NEXT: v_readlane_b32 s0, v4, 0 +; NOOPT-NEXT: v_readlane_b32 s1, v4, 1 ; NOOPT-NEXT: ; implicit-def: $sgpr2 ; NOOPT-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; NOOPT-NEXT: s_mov_b32 s0, 1 @@ -7330,7 +7310,7 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) { ; NOOPT-NEXT: .LBB19_3: ; %bb4 ; NOOPT-NEXT: s_or_saveexec_b64 s[8:9], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[8:9] ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: s_mov_b32 s6, s1 @@ -7342,24 +7322,21 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) { ; NOOPT-NEXT: s_mov_b32 s1, s6 ; NOOPT-NEXT: s_mov_b32 s2, s5 ; NOOPT-NEXT: s_mov_b32 s3, s4 -; NOOPT-NEXT: buffer_load_dwordx4 v[1:4], off, s[0:3], 0 glc +; NOOPT-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: ; implicit-def: $sgpr0 ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: ;;#ASMSTART -; NOOPT-NEXT: ; reg use v[1:4] +; NOOPT-NEXT: ; reg use v[0:3] ; NOOPT-NEXT: ;;#ASMEND ; NOOPT-NEXT: s_mov_b64 s[0:1], 0 -; NOOPT-NEXT: v_writelane_b32 v0, s0, 0 -; NOOPT-NEXT: v_writelane_b32 v0, s1, 1 +; NOOPT-NEXT: v_writelane_b32 v4, s0, 0 +; NOOPT-NEXT: v_writelane_b32 v4, s1, 1 ; NOOPT-NEXT: s_or_saveexec_b64 s[8:9], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[12:15], 0 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[12:15], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[8:9] ; NOOPT-NEXT: s_branch .LBB19_1 ; NOOPT-NEXT: .LBB19_4: ; %bb7 -; NOOPT-NEXT: s_or_saveexec_b64 s[8:9], -1 -; NOOPT-NEXT: buffer_load_dword v0, off, s[12:15], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[8:9] ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: s_mov_b32 s7, s1 @@ -7371,10 +7348,9 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) { ; NOOPT-NEXT: s_mov_b32 s1, s7 ; NOOPT-NEXT: s_mov_b32 s2, s6 ; NOOPT-NEXT: s_mov_b32 s3, s5 -; NOOPT-NEXT: v_mov_b32_e32 v1, s4 -; NOOPT-NEXT: buffer_store_dword v1, off, s[0:3], 0 +; NOOPT-NEXT: v_mov_b32_e32 v0, s4 +; NOOPT-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: ; kill: killed $vgpr0 ; NOOPT-NEXT: s_endpgm ; ; SI-MOVREL-LABEL: extract_adjacent_blocks: @@ -7525,7 +7501,6 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) { ; NOOPT-NEXT: s_mov_b32 s19, 0xe8f000 ; NOOPT-NEXT: s_add_u32 s16, s16, s9 ; NOOPT-NEXT: s_addc_u32 s17, s17, 0 -; NOOPT-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; NOOPT-NEXT: s_mov_b64 s[0:1], s[2:3] ; NOOPT-NEXT: s_load_dword s2, s[0:1], 0x9 ; NOOPT-NEXT: s_load_dword s0, s[0:1], 0xa @@ -7534,21 +7509,22 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) { ; NOOPT-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7 ; NOOPT-NEXT: s_mov_b32 s3, 0 ; NOOPT-NEXT: s_cmp_lg_u32 s2, s3 -; NOOPT-NEXT: v_writelane_b32 v0, s0, 0 -; NOOPT-NEXT: v_writelane_b32 v0, s1, 1 +; NOOPT-NEXT: ; implicit-def: $vgpr4 : SGPR spill to VGPR lane +; NOOPT-NEXT: v_writelane_b32 v4, s0, 0 +; NOOPT-NEXT: v_writelane_b32 v4, s1, 1 ; NOOPT-NEXT: s_mov_b64 s[12:13], exec ; NOOPT-NEXT: s_mov_b64 exec, -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[16:19], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[12:13] ; NOOPT-NEXT: s_cbranch_scc1 .LBB20_3 ; NOOPT-NEXT: .LBB20_1: ; %Flow ; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[16:19], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v4, off, s[16:19], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[12:13] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 0 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 1 +; NOOPT-NEXT: v_readlane_b32 s0, v4, 0 +; NOOPT-NEXT: v_readlane_b32 s1, v4, 1 ; NOOPT-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7 ; NOOPT-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; NOOPT-NEXT: s_mov_b32 s0, 1 @@ -7579,7 +7555,7 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) { ; NOOPT-NEXT: .LBB20_3: ; %bb4 ; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[16:19], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v4, off, s[16:19], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[12:13] ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: s_mov_b32 s6, s1 @@ -7591,25 +7567,22 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) { ; NOOPT-NEXT: s_mov_b32 s1, s6 ; NOOPT-NEXT: s_mov_b32 s2, s5 ; NOOPT-NEXT: s_mov_b32 s3, s4 -; NOOPT-NEXT: buffer_load_dwordx4 v[1:4], off, s[0:3], 0 glc +; NOOPT-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1_sgpr2_sgpr3 -; NOOPT-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 +; NOOPT-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: ;;#ASMSTART -; NOOPT-NEXT: ; reg use v[1:4] +; NOOPT-NEXT: ; reg use v[0:3] ; NOOPT-NEXT: ;;#ASMEND ; NOOPT-NEXT: s_mov_b64 s[0:1], 0 -; NOOPT-NEXT: v_writelane_b32 v0, s0, 0 -; NOOPT-NEXT: v_writelane_b32 v0, s1, 1 +; NOOPT-NEXT: v_writelane_b32 v4, s0, 0 +; NOOPT-NEXT: v_writelane_b32 v4, s1, 1 ; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[16:19], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[12:13] ; NOOPT-NEXT: s_branch .LBB20_1 ; NOOPT-NEXT: .LBB20_4: ; %bb7 -; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1 -; NOOPT-NEXT: buffer_load_dword v0, off, s[16:19], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[12:13] ; NOOPT-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7 ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: s_mov_b32 s10, s1 @@ -7621,13 +7594,12 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) { ; NOOPT-NEXT: s_mov_b32 s1, s10 ; NOOPT-NEXT: s_mov_b32 s2, s9 ; NOOPT-NEXT: s_mov_b32 s3, s8 -; NOOPT-NEXT: v_mov_b32_e32 v1, s4 -; NOOPT-NEXT: v_mov_b32_e32 v2, s5 -; NOOPT-NEXT: v_mov_b32_e32 v3, s6 -; NOOPT-NEXT: v_mov_b32_e32 v4, s7 -; NOOPT-NEXT: buffer_store_dwordx4 v[1:4], off, s[0:3], 0 +; NOOPT-NEXT: v_mov_b32_e32 v0, s4 +; NOOPT-NEXT: v_mov_b32_e32 v1, s5 +; NOOPT-NEXT: v_mov_b32_e32 v2, s6 +; NOOPT-NEXT: v_mov_b32_e32 v3, s7 +; NOOPT-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: ; kill: killed $vgpr0 ; NOOPT-NEXT: s_endpgm ; ; SI-MOVREL-LABEL: insert_adjacent_blocks: @@ -9084,49 +9056,48 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: s_mov_b32 s27, 0xe8f000 ; NOOPT-NEXT: s_add_u32 s24, s24, s9 ; NOOPT-NEXT: s_addc_u32 s25, s25, 0 -; NOOPT-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; NOOPT-NEXT: s_load_dword s1, s[2:3], 0x9 ; NOOPT-NEXT: s_load_dword s0, s[2:3], 0xa +; NOOPT-NEXT: ; implicit-def: $vgpr18 : SGPR spill to VGPR lane ; NOOPT-NEXT: s_waitcnt lgkmcnt(0) -; NOOPT-NEXT: v_writelane_b32 v0, s1, 0 +; NOOPT-NEXT: v_writelane_b32 v18, s1, 0 ; NOOPT-NEXT: s_mov_b32 s1, 8 -; NOOPT-NEXT: v_writelane_b32 v0, s0, 1 +; NOOPT-NEXT: v_writelane_b32 v18, s0, 1 ; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v18, off, s[24:27], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[20:21] -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: v_mov_b32_e32 v0, 8 -; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: .LBB26_1: ; %bb2 ; NOOPT-NEXT: ; =>This Loop Header: Depth=1 ; NOOPT-NEXT: ; Child Loop BB26_3 Depth 2 -; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload +; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 +; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[20:21] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s2, v0, 0 -; NOOPT-NEXT: buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: v_readlane_b32 s2, v18, 0 ; NOOPT-NEXT: s_mov_b64 s[0:1], -1 ; NOOPT-NEXT: ; implicit-def: $sgpr4 -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_cmp_ge_i32_e64 s[2:3], v1, s2 -; NOOPT-NEXT: v_mov_b32_e32 v1, s4 +; NOOPT-NEXT: v_cmp_ge_i32_e64 s[2:3], v0, s2 +; NOOPT-NEXT: v_mov_b32_e32 v0, s4 ; NOOPT-NEXT: s_and_b64 vcc, exec, s[2:3] -; NOOPT-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:8 ; 4-byte Folded Spill -; NOOPT-NEXT: v_writelane_b32 v0, s0, 2 -; NOOPT-NEXT: v_writelane_b32 v0, s1, 3 +; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:8 ; 4-byte Folded Spill +; NOOPT-NEXT: v_writelane_b32 v18, s0, 2 +; NOOPT-NEXT: v_writelane_b32 v18, s1, 3 ; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v18, off, s[24:27], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[20:21] ; NOOPT-NEXT: s_cbranch_vccnz .LBB26_6 ; NOOPT-NEXT: ; %bb.2: ; %bb4 ; NOOPT-NEXT: ; in Loop: Header=BB26_1 Depth=1 ; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 -; NOOPT-NEXT: buffer_load_dword v16, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload +; NOOPT-NEXT: s_waitcnt expcnt(0) +; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[20:21] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v16, 1 +; NOOPT-NEXT: v_readlane_b32 s0, v18, 1 ; NOOPT-NEXT: ; implicit-def: $sgpr2_sgpr3 ; NOOPT-NEXT: ; kill: def $sgpr3 killed $sgpr3 killed $sgpr2_sgpr3 ; NOOPT-NEXT: ; implicit-def: $sgpr4_sgpr5 @@ -9137,7 +9108,6 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: s_mov_b32 s5, s3 ; NOOPT-NEXT: s_mov_b32 s6, s2 ; NOOPT-NEXT: s_mov_b32 s7, s1 -; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:80 ; 4-byte Folded Spill @@ -9159,13 +9129,13 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: v_mov_b32_e32 v13, s17 ; NOOPT-NEXT: v_mov_b32_e32 v14, s18 ; NOOPT-NEXT: v_mov_b32_e32 v15, s19 -; NOOPT-NEXT: v_mov_b32_e32 v17, s0 -; NOOPT-NEXT: buffer_store_dword v17, off, s[24:27], 0 offset:76 ; 4-byte Folded Spill +; NOOPT-NEXT: v_mov_b32_e32 v16, s0 +; NOOPT-NEXT: buffer_store_dword v16, off, s[24:27], 0 offset:76 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 s[0:1], exec -; NOOPT-NEXT: v_writelane_b32 v16, s0, 4 -; NOOPT-NEXT: v_writelane_b32 v16, s1, 5 +; NOOPT-NEXT: v_writelane_b32 v18, s0, 4 +; NOOPT-NEXT: v_writelane_b32 v18, s1, 5 ; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 -; NOOPT-NEXT: buffer_store_dword v16, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v18, off, s[24:27], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[20:21] ; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:12 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:16 ; 4-byte Folded Spill @@ -9186,146 +9156,139 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: .LBB26_3: ; Parent Loop BB26_1 Depth=1 ; NOOPT-NEXT: ; => This Inner Loop Header: Depth=2 -; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 -; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[20:21] -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 6 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 7 -; NOOPT-NEXT: buffer_load_dword v1, off, s[24:27], 0 offset:12 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v2, off, s[24:27], 0 offset:16 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v3, off, s[24:27], 0 offset:20 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v4, off, s[24:27], 0 offset:24 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v5, off, s[24:27], 0 offset:28 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v6, off, s[24:27], 0 offset:32 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v7, off, s[24:27], 0 offset:36 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v8, off, s[24:27], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:12 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v1, off, s[24:27], 0 offset:16 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v2, off, s[24:27], 0 offset:20 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v3, off, s[24:27], 0 offset:24 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v4, off, s[24:27], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v5, off, s[24:27], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v6, off, s[24:27], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v7, off, s[24:27], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v8, off, s[24:27], 0 offset:44 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(6) -; NOOPT-NEXT: buffer_load_dword v9, off, s[24:27], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v9, off, s[24:27], 0 offset:48 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(5) -; NOOPT-NEXT: buffer_load_dword v10, off, s[24:27], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v10, off, s[24:27], 0 offset:52 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(4) -; NOOPT-NEXT: buffer_load_dword v11, off, s[24:27], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v11, off, s[24:27], 0 offset:56 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(3) -; NOOPT-NEXT: buffer_load_dword v12, off, s[24:27], 0 offset:56 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v12, off, s[24:27], 0 offset:60 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(2) -; NOOPT-NEXT: buffer_load_dword v13, off, s[24:27], 0 offset:60 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v13, off, s[24:27], 0 offset:64 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(1) -; NOOPT-NEXT: buffer_load_dword v14, off, s[24:27], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v14, off, s[24:27], 0 offset:68 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v15, off, s[24:27], 0 offset:68 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v16, off, s[24:27], 0 offset:72 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v17, off, s[24:27], 0 offset:76 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 offset:80 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v15, off, s[24:27], 0 offset:72 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v16, off, s[24:27], 0 offset:76 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v17, off, s[24:27], 0 offset:80 ; 4-byte Folded Reload +; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 +; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[20:21] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readfirstlane_b32 s2, v18 -; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v18 +; NOOPT-NEXT: v_readlane_b32 s0, v18, 6 +; NOOPT-NEXT: v_readlane_b32 s1, v18, 7 +; NOOPT-NEXT: v_readfirstlane_b32 s2, v17 +; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17 ; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] ; NOOPT-NEXT: s_mov_b32 m0, s2 -; NOOPT-NEXT: v_movreld_b32_e32 v1, v17 -; NOOPT-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:84 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[24:27], 0 offset:88 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[24:27], 0 offset:92 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[24:27], 0 offset:96 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[24:27], 0 offset:100 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[24:27], 0 offset:104 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[24:27], 0 offset:108 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[24:27], 0 offset:112 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[24:27], 0 offset:116 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[24:27], 0 offset:120 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[24:27], 0 offset:124 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[24:27], 0 offset:128 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[24:27], 0 offset:132 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[24:27], 0 offset:136 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[24:27], 0 offset:140 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v16, off, s[24:27], 0 offset:144 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:12 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[24:27], 0 offset:16 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[24:27], 0 offset:20 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[24:27], 0 offset:24 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[24:27], 0 offset:28 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[24:27], 0 offset:32 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[24:27], 0 offset:36 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[24:27], 0 offset:40 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[24:27], 0 offset:44 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[24:27], 0 offset:48 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[24:27], 0 offset:52 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[24:27], 0 offset:56 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[24:27], 0 offset:60 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[24:27], 0 offset:64 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[24:27], 0 offset:68 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v16, off, s[24:27], 0 offset:72 ; 4-byte Folded Spill +; NOOPT-NEXT: v_movreld_b32_e32 v0, v16 +; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:84 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:88 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[24:27], 0 offset:92 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[24:27], 0 offset:96 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[24:27], 0 offset:100 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[24:27], 0 offset:104 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[24:27], 0 offset:108 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[24:27], 0 offset:112 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[24:27], 0 offset:116 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[24:27], 0 offset:120 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[24:27], 0 offset:124 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[24:27], 0 offset:128 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[24:27], 0 offset:132 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[24:27], 0 offset:136 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[24:27], 0 offset:140 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[24:27], 0 offset:144 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:12 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:16 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[24:27], 0 offset:20 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[24:27], 0 offset:24 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[24:27], 0 offset:28 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[24:27], 0 offset:32 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[24:27], 0 offset:36 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[24:27], 0 offset:40 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[24:27], 0 offset:44 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[24:27], 0 offset:48 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[24:27], 0 offset:52 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[24:27], 0 offset:56 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[24:27], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[24:27], 0 offset:64 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[24:27], 0 offset:68 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[24:27], 0 offset:72 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 s[2:3], s[0:1] -; NOOPT-NEXT: v_writelane_b32 v0, s2, 6 -; NOOPT-NEXT: v_writelane_b32 v0, s3, 7 +; NOOPT-NEXT: v_writelane_b32 v18, s2, 6 +; NOOPT-NEXT: v_writelane_b32 v18, s3, 7 ; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v18, off, s[24:27], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[20:21] ; NOOPT-NEXT: s_xor_b64 exec, exec, s[0:1] ; NOOPT-NEXT: s_cbranch_execnz .LBB26_3 ; NOOPT-NEXT: ; %bb.4: ; in Loop: Header=BB26_1 Depth=1 ; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[20:21] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 4 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 5 +; NOOPT-NEXT: v_readlane_b32 s0, v18, 4 +; NOOPT-NEXT: v_readlane_b32 s1, v18, 5 ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: ; %bb.5: ; in Loop: Header=BB26_1 Depth=1 +; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:84 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v1, off, s[24:27], 0 offset:88 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v2, off, s[24:27], 0 offset:92 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v3, off, s[24:27], 0 offset:96 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v4, off, s[24:27], 0 offset:100 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v5, off, s[24:27], 0 offset:104 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v6, off, s[24:27], 0 offset:108 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v7, off, s[24:27], 0 offset:112 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v8, off, s[24:27], 0 offset:116 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v9, off, s[24:27], 0 offset:120 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v10, off, s[24:27], 0 offset:124 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v11, off, s[24:27], 0 offset:128 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v12, off, s[24:27], 0 offset:132 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v13, off, s[24:27], 0 offset:136 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v14, off, s[24:27], 0 offset:140 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v15, off, s[24:27], 0 offset:144 ; 4-byte Folded Reload ; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 -; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[20:21] -; NOOPT-NEXT: buffer_load_dword v1, off, s[24:27], 0 offset:84 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v2, off, s[24:27], 0 offset:88 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v3, off, s[24:27], 0 offset:92 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v4, off, s[24:27], 0 offset:96 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v5, off, s[24:27], 0 offset:100 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v6, off, s[24:27], 0 offset:104 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v7, off, s[24:27], 0 offset:108 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v8, off, s[24:27], 0 offset:112 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v9, off, s[24:27], 0 offset:116 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v10, off, s[24:27], 0 offset:120 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v11, off, s[24:27], 0 offset:124 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v12, off, s[24:27], 0 offset:128 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v13, off, s[24:27], 0 offset:132 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v14, off, s[24:27], 0 offset:136 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v15, off, s[24:27], 0 offset:140 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v16, off, s[24:27], 0 offset:144 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 s[0:1], 0 ; NOOPT-NEXT: s_waitcnt vmcnt(14) -; NOOPT-NEXT: buffer_store_dword v1, off, s[24:27], 0 offset:8 ; 4-byte Folded Spill -; NOOPT-NEXT: v_writelane_b32 v0, s0, 2 -; NOOPT-NEXT: v_writelane_b32 v0, s1, 3 +; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:8 ; 4-byte Folded Spill +; NOOPT-NEXT: s_waitcnt vmcnt(1) +; NOOPT-NEXT: v_writelane_b32 v18, s0, 2 +; NOOPT-NEXT: v_writelane_b32 v18, s1, 3 ; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v18, off, s[24:27], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[20:21] ; NOOPT-NEXT: .LBB26_6: ; %Flow ; NOOPT-NEXT: ; in Loop: Header=BB26_1 Depth=1 -; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 ; NOOPT-NEXT: s_waitcnt expcnt(1) -; NOOPT-NEXT: buffer_load_dword v1, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:8 ; 4-byte Folded Reload +; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 +; NOOPT-NEXT: s_waitcnt expcnt(0) +; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[20:21] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v1, 2 -; NOOPT-NEXT: v_readlane_b32 s1, v1, 3 -; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:8 ; 4-byte Folded Reload +; NOOPT-NEXT: v_readlane_b32 s0, v18, 2 +; NOOPT-NEXT: v_readlane_b32 s1, v18, 3 ; NOOPT-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] ; NOOPT-NEXT: s_mov_b32 s0, 1 ; NOOPT-NEXT: ; implicit-def: $sgpr1 ; NOOPT-NEXT: v_cmp_ne_u32_e64 s[0:1], v1, s0 ; NOOPT-NEXT: s_and_b64 vcc, exec, s[0:1] -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Spill ; NOOPT-NEXT: s_cbranch_vccnz .LBB26_1 ; NOOPT-NEXT: ; %bb.7: ; %bb8 -; NOOPT-NEXT: s_or_saveexec_b64 s[20:21], -1 -; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:4 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[20:21] -; NOOPT-NEXT: ; kill: killed $vgpr0 ; NOOPT-NEXT: s_endpgm ; ; SI-MOVREL-LABEL: broken_phi_bb: @@ -9570,13 +9533,13 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: s_mov_b32 s19, 0xe8f000 ; NOOPT-NEXT: s_add_u32 s16, s16, s5 ; NOOPT-NEXT: s_addc_u32 s17, s17, 0 -; NOOPT-NEXT: ; implicit-def: $vgpr16 : SGPR spill to VGPR lane -; NOOPT-NEXT: v_writelane_b32 v16, s4, 0 +; NOOPT-NEXT: ; implicit-def: $vgpr33 : SGPR spill to VGPR lane +; NOOPT-NEXT: v_writelane_b32 v33, s4, 0 ; NOOPT-NEXT: s_mov_b32 s4, s1 -; NOOPT-NEXT: v_readlane_b32 s1, v16, 0 -; NOOPT-NEXT: v_writelane_b32 v16, s4, 1 +; NOOPT-NEXT: v_readlane_b32 s1, v33, 0 +; NOOPT-NEXT: v_writelane_b32 v33, s4, 1 ; NOOPT-NEXT: s_mov_b32 s4, s0 -; NOOPT-NEXT: v_readlane_b32 s0, v16, 1 +; NOOPT-NEXT: v_readlane_b32 s0, v33, 1 ; NOOPT-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:144 ; 4-byte Folded Spill ; NOOPT-NEXT: v_mov_b32_e32 v2, v1 ; NOOPT-NEXT: ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7 @@ -9591,17 +9554,17 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:140 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b32 s8, 0xf000 ; NOOPT-NEXT: s_mov_b32 s0, 0 -; NOOPT-NEXT: v_writelane_b32 v16, s0, 2 +; NOOPT-NEXT: v_writelane_b32 v33, s0, 2 ; NOOPT-NEXT: s_mov_b32 s2, s0 ; NOOPT-NEXT: s_mov_b32 s3, s8 ; NOOPT-NEXT: s_mov_b32 s8, s0 ; NOOPT-NEXT: s_mov_b32 s9, s0 ; NOOPT-NEXT: ; kill: def $sgpr8_sgpr9 killed $sgpr8_sgpr9 def $sgpr8_sgpr9_sgpr10_sgpr11 ; NOOPT-NEXT: s_mov_b64 s[10:11], s[2:3] -; NOOPT-NEXT: v_writelane_b32 v16, s8, 3 -; NOOPT-NEXT: v_writelane_b32 v16, s9, 4 -; NOOPT-NEXT: v_writelane_b32 v16, s10, 5 -; NOOPT-NEXT: v_writelane_b32 v16, s11, 6 +; NOOPT-NEXT: v_writelane_b32 v33, s8, 3 +; NOOPT-NEXT: v_writelane_b32 v33, s9, 4 +; NOOPT-NEXT: v_writelane_b32 v33, s10, 5 +; NOOPT-NEXT: v_writelane_b32 v33, s11, 6 ; NOOPT-NEXT: ; kill: def $sgpr8_sgpr9_sgpr10_sgpr11 killed $sgpr4_sgpr5_sgpr6_sgpr7 ; NOOPT-NEXT: ; implicit-def: $sgpr2_sgpr3 ; NOOPT-NEXT: s_waitcnt expcnt(1) @@ -9611,7 +9574,6 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:132 ; 4-byte Folded Spill ; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: v_mov_b32_e32 v0, s0 -; NOOPT-NEXT: v_mov_b32_e32 v31, s0 ; NOOPT-NEXT: v_mov_b32_e32 v30, s0 ; NOOPT-NEXT: v_mov_b32_e32 v29, s0 ; NOOPT-NEXT: v_mov_b32_e32 v28, s0 @@ -9626,22 +9588,23 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: v_mov_b32_e32 v19, s0 ; NOOPT-NEXT: v_mov_b32_e32 v18, s0 ; NOOPT-NEXT: v_mov_b32_e32 v17, s0 +; NOOPT-NEXT: v_mov_b32_e32 v16, s0 ; NOOPT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 killed $exec -; NOOPT-NEXT: v_mov_b32_e32 v1, v31 -; NOOPT-NEXT: v_mov_b32_e32 v2, v30 -; NOOPT-NEXT: v_mov_b32_e32 v3, v29 -; NOOPT-NEXT: v_mov_b32_e32 v4, v28 -; NOOPT-NEXT: v_mov_b32_e32 v5, v27 -; NOOPT-NEXT: v_mov_b32_e32 v6, v26 -; NOOPT-NEXT: v_mov_b32_e32 v7, v25 -; NOOPT-NEXT: v_mov_b32_e32 v8, v24 -; NOOPT-NEXT: v_mov_b32_e32 v9, v23 -; NOOPT-NEXT: v_mov_b32_e32 v10, v22 -; NOOPT-NEXT: v_mov_b32_e32 v11, v21 -; NOOPT-NEXT: v_mov_b32_e32 v12, v20 -; NOOPT-NEXT: v_mov_b32_e32 v13, v19 -; NOOPT-NEXT: v_mov_b32_e32 v14, v18 -; NOOPT-NEXT: v_mov_b32_e32 v15, v17 +; NOOPT-NEXT: v_mov_b32_e32 v1, v30 +; NOOPT-NEXT: v_mov_b32_e32 v2, v29 +; NOOPT-NEXT: v_mov_b32_e32 v3, v28 +; NOOPT-NEXT: v_mov_b32_e32 v4, v27 +; NOOPT-NEXT: v_mov_b32_e32 v5, v26 +; NOOPT-NEXT: v_mov_b32_e32 v6, v25 +; NOOPT-NEXT: v_mov_b32_e32 v7, v24 +; NOOPT-NEXT: v_mov_b32_e32 v8, v23 +; NOOPT-NEXT: v_mov_b32_e32 v9, v22 +; NOOPT-NEXT: v_mov_b32_e32 v10, v21 +; NOOPT-NEXT: v_mov_b32_e32 v11, v20 +; NOOPT-NEXT: v_mov_b32_e32 v12, v19 +; NOOPT-NEXT: v_mov_b32_e32 v13, v18 +; NOOPT-NEXT: v_mov_b32_e32 v14, v17 +; NOOPT-NEXT: v_mov_b32_e32 v15, v16 ; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:68 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:72 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:76 ; 4-byte Folded Spill @@ -9659,207 +9622,200 @@ define amdgpu_cs void @insert_or_disj_index(ptr addrspace(1) %out, ptr addrspace ; NOOPT-NEXT: buffer_store_dword v14, off, s[16:19], 0 offset:124 ; 4-byte Folded Spill ; NOOPT-NEXT: buffer_store_dword v15, off, s[16:19], 0 offset:128 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 s[0:1], exec -; NOOPT-NEXT: v_writelane_b32 v16, s0, 7 -; NOOPT-NEXT: v_writelane_b32 v16, s1, 8 +; NOOPT-NEXT: v_writelane_b32 v33, s0, 7 +; NOOPT-NEXT: v_writelane_b32 v33, s1, 8 ; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1 -; NOOPT-NEXT: buffer_store_dword v16, off, s[16:19], 0 offset:64 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v33, off, s[16:19], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[12:13] -; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[16:19], 0 offset:20 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[16:19], 0 offset:24 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[16:19], 0 offset:28 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[16:19], 0 offset:32 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[16:19], 0 offset:36 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[16:19], 0 offset:40 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[16:19], 0 offset:44 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[16:19], 0 offset:48 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[16:19], 0 offset:52 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[16:19], 0 offset:56 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[16:19], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:20 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[16:19], 0 offset:24 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[16:19], 0 offset:28 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[16:19], 0 offset:32 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[16:19], 0 offset:36 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[16:19], 0 offset:40 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[16:19], 0 offset:44 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[16:19], 0 offset:48 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[16:19], 0 offset:52 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[16:19], 0 offset:56 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[16:19], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[16:19], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: .LBB27_1: ; =>This Inner Loop Header: Depth=1 -; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1 -; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:64 ; 4-byte Folded Reload -; NOOPT-NEXT: s_mov_b64 exec, s[12:13] -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 9 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 10 -; NOOPT-NEXT: buffer_load_dword v1, off, s[16:19], 0 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v2, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:12 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v5, off, s[16:19], 0 offset:16 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v6, off, s[16:19], 0 offset:20 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v7, off, s[16:19], 0 offset:24 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v8, off, s[16:19], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v1, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v2, off, s[16:19], 0 offset:12 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:16 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:20 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v5, off, s[16:19], 0 offset:24 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v6, off, s[16:19], 0 offset:28 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v7, off, s[16:19], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v8, off, s[16:19], 0 offset:36 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(6) -; NOOPT-NEXT: buffer_load_dword v9, off, s[16:19], 0 offset:32 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v9, off, s[16:19], 0 offset:40 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(5) -; NOOPT-NEXT: buffer_load_dword v10, off, s[16:19], 0 offset:36 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v10, off, s[16:19], 0 offset:44 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(4) -; NOOPT-NEXT: buffer_load_dword v11, off, s[16:19], 0 offset:40 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v11, off, s[16:19], 0 offset:48 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(3) -; NOOPT-NEXT: buffer_load_dword v12, off, s[16:19], 0 offset:44 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v12, off, s[16:19], 0 offset:52 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(2) -; NOOPT-NEXT: buffer_load_dword v13, off, s[16:19], 0 offset:48 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v13, off, s[16:19], 0 offset:56 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(1) -; NOOPT-NEXT: buffer_load_dword v14, off, s[16:19], 0 offset:52 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v14, off, s[16:19], 0 offset:60 ; 4-byte Folded Reload ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v15, off, s[16:19], 0 offset:56 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v16, off, s[16:19], 0 offset:60 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v17, off, s[16:19], 0 offset:144 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v18, off, s[16:19], 0 offset:132 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v15, off, s[16:19], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v16, off, s[16:19], 0 offset:144 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v17, off, s[16:19], 0 offset:132 ; 4-byte Folded Reload +; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1 +; NOOPT-NEXT: buffer_load_dword v33, off, s[16:19], 0 ; 4-byte Folded Reload +; NOOPT-NEXT: s_mov_b64 exec, s[12:13] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readfirstlane_b32 s2, v18 -; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v18 +; NOOPT-NEXT: v_readlane_b32 s0, v33, 9 +; NOOPT-NEXT: v_readlane_b32 s1, v33, 10 +; NOOPT-NEXT: v_readfirstlane_b32 s2, v17 +; NOOPT-NEXT: v_cmp_eq_u32_e64 s[0:1], s2, v17 ; NOOPT-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] ; NOOPT-NEXT: s_mov_b32 m0, s2 -; NOOPT-NEXT: v_movreld_b32_e32 v2, v17 -; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:148 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:152 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:156 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:160 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[16:19], 0 offset:164 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[16:19], 0 offset:168 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[16:19], 0 offset:172 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[16:19], 0 offset:176 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[16:19], 0 offset:180 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[16:19], 0 offset:184 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[16:19], 0 offset:188 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[16:19], 0 offset:192 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[16:19], 0 offset:196 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[16:19], 0 offset:200 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[16:19], 0 offset:204 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v16, off, s[16:19], 0 offset:208 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v5, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v6, off, s[16:19], 0 offset:20 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v7, off, s[16:19], 0 offset:24 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v8, off, s[16:19], 0 offset:28 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v9, off, s[16:19], 0 offset:32 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v10, off, s[16:19], 0 offset:36 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v11, off, s[16:19], 0 offset:40 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v12, off, s[16:19], 0 offset:44 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v13, off, s[16:19], 0 offset:48 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v14, off, s[16:19], 0 offset:52 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v15, off, s[16:19], 0 offset:56 ; 4-byte Folded Spill -; NOOPT-NEXT: buffer_store_dword v16, off, s[16:19], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: v_movreld_b32_e32 v1, v16 +; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:148 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:152 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:156 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:160 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:164 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[16:19], 0 offset:168 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[16:19], 0 offset:172 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[16:19], 0 offset:176 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[16:19], 0 offset:180 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[16:19], 0 offset:184 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[16:19], 0 offset:188 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[16:19], 0 offset:192 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[16:19], 0 offset:196 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[16:19], 0 offset:200 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[16:19], 0 offset:204 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[16:19], 0 offset:208 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v1, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v2, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:20 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v5, off, s[16:19], 0 offset:24 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v6, off, s[16:19], 0 offset:28 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v7, off, s[16:19], 0 offset:32 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v8, off, s[16:19], 0 offset:36 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v9, off, s[16:19], 0 offset:40 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v10, off, s[16:19], 0 offset:44 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v11, off, s[16:19], 0 offset:48 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v12, off, s[16:19], 0 offset:52 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v13, off, s[16:19], 0 offset:56 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v14, off, s[16:19], 0 offset:60 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v15, off, s[16:19], 0 offset:64 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 s[2:3], s[0:1] -; NOOPT-NEXT: v_writelane_b32 v0, s2, 9 -; NOOPT-NEXT: v_writelane_b32 v0, s3, 10 +; NOOPT-NEXT: v_writelane_b32 v33, s2, 9 +; NOOPT-NEXT: v_writelane_b32 v33, s3, 10 ; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1 -; NOOPT-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:64 ; 4-byte Folded Spill +; NOOPT-NEXT: buffer_store_dword v33, off, s[16:19], 0 ; 4-byte Folded Spill ; NOOPT-NEXT: s_mov_b64 exec, s[12:13] ; NOOPT-NEXT: s_xor_b64 exec, exec, s[0:1] ; NOOPT-NEXT: s_cbranch_execnz .LBB27_1 ; NOOPT-NEXT: ; %bb.2: ; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v33, off, s[16:19], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[12:13] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 7 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 8 +; NOOPT-NEXT: v_readlane_b32 s0, v33, 7 +; NOOPT-NEXT: v_readlane_b32 s1, v33, 8 ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: ; %bb.3: +; NOOPT-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:136 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v5, off, s[16:19], 0 offset:140 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v17, off, s[16:19], 0 offset:148 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v18, off, s[16:19], 0 offset:152 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v19, off, s[16:19], 0 offset:156 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v20, off, s[16:19], 0 offset:160 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v21, off, s[16:19], 0 offset:164 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v22, off, s[16:19], 0 offset:168 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v23, off, s[16:19], 0 offset:172 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v24, off, s[16:19], 0 offset:176 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v25, off, s[16:19], 0 offset:180 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v26, off, s[16:19], 0 offset:184 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v27, off, s[16:19], 0 offset:188 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v28, off, s[16:19], 0 offset:192 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v29, off, s[16:19], 0 offset:196 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v30, off, s[16:19], 0 offset:200 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v31, off, s[16:19], 0 offset:204 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v32, off, s[16:19], 0 offset:208 ; 4-byte Folded Reload ; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1 -; NOOPT-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:64 ; 4-byte Folded Reload +; NOOPT-NEXT: buffer_load_dword v33, off, s[16:19], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[12:13] ; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_readlane_b32 s0, v0, 3 -; NOOPT-NEXT: v_readlane_b32 s1, v0, 4 -; NOOPT-NEXT: v_readlane_b32 s2, v0, 5 -; NOOPT-NEXT: v_readlane_b32 s3, v0, 6 -; NOOPT-NEXT: buffer_load_dword v5, off, s[16:19], 0 offset:136 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v6, off, s[16:19], 0 offset:140 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v18, off, s[16:19], 0 offset:148 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v19, off, s[16:19], 0 offset:152 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v20, off, s[16:19], 0 offset:156 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v21, off, s[16:19], 0 offset:160 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v22, off, s[16:19], 0 offset:164 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v23, off, s[16:19], 0 offset:168 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v24, off, s[16:19], 0 offset:172 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v25, off, s[16:19], 0 offset:176 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v26, off, s[16:19], 0 offset:180 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v27, off, s[16:19], 0 offset:184 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v28, off, s[16:19], 0 offset:188 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v29, off, s[16:19], 0 offset:192 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v30, off, s[16:19], 0 offset:196 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v31, off, s[16:19], 0 offset:200 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v32, off, s[16:19], 0 offset:204 ; 4-byte Folded Reload -; NOOPT-NEXT: buffer_load_dword v33, off, s[16:19], 0 offset:208 ; 4-byte Folded Reload -; NOOPT-NEXT: s_waitcnt vmcnt(12) -; NOOPT-NEXT: v_mov_b32_e32 v7, v21 -; NOOPT-NEXT: v_mov_b32_e32 v8, v20 -; NOOPT-NEXT: v_mov_b32_e32 v9, v19 -; NOOPT-NEXT: v_mov_b32_e32 v1, v18 -; NOOPT-NEXT: s_waitcnt vmcnt(8) -; NOOPT-NEXT: v_mov_b32_e32 v2, v25 -; NOOPT-NEXT: v_mov_b32_e32 v3, v24 -; NOOPT-NEXT: v_mov_b32_e32 v4, v23 -; NOOPT-NEXT: v_mov_b32_e32 v10, v22 -; NOOPT-NEXT: s_waitcnt vmcnt(4) -; NOOPT-NEXT: v_mov_b32_e32 v15, v29 -; NOOPT-NEXT: v_mov_b32_e32 v16, v28 -; NOOPT-NEXT: v_mov_b32_e32 v17, v27 -; NOOPT-NEXT: v_mov_b32_e32 v11, v26 -; NOOPT-NEXT: s_waitcnt vmcnt(0) -; NOOPT-NEXT: v_mov_b32_e32 v12, v33 -; NOOPT-NEXT: v_mov_b32_e32 v13, v32 -; NOOPT-NEXT: v_mov_b32_e32 v14, v31 -; NOOPT-NEXT: v_mov_b32_e32 v18, v30 +; NOOPT-NEXT: v_readlane_b32 s0, v33, 3 +; NOOPT-NEXT: v_readlane_b32 s1, v33, 4 +; NOOPT-NEXT: v_readlane_b32 s2, v33, 5 +; NOOPT-NEXT: v_readlane_b32 s3, v33, 6 +; NOOPT-NEXT: v_mov_b32_e32 v6, v20 +; NOOPT-NEXT: v_mov_b32_e32 v7, v19 +; NOOPT-NEXT: v_mov_b32_e32 v8, v18 +; NOOPT-NEXT: v_mov_b32_e32 v0, v17 +; NOOPT-NEXT: v_mov_b32_e32 v1, v24 +; NOOPT-NEXT: v_mov_b32_e32 v2, v23 +; NOOPT-NEXT: v_mov_b32_e32 v3, v22 +; NOOPT-NEXT: v_mov_b32_e32 v9, v21 +; NOOPT-NEXT: v_mov_b32_e32 v14, v28 +; NOOPT-NEXT: v_mov_b32_e32 v15, v27 +; NOOPT-NEXT: v_mov_b32_e32 v16, v26 +; NOOPT-NEXT: v_mov_b32_e32 v10, v25 +; NOOPT-NEXT: v_mov_b32_e32 v11, v32 +; NOOPT-NEXT: v_mov_b32_e32 v12, v31 +; NOOPT-NEXT: v_mov_b32_e32 v13, v30 +; NOOPT-NEXT: v_mov_b32_e32 v17, v29 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 -; NOOPT-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19_vgpr20_vgpr21 killed $exec -; NOOPT-NEXT: v_mov_b32_e32 v19, v14 -; NOOPT-NEXT: v_mov_b32_e32 v20, v13 -; NOOPT-NEXT: v_mov_b32_e32 v21, v12 -; NOOPT-NEXT: v_mov_b32_e32 v13, v6 +; NOOPT-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18_vgpr19_vgpr20 killed $exec +; NOOPT-NEXT: v_mov_b32_e32 v18, v13 +; NOOPT-NEXT: v_mov_b32_e32 v19, v12 +; NOOPT-NEXT: v_mov_b32_e32 v20, v11 ; NOOPT-NEXT: v_mov_b32_e32 v12, v5 -; NOOPT-NEXT: buffer_store_dwordx4 v[18:21], v[12:13], s[0:3], 0 addr64 offset:48 +; NOOPT-NEXT: v_mov_b32_e32 v11, v4 +; NOOPT-NEXT: buffer_store_dwordx4 v[17:20], v[11:12], s[0:3], 0 addr64 offset:48 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 -; NOOPT-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12_vgpr13_vgpr14 killed $exec -; NOOPT-NEXT: v_mov_b32_e32 v12, v17 -; NOOPT-NEXT: v_mov_b32_e32 v13, v16 -; NOOPT-NEXT: v_mov_b32_e32 v14, v15 -; NOOPT-NEXT: v_mov_b32_e32 v16, v6 +; NOOPT-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11_vgpr12_vgpr13 killed $exec +; NOOPT-NEXT: v_mov_b32_e32 v11, v16 +; NOOPT-NEXT: v_mov_b32_e32 v12, v15 +; NOOPT-NEXT: v_mov_b32_e32 v13, v14 ; NOOPT-NEXT: v_mov_b32_e32 v15, v5 -; NOOPT-NEXT: buffer_store_dwordx4 v[11:14], v[15:16], s[0:3], 0 addr64 offset:32 +; NOOPT-NEXT: v_mov_b32_e32 v14, v4 +; NOOPT-NEXT: buffer_store_dwordx4 v[10:13], v[14:15], s[0:3], 0 addr64 offset:32 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 -; NOOPT-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11_vgpr12_vgpr13 killed $exec +; NOOPT-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10_vgpr11_vgpr12 killed $exec ; NOOPT-NEXT: s_waitcnt expcnt(0) -; NOOPT-NEXT: v_mov_b32_e32 v11, v4 -; NOOPT-NEXT: v_mov_b32_e32 v12, v3 -; NOOPT-NEXT: v_mov_b32_e32 v13, v2 +; NOOPT-NEXT: v_mov_b32_e32 v10, v3 +; NOOPT-NEXT: v_mov_b32_e32 v11, v2 +; NOOPT-NEXT: v_mov_b32_e32 v12, v1 +; NOOPT-NEXT: v_mov_b32_e32 v1, v4 ; NOOPT-NEXT: v_mov_b32_e32 v2, v5 -; NOOPT-NEXT: v_mov_b32_e32 v3, v6 -; NOOPT-NEXT: buffer_store_dwordx4 v[10:13], v[2:3], s[0:3], 0 addr64 offset:16 +; NOOPT-NEXT: buffer_store_dwordx4 v[9:12], v[1:2], s[0:3], 0 addr64 offset:16 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 ; NOOPT-NEXT: ; implicit-def: $sgpr4 -; NOOPT-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec -; NOOPT-NEXT: v_mov_b32_e32 v2, v9 -; NOOPT-NEXT: v_mov_b32_e32 v3, v8 -; NOOPT-NEXT: v_mov_b32_e32 v4, v7 -; NOOPT-NEXT: buffer_store_dwordx4 v[1:4], v[5:6], s[0:3], 0 addr64 -; NOOPT-NEXT: ; kill: killed $vgpr0 +; NOOPT-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec +; NOOPT-NEXT: v_mov_b32_e32 v1, v8 +; NOOPT-NEXT: v_mov_b32_e32 v2, v7 +; NOOPT-NEXT: v_mov_b32_e32 v3, v6 +; NOOPT-NEXT: buffer_store_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64 ; NOOPT-NEXT: s_endpgm ; ; SI-MOVREL-LABEL: insert_or_disj_index: diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll index 9e336a714ca67..eef51acc4e12e 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll @@ -7,13 +7,13 @@ define fastcc i32 @foo() { ; CHECK-LABEL: name: foo ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $vgpr40, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr30, $sgpr31, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_WAITCNT 0 ; CHECK-NEXT: $sgpr16 = S_MOV_B32 $sgpr33 ; CHECK-NEXT: $sgpr33 = S_MOV_B32 $sgpr32 ; CHECK-NEXT: $sgpr17 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr17 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 512, implicit-def dead $scc ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40 @@ -26,24 +26,22 @@ define fastcc i32 @foo() { ; CHECK-NEXT: BUFFER_GL1_INV implicit $exec ; CHECK-NEXT: BUFFER_GL0_INV implicit $exec ; CHECK-NEXT: renamable $sgpr16_sgpr17 = S_LOAD_DWORDX2_IMM killed renamable $sgpr16_sgpr17, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) - ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr30, 0, killed $vgpr40 - ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, killed $vgpr40 + ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr40 + ; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr40 ; CHECK-NEXT: S_WAITCNT 49279 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @bar, csr_amdgpu, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit killed $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $vcc_lo = S_MOV_B32 $exec_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1 (%ir-block.1): ; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) - ; CHECK-NEXT: liveins: $vcc_lo, $vgpr40 + ; CHECK-NEXT: liveins: $vcc_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.DummyReturnBlock: - ; CHECK-NEXT: liveins: $vgpr40 - ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $sgpr31 = V_READLANE_B32 $vgpr40, 1 ; CHECK-NEXT: $sgpr30 = V_READLANE_B32 $vgpr40, 0 - ; CHECK-NEXT: $sgpr4 = V_READLANE_B32 killed $vgpr40, 2 + ; CHECK-NEXT: $sgpr4 = V_READLANE_B32 $vgpr40, 2 ; CHECK-NEXT: $sgpr5 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) ; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll index f771536463778..ea18e0d9eeefb 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_dynelt.ll @@ -1010,73 +1010,73 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec, ; GCN-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x34 ; GCN-NEXT: s_load_dword s8, s[2:3], 0x44 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24 -; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane +; GCN-NEXT: ; implicit-def: $vgpr6 : SGPR spill to VGPR lane ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_bfe_u32 s9, s4, 0xf0001 ; GCN-NEXT: s_lshr_b32 s42, s5, 16 -; GCN-NEXT: v_writelane_b32 v0, s0, 0 -; GCN-NEXT: v_writelane_b32 v0, s1, 1 +; GCN-NEXT: v_writelane_b32 v6, s0, 0 +; GCN-NEXT: v_writelane_b32 v6, s1, 1 ; GCN-NEXT: s_lshr_b32 s0, s4, 16 -; GCN-NEXT: v_writelane_b32 v0, s0, 2 +; GCN-NEXT: v_writelane_b32 v6, s0, 2 ; GCN-NEXT: s_lshr_b32 s0, s4, 17 -; GCN-NEXT: v_writelane_b32 v0, s0, 3 +; GCN-NEXT: v_writelane_b32 v6, s0, 3 ; GCN-NEXT: s_lshr_b32 s0, s4, 18 -; GCN-NEXT: v_writelane_b32 v0, s0, 4 +; GCN-NEXT: v_writelane_b32 v6, s0, 4 ; GCN-NEXT: s_lshr_b32 s0, s4, 19 -; GCN-NEXT: v_writelane_b32 v0, s0, 5 +; GCN-NEXT: v_writelane_b32 v6, s0, 5 ; GCN-NEXT: s_lshr_b32 s0, s4, 20 -; GCN-NEXT: v_writelane_b32 v0, s0, 6 +; GCN-NEXT: v_writelane_b32 v6, s0, 6 ; GCN-NEXT: s_lshr_b32 s0, s4, 21 -; GCN-NEXT: v_writelane_b32 v0, s0, 7 +; GCN-NEXT: v_writelane_b32 v6, s0, 7 ; GCN-NEXT: s_lshr_b32 s0, s4, 22 -; GCN-NEXT: v_writelane_b32 v0, s0, 8 +; GCN-NEXT: v_writelane_b32 v6, s0, 8 ; GCN-NEXT: s_lshr_b32 s0, s4, 23 -; GCN-NEXT: v_writelane_b32 v0, s0, 9 +; GCN-NEXT: v_writelane_b32 v6, s0, 9 ; GCN-NEXT: s_lshr_b32 s0, s4, 24 -; GCN-NEXT: v_writelane_b32 v0, s0, 10 +; GCN-NEXT: v_writelane_b32 v6, s0, 10 ; GCN-NEXT: s_lshr_b32 s0, s4, 25 -; GCN-NEXT: v_writelane_b32 v0, s0, 11 +; GCN-NEXT: v_writelane_b32 v6, s0, 11 ; GCN-NEXT: s_lshr_b32 s0, s4, 26 -; GCN-NEXT: v_writelane_b32 v0, s0, 12 +; GCN-NEXT: v_writelane_b32 v6, s0, 12 ; GCN-NEXT: s_lshr_b32 s0, s4, 27 -; GCN-NEXT: v_writelane_b32 v0, s0, 13 +; GCN-NEXT: v_writelane_b32 v6, s0, 13 ; GCN-NEXT: s_lshr_b32 s0, s4, 28 -; GCN-NEXT: v_writelane_b32 v0, s0, 14 +; GCN-NEXT: v_writelane_b32 v6, s0, 14 ; GCN-NEXT: s_lshr_b32 s0, s4, 29 -; GCN-NEXT: v_writelane_b32 v0, s0, 15 +; GCN-NEXT: v_writelane_b32 v6, s0, 15 ; GCN-NEXT: s_lshr_b32 s0, s4, 30 -; GCN-NEXT: v_writelane_b32 v0, s0, 16 +; GCN-NEXT: v_writelane_b32 v6, s0, 16 ; GCN-NEXT: s_lshr_b32 s0, s4, 31 -; GCN-NEXT: v_writelane_b32 v0, s0, 17 -; GCN-NEXT: v_writelane_b32 v0, s9, 18 +; GCN-NEXT: v_writelane_b32 v6, s0, 17 +; GCN-NEXT: v_writelane_b32 v6, s9, 18 ; GCN-NEXT: s_bfe_u32 s9, s4, 0xe0002 -; GCN-NEXT: v_writelane_b32 v0, s9, 19 +; GCN-NEXT: v_writelane_b32 v6, s9, 19 ; GCN-NEXT: s_bfe_u32 s9, s4, 0xd0003 -; GCN-NEXT: v_writelane_b32 v0, s9, 20 +; GCN-NEXT: v_writelane_b32 v6, s9, 20 ; GCN-NEXT: s_bfe_u32 s9, s4, 0xc0004 -; GCN-NEXT: v_writelane_b32 v0, s9, 21 +; GCN-NEXT: v_writelane_b32 v6, s9, 21 ; GCN-NEXT: s_bfe_u32 s9, s4, 0xb0005 -; GCN-NEXT: v_writelane_b32 v0, s9, 22 +; GCN-NEXT: v_writelane_b32 v6, s9, 22 ; GCN-NEXT: s_bfe_u32 s9, s4, 0xa0006 -; GCN-NEXT: v_writelane_b32 v0, s9, 23 +; GCN-NEXT: v_writelane_b32 v6, s9, 23 ; GCN-NEXT: s_bfe_u32 s9, s4, 0x90007 -; GCN-NEXT: v_writelane_b32 v0, s9, 24 +; GCN-NEXT: v_writelane_b32 v6, s9, 24 ; GCN-NEXT: s_bfe_u32 s9, s4, 0x80008 -; GCN-NEXT: v_writelane_b32 v0, s9, 25 +; GCN-NEXT: v_writelane_b32 v6, s9, 25 ; GCN-NEXT: s_bfe_u32 s9, s4, 0x70009 -; GCN-NEXT: v_writelane_b32 v0, s9, 26 +; GCN-NEXT: v_writelane_b32 v6, s9, 26 ; GCN-NEXT: s_bfe_u32 s9, s4, 0x6000a -; GCN-NEXT: v_writelane_b32 v0, s9, 27 +; GCN-NEXT: v_writelane_b32 v6, s9, 27 ; GCN-NEXT: s_bfe_u32 s9, s4, 0x5000b -; GCN-NEXT: v_writelane_b32 v0, s9, 28 +; GCN-NEXT: v_writelane_b32 v6, s9, 28 ; GCN-NEXT: s_bfe_u32 s9, s4, 0x4000c -; GCN-NEXT: v_writelane_b32 v0, s9, 29 +; GCN-NEXT: v_writelane_b32 v6, s9, 29 ; GCN-NEXT: s_bfe_u32 s9, s4, 0x3000d -; GCN-NEXT: v_writelane_b32 v0, s9, 30 +; GCN-NEXT: v_writelane_b32 v6, s9, 30 ; GCN-NEXT: s_bfe_u32 s9, s4, 0x2000e -; GCN-NEXT: v_writelane_b32 v0, s9, 31 +; GCN-NEXT: v_writelane_b32 v6, s9, 31 ; GCN-NEXT: s_bfe_u32 s9, s4, 0x1000f -; GCN-NEXT: v_writelane_b32 v0, s9, 32 +; GCN-NEXT: v_writelane_b32 v6, s9, 32 ; GCN-NEXT: s_bfe_u32 s9, s5, 0xf0001 ; GCN-NEXT: s_lshr_b32 s43, s5, 17 ; GCN-NEXT: s_lshr_b32 s45, s5, 18 @@ -1125,7 +1125,7 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec, ; GCN-NEXT: s_lshr_b32 s2, s7, 29 ; GCN-NEXT: s_lshr_b32 s1, s7, 30 ; GCN-NEXT: s_lshr_b32 s0, s7, 31 -; GCN-NEXT: v_writelane_b32 v0, s9, 33 +; GCN-NEXT: v_writelane_b32 v6, s9, 33 ; GCN-NEXT: s_bfe_u32 s40, s5, 0xe0002 ; GCN-NEXT: s_bfe_u32 s41, s5, 0xd0003 ; GCN-NEXT: s_bfe_u32 s44, s5, 0xc0004 @@ -1630,7 +1630,7 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec, ; GCN-NEXT: s_cselect_b32 s5, s5, 1 ; GCN-NEXT: s_and_b32 s5, s5, 1 ; GCN-NEXT: s_cmp_lg_u32 s8, 33 -; GCN-NEXT: v_readlane_b32 s9, v0, 33 +; GCN-NEXT: v_readlane_b32 s9, v6, 33 ; GCN-NEXT: s_cselect_b32 s9, s9, 1 ; GCN-NEXT: s_lshl_b32 s9, s9, 1 ; GCN-NEXT: s_or_b32 s5, s5, s9 @@ -1643,21 +1643,21 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec, ; GCN-NEXT: s_and_b32 s1, s1, 0xffff ; GCN-NEXT: s_or_b32 s0, s1, s0 ; GCN-NEXT: s_cmp_lg_u32 s8, 31 -; GCN-NEXT: v_readlane_b32 s1, v0, 17 +; GCN-NEXT: v_readlane_b32 s1, v6, 17 ; GCN-NEXT: s_cselect_b32 s1, s1, 1 ; GCN-NEXT: s_lshl_b32 s1, s1, 3 ; GCN-NEXT: s_cmp_lg_u32 s8, 30 -; GCN-NEXT: v_readlane_b32 s2, v0, 16 +; GCN-NEXT: v_readlane_b32 s2, v6, 16 ; GCN-NEXT: s_cselect_b32 s2, s2, 1 ; GCN-NEXT: s_and_b32 s2, s2, 1 ; GCN-NEXT: s_lshl_b32 s2, s2, 2 ; GCN-NEXT: s_or_b32 s1, s1, s2 ; GCN-NEXT: s_cmp_lg_u32 s8, 29 -; GCN-NEXT: v_readlane_b32 s2, v0, 15 +; GCN-NEXT: v_readlane_b32 s2, v6, 15 ; GCN-NEXT: s_cselect_b32 s2, s2, 1 ; GCN-NEXT: s_lshl_b32 s2, s2, 1 ; GCN-NEXT: s_cmp_lg_u32 s8, 28 -; GCN-NEXT: v_readlane_b32 s3, v0, 14 +; GCN-NEXT: v_readlane_b32 s3, v6, 14 ; GCN-NEXT: s_cselect_b32 s3, s3, 1 ; GCN-NEXT: s_and_b32 s3, s3, 1 ; GCN-NEXT: s_or_b32 s2, s3, s2 @@ -1665,21 +1665,21 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec, ; GCN-NEXT: s_or_b32 s1, s2, s1 ; GCN-NEXT: s_lshl_b32 s1, s1, 12 ; GCN-NEXT: s_cmp_lg_u32 s8, 27 -; GCN-NEXT: v_readlane_b32 s2, v0, 13 +; GCN-NEXT: v_readlane_b32 s2, v6, 13 ; GCN-NEXT: s_cselect_b32 s2, s2, 1 ; GCN-NEXT: s_lshl_b32 s2, s2, 3 ; GCN-NEXT: s_cmp_lg_u32 s8, 26 -; GCN-NEXT: v_readlane_b32 s3, v0, 12 +; GCN-NEXT: v_readlane_b32 s3, v6, 12 ; GCN-NEXT: s_cselect_b32 s3, s3, 1 ; GCN-NEXT: s_and_b32 s3, s3, 1 ; GCN-NEXT: s_lshl_b32 s3, s3, 2 ; GCN-NEXT: s_or_b32 s2, s2, s3 ; GCN-NEXT: s_cmp_lg_u32 s8, 25 -; GCN-NEXT: v_readlane_b32 s3, v0, 11 +; GCN-NEXT: v_readlane_b32 s3, v6, 11 ; GCN-NEXT: s_cselect_b32 s3, s3, 1 ; GCN-NEXT: s_lshl_b32 s3, s3, 1 ; GCN-NEXT: s_cmp_lg_u32 s8, 24 -; GCN-NEXT: v_readlane_b32 s5, v0, 10 +; GCN-NEXT: v_readlane_b32 s5, v6, 10 ; GCN-NEXT: s_cselect_b32 s5, s5, 1 ; GCN-NEXT: s_and_b32 s5, s5, 1 ; GCN-NEXT: s_or_b32 s3, s5, s3 @@ -1689,21 +1689,21 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec, ; GCN-NEXT: s_lshl_b32 s2, s2, 8 ; GCN-NEXT: s_or_b32 s1, s1, s2 ; GCN-NEXT: s_cmp_lg_u32 s8, 23 -; GCN-NEXT: v_readlane_b32 s2, v0, 9 +; GCN-NEXT: v_readlane_b32 s2, v6, 9 ; GCN-NEXT: s_cselect_b32 s2, s2, 1 ; GCN-NEXT: s_lshl_b32 s2, s2, 3 ; GCN-NEXT: s_cmp_lg_u32 s8, 22 -; GCN-NEXT: v_readlane_b32 s3, v0, 8 +; GCN-NEXT: v_readlane_b32 s3, v6, 8 ; GCN-NEXT: s_cselect_b32 s3, s3, 1 ; GCN-NEXT: s_and_b32 s3, s3, 1 ; GCN-NEXT: s_lshl_b32 s3, s3, 2 ; GCN-NEXT: s_or_b32 s2, s2, s3 ; GCN-NEXT: s_cmp_lg_u32 s8, 21 -; GCN-NEXT: v_readlane_b32 s3, v0, 7 +; GCN-NEXT: v_readlane_b32 s3, v6, 7 ; GCN-NEXT: s_cselect_b32 s3, s3, 1 ; GCN-NEXT: s_lshl_b32 s3, s3, 1 ; GCN-NEXT: s_cmp_lg_u32 s8, 20 -; GCN-NEXT: v_readlane_b32 s5, v0, 6 +; GCN-NEXT: v_readlane_b32 s5, v6, 6 ; GCN-NEXT: s_cselect_b32 s5, s5, 1 ; GCN-NEXT: s_and_b32 s5, s5, 1 ; GCN-NEXT: s_or_b32 s3, s5, s3 @@ -1711,21 +1711,21 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec, ; GCN-NEXT: s_or_b32 s2, s3, s2 ; GCN-NEXT: s_lshl_b32 s2, s2, 4 ; GCN-NEXT: s_cmp_lg_u32 s8, 19 -; GCN-NEXT: v_readlane_b32 s3, v0, 5 +; GCN-NEXT: v_readlane_b32 s3, v6, 5 ; GCN-NEXT: s_cselect_b32 s3, s3, 1 ; GCN-NEXT: s_lshl_b32 s3, s3, 3 ; GCN-NEXT: s_cmp_lg_u32 s8, 18 -; GCN-NEXT: v_readlane_b32 s5, v0, 4 +; GCN-NEXT: v_readlane_b32 s5, v6, 4 ; GCN-NEXT: s_cselect_b32 s5, s5, 1 ; GCN-NEXT: s_and_b32 s5, s5, 1 ; GCN-NEXT: s_lshl_b32 s5, s5, 2 ; GCN-NEXT: s_or_b32 s3, s3, s5 ; GCN-NEXT: s_cmp_lg_u32 s8, 17 -; GCN-NEXT: v_readlane_b32 s5, v0, 3 +; GCN-NEXT: v_readlane_b32 s5, v6, 3 ; GCN-NEXT: s_cselect_b32 s5, s5, 1 ; GCN-NEXT: s_lshl_b32 s5, s5, 1 ; GCN-NEXT: s_cmp_lg_u32 s8, 16 -; GCN-NEXT: v_readlane_b32 s9, v0, 2 +; GCN-NEXT: v_readlane_b32 s9, v6, 2 ; GCN-NEXT: s_cselect_b32 s9, s9, 1 ; GCN-NEXT: s_and_b32 s9, s9, 1 ; GCN-NEXT: s_or_b32 s5, s9, s5 @@ -1737,21 +1737,21 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec, ; GCN-NEXT: s_or_b32 s1, s2, s1 ; GCN-NEXT: s_lshl_b32 s1, s1, 16 ; GCN-NEXT: s_cmp_lg_u32 s8, 15 -; GCN-NEXT: v_readlane_b32 s2, v0, 32 +; GCN-NEXT: v_readlane_b32 s2, v6, 32 ; GCN-NEXT: s_cselect_b32 s2, s2, 1 ; GCN-NEXT: s_lshl_b32 s2, s2, 3 ; GCN-NEXT: s_cmp_lg_u32 s8, 14 -; GCN-NEXT: v_readlane_b32 s3, v0, 31 +; GCN-NEXT: v_readlane_b32 s3, v6, 31 ; GCN-NEXT: s_cselect_b32 s3, s3, 1 ; GCN-NEXT: s_and_b32 s3, s3, 1 ; GCN-NEXT: s_lshl_b32 s3, s3, 2 ; GCN-NEXT: s_or_b32 s2, s2, s3 ; GCN-NEXT: s_cmp_lg_u32 s8, 13 -; GCN-NEXT: v_readlane_b32 s3, v0, 30 +; GCN-NEXT: v_readlane_b32 s3, v6, 30 ; GCN-NEXT: s_cselect_b32 s3, s3, 1 ; GCN-NEXT: s_lshl_b32 s3, s3, 1 ; GCN-NEXT: s_cmp_lg_u32 s8, 12 -; GCN-NEXT: v_readlane_b32 s5, v0, 29 +; GCN-NEXT: v_readlane_b32 s5, v6, 29 ; GCN-NEXT: s_cselect_b32 s5, s5, 1 ; GCN-NEXT: s_and_b32 s5, s5, 1 ; GCN-NEXT: s_or_b32 s3, s5, s3 @@ -1759,21 +1759,21 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec, ; GCN-NEXT: s_or_b32 s2, s3, s2 ; GCN-NEXT: s_lshl_b32 s2, s2, 12 ; GCN-NEXT: s_cmp_lg_u32 s8, 11 -; GCN-NEXT: v_readlane_b32 s3, v0, 28 +; GCN-NEXT: v_readlane_b32 s3, v6, 28 ; GCN-NEXT: s_cselect_b32 s3, s3, 1 ; GCN-NEXT: s_lshl_b32 s3, s3, 3 ; GCN-NEXT: s_cmp_lg_u32 s8, 10 -; GCN-NEXT: v_readlane_b32 s5, v0, 27 +; GCN-NEXT: v_readlane_b32 s5, v6, 27 ; GCN-NEXT: s_cselect_b32 s5, s5, 1 ; GCN-NEXT: s_and_b32 s5, s5, 1 ; GCN-NEXT: s_lshl_b32 s5, s5, 2 ; GCN-NEXT: s_or_b32 s3, s3, s5 ; GCN-NEXT: s_cmp_lg_u32 s8, 9 -; GCN-NEXT: v_readlane_b32 s5, v0, 26 +; GCN-NEXT: v_readlane_b32 s5, v6, 26 ; GCN-NEXT: s_cselect_b32 s5, s5, 1 ; GCN-NEXT: s_lshl_b32 s5, s5, 1 ; GCN-NEXT: s_cmp_lg_u32 s8, 8 -; GCN-NEXT: v_readlane_b32 s9, v0, 25 +; GCN-NEXT: v_readlane_b32 s9, v6, 25 ; GCN-NEXT: s_cselect_b32 s9, s9, 1 ; GCN-NEXT: s_and_b32 s9, s9, 1 ; GCN-NEXT: s_or_b32 s5, s9, s5 @@ -1783,21 +1783,21 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec, ; GCN-NEXT: s_lshl_b32 s3, s3, 8 ; GCN-NEXT: s_or_b32 s2, s2, s3 ; GCN-NEXT: s_cmp_lg_u32 s8, 7 -; GCN-NEXT: v_readlane_b32 s3, v0, 24 +; GCN-NEXT: v_readlane_b32 s3, v6, 24 ; GCN-NEXT: s_cselect_b32 s3, s3, 1 ; GCN-NEXT: s_lshl_b32 s3, s3, 3 ; GCN-NEXT: s_cmp_lg_u32 s8, 6 -; GCN-NEXT: v_readlane_b32 s5, v0, 23 +; GCN-NEXT: v_readlane_b32 s5, v6, 23 ; GCN-NEXT: s_cselect_b32 s5, s5, 1 ; GCN-NEXT: s_and_b32 s5, s5, 1 ; GCN-NEXT: s_lshl_b32 s5, s5, 2 ; GCN-NEXT: s_or_b32 s3, s3, s5 ; GCN-NEXT: s_cmp_lg_u32 s8, 5 -; GCN-NEXT: v_readlane_b32 s5, v0, 22 +; GCN-NEXT: v_readlane_b32 s5, v6, 22 ; GCN-NEXT: s_cselect_b32 s5, s5, 1 ; GCN-NEXT: s_lshl_b32 s5, s5, 1 ; GCN-NEXT: s_cmp_lg_u32 s8, 4 -; GCN-NEXT: v_readlane_b32 s9, v0, 21 +; GCN-NEXT: v_readlane_b32 s9, v6, 21 ; GCN-NEXT: s_cselect_b32 s9, s9, 1 ; GCN-NEXT: s_and_b32 s9, s9, 1 ; GCN-NEXT: s_or_b32 s5, s9, s5 @@ -1805,11 +1805,11 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec, ; GCN-NEXT: s_or_b32 s3, s5, s3 ; GCN-NEXT: s_lshl_b32 s3, s3, 4 ; GCN-NEXT: s_cmp_lg_u32 s8, 3 -; GCN-NEXT: v_readlane_b32 s5, v0, 20 +; GCN-NEXT: v_readlane_b32 s5, v6, 20 ; GCN-NEXT: s_cselect_b32 s5, s5, 1 ; GCN-NEXT: s_lshl_b32 s5, s5, 3 ; GCN-NEXT: s_cmp_lg_u32 s8, 2 -; GCN-NEXT: v_readlane_b32 s9, v0, 19 +; GCN-NEXT: v_readlane_b32 s9, v6, 19 ; GCN-NEXT: s_cselect_b32 s9, s9, 1 ; GCN-NEXT: s_and_b32 s9, s9, 1 ; GCN-NEXT: s_lshl_b32 s9, s9, 2 @@ -1818,7 +1818,7 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec, ; GCN-NEXT: s_cselect_b32 s4, s4, 1 ; GCN-NEXT: s_and_b32 s4, s4, 1 ; GCN-NEXT: s_cmp_lg_u32 s8, 1 -; GCN-NEXT: v_readlane_b32 s8, v0, 18 +; GCN-NEXT: v_readlane_b32 s8, v6, 18 ; GCN-NEXT: s_cselect_b32 s8, s8, 1 ; GCN-NEXT: s_lshl_b32 s8, s8, 1 ; GCN-NEXT: s_or_b32 s4, s4, s8 @@ -1830,16 +1830,15 @@ define amdgpu_kernel void @bit128_inselt(ptr addrspace(1) %out, <128 x i1> %vec, ; GCN-NEXT: s_or_b32 s2, s3, s2 ; GCN-NEXT: s_and_b32 s2, s2, 0xffff ; GCN-NEXT: s_or_b32 s1, s2, s1 -; GCN-NEXT: v_mov_b32_e32 v1, s1 -; GCN-NEXT: v_mov_b32_e32 v2, s0 -; GCN-NEXT: v_readlane_b32 s0, v0, 0 -; GCN-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-NEXT: v_mov_b32_e32 v6, s1 -; GCN-NEXT: v_mov_b32_e32 v3, s6 -; GCN-NEXT: v_mov_b32_e32 v4, s7 -; GCN-NEXT: v_mov_b32_e32 v5, s0 -; GCN-NEXT: flat_store_dwordx4 v[5:6], v[1:4] -; GCN-NEXT: ; kill: killed $vgpr0 +; GCN-NEXT: v_mov_b32_e32 v0, s1 +; GCN-NEXT: v_mov_b32_e32 v1, s0 +; GCN-NEXT: v_readlane_b32 s0, v6, 0 +; GCN-NEXT: v_readlane_b32 s1, v6, 1 +; GCN-NEXT: v_mov_b32_e32 v5, s1 +; GCN-NEXT: v_mov_b32_e32 v2, s6 +; GCN-NEXT: v_mov_b32_e32 v3, s7 +; GCN-NEXT: v_mov_b32_e32 v4, s0 +; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm entry: %v = insertelement <128 x i1> %vec, i1 1, i32 %sel diff --git a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll index ec446f1f3bf27..7b195f8e86220 100644 --- a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll +++ b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll @@ -13,22 +13,14 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 { ; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 ; CHECK-NEXT: s_add_u32 s0, s0, s15 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; CHECK-NEXT: s_mov_b64 s[10:11], s[8:9] ; CHECK-NEXT: v_mov_b32_e32 v3, v2 ; CHECK-NEXT: v_mov_b32_e32 v2, v1 ; CHECK-NEXT: v_mov_b32_e32 v1, v0 -; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1 -; CHECK-NEXT: s_add_i32 s8, s33, 0x100200 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s8 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[34:35] ; CHECK-NEXT: s_load_dword s8, s[6:7], 0x0 -; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_writelane_b32 v0, s8, 0 -; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1 -; CHECK-NEXT: s_add_i32 s8, s33, 0x100200 -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s8 ; 4-byte Folded Spill -; CHECK-NEXT: s_mov_b64 exec, s[34:35] +; CHECK-NEXT: ; implicit-def: $vgpr40 : SGPR spill to VGPR lane +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_writelane_b32 v40, s8, 0 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def vgpr10 ; CHECK-NEXT: ;;#ASMEND @@ -62,14 +54,9 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 { ; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] -; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1 -; CHECK-NEXT: s_add_i32 s4, s33, 0x100200 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[34:35] ; CHECK-NEXT: s_add_i32 s4, s33, 0x100100 ; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s4 ; 4-byte Folded Reload -; CHECK-NEXT: s_waitcnt vmcnt(1) -; CHECK-NEXT: v_readlane_b32 s4, v0, 0 +; CHECK-NEXT: v_readlane_b32 s4, v40, 0 ; CHECK-NEXT: s_mov_b32 s5, 0 ; CHECK-NEXT: s_cmp_eq_u32 s4, s5 ; CHECK-NEXT: v_mov_b32_e32 v0, 0x4000 @@ -77,24 +64,14 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 { ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], s33 offen ; 4-byte Folded Spill ; CHECK-NEXT: s_cbranch_scc1 .LBB0_2 ; CHECK-NEXT: ; %bb.1: ; %store -; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1 -; CHECK-NEXT: s_add_i32 s4, s33, 0x100200 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[34:35] ; CHECK-NEXT: s_add_i32 s4, s33, 0x100000 -; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s4 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s4 ; 4-byte Folded Reload ; CHECK-NEXT: ; implicit-def: $sgpr4 -; CHECK-NEXT: v_mov_b32_e32 v1, s4 +; CHECK-NEXT: v_mov_b32_e32 v0, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: ds_write_b32 v1, v2 -; CHECK-NEXT: ; kill: killed $vgpr0 +; CHECK-NEXT: ds_write_b32 v0, v1 ; CHECK-NEXT: s_endpgm ; CHECK-NEXT: .LBB0_2: ; %end -; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1 -; CHECK-NEXT: s_add_i32 s4, s33, 0x100200 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[34:35] -; CHECK-NEXT: ; kill: killed $vgpr0 ; CHECK-NEXT: s_endpgm %arr = alloca < 1339 x i32>, align 8192, addrspace(5) %cmp = icmp ne i32 %val, 0 diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index c9e24b721c41e..b192fdec15739 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -123,6 +123,8 @@ ; GCN-O0-NEXT: SI Pre-allocate WWM Registers ; GCN-O0-NEXT: Fast Register Allocator ; GCN-O0-NEXT: SI Lower WWM Copies +; GCN-O0-NEXT: AMDGPU Reserve WWM Registers +; GCN-O0-NEXT: Fast Register Allocator ; GCN-O0-NEXT: SI Fix VGPR copies ; GCN-O0-NEXT: Remove Redundant DEBUG_VALUE analysis ; GCN-O0-NEXT: Fixup Statepoint Caller Saved @@ -370,6 +372,11 @@ ; GCN-O1-NEXT: SI Pre-allocate WWM Registers ; GCN-O1-NEXT: Greedy Register Allocator ; GCN-O1-NEXT: SI Lower WWM Copies +; GCN-O1-NEXT: Virtual Register Rewriter +; GCN-O1-NEXT: AMDGPU Reserve WWM Registers +; GCN-O1-NEXT: Virtual Register Map +; GCN-O1-NEXT: Live Register Matrix +; GCN-O1-NEXT: Greedy Register Allocator ; GCN-O1-NEXT: GCN NSA Reassign ; GCN-O1-NEXT: Virtual Register Rewriter ; GCN-O1-NEXT: AMDGPU Mark Last Scratch Load @@ -673,6 +680,11 @@ ; GCN-O1-OPTS-NEXT: SI Pre-allocate WWM Registers ; GCN-O1-OPTS-NEXT: Greedy Register Allocator ; GCN-O1-OPTS-NEXT: SI Lower WWM Copies +; GCN-O1-OPTS-NEXT: Virtual Register Rewriter +; GCN-O1-OPTS-NEXT: AMDGPU Reserve WWM Registers +; GCN-O1-OPTS-NEXT: Virtual Register Map +; GCN-O1-OPTS-NEXT: Live Register Matrix +; GCN-O1-OPTS-NEXT: Greedy Register Allocator ; GCN-O1-OPTS-NEXT: GCN NSA Reassign ; GCN-O1-OPTS-NEXT: Virtual Register Rewriter ; GCN-O1-OPTS-NEXT: AMDGPU Mark Last Scratch Load @@ -982,6 +994,11 @@ ; GCN-O2-NEXT: SI Pre-allocate WWM Registers ; GCN-O2-NEXT: Greedy Register Allocator ; GCN-O2-NEXT: SI Lower WWM Copies +; GCN-O2-NEXT: Virtual Register Rewriter +; GCN-O2-NEXT: AMDGPU Reserve WWM Registers +; GCN-O2-NEXT: Virtual Register Map +; GCN-O2-NEXT: Live Register Matrix +; GCN-O2-NEXT: Greedy Register Allocator ; GCN-O2-NEXT: GCN NSA Reassign ; GCN-O2-NEXT: Virtual Register Rewriter ; GCN-O2-NEXT: AMDGPU Mark Last Scratch Load @@ -1303,6 +1320,11 @@ ; GCN-O3-NEXT: SI Pre-allocate WWM Registers ; GCN-O3-NEXT: Greedy Register Allocator ; GCN-O3-NEXT: SI Lower WWM Copies +; GCN-O3-NEXT: Virtual Register Rewriter +; GCN-O3-NEXT: AMDGPU Reserve WWM Registers +; GCN-O3-NEXT: Virtual Register Map +; GCN-O3-NEXT: Live Register Matrix +; GCN-O3-NEXT: Greedy Register Allocator ; GCN-O3-NEXT: GCN NSA Reassign ; GCN-O3-NEXT: Virtual Register Rewriter ; GCN-O3-NEXT: AMDGPU Mark Last Scratch Load diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll index db88ddf1807f3..32abe50ff04d8 100644 --- a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll @@ -8759,11 +8759,11 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX8-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0 ; GFX8-NEXT: s_add_u32 s88, s88, s9 ; GFX8-NEXT: s_addc_u32 s89, s89, 0 -; GFX8-NEXT: ; implicit-def: $vgpr44 : SGPR spill to VGPR lane +; GFX8-NEXT: ; implicit-def: $vgpr62 : SGPR spill to VGPR lane ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_lshr_b32 s0, s3, 8 -; GFX8-NEXT: v_writelane_b32 v44, s0, 0 -; GFX8-NEXT: v_writelane_b32 v44, s1, 1 +; GFX8-NEXT: v_writelane_b32 v62, s0, 0 +; GFX8-NEXT: v_writelane_b32 v62, s1, 1 ; GFX8-NEXT: s_lshr_b32 s0, s2, 1 ; GFX8-NEXT: s_lshr_b32 s36, s3, 21 ; GFX8-NEXT: s_lshr_b32 s30, s3, 19 @@ -8789,7 +8789,7 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX8-NEXT: s_lshr_b32 s54, s3, 10 ; GFX8-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x10000 ; GFX8-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x10000 -; GFX8-NEXT: v_writelane_b32 v44, s0, 2 +; GFX8-NEXT: v_writelane_b32 v62, s0, 2 ; GFX8-NEXT: s_lshr_b32 s52, s3, 11 ; GFX8-NEXT: s_bfe_i64 s[74:75], s[74:75], 0x10000 ; GFX8-NEXT: s_bfe_i64 s[72:73], s[72:73], 0x10000 @@ -8814,7 +8814,7 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX8-NEXT: s_bfe_i64 s[30:31], s[44:45], 0x10000 ; GFX8-NEXT: s_bfe_i64 s[36:37], s[38:39], 0x10000 ; GFX8-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x10000 -; GFX8-NEXT: v_writelane_b32 v44, s1, 3 +; GFX8-NEXT: v_writelane_b32 v62, s1, 3 ; GFX8-NEXT: s_lshr_b32 s6, s3, 9 ; GFX8-NEXT: s_lshr_b32 s8, s3, 6 ; GFX8-NEXT: s_lshr_b32 s10, s3, 7 @@ -8830,7 +8830,7 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX8-NEXT: v_mov_b32_e32 v4, s74 ; GFX8-NEXT: v_mov_b32_e32 v8, s72 ; GFX8-NEXT: v_mov_b32_e32 v0, s70 -; GFX8-NEXT: v_mov_b32_e32 v55, s68 +; GFX8-NEXT: v_mov_b32_e32 v54, s68 ; GFX8-NEXT: v_mov_b32_e32 v20, s66 ; GFX8-NEXT: v_mov_b32_e32 v16, s64 ; GFX8-NEXT: v_mov_b32_e32 v24, s62 @@ -8851,7 +8851,7 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX8-NEXT: v_mov_b32_e32 v2, s46 ; GFX8-NEXT: s_lshr_b32 s70, s2, 21 ; GFX8-NEXT: s_lshr_b32 s68, s2, 18 -; GFX8-NEXT: v_mov_b32_e32 v57, s42 +; GFX8-NEXT: v_mov_b32_e32 v56, s42 ; GFX8-NEXT: s_lshr_b32 s66, s2, 19 ; GFX8-NEXT: s_lshr_b32 s64, s2, 16 ; GFX8-NEXT: v_mov_b32_e32 v22, s40 @@ -8876,16 +8876,16 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX8-NEXT: s_lshr_b32 s36, s2, 2 ; GFX8-NEXT: s_lshr_b32 s30, s2, 3 ; GFX8-NEXT: s_bfe_i64 s[18:19], s[2:3], 0x10000 -; GFX8-NEXT: v_readlane_b32 s2, v44, 0 -; GFX8-NEXT: v_readlane_b32 s3, v44, 1 +; GFX8-NEXT: v_readlane_b32 s2, v62, 0 +; GFX8-NEXT: v_readlane_b32 s3, v62, 1 ; GFX8-NEXT: v_mov_b32_e32 v5, s75 ; GFX8-NEXT: v_mov_b32_e32 v7, s51 ; GFX8-NEXT: v_mov_b32_e32 v9, s73 ; GFX8-NEXT: v_mov_b32_e32 v11, s49 ; GFX8-NEXT: v_mov_b32_e32 v1, s71 ; GFX8-NEXT: v_mov_b32_e32 v3, s47 -; GFX8-NEXT: v_mov_b32_e32 v56, s69 -; GFX8-NEXT: v_mov_b32_e32 v58, s43 +; GFX8-NEXT: v_mov_b32_e32 v55, s69 +; GFX8-NEXT: v_mov_b32_e32 v57, s43 ; GFX8-NEXT: v_mov_b32_e32 v21, s67 ; GFX8-NEXT: v_mov_b32_e32 v23, s41 ; GFX8-NEXT: v_mov_b32_e32 v17, s65 @@ -8942,24 +8942,24 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX8-NEXT: v_mov_b32_e32 v42, s2 ; GFX8-NEXT: s_add_u32 s2, s4, 0x1e0 ; GFX8-NEXT: s_addc_u32 s3, s5, 0 -; GFX8-NEXT: v_mov_b32_e32 v46, s3 -; GFX8-NEXT: v_mov_b32_e32 v45, s2 +; GFX8-NEXT: v_mov_b32_e32 v45, s3 +; GFX8-NEXT: v_mov_b32_e32 v44, s2 ; GFX8-NEXT: s_add_u32 s2, s4, 0x1d0 ; GFX8-NEXT: s_addc_u32 s3, s5, 0 -; GFX8-NEXT: v_mov_b32_e32 v48, s3 -; GFX8-NEXT: v_mov_b32_e32 v47, s2 +; GFX8-NEXT: v_mov_b32_e32 v47, s3 +; GFX8-NEXT: v_mov_b32_e32 v46, s2 ; GFX8-NEXT: s_add_u32 s2, s4, 0x1c0 ; GFX8-NEXT: s_addc_u32 s3, s5, 0 -; GFX8-NEXT: v_mov_b32_e32 v50, s3 -; GFX8-NEXT: v_mov_b32_e32 v49, s2 +; GFX8-NEXT: v_mov_b32_e32 v49, s3 +; GFX8-NEXT: v_mov_b32_e32 v48, s2 ; GFX8-NEXT: s_add_u32 s2, s4, 0x1b0 ; GFX8-NEXT: s_addc_u32 s3, s5, 0 -; GFX8-NEXT: v_mov_b32_e32 v52, s3 -; GFX8-NEXT: v_mov_b32_e32 v51, s2 +; GFX8-NEXT: v_mov_b32_e32 v51, s3 +; GFX8-NEXT: v_mov_b32_e32 v50, s2 ; GFX8-NEXT: s_add_u32 s2, s4, 0x1a0 ; GFX8-NEXT: s_addc_u32 s3, s5, 0 -; GFX8-NEXT: v_mov_b32_e32 v54, s3 -; GFX8-NEXT: v_mov_b32_e32 v53, s2 +; GFX8-NEXT: v_mov_b32_e32 v53, s3 +; GFX8-NEXT: v_mov_b32_e32 v52, s2 ; GFX8-NEXT: s_add_u32 s2, s4, 0x190 ; GFX8-NEXT: s_addc_u32 s3, s5, 0 ; GFX8-NEXT: v_mov_b32_e32 v15, s3 @@ -8971,26 +8971,26 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX8-NEXT: buffer_store_dword v12, off, s[88:91], 0 ; 4-byte Folded Spill ; GFX8-NEXT: buffer_store_dword v13, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill ; GFX8-NEXT: flat_store_dwordx4 v[42:43], v[4:7] -; GFX8-NEXT: flat_store_dwordx4 v[45:46], v[8:11] -; GFX8-NEXT: flat_store_dwordx4 v[47:48], v[0:3] -; GFX8-NEXT: flat_store_dwordx4 v[49:50], v[55:58] -; GFX8-NEXT: flat_store_dwordx4 v[51:52], v[20:23] -; GFX8-NEXT: flat_store_dwordx4 v[53:54], v[16:19] +; GFX8-NEXT: flat_store_dwordx4 v[44:45], v[8:11] +; GFX8-NEXT: flat_store_dwordx4 v[46:47], v[0:3] +; GFX8-NEXT: flat_store_dwordx4 v[48:49], v[54:57] +; GFX8-NEXT: flat_store_dwordx4 v[50:51], v[20:23] +; GFX8-NEXT: flat_store_dwordx4 v[52:53], v[16:19] ; GFX8-NEXT: flat_store_dwordx4 v[14:15], v[24:27] ; GFX8-NEXT: buffer_load_dword v18, off, s[88:91], 0 ; 4-byte Folded Reload ; GFX8-NEXT: buffer_load_dword v19, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload ; GFX8-NEXT: s_add_u32 s2, s4, 0x170 ; GFX8-NEXT: s_addc_u32 s3, s5, 0 -; GFX8-NEXT: v_mov_b32_e32 v60, s3 -; GFX8-NEXT: v_mov_b32_e32 v59, s2 +; GFX8-NEXT: v_mov_b32_e32 v59, s3 +; GFX8-NEXT: v_mov_b32_e32 v58, s2 ; GFX8-NEXT: s_add_u32 s2, s4, 0x160 ; GFX8-NEXT: s_addc_u32 s3, s5, 0 -; GFX8-NEXT: v_mov_b32_e32 v62, s3 -; GFX8-NEXT: v_mov_b32_e32 v61, s2 +; GFX8-NEXT: v_mov_b32_e32 v61, s3 +; GFX8-NEXT: v_mov_b32_e32 v60, s2 ; GFX8-NEXT: s_add_u32 s2, s4, 0x150 ; GFX8-NEXT: s_addc_u32 s3, s5, 0 -; GFX8-NEXT: v_mov_b32_e32 v46, s3 -; GFX8-NEXT: v_mov_b32_e32 v45, s2 +; GFX8-NEXT: v_mov_b32_e32 v45, s3 +; GFX8-NEXT: v_mov_b32_e32 v44, s2 ; GFX8-NEXT: s_add_u32 s2, s4, 0x140 ; GFX8-NEXT: s_addc_u32 s3, s5, 0 ; GFX8-NEXT: v_mov_b32_e32 v6, s0 @@ -9021,9 +9021,9 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX8-NEXT: v_mov_b32_e32 v11, s15 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: flat_store_dwordx4 v[18:19], v[28:31] -; GFX8-NEXT: flat_store_dwordx4 v[59:60], v[32:35] -; GFX8-NEXT: flat_store_dwordx4 v[61:62], v[36:39] -; GFX8-NEXT: flat_store_dwordx4 v[45:46], v[40:43] +; GFX8-NEXT: flat_store_dwordx4 v[58:59], v[32:35] +; GFX8-NEXT: flat_store_dwordx4 v[60:61], v[36:39] +; GFX8-NEXT: flat_store_dwordx4 v[44:45], v[40:43] ; GFX8-NEXT: flat_store_dwordx4 v[12:13], v[4:7] ; GFX8-NEXT: flat_store_dwordx4 v[16:17], v[0:3] ; GFX8-NEXT: flat_store_dwordx4 v[14:15], v[8:11] @@ -9177,9 +9177,9 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX8-NEXT: v_mov_b32_e32 v2, s30 ; GFX8-NEXT: v_mov_b32_e32 v3, s31 ; GFX8-NEXT: v_mov_b32_e32 v4, s0 -; GFX8-NEXT: v_readlane_b32 s0, v44, 2 +; GFX8-NEXT: v_readlane_b32 s0, v62, 2 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] -; GFX8-NEXT: v_readlane_b32 s1, v44, 3 +; GFX8-NEXT: v_readlane_b32 s1, v62, 3 ; GFX8-NEXT: v_mov_b32_e32 v4, s4 ; GFX8-NEXT: v_mov_b32_e32 v0, s18 ; GFX8-NEXT: v_mov_b32_e32 v1, s19 @@ -9187,7 +9187,6 @@ define amdgpu_kernel void @constant_sextload_v64i1_to_v64i64(ptr addrspace(1) %o ; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: v_mov_b32_e32 v5, s5 ; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] -; GFX8-NEXT: ; kill: killed $vgpr44 ; GFX8-NEXT: s_endpgm ; ; EG-LABEL: constant_sextload_v64i1_to_v64i64: diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll index 9829b7e787d47..e9cd94620a6b9 100644 --- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll @@ -1520,9 +1520,9 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX7-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX7-NEXT: s_add_i32 s6, s32, 0x202000 ; GFX7-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX7-NEXT: s_add_i32 s6, s32, 0x201200 +; GFX7-NEXT: s_add_i32 s6, s32, 0x202100 ; GFX7-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[4:5] ; GFX7-NEXT: v_writelane_b32 v23, s28, 28 @@ -1562,36 +1562,57 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX7-NEXT: ;;#ASMEND -; GFX7-NEXT: ; implicit-def: $vgpr22 -; GFX7-NEXT: v_writelane_b32 v23, s59, 27 +; GFX7-NEXT: buffer_store_dword v16, off, s[0:3], s32 +; GFX7-NEXT: v_mov_b32_e32 v16, 0x8040 +; GFX7-NEXT: buffer_store_dword v0, v16, s[0:3], s32 offen ; 4-byte Folded Spill +; GFX7-NEXT: buffer_store_dword v1, v16, s[0:3], s32 offen offset:4 ; 4-byte Folded Spill +; GFX7-NEXT: buffer_store_dword v2, v16, s[0:3], s32 offen offset:8 ; 4-byte Folded Spill +; GFX7-NEXT: buffer_store_dword v3, v16, s[0:3], s32 offen offset:12 ; 4-byte Folded Spill +; GFX7-NEXT: buffer_store_dword v4, v16, s[0:3], s32 offen offset:16 ; 4-byte Folded Spill +; GFX7-NEXT: buffer_store_dword v5, v16, s[0:3], s32 offen offset:20 ; 4-byte Folded Spill +; GFX7-NEXT: buffer_store_dword v6, v16, s[0:3], s32 offen offset:24 ; 4-byte Folded Spill +; GFX7-NEXT: buffer_store_dword v7, v16, s[0:3], s32 offen offset:28 ; 4-byte Folded Spill +; GFX7-NEXT: buffer_store_dword v8, v16, s[0:3], s32 offen offset:32 ; 4-byte Folded Spill +; GFX7-NEXT: buffer_store_dword v9, v16, s[0:3], s32 offen offset:36 ; 4-byte Folded Spill +; GFX7-NEXT: buffer_store_dword v10, v16, s[0:3], s32 offen offset:40 ; 4-byte Folded Spill +; GFX7-NEXT: buffer_store_dword v11, v16, s[0:3], s32 offen offset:44 ; 4-byte Folded Spill +; GFX7-NEXT: buffer_store_dword v12, v16, s[0:3], s32 offen offset:48 ; 4-byte Folded Spill +; GFX7-NEXT: buffer_store_dword v13, v16, s[0:3], s32 offen offset:52 ; 4-byte Folded Spill +; GFX7-NEXT: buffer_store_dword v14, v16, s[0:3], s32 offen offset:56 ; 4-byte Folded Spill +; GFX7-NEXT: buffer_store_dword v15, v16, s[0:3], s32 offen offset:60 ; 4-byte Folded Spill +; GFX7-NEXT: buffer_load_dword v16, off, s[0:3], s32 +; GFX7-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane +; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6 ; GFX7-NEXT: v_writelane_b32 v22, vcc_lo, 0 ; GFX7-NEXT: v_writelane_b32 v22, vcc_hi, 1 -; GFX7-NEXT: s_or_saveexec_b64 s[28:29], -1 -; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s32 -; GFX7-NEXT: v_mov_b32_e32 v0, 0x8044 -; GFX7-NEXT: buffer_store_dword v22, v0, s[0:3], s32 offen ; 4-byte Folded Spill -; GFX7-NEXT: s_mov_b64 exec, s[28:29] -; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32 -; GFX7-NEXT: v_lshr_b32_e64 v22, s32, 6 ; GFX7-NEXT: s_movk_i32 vcc_lo, 0x4040 -; GFX7-NEXT: v_add_i32_e32 v22, vcc, vcc_lo, v22 -; GFX7-NEXT: v_add_i32_e32 v22, vcc, 0x200, v22 -; GFX7-NEXT: v_readfirstlane_b32 s59, v22 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, vcc_lo, v0 +; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x200, v0 +; GFX7-NEXT: v_writelane_b32 v23, s59, 27 +; GFX7-NEXT: v_readfirstlane_b32 s59, v0 ; GFX7-NEXT: s_and_b64 vcc, 0, exec -; GFX7-NEXT: s_mov_b64 s[28:29], exec -; GFX7-NEXT: s_mov_b64 exec, -1 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], s32 -; GFX7-NEXT: v_mov_b32_e32 v0, 0x8044 -; GFX7-NEXT: buffer_load_dword v22, v0, s[0:3], s32 offen ; 4-byte Folded Reload -; GFX7-NEXT: s_mov_b64 exec, s[28:29] -; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32 -; GFX7-NEXT: s_waitcnt vmcnt(1) ; GFX7-NEXT: v_readlane_b32 vcc_lo, v22, 0 ; GFX7-NEXT: v_readlane_b32 vcc_hi, v22, 1 -; GFX7-NEXT: s_mov_b64 s[28:29], exec -; GFX7-NEXT: s_mov_b64 exec, -1 -; GFX7-NEXT: s_mov_b64 exec, s[28:29] +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: buffer_store_dword v16, off, s[0:3], s32 +; GFX7-NEXT: v_mov_b32_e32 v16, 0x8040 +; GFX7-NEXT: buffer_load_dword v0, v16, s[0:3], s32 offen ; 4-byte Folded Reload +; GFX7-NEXT: buffer_load_dword v1, v16, s[0:3], s32 offen offset:4 ; 4-byte Folded Reload +; GFX7-NEXT: buffer_load_dword v2, v16, s[0:3], s32 offen offset:8 ; 4-byte Folded Reload +; GFX7-NEXT: buffer_load_dword v3, v16, s[0:3], s32 offen offset:12 ; 4-byte Folded Reload +; GFX7-NEXT: buffer_load_dword v4, v16, s[0:3], s32 offen offset:16 ; 4-byte Folded Reload +; GFX7-NEXT: buffer_load_dword v5, v16, s[0:3], s32 offen offset:20 ; 4-byte Folded Reload +; GFX7-NEXT: buffer_load_dword v6, v16, s[0:3], s32 offen offset:24 ; 4-byte Folded Reload +; GFX7-NEXT: buffer_load_dword v7, v16, s[0:3], s32 offen offset:28 ; 4-byte Folded Reload +; GFX7-NEXT: buffer_load_dword v8, v16, s[0:3], s32 offen offset:32 ; 4-byte Folded Reload +; GFX7-NEXT: buffer_load_dword v9, v16, s[0:3], s32 offen offset:36 ; 4-byte Folded Reload +; GFX7-NEXT: buffer_load_dword v10, v16, s[0:3], s32 offen offset:40 ; 4-byte Folded Reload +; GFX7-NEXT: buffer_load_dword v11, v16, s[0:3], s32 offen offset:44 ; 4-byte Folded Reload +; GFX7-NEXT: buffer_load_dword v12, v16, s[0:3], s32 offen offset:48 ; 4-byte Folded Reload +; GFX7-NEXT: buffer_load_dword v13, v16, s[0:3], s32 offen offset:52 ; 4-byte Folded Reload +; GFX7-NEXT: buffer_load_dword v14, v16, s[0:3], s32 offen offset:56 ; 4-byte Folded Reload +; GFX7-NEXT: buffer_load_dword v15, v16, s[0:3], s32 offen offset:60 ; 4-byte Folded Reload +; GFX7-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc @@ -1624,13 +1645,12 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX7-NEXT: v_readlane_b32 s33, v23, 2 ; GFX7-NEXT: v_readlane_b32 s31, v23, 1 ; GFX7-NEXT: v_readlane_b32 s30, v23, 0 -; GFX7-NEXT: ; kill: killed $vgpr22 ; GFX7-NEXT: v_readlane_b32 s28, v23, 28 ; GFX7-NEXT: v_readlane_b32 s29, v23, 29 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX7-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX7-NEXT: s_add_i32 s6, s32, 0x202000 ; GFX7-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX7-NEXT: s_add_i32 s6, s32, 0x201200 +; GFX7-NEXT: s_add_i32 s6, s32, 0x202100 ; GFX7-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX7-NEXT: s_mov_b64 exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) @@ -1640,9 +1660,9 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX8-NEXT: s_add_i32 s6, s32, 0x202000 ; GFX8-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX8-NEXT: s_add_i32 s6, s32, 0x201200 +; GFX8-NEXT: s_add_i32 s6, s32, 0x202100 ; GFX8-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: v_writelane_b32 v23, s58, 28 @@ -1682,36 +1702,60 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: ; implicit-def: $vgpr22 -; GFX8-NEXT: v_writelane_b32 v23, s59, 27 +; GFX8-NEXT: buffer_store_dword v16, off, s[0:3], s32 +; GFX8-NEXT: v_mov_b32_e32 v16, 0x8040 +; GFX8-NEXT: buffer_store_dword v0, v16, s[0:3], s32 offen ; 4-byte Folded Spill +; GFX8-NEXT: s_nop 0 +; GFX8-NEXT: buffer_store_dword v1, v16, s[0:3], s32 offen offset:4 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v2, v16, s[0:3], s32 offen offset:8 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v3, v16, s[0:3], s32 offen offset:12 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v4, v16, s[0:3], s32 offen offset:16 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v5, v16, s[0:3], s32 offen offset:20 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v6, v16, s[0:3], s32 offen offset:24 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v7, v16, s[0:3], s32 offen offset:28 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v8, v16, s[0:3], s32 offen offset:32 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v9, v16, s[0:3], s32 offen offset:36 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v10, v16, s[0:3], s32 offen offset:40 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v11, v16, s[0:3], s32 offen offset:44 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v12, v16, s[0:3], s32 offen offset:48 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v13, v16, s[0:3], s32 offen offset:52 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v14, v16, s[0:3], s32 offen offset:56 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_store_dword v15, v16, s[0:3], s32 offen offset:60 ; 4-byte Folded Spill +; GFX8-NEXT: buffer_load_dword v16, off, s[0:3], s32 +; GFX8-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane +; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: v_writelane_b32 v22, vcc_lo, 0 ; GFX8-NEXT: v_writelane_b32 v22, vcc_hi, 1 -; GFX8-NEXT: s_or_saveexec_b64 s[58:59], -1 -; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s32 -; GFX8-NEXT: v_mov_b32_e32 v0, 0x8044 -; GFX8-NEXT: buffer_store_dword v22, v0, s[0:3], s32 offen ; 4-byte Folded Spill -; GFX8-NEXT: s_mov_b64 exec, s[58:59] -; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32 -; GFX8-NEXT: v_lshrrev_b32_e64 v22, 6, s32 ; GFX8-NEXT: s_movk_i32 vcc_lo, 0x4040 -; GFX8-NEXT: v_add_u32_e32 v22, vcc, vcc_lo, v22 -; GFX8-NEXT: v_add_u32_e32 v22, vcc, 0x200, v22 -; GFX8-NEXT: v_readfirstlane_b32 s59, v22 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, vcc_lo, v0 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x200, v0 +; GFX8-NEXT: v_writelane_b32 v23, s59, 27 +; GFX8-NEXT: v_readfirstlane_b32 s59, v0 ; GFX8-NEXT: s_and_b64 vcc, 0, exec -; GFX8-NEXT: s_mov_b64 s[58:59], exec -; GFX8-NEXT: s_mov_b64 exec, -1 -; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s32 -; GFX8-NEXT: v_mov_b32_e32 v0, 0x8044 -; GFX8-NEXT: buffer_load_dword v22, v0, s[0:3], s32 offen ; 4-byte Folded Reload -; GFX8-NEXT: s_mov_b64 exec, s[58:59] -; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32 -; GFX8-NEXT: s_waitcnt vmcnt(1) ; GFX8-NEXT: v_readlane_b32 vcc_lo, v22, 0 ; GFX8-NEXT: v_readlane_b32 vcc_hi, v22, 1 -; GFX8-NEXT: s_mov_b64 s[58:59], exec -; GFX8-NEXT: s_mov_b64 exec, -1 -; GFX8-NEXT: s_mov_b64 exec, s[58:59] +; GFX8-NEXT: v_readlane_b32 s58, v23, 28 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: buffer_store_dword v16, off, s[0:3], s32 +; GFX8-NEXT: v_mov_b32_e32 v16, 0x8040 +; GFX8-NEXT: buffer_load_dword v0, v16, s[0:3], s32 offen ; 4-byte Folded Reload +; GFX8-NEXT: buffer_load_dword v1, v16, s[0:3], s32 offen offset:4 ; 4-byte Folded Reload +; GFX8-NEXT: buffer_load_dword v2, v16, s[0:3], s32 offen offset:8 ; 4-byte Folded Reload +; GFX8-NEXT: buffer_load_dword v3, v16, s[0:3], s32 offen offset:12 ; 4-byte Folded Reload +; GFX8-NEXT: buffer_load_dword v4, v16, s[0:3], s32 offen offset:16 ; 4-byte Folded Reload +; GFX8-NEXT: buffer_load_dword v5, v16, s[0:3], s32 offen offset:20 ; 4-byte Folded Reload +; GFX8-NEXT: buffer_load_dword v6, v16, s[0:3], s32 offen offset:24 ; 4-byte Folded Reload +; GFX8-NEXT: buffer_load_dword v7, v16, s[0:3], s32 offen offset:28 ; 4-byte Folded Reload +; GFX8-NEXT: buffer_load_dword v8, v16, s[0:3], s32 offen offset:32 ; 4-byte Folded Reload +; GFX8-NEXT: buffer_load_dword v9, v16, s[0:3], s32 offen offset:36 ; 4-byte Folded Reload +; GFX8-NEXT: buffer_load_dword v10, v16, s[0:3], s32 offen offset:40 ; 4-byte Folded Reload +; GFX8-NEXT: buffer_load_dword v11, v16, s[0:3], s32 offen offset:44 ; 4-byte Folded Reload +; GFX8-NEXT: buffer_load_dword v12, v16, s[0:3], s32 offen offset:48 ; 4-byte Folded Reload +; GFX8-NEXT: buffer_load_dword v13, v16, s[0:3], s32 offen offset:52 ; 4-byte Folded Reload +; GFX8-NEXT: buffer_load_dword v14, v16, s[0:3], s32 offen offset:56 ; 4-byte Folded Reload +; GFX8-NEXT: buffer_load_dword v15, v16, s[0:3], s32 offen offset:60 ; 4-byte Folded Reload +; GFX8-NEXT: s_nop 0 +; GFX8-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc @@ -1744,13 +1788,11 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX8-NEXT: v_readlane_b32 s33, v23, 2 ; GFX8-NEXT: v_readlane_b32 s31, v23, 1 ; GFX8-NEXT: v_readlane_b32 s30, v23, 0 -; GFX8-NEXT: ; kill: killed $vgpr22 -; GFX8-NEXT: v_readlane_b32 s58, v23, 28 ; GFX8-NEXT: v_readlane_b32 s59, v23, 29 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX8-NEXT: s_add_i32 s6, s32, 0x202000 ; GFX8-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX8-NEXT: s_add_i32 s6, s32, 0x201200 +; GFX8-NEXT: s_add_i32 s6, s32, 0x202100 ; GFX8-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll index c302233e748fd..76a31a7fac8c1 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll @@ -141,112 +141,103 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { ; W64-O0: ; %bb.0: ; W64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane ; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; W64-O0-NEXT: v_mov_b32_e32 v5, v3 -; W64-O0-NEXT: v_mov_b32_e32 v6, v2 -; W64-O0-NEXT: v_mov_b32_e32 v7, v1 -; W64-O0-NEXT: v_mov_b32_e32 v1, v0 -; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: v_mov_b32_e32 v4, v3 +; W64-O0-NEXT: v_mov_b32_e32 v5, v2 +; W64-O0-NEXT: v_mov_b32_e32 v6, v1 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v2, v7 -; W64-O0-NEXT: v_mov_b32_e32 v3, v6 -; W64-O0-NEXT: v_mov_b32_e32 v4, v5 -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v1, v6 +; W64-O0-NEXT: v_mov_b32_e32 v2, v5 +; W64-O0-NEXT: v_mov_b32_e32 v3, v4 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7 ; W64-O0-NEXT: s_mov_b32 s4, 0 -; W64-O0-NEXT: s_waitcnt vmcnt(4) -; W64-O0-NEXT: v_writelane_b32 v0, s4, 0 +; W64-O0-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane +; W64-O0-NEXT: v_writelane_b32 v7, s4, 0 ; W64-O0-NEXT: s_mov_b64 s[4:5], exec -; W64-O0-NEXT: v_writelane_b32 v0, s4, 1 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 2 +; W64-O0-NEXT: v_writelane_b32 v7, s4, 1 +; W64-O0-NEXT: v_writelane_b32 v7, s5, 2 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(4) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v1 -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s12, v2 +; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] +; W64-O0-NEXT: s_waitcnt vmcnt(2) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v3 -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readfirstlane_b32 s6, v4 +; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3] ; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11] ; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_writelane_b32 v0, s8, 3 -; W64-O0-NEXT: v_writelane_b32 v0, s9, 4 -; W64-O0-NEXT: v_writelane_b32 v0, s10, 5 -; W64-O0-NEXT: v_writelane_b32 v0, s11, 6 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v7, s8, 3 +; W64-O0-NEXT: v_writelane_b32 v7, s9, 4 +; W64-O0-NEXT: v_writelane_b32 v7, s10, 5 +; W64-O0-NEXT: v_writelane_b32 v7, s11, 6 ; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; W64-O0-NEXT: v_writelane_b32 v0, s4, 7 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 8 +; W64-O0-NEXT: v_writelane_b32 v7, s4, 7 +; W64-O0-NEXT: v_writelane_b32 v7, s5, 8 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v1, 7 -; W64-O0-NEXT: v_readlane_b32 s5, v1, 8 -; W64-O0-NEXT: v_readlane_b32 s8, v1, 3 -; W64-O0-NEXT: v_readlane_b32 s9, v1, 4 -; W64-O0-NEXT: v_readlane_b32 s10, v1, 5 -; W64-O0-NEXT: v_readlane_b32 s11, v1, 6 -; W64-O0-NEXT: v_readlane_b32 s6, v1, 0 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: s_nop 2 +; W64-O0-NEXT: v_readlane_b32 s4, v7, 7 +; W64-O0-NEXT: v_readlane_b32 s5, v7, 8 +; W64-O0-NEXT: v_readlane_b32 s8, v7, 3 +; W64-O0-NEXT: v_readlane_b32 s9, v7, 4 +; W64-O0-NEXT: v_readlane_b32 s10, v7, 5 +; W64-O0-NEXT: v_readlane_b32 s11, v7, 6 +; W64-O0-NEXT: v_readlane_b32 s6, v7, 0 +; W64-O0-NEXT: s_nop 4 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execnz .LBB0_1 ; W64-O0-NEXT: ; %bb.3: +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v1, 1 -; W64-O0-NEXT: v_readlane_b32 s5, v1, 2 +; W64-O0-NEXT: v_readlane_b32 s4, v7, 1 +; W64-O0-NEXT: v_readlane_b32 s5, v7, 2 ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; W64-O0-NEXT: ; kill: killed $vgpr1 ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: s_setpc_b64 s[30:31] @@ -498,34 +489,32 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt ; W64-O0: ; %bb.0: ; %entry ; W64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane ; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill ; W64-O0-NEXT: v_mov_b32_e32 v13, v4 -; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; W64-O0-NEXT: v_mov_b32_e32 v7, v3 -; W64-O0-NEXT: v_mov_b32_e32 v8, v2 -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; W64-O0-NEXT: v_mov_b32_e32 v9, v1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; W64-O0-NEXT: v_mov_b32_e32 v3, v0 -; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; W64-O0-NEXT: v_mov_b32_e32 v6, v3 +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; W64-O0-NEXT: v_mov_b32_e32 v7, v2 +; W64-O0-NEXT: v_mov_b32_e32 v8, v1 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; W64-O0-NEXT: v_mov_b32_e32 v2, v0 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14_vgpr15_vgpr16 killed $exec ; W64-O0-NEXT: v_mov_b32_e32 v14, v5 -; W64-O0-NEXT: v_mov_b32_e32 v15, v6 ; W64-O0-NEXT: s_waitcnt vmcnt(3) -; W64-O0-NEXT: v_mov_b32_e32 v16, v4 +; W64-O0-NEXT: v_mov_b32_e32 v15, v4 +; W64-O0-NEXT: s_waitcnt vmcnt(2) +; W64-O0-NEXT: v_mov_b32_e32 v16, v3 ; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; W64-O0-NEXT: s_nop 0 ; W64-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill @@ -535,195 +524,192 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4_vgpr5_vgpr6 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v4, v9 -; W64-O0-NEXT: v_mov_b32_e32 v5, v8 -; W64-O0-NEXT: v_mov_b32_e32 v6, v7 -; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; W64-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v3, v8 +; W64-O0-NEXT: v_mov_b32_e32 v4, v7 +; W64-O0-NEXT: v_mov_b32_e32 v5, v6 +; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v3, v12 -; W64-O0-NEXT: s_waitcnt vmcnt(10) -; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v2, v12 +; W64-O0-NEXT: s_waitcnt vmcnt(9) +; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v2, v10 -; W64-O0-NEXT: s_waitcnt vmcnt(11) -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v1, v10 +; W64-O0-NEXT: s_waitcnt vmcnt(10) +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7 ; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7 ; W64-O0-NEXT: s_mov_b32 s4, 0 -; W64-O0-NEXT: s_waitcnt vmcnt(12) -; W64-O0-NEXT: v_writelane_b32 v0, s4, 0 +; W64-O0-NEXT: ; implicit-def: $vgpr17 : SGPR spill to VGPR lane +; W64-O0-NEXT: v_writelane_b32 v17, s4, 0 ; W64-O0-NEXT: s_mov_b64 s[4:5], exec -; W64-O0-NEXT: v_writelane_b32 v0, s4, 1 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 2 +; W64-O0-NEXT: v_writelane_b32 v17, s4, 1 +; W64-O0-NEXT: v_writelane_b32 v17, s5, 2 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(4) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v1 -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s12, v2 +; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] +; W64-O0-NEXT: s_waitcnt vmcnt(2) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v3 -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readfirstlane_b32 s6, v4 +; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3] ; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11] ; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_writelane_b32 v0, s8, 3 -; W64-O0-NEXT: v_writelane_b32 v0, s9, 4 -; W64-O0-NEXT: v_writelane_b32 v0, s10, 5 -; W64-O0-NEXT: v_writelane_b32 v0, s11, 6 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v17, s8, 3 +; W64-O0-NEXT: v_writelane_b32 v17, s9, 4 +; W64-O0-NEXT: v_writelane_b32 v17, s10, 5 +; W64-O0-NEXT: v_writelane_b32 v17, s11, 6 ; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; W64-O0-NEXT: v_writelane_b32 v0, s4, 7 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 8 +; W64-O0-NEXT: v_writelane_b32 v17, s4, 7 +; W64-O0-NEXT: v_writelane_b32 v17, s5, 8 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB1_1 Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v1, 7 -; W64-O0-NEXT: v_readlane_b32 s5, v1, 8 -; W64-O0-NEXT: v_readlane_b32 s8, v1, 3 -; W64-O0-NEXT: v_readlane_b32 s9, v1, 4 -; W64-O0-NEXT: v_readlane_b32 s10, v1, 5 -; W64-O0-NEXT: v_readlane_b32 s11, v1, 6 -; W64-O0-NEXT: v_readlane_b32 s6, v1, 0 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: s_nop 2 +; W64-O0-NEXT: v_readlane_b32 s4, v17, 7 +; W64-O0-NEXT: v_readlane_b32 s5, v17, 8 +; W64-O0-NEXT: v_readlane_b32 s8, v17, 3 +; W64-O0-NEXT: v_readlane_b32 s9, v17, 4 +; W64-O0-NEXT: v_readlane_b32 s10, v17, 5 +; W64-O0-NEXT: v_readlane_b32 s11, v17, 6 +; W64-O0-NEXT: v_readlane_b32 s6, v17, 0 +; W64-O0-NEXT: s_nop 4 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execnz .LBB1_1 ; W64-O0-NEXT: ; %bb.3: ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v0, 1 -; W64-O0-NEXT: v_readlane_b32 s5, v0, 2 +; W64-O0-NEXT: v_readlane_b32 s4, v17, 1 +; W64-O0-NEXT: v_readlane_b32 s5, v17, 2 ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] ; W64-O0-NEXT: s_mov_b64 s[4:5], exec -; W64-O0-NEXT: v_writelane_b32 v0, s4, 9 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 10 +; W64-O0-NEXT: v_writelane_b32 v17, s4, 9 +; W64-O0-NEXT: v_writelane_b32 v17, s5, 10 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB1_4: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(4) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v1 -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s12, v2 +; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] +; W64-O0-NEXT: s_waitcnt vmcnt(2) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v3 -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readfirstlane_b32 s6, v4 +; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3] ; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11] ; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_writelane_b32 v0, s8, 11 -; W64-O0-NEXT: v_writelane_b32 v0, s9, 12 -; W64-O0-NEXT: v_writelane_b32 v0, s10, 13 -; W64-O0-NEXT: v_writelane_b32 v0, s11, 14 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v17, s8, 11 +; W64-O0-NEXT: v_writelane_b32 v17, s9, 12 +; W64-O0-NEXT: v_writelane_b32 v17, s10, 13 +; W64-O0-NEXT: v_writelane_b32 v17, s11, 14 ; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; W64-O0-NEXT: v_writelane_b32 v0, s4, 15 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 16 +; W64-O0-NEXT: v_writelane_b32 v17, s4, 15 +; W64-O0-NEXT: v_writelane_b32 v17, s5, 16 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.5: ; in Loop: Header=BB1_4 Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v1, 15 -; W64-O0-NEXT: v_readlane_b32 s5, v1, 16 -; W64-O0-NEXT: v_readlane_b32 s8, v1, 11 -; W64-O0-NEXT: v_readlane_b32 s9, v1, 12 -; W64-O0-NEXT: v_readlane_b32 s10, v1, 13 -; W64-O0-NEXT: v_readlane_b32 s11, v1, 14 -; W64-O0-NEXT: v_readlane_b32 s6, v1, 0 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: s_nop 2 +; W64-O0-NEXT: v_readlane_b32 s4, v17, 15 +; W64-O0-NEXT: v_readlane_b32 s5, v17, 16 +; W64-O0-NEXT: v_readlane_b32 s8, v17, 11 +; W64-O0-NEXT: v_readlane_b32 s9, v17, 12 +; W64-O0-NEXT: v_readlane_b32 s10, v17, 13 +; W64-O0-NEXT: v_readlane_b32 s11, v17, 14 +; W64-O0-NEXT: v_readlane_b32 s6, v17, 0 +; W64-O0-NEXT: s_nop 4 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execnz .LBB1_4 ; W64-O0-NEXT: ; %bb.6: +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v0, 9 -; W64-O0-NEXT: v_readlane_b32 s5, v0, 10 +; W64-O0-NEXT: v_readlane_b32 s4, v17, 9 +; W64-O0-NEXT: v_readlane_b32 s5, v17, 10 ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: global_store_dword v[4:5], v6, off +; W64-O0-NEXT: global_store_dword v[3:4], v5, off ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: global_store_dword v[1:2], v3, off +; W64-O0-NEXT: global_store_dword v[0:1], v2, off ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: ; kill: killed $vgpr0 ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: s_setpc_b64 s[30:31] @@ -1031,262 +1017,253 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad ; W64-O0: ; %bb.0: ; %entry ; W64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane ; W64-O0-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; W64-O0-NEXT: v_mov_b32_e32 v8, v6 -; W64-O0-NEXT: v_mov_b32_e32 v9, v5 -; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload +; W64-O0-NEXT: v_mov_b32_e32 v8, v5 +; W64-O0-NEXT: v_mov_b32_e32 v5, v4 +; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload ; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; W64-O0-NEXT: v_mov_b32_e32 v10, v3 -; W64-O0-NEXT: v_mov_b32_e32 v11, v2 -; W64-O0-NEXT: v_mov_b32_e32 v13, v1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; W64-O0-NEXT: v_mov_b32_e32 v6, v0 -; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; W64-O0-NEXT: v_mov_b32_e32 v9, v3 +; W64-O0-NEXT: v_mov_b32_e32 v10, v2 +; W64-O0-NEXT: v_mov_b32_e32 v11, v1 +; W64-O0-NEXT: v_mov_b32_e32 v5, v0 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v2, v9 -; W64-O0-NEXT: v_mov_b32_e32 v3, v8 -; W64-O0-NEXT: v_mov_b32_e32 v4, v7 +; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v1, v8 +; W64-O0-NEXT: v_mov_b32_e32 v2, v6 +; W64-O0-NEXT: v_mov_b32_e32 v3, v7 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7_vgpr8_vgpr9 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v7, v13 -; W64-O0-NEXT: v_mov_b32_e32 v8, v11 -; W64-O0-NEXT: v_mov_b32_e32 v9, v10 -; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; W64-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6_vgpr7_vgpr8 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v6, v11 +; W64-O0-NEXT: v_mov_b32_e32 v7, v10 +; W64-O0-NEXT: v_mov_b32_e32 v8, v9 +; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v6, v12 -; W64-O0-NEXT: s_waitcnt vmcnt(7) -; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; W64-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v5, v12 +; W64-O0-NEXT: s_waitcnt vmcnt(6) +; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; W64-O0-NEXT: s_waitcnt vmcnt(7) -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; W64-O0-NEXT: s_waitcnt vmcnt(6) +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7 ; W64-O0-NEXT: ;;#ASMSTART ; W64-O0-NEXT: s_mov_b32 s4, 17 ; W64-O0-NEXT: ;;#ASMEND ; W64-O0-NEXT: s_mov_b32 s5, s4 -; W64-O0-NEXT: s_waitcnt vmcnt(10) -; W64-O0-NEXT: v_writelane_b32 v0, s5, 0 +; W64-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane +; W64-O0-NEXT: v_writelane_b32 v13, s5, 0 ; W64-O0-NEXT: s_mov_b32 s5, 0 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 1 -; W64-O0-NEXT: v_mov_b32_e32 v1, s4 -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; W64-O0-NEXT: v_writelane_b32 v13, s5, 1 +; W64-O0-NEXT: v_mov_b32_e32 v0, s4 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 s[4:5], exec -; W64-O0-NEXT: v_writelane_b32 v0, s4, 2 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 3 +; W64-O0-NEXT: v_writelane_b32 v13, s4, 2 +; W64-O0-NEXT: v_writelane_b32 v13, s5, 3 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(4) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v1 -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s12, v2 +; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] +; W64-O0-NEXT: s_waitcnt vmcnt(2) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v3 -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readfirstlane_b32 s6, v4 +; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3] ; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11] ; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_writelane_b32 v0, s8, 4 -; W64-O0-NEXT: v_writelane_b32 v0, s9, 5 -; W64-O0-NEXT: v_writelane_b32 v0, s10, 6 -; W64-O0-NEXT: v_writelane_b32 v0, s11, 7 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v13, s8, 4 +; W64-O0-NEXT: v_writelane_b32 v13, s9, 5 +; W64-O0-NEXT: v_writelane_b32 v13, s10, 6 +; W64-O0-NEXT: v_writelane_b32 v13, s11, 7 ; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; W64-O0-NEXT: v_writelane_b32 v0, s4, 8 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 9 +; W64-O0-NEXT: v_writelane_b32 v13, s4, 8 +; W64-O0-NEXT: v_writelane_b32 v13, s5, 9 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB2_1 Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v1, 8 -; W64-O0-NEXT: v_readlane_b32 s5, v1, 9 -; W64-O0-NEXT: v_readlane_b32 s8, v1, 4 -; W64-O0-NEXT: v_readlane_b32 s9, v1, 5 -; W64-O0-NEXT: v_readlane_b32 s10, v1, 6 -; W64-O0-NEXT: v_readlane_b32 s11, v1, 7 -; W64-O0-NEXT: v_readlane_b32 s6, v1, 1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: s_nop 2 +; W64-O0-NEXT: v_readlane_b32 s4, v13, 8 +; W64-O0-NEXT: v_readlane_b32 s5, v13, 9 +; W64-O0-NEXT: v_readlane_b32 s8, v13, 4 +; W64-O0-NEXT: v_readlane_b32 s9, v13, 5 +; W64-O0-NEXT: v_readlane_b32 s10, v13, 6 +; W64-O0-NEXT: v_readlane_b32 s11, v13, 7 +; W64-O0-NEXT: v_readlane_b32 s6, v13, 1 +; W64-O0-NEXT: s_nop 4 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execnz .LBB2_1 ; W64-O0-NEXT: ; %bb.3: +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s6, v0, 2 -; W64-O0-NEXT: v_readlane_b32 s7, v0, 3 +; W64-O0-NEXT: v_readlane_b32 s6, v13, 2 +; W64-O0-NEXT: v_readlane_b32 s7, v13, 3 ; W64-O0-NEXT: s_mov_b64 exec, s[6:7] -; W64-O0-NEXT: v_readlane_b32 s4, v0, 1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; W64-O0-NEXT: v_readlane_b32 s4, v13, 1 ; W64-O0-NEXT: s_mov_b32 s5, 0x3ff -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_and_b32_e64 v2, v2, s5 -; W64-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v2, s4 -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; W64-O0-NEXT: v_and_b32_e64 v1, v1, s5 +; W64-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v1, s4 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 s[4:5], exec -; W64-O0-NEXT: v_writelane_b32 v0, s4, 10 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 11 +; W64-O0-NEXT: v_writelane_b32 v13, s4, 10 +; W64-O0-NEXT: v_writelane_b32 v13, s5, 11 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execz .LBB2_8 ; W64-O0-NEXT: ; %bb.4: ; %bb1 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v0, 0 +; W64-O0-NEXT: v_readlane_b32 s4, v13, 0 ; W64-O0-NEXT: s_mov_b32 s5, 0 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 12 -; W64-O0-NEXT: v_mov_b32_e32 v1, s4 -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill +; W64-O0-NEXT: v_writelane_b32 v13, s5, 12 +; W64-O0-NEXT: v_mov_b32_e32 v0, s4 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7 ; W64-O0-NEXT: s_mov_b64 s[4:5], exec -; W64-O0-NEXT: v_writelane_b32 v0, s4, 13 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 14 +; W64-O0-NEXT: v_writelane_b32 v13, s4, 13 +; W64-O0-NEXT: v_writelane_b32 v13, s5, 14 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB2_5: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(4) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v1 -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s12, v2 +; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] +; W64-O0-NEXT: s_waitcnt vmcnt(2) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v3 -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readfirstlane_b32 s6, v4 +; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3] ; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11] ; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_writelane_b32 v0, s8, 15 -; W64-O0-NEXT: v_writelane_b32 v0, s9, 16 -; W64-O0-NEXT: v_writelane_b32 v0, s10, 17 -; W64-O0-NEXT: v_writelane_b32 v0, s11, 18 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v13, s8, 15 +; W64-O0-NEXT: v_writelane_b32 v13, s9, 16 +; W64-O0-NEXT: v_writelane_b32 v13, s10, 17 +; W64-O0-NEXT: v_writelane_b32 v13, s11, 18 ; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; W64-O0-NEXT: v_writelane_b32 v0, s4, 19 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 20 +; W64-O0-NEXT: v_writelane_b32 v13, s4, 19 +; W64-O0-NEXT: v_writelane_b32 v13, s5, 20 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.6: ; in Loop: Header=BB2_5 Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v1, 19 -; W64-O0-NEXT: v_readlane_b32 s5, v1, 20 -; W64-O0-NEXT: v_readlane_b32 s8, v1, 15 -; W64-O0-NEXT: v_readlane_b32 s9, v1, 16 -; W64-O0-NEXT: v_readlane_b32 s10, v1, 17 -; W64-O0-NEXT: v_readlane_b32 s11, v1, 18 -; W64-O0-NEXT: v_readlane_b32 s6, v1, 12 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: s_nop 2 +; W64-O0-NEXT: v_readlane_b32 s4, v13, 19 +; W64-O0-NEXT: v_readlane_b32 s5, v13, 20 +; W64-O0-NEXT: v_readlane_b32 s8, v13, 15 +; W64-O0-NEXT: v_readlane_b32 s9, v13, 16 +; W64-O0-NEXT: v_readlane_b32 s10, v13, 17 +; W64-O0-NEXT: v_readlane_b32 s11, v13, 18 +; W64-O0-NEXT: v_readlane_b32 s6, v13, 12 +; W64-O0-NEXT: s_nop 4 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execnz .LBB2_5 ; W64-O0-NEXT: ; %bb.7: +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v1, 13 -; W64-O0-NEXT: v_readlane_b32 s5, v1, 14 +; W64-O0-NEXT: v_readlane_b32 s4, v13, 13 +; W64-O0-NEXT: v_readlane_b32 s5, v13, 14 ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill ; W64-O0-NEXT: .LBB2_8: ; %bb2 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v0, 10 -; W64-O0-NEXT: v_readlane_b32 s5, v0, 11 +; W64-O0-NEXT: v_readlane_b32 s4, v13, 10 +; W64-O0-NEXT: v_readlane_b32 s5, v13, 11 ; W64-O0-NEXT: s_or_b64 exec, exec, s[4:5] -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: global_store_dword v[1:2], v3, off +; W64-O0-NEXT: global_store_dword v[0:1], v2, off ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: ; kill: killed $vgpr0 ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll index dd6fd5aa384f6..59ceecbf43b78 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll @@ -140,127 +140,115 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 { ; W64-O0: ; %bb.0: ; W64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane -; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; W64-O0-NEXT: v_mov_b32_e32 v6, v2 -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; W64-O0-NEXT: v_mov_b32_e32 v3, v1 -; W64-O0-NEXT: v_mov_b32_e32 v1, v0 -; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; W64-O0-NEXT: v_mov_b32_e32 v5, v2 +; W64-O0-NEXT: v_mov_b32_e32 v2, v1 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec -; W64-O0-NEXT: s_waitcnt vmcnt(1) -; W64-O0-NEXT: v_mov_b32_e32 v7, v2 -; W64-O0-NEXT: v_mov_b32_e32 v5, v7 -; W64-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v6, v3 +; W64-O0-NEXT: v_mov_b32_e32 v4, v6 +; W64-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v2, v3 -; W64-O0-NEXT: v_mov_b32_e32 v7, v2 -; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v1, v2 +; W64-O0-NEXT: v_mov_b32_e32 v6, v1 +; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v2, v7 -; W64-O0-NEXT: v_mov_b32_e32 v3, v6 -; W64-O0-NEXT: v_mov_b32_e32 v4, v5 -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v1, v6 +; W64-O0-NEXT: v_mov_b32_e32 v2, v5 +; W64-O0-NEXT: v_mov_b32_e32 v3, v4 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; W64-O0-NEXT: s_mov_b32 s4, 0 -; W64-O0-NEXT: s_waitcnt vmcnt(4) -; W64-O0-NEXT: v_writelane_b32 v0, s4, 0 +; W64-O0-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane +; W64-O0-NEXT: v_writelane_b32 v7, s4, 0 ; W64-O0-NEXT: s_mov_b64 s[4:5], exec -; W64-O0-NEXT: v_writelane_b32 v0, s4, 1 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 2 +; W64-O0-NEXT: v_writelane_b32 v7, s4, 1 +; W64-O0-NEXT: v_writelane_b32 v7, s5, 2 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(4) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v1 -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s12, v2 +; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] +; W64-O0-NEXT: s_waitcnt vmcnt(2) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v3 -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readfirstlane_b32 s6, v4 +; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3] ; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11] ; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_writelane_b32 v0, s8, 3 -; W64-O0-NEXT: v_writelane_b32 v0, s9, 4 -; W64-O0-NEXT: v_writelane_b32 v0, s10, 5 -; W64-O0-NEXT: v_writelane_b32 v0, s11, 6 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v7, s8, 3 +; W64-O0-NEXT: v_writelane_b32 v7, s9, 4 +; W64-O0-NEXT: v_writelane_b32 v7, s10, 5 +; W64-O0-NEXT: v_writelane_b32 v7, s11, 6 ; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; W64-O0-NEXT: v_writelane_b32 v0, s4, 7 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 8 +; W64-O0-NEXT: v_writelane_b32 v7, s4, 7 +; W64-O0-NEXT: v_writelane_b32 v7, s5, 8 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v1, 7 -; W64-O0-NEXT: v_readlane_b32 s5, v1, 8 -; W64-O0-NEXT: v_readlane_b32 s8, v1, 3 -; W64-O0-NEXT: v_readlane_b32 s9, v1, 4 -; W64-O0-NEXT: v_readlane_b32 s10, v1, 5 -; W64-O0-NEXT: v_readlane_b32 s11, v1, 6 -; W64-O0-NEXT: v_readlane_b32 s6, v1, 0 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: s_nop 2 +; W64-O0-NEXT: v_readlane_b32 s4, v7, 7 +; W64-O0-NEXT: v_readlane_b32 s5, v7, 8 +; W64-O0-NEXT: v_readlane_b32 s8, v7, 3 +; W64-O0-NEXT: v_readlane_b32 s9, v7, 4 +; W64-O0-NEXT: v_readlane_b32 s10, v7, 5 +; W64-O0-NEXT: v_readlane_b32 s11, v7, 6 +; W64-O0-NEXT: v_readlane_b32 s6, v7, 0 +; W64-O0-NEXT: s_nop 4 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execnz .LBB0_1 ; W64-O0-NEXT: ; %bb.3: +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v1, 1 -; W64-O0-NEXT: v_readlane_b32 s5, v1, 2 +; W64-O0-NEXT: v_readlane_b32 s4, v7, 1 +; W64-O0-NEXT: v_readlane_b32 s5, v7, 2 ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; W64-O0-NEXT: ; kill: killed $vgpr1 ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: s_setpc_b64 s[30:31] @@ -512,45 +500,42 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) ; W64-O0: ; %bb.0: ; %entry ; W64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane ; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; W64-O0-NEXT: v_mov_b32_e32 v14, v6 -; W64-O0-NEXT: v_mov_b32_e32 v9, v5 +; W64-O0-NEXT: v_mov_b32_e32 v8, v5 +; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload ; W64-O0-NEXT: v_mov_b32_e32 v13, v4 -; W64-O0-NEXT: v_mov_b32_e32 v4, v3 -; W64-O0-NEXT: v_mov_b32_e32 v8, v2 -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; W64-O0-NEXT: v_mov_b32_e32 v5, v1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; W64-O0-NEXT: v_mov_b32_e32 v3, v0 -; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: v_mov_b32_e32 v7, v2 +; W64-O0-NEXT: v_mov_b32_e32 v4, v1 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; W64-O0-NEXT: v_mov_b32_e32 v2, v0 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v15, v7 -; W64-O0-NEXT: v_mov_b32_e32 v6, v15 -; W64-O0-NEXT: v_mov_b32_e32 v7, v14 +; W64-O0-NEXT: s_waitcnt vmcnt(2) +; W64-O0-NEXT: v_mov_b32_e32 v15, v5 +; W64-O0-NEXT: v_mov_b32_e32 v5, v15 +; W64-O0-NEXT: v_mov_b32_e32 v6, v14 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v14, v9 -; W64-O0-NEXT: v_mov_b32_e32 v9, v14 +; W64-O0-NEXT: v_mov_b32_e32 v14, v8 +; W64-O0-NEXT: v_mov_b32_e32 v8, v14 ; W64-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 killed $vgpr13_vgpr14 killed $exec ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14_vgpr15_vgpr16 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v14, v9 -; W64-O0-NEXT: v_mov_b32_e32 v15, v7 -; W64-O0-NEXT: v_mov_b32_e32 v16, v6 +; W64-O0-NEXT: v_mov_b32_e32 v14, v8 +; W64-O0-NEXT: v_mov_b32_e32 v15, v6 +; W64-O0-NEXT: v_mov_b32_e32 v16, v5 ; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; W64-O0-NEXT: s_nop 0 ; W64-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill @@ -558,45 +543,45 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) ; W64-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v9, v4 -; W64-O0-NEXT: v_mov_b32_e32 v7, v9 -; W64-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v8, v3 +; W64-O0-NEXT: v_mov_b32_e32 v6, v8 +; W64-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v4, v5 -; W64-O0-NEXT: v_mov_b32_e32 v9, v4 -; W64-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec +; W64-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v3, v4 +; W64-O0-NEXT: v_mov_b32_e32 v8, v3 +; W64-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $vgpr2_vgpr3 killed $exec ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4_vgpr5_vgpr6 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v4, v9 -; W64-O0-NEXT: v_mov_b32_e32 v5, v8 -; W64-O0-NEXT: v_mov_b32_e32 v6, v7 -; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; W64-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3_vgpr4_vgpr5 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v3, v8 +; W64-O0-NEXT: v_mov_b32_e32 v4, v7 +; W64-O0-NEXT: v_mov_b32_e32 v5, v6 +; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v3, v12 -; W64-O0-NEXT: s_waitcnt vmcnt(10) -; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v2, v12 +; W64-O0-NEXT: s_waitcnt vmcnt(9) +; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v2, v10 -; W64-O0-NEXT: s_waitcnt vmcnt(11) -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v1, v10 +; W64-O0-NEXT: s_waitcnt vmcnt(10) +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 @@ -604,165 +589,162 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) ; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; W64-O0-NEXT: s_mov_b32 s4, 0 -; W64-O0-NEXT: s_waitcnt vmcnt(12) -; W64-O0-NEXT: v_writelane_b32 v0, s4, 0 +; W64-O0-NEXT: ; implicit-def: $vgpr17 : SGPR spill to VGPR lane +; W64-O0-NEXT: v_writelane_b32 v17, s4, 0 ; W64-O0-NEXT: s_mov_b64 s[4:5], exec -; W64-O0-NEXT: v_writelane_b32 v0, s4, 1 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 2 +; W64-O0-NEXT: v_writelane_b32 v17, s4, 1 +; W64-O0-NEXT: v_writelane_b32 v17, s5, 2 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(4) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v1 -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s12, v2 +; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] +; W64-O0-NEXT: s_waitcnt vmcnt(2) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v3 -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readfirstlane_b32 s6, v4 +; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3] ; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11] ; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_writelane_b32 v0, s8, 3 -; W64-O0-NEXT: v_writelane_b32 v0, s9, 4 -; W64-O0-NEXT: v_writelane_b32 v0, s10, 5 -; W64-O0-NEXT: v_writelane_b32 v0, s11, 6 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v17, s8, 3 +; W64-O0-NEXT: v_writelane_b32 v17, s9, 4 +; W64-O0-NEXT: v_writelane_b32 v17, s10, 5 +; W64-O0-NEXT: v_writelane_b32 v17, s11, 6 ; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; W64-O0-NEXT: v_writelane_b32 v0, s4, 7 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 8 +; W64-O0-NEXT: v_writelane_b32 v17, s4, 7 +; W64-O0-NEXT: v_writelane_b32 v17, s5, 8 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB1_1 Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v1, 7 -; W64-O0-NEXT: v_readlane_b32 s5, v1, 8 -; W64-O0-NEXT: v_readlane_b32 s8, v1, 3 -; W64-O0-NEXT: v_readlane_b32 s9, v1, 4 -; W64-O0-NEXT: v_readlane_b32 s10, v1, 5 -; W64-O0-NEXT: v_readlane_b32 s11, v1, 6 -; W64-O0-NEXT: v_readlane_b32 s6, v1, 0 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: s_nop 2 +; W64-O0-NEXT: v_readlane_b32 s4, v17, 7 +; W64-O0-NEXT: v_readlane_b32 s5, v17, 8 +; W64-O0-NEXT: v_readlane_b32 s8, v17, 3 +; W64-O0-NEXT: v_readlane_b32 s9, v17, 4 +; W64-O0-NEXT: v_readlane_b32 s10, v17, 5 +; W64-O0-NEXT: v_readlane_b32 s11, v17, 6 +; W64-O0-NEXT: v_readlane_b32 s6, v17, 0 +; W64-O0-NEXT: s_nop 4 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execnz .LBB1_1 ; W64-O0-NEXT: ; %bb.3: ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v0, 1 -; W64-O0-NEXT: v_readlane_b32 s5, v0, 2 +; W64-O0-NEXT: v_readlane_b32 s4, v17, 1 +; W64-O0-NEXT: v_readlane_b32 s5, v17, 2 ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] ; W64-O0-NEXT: s_mov_b64 s[4:5], exec -; W64-O0-NEXT: v_writelane_b32 v0, s4, 9 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 10 +; W64-O0-NEXT: v_writelane_b32 v17, s4, 9 +; W64-O0-NEXT: v_writelane_b32 v17, s5, 10 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB1_4: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(4) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v1 -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s12, v2 +; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] +; W64-O0-NEXT: s_waitcnt vmcnt(2) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v3 -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readfirstlane_b32 s6, v4 +; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3] ; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11] ; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_writelane_b32 v0, s8, 11 -; W64-O0-NEXT: v_writelane_b32 v0, s9, 12 -; W64-O0-NEXT: v_writelane_b32 v0, s10, 13 -; W64-O0-NEXT: v_writelane_b32 v0, s11, 14 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v17, s8, 11 +; W64-O0-NEXT: v_writelane_b32 v17, s9, 12 +; W64-O0-NEXT: v_writelane_b32 v17, s10, 13 +; W64-O0-NEXT: v_writelane_b32 v17, s11, 14 ; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; W64-O0-NEXT: v_writelane_b32 v0, s4, 15 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 16 +; W64-O0-NEXT: v_writelane_b32 v17, s4, 15 +; W64-O0-NEXT: v_writelane_b32 v17, s5, 16 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.5: ; in Loop: Header=BB1_4 Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v1, 15 -; W64-O0-NEXT: v_readlane_b32 s5, v1, 16 -; W64-O0-NEXT: v_readlane_b32 s8, v1, 11 -; W64-O0-NEXT: v_readlane_b32 s9, v1, 12 -; W64-O0-NEXT: v_readlane_b32 s10, v1, 13 -; W64-O0-NEXT: v_readlane_b32 s11, v1, 14 -; W64-O0-NEXT: v_readlane_b32 s6, v1, 0 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: s_nop 2 +; W64-O0-NEXT: v_readlane_b32 s4, v17, 15 +; W64-O0-NEXT: v_readlane_b32 s5, v17, 16 +; W64-O0-NEXT: v_readlane_b32 s8, v17, 11 +; W64-O0-NEXT: v_readlane_b32 s9, v17, 12 +; W64-O0-NEXT: v_readlane_b32 s10, v17, 13 +; W64-O0-NEXT: v_readlane_b32 s11, v17, 14 +; W64-O0-NEXT: v_readlane_b32 s6, v17, 0 +; W64-O0-NEXT: s_nop 4 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execnz .LBB1_4 ; W64-O0-NEXT: ; %bb.6: +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v0, 9 -; W64-O0-NEXT: v_readlane_b32 s5, v0, 10 +; W64-O0-NEXT: v_readlane_b32 s4, v17, 9 +; W64-O0-NEXT: v_readlane_b32 s5, v17, 10 ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; W64-O0-NEXT: global_store_dword v[3:4], v5, off ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: global_store_dword v[4:5], v6, off +; W64-O0-NEXT: global_store_dword v[0:1], v2, off ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: global_store_dword v[1:2], v3, off -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: ; kill: killed $vgpr0 ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: s_setpc_b64 s[30:31] @@ -1070,48 +1052,42 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; W64-O0: ; %bb.0: ; %entry ; W64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane ; W64-O0-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; W64-O0-NEXT: v_mov_b32_e32 v6, v5 -; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; W64-O0-NEXT: v_mov_b32_e32 v5, v4 +; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload ; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; W64-O0-NEXT: v_mov_b32_e32 v4, v3 -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; W64-O0-NEXT: v_mov_b32_e32 v13, v2 -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; W64-O0-NEXT: v_mov_b32_e32 v10, v1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; W64-O0-NEXT: v_mov_b32_e32 v9, v2 +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; W64-O0-NEXT: v_mov_b32_e32 v6, v1 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload ; W64-O0-NEXT: v_mov_b32_e32 v8, v0 -; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; W64-O0-NEXT: s_mov_b64 exec, s[16:17] +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v14, v4 -; W64-O0-NEXT: v_mov_b32_e32 v4, v14 -; W64-O0-NEXT: v_mov_b32_e32 v6, v13 +; W64-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v10, v3 +; W64-O0-NEXT: v_mov_b32_e32 v3, v10 +; W64-O0-NEXT: v_mov_b32_e32 v5, v9 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v9, v10 -; W64-O0-NEXT: v_mov_b32_e32 v13, v9 +; W64-O0-NEXT: v_mov_b32_e32 v9, v6 +; W64-O0-NEXT: v_mov_b32_e32 v6, v9 ; W64-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9 killed $exec ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9_vgpr10_vgpr11 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v9, v13 -; W64-O0-NEXT: v_mov_b32_e32 v10, v6 -; W64-O0-NEXT: v_mov_b32_e32 v11, v4 +; W64-O0-NEXT: v_mov_b32_e32 v9, v6 +; W64-O0-NEXT: v_mov_b32_e32 v10, v5 +; W64-O0-NEXT: v_mov_b32_e32 v11, v3 ; W64-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_nop 0 ; W64-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill @@ -1119,251 +1095,246 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; W64-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v6, v7 +; W64-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v5, v7 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec -; W64-O0-NEXT: s_waitcnt vmcnt(6) -; W64-O0-NEXT: v_mov_b32_e32 v4, v2 +; W64-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; W64-O0-NEXT: s_waitcnt vmcnt(5) +; W64-O0-NEXT: v_mov_b32_e32 v3, v1 ; W64-O0-NEXT: ; implicit-def: $sgpr4 ; W64-O0-NEXT: ; implicit-def: $sgpr4 -; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v2, v12 -; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v1, v12 +; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; W64-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 -; W64-O0-NEXT: s_waitcnt vmcnt(9) -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; W64-O0-NEXT: s_waitcnt vmcnt(8) +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; W64-O0-NEXT: ;;#ASMSTART ; W64-O0-NEXT: s_mov_b32 s4, 17 ; W64-O0-NEXT: ;;#ASMEND ; W64-O0-NEXT: s_mov_b32 s5, s4 -; W64-O0-NEXT: s_waitcnt vmcnt(10) -; W64-O0-NEXT: v_writelane_b32 v0, s5, 0 +; W64-O0-NEXT: ; implicit-def: $vgpr13 : SGPR spill to VGPR lane +; W64-O0-NEXT: v_writelane_b32 v13, s5, 0 ; W64-O0-NEXT: s_mov_b32 s5, 0 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 1 -; W64-O0-NEXT: v_mov_b32_e32 v1, s4 -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; W64-O0-NEXT: v_writelane_b32 v13, s5, 1 +; W64-O0-NEXT: v_mov_b32_e32 v0, s4 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 s[4:5], exec -; W64-O0-NEXT: v_writelane_b32 v0, s4, 2 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 3 +; W64-O0-NEXT: v_writelane_b32 v13, s4, 2 +; W64-O0-NEXT: v_writelane_b32 v13, s5, 3 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(4) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v1 -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s12, v2 +; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] +; W64-O0-NEXT: s_waitcnt vmcnt(2) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v3 -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readfirstlane_b32 s6, v4 +; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3] ; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11] ; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_writelane_b32 v0, s8, 4 -; W64-O0-NEXT: v_writelane_b32 v0, s9, 5 -; W64-O0-NEXT: v_writelane_b32 v0, s10, 6 -; W64-O0-NEXT: v_writelane_b32 v0, s11, 7 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v13, s8, 4 +; W64-O0-NEXT: v_writelane_b32 v13, s9, 5 +; W64-O0-NEXT: v_writelane_b32 v13, s10, 6 +; W64-O0-NEXT: v_writelane_b32 v13, s11, 7 ; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; W64-O0-NEXT: v_writelane_b32 v0, s4, 8 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 9 +; W64-O0-NEXT: v_writelane_b32 v13, s4, 8 +; W64-O0-NEXT: v_writelane_b32 v13, s5, 9 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.2: ; in Loop: Header=BB2_1 Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v1, 8 -; W64-O0-NEXT: v_readlane_b32 s5, v1, 9 -; W64-O0-NEXT: v_readlane_b32 s8, v1, 4 -; W64-O0-NEXT: v_readlane_b32 s9, v1, 5 -; W64-O0-NEXT: v_readlane_b32 s10, v1, 6 -; W64-O0-NEXT: v_readlane_b32 s11, v1, 7 -; W64-O0-NEXT: v_readlane_b32 s6, v1, 1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: s_nop 2 +; W64-O0-NEXT: v_readlane_b32 s4, v13, 8 +; W64-O0-NEXT: v_readlane_b32 s5, v13, 9 +; W64-O0-NEXT: v_readlane_b32 s8, v13, 4 +; W64-O0-NEXT: v_readlane_b32 s9, v13, 5 +; W64-O0-NEXT: v_readlane_b32 s10, v13, 6 +; W64-O0-NEXT: v_readlane_b32 s11, v13, 7 +; W64-O0-NEXT: v_readlane_b32 s6, v13, 1 +; W64-O0-NEXT: s_nop 4 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execnz .LBB2_1 ; W64-O0-NEXT: ; %bb.3: +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s6, v0, 2 -; W64-O0-NEXT: v_readlane_b32 s7, v0, 3 +; W64-O0-NEXT: v_readlane_b32 s6, v13, 2 +; W64-O0-NEXT: v_readlane_b32 s7, v13, 3 ; W64-O0-NEXT: s_mov_b64 exec, s[6:7] -; W64-O0-NEXT: v_readlane_b32 s4, v0, 1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; W64-O0-NEXT: v_readlane_b32 s4, v13, 1 ; W64-O0-NEXT: s_mov_b32 s5, 0x3ff -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_and_b32_e64 v2, v2, s5 -; W64-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v2, s4 -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; W64-O0-NEXT: v_and_b32_e64 v1, v1, s5 +; W64-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v1, s4 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 s[4:5], exec -; W64-O0-NEXT: v_writelane_b32 v0, s4, 10 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 11 +; W64-O0-NEXT: v_writelane_b32 v13, s4, 10 +; W64-O0-NEXT: v_writelane_b32 v13, s5, 11 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execz .LBB2_8 ; W64-O0-NEXT: ; %bb.4: ; %bb1 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v0, 0 -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_mov_b32_e32 v7, v5 -; W64-O0-NEXT: v_mov_b32_e32 v1, v4 -; W64-O0-NEXT: v_mov_b32_e32 v5, v3 -; W64-O0-NEXT: v_mov_b32_e32 v6, v2 +; W64-O0-NEXT: v_readlane_b32 s4, v13, 0 +; W64-O0-NEXT: v_mov_b32_e32 v6, v4 +; W64-O0-NEXT: v_mov_b32_e32 v0, v3 +; W64-O0-NEXT: v_mov_b32_e32 v4, v2 +; W64-O0-NEXT: v_mov_b32_e32 v5, v1 ; W64-O0-NEXT: ; implicit-def: $sgpr5 ; W64-O0-NEXT: ; implicit-def: $sgpr5 ; W64-O0-NEXT: ; implicit-def: $sgpr5 ; W64-O0-NEXT: ; implicit-def: $sgpr5 -; W64-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec -; W64-O0-NEXT: v_mov_b32_e32 v2, v7 -; W64-O0-NEXT: v_mov_b32_e32 v3, v6 -; W64-O0-NEXT: v_mov_b32_e32 v4, v5 -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; W64-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec +; W64-O0-NEXT: v_mov_b32_e32 v1, v6 +; W64-O0-NEXT: v_mov_b32_e32 v2, v5 +; W64-O0-NEXT: v_mov_b32_e32 v3, v4 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill ; W64-O0-NEXT: s_nop 0 -; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; W64-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b32 s5, 0 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 12 -; W64-O0-NEXT: v_mov_b32_e32 v1, s4 -; W64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; W64-O0-NEXT: v_writelane_b32 v13, s5, 12 +; W64-O0-NEXT: v_mov_b32_e32 v0, s4 +; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 s[4:5], exec -; W64-O0-NEXT: v_writelane_b32 v0, s4, 13 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 14 +; W64-O0-NEXT: v_writelane_b32 v13, s4, 13 +; W64-O0-NEXT: v_writelane_b32 v13, s5, 14 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: .LBB2_5: ; =>This Inner Loop Header: Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload +; W64-O0-NEXT: s_waitcnt vmcnt(4) +; W64-O0-NEXT: v_readfirstlane_b32 s8, v0 ; W64-O0-NEXT: s_waitcnt vmcnt(3) -; W64-O0-NEXT: v_readfirstlane_b32 s8, v1 -; W64-O0-NEXT: s_waitcnt vmcnt(2) -; W64-O0-NEXT: v_readfirstlane_b32 s12, v2 +; W64-O0-NEXT: v_readfirstlane_b32 s12, v1 ; W64-O0-NEXT: s_mov_b32 s4, s8 ; W64-O0-NEXT: s_mov_b32 s5, s12 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[1:2] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], v[0:1] +; W64-O0-NEXT: s_waitcnt vmcnt(2) +; W64-O0-NEXT: v_readfirstlane_b32 s7, v2 ; W64-O0-NEXT: s_waitcnt vmcnt(1) -; W64-O0-NEXT: v_readfirstlane_b32 s7, v3 -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readfirstlane_b32 s6, v4 +; W64-O0-NEXT: v_readfirstlane_b32 s6, v3 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[3:4] +; W64-O0-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], v[2:3] ; W64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[10:11] ; W64-O0-NEXT: ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11 ; W64-O0-NEXT: s_mov_b32 s9, s12 ; W64-O0-NEXT: s_mov_b32 s10, s7 ; W64-O0-NEXT: s_mov_b32 s11, s6 -; W64-O0-NEXT: v_writelane_b32 v0, s8, 15 -; W64-O0-NEXT: v_writelane_b32 v0, s9, 16 -; W64-O0-NEXT: v_writelane_b32 v0, s10, 17 -; W64-O0-NEXT: v_writelane_b32 v0, s11, 18 +; W64-O0-NEXT: s_waitcnt vmcnt(0) +; W64-O0-NEXT: v_writelane_b32 v13, s8, 15 +; W64-O0-NEXT: v_writelane_b32 v13, s9, 16 +; W64-O0-NEXT: v_writelane_b32 v13, s10, 17 +; W64-O0-NEXT: v_writelane_b32 v13, s11, 18 ; W64-O0-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; W64-O0-NEXT: v_writelane_b32 v0, s4, 19 -; W64-O0-NEXT: v_writelane_b32 v0, s5, 20 +; W64-O0-NEXT: v_writelane_b32 v13, s4, 19 +; W64-O0-NEXT: v_writelane_b32 v13, s5, 20 ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; W64-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 ; 4-byte Folded Spill ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: ; %bb.6: ; in Loop: Header=BB2_5 Depth=1 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v1, 19 -; W64-O0-NEXT: v_readlane_b32 s5, v1, 20 -; W64-O0-NEXT: v_readlane_b32 s8, v1, 15 -; W64-O0-NEXT: v_readlane_b32 s9, v1, 16 -; W64-O0-NEXT: v_readlane_b32 s10, v1, 17 -; W64-O0-NEXT: v_readlane_b32 s11, v1, 18 -; W64-O0-NEXT: v_readlane_b32 s6, v1, 12 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: s_nop 2 +; W64-O0-NEXT: v_readlane_b32 s4, v13, 19 +; W64-O0-NEXT: v_readlane_b32 s5, v13, 20 +; W64-O0-NEXT: v_readlane_b32 s8, v13, 15 +; W64-O0-NEXT: v_readlane_b32 s9, v13, 16 +; W64-O0-NEXT: v_readlane_b32 s10, v13, 17 +; W64-O0-NEXT: v_readlane_b32 s11, v13, 18 +; W64-O0-NEXT: v_readlane_b32 s6, v13, 12 +; W64-O0-NEXT: s_nop 4 ; W64-O0-NEXT: buffer_load_format_x v0, v0, s[8:11], s6 idxen ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill ; W64-O0-NEXT: s_xor_b64 exec, exec, s[4:5] ; W64-O0-NEXT: s_cbranch_execnz .LBB2_5 ; W64-O0-NEXT: ; %bb.7: +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v1, 13 -; W64-O0-NEXT: v_readlane_b32 s5, v1, 14 +; W64-O0-NEXT: v_readlane_b32 s4, v13, 13 +; W64-O0-NEXT: v_readlane_b32 s5, v13, 14 ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload -; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; W64-O0-NEXT: .LBB2_8: ; %bb2 +; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; W64-O0-NEXT: s_nop 0 +; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload ; W64-O0-NEXT: s_or_saveexec_b64 s[16:17], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[16:17] ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: v_readlane_b32 s4, v0, 10 -; W64-O0-NEXT: v_readlane_b32 s5, v0, 11 +; W64-O0-NEXT: v_readlane_b32 s4, v13, 10 +; W64-O0-NEXT: v_readlane_b32 s5, v13, 11 ; W64-O0-NEXT: s_or_b64 exec, exec, s[4:5] -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: global_store_dword v[1:2], v3, off +; W64-O0-NEXT: global_store_dword v[0:1], v2, off ; W64-O0-NEXT: s_waitcnt vmcnt(0) -; W64-O0-NEXT: ; kill: killed $vgpr0 ; W64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; W64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload -; W64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload +; W64-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload ; W64-O0-NEXT: s_mov_b64 exec, s[4:5] ; W64-O0-NEXT: s_waitcnt vmcnt(0) ; W64-O0-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll index 72aafcaca3ff8..37d0309caac0a 100644 --- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll @@ -11,21 +11,17 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; REGALLOC-GFX908-NEXT: liveins: $sgpr4_sgpr5 ; REGALLOC-GFX908-NEXT: {{ $}} ; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %5:agpr_32 - ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:VReg_128 */, def %26 - ; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:av_128 = COPY %26 - ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def %23 - ; REGALLOC-GFX908-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) - ; REGALLOC-GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[COPY]] - ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) + ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6291466 /* regdef:VReg_128 */, def %6 + ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def %7 + ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64, %6, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) ; REGALLOC-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4) - ; REGALLOC-GFX908-NEXT: [[COPY2:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 + ; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 ; REGALLOC-GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; REGALLOC-GFX908-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec - ; REGALLOC-GFX908-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec - ; REGALLOC-GFX908-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) - ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX2 undef %16:vreg_64, [[SI_SPILL_V64_RESTORE]], 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) - ; REGALLOC-GFX908-NEXT: [[COPY3:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]] - ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %18:vreg_64, [[COPY3]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) + ; REGALLOC-GFX908-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec + ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX2 undef %16:vreg_64, %7, 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) + ; REGALLOC-GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]] + ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %18:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) ; REGALLOC-GFX908-NEXT: S_ENDPGM 0 ; ; PEI-GFX908-LABEL: name: partial_copy @@ -60,18 +56,15 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 { ; REGALLOC-GFX90A-NEXT: liveins: $sgpr4_sgpr5 ; REGALLOC-GFX90A-NEXT: {{ $}} ; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %5:agpr_32 - ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6619146 /* regdef:VReg_128_Align2 */, def %25 - ; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:av_128_align2 = COPY %25 - ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def %23 - ; REGALLOC-GFX90A-NEXT: SI_SPILL_V64_SAVE %23, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) - ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64_align2, [[COPY]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) + ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6619146 /* regdef:VReg_128_Align2 */, def %6 + ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def %7 + ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %14:vreg_64_align2, %6, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) ; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4) - ; REGALLOC-GFX90A-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 + ; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 ; REGALLOC-GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; REGALLOC-GFX90A-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec - ; REGALLOC-GFX90A-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec - ; REGALLOC-GFX90A-NEXT: [[SI_SPILL_AV64_RESTORE:%[0-9]+]]:av_64_align2 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) - ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX2 undef %16:vreg_64_align2, [[SI_SPILL_AV64_RESTORE]], 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) + ; REGALLOC-GFX90A-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec + ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX2 undef %16:vreg_64_align2, %7, 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1) ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %18:vreg_64_align2, [[V_MFMA_I32_4X4X4I8_e64_]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) ; REGALLOC-GFX90A-NEXT: S_ENDPGM 0 ; diff --git a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll index 5b0354e63c236..078b133a93d6f 100644 --- a/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -17,13 +17,11 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out, ; GCN-NEXT: s_mov_b32 s95, 0xe8f000 ; GCN-NEXT: s_add_u32 s92, s92, s9 ; GCN-NEXT: s_addc_u32 s93, s93, 0 -; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane -; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane -; GCN-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane ; GCN-NEXT: s_load_dword s0, s[2:3], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane ; GCN-NEXT: v_writelane_b32 v2, s4, 0 ; GCN-NEXT: v_writelane_b32 v2, s5, 1 ; GCN-NEXT: v_writelane_b32 v2, s6, 2 @@ -115,107 +113,109 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out, ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s4, 0 -; GCN-NEXT: v_writelane_b32 v1, s5, 1 -; GCN-NEXT: v_writelane_b32 v1, s6, 2 -; GCN-NEXT: v_writelane_b32 v1, s7, 3 -; GCN-NEXT: v_writelane_b32 v1, s8, 4 -; GCN-NEXT: v_writelane_b32 v1, s9, 5 -; GCN-NEXT: v_writelane_b32 v1, s10, 6 -; GCN-NEXT: v_writelane_b32 v1, s11, 7 +; GCN-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane +; GCN-NEXT: v_writelane_b32 v2, s4, 0 +; GCN-NEXT: v_writelane_b32 v2, s5, 1 +; GCN-NEXT: v_writelane_b32 v2, s6, 2 +; GCN-NEXT: v_writelane_b32 v2, s7, 3 +; GCN-NEXT: v_writelane_b32 v2, s8, 4 +; GCN-NEXT: v_writelane_b32 v2, s9, 5 +; GCN-NEXT: v_writelane_b32 v2, s10, 6 +; GCN-NEXT: v_writelane_b32 v2, s11, 7 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s4, 8 -; GCN-NEXT: v_writelane_b32 v1, s5, 9 -; GCN-NEXT: v_writelane_b32 v1, s6, 10 -; GCN-NEXT: v_writelane_b32 v1, s7, 11 -; GCN-NEXT: v_writelane_b32 v1, s8, 12 -; GCN-NEXT: v_writelane_b32 v1, s9, 13 -; GCN-NEXT: v_writelane_b32 v1, s10, 14 -; GCN-NEXT: v_writelane_b32 v1, s11, 15 +; GCN-NEXT: v_writelane_b32 v2, s4, 8 +; GCN-NEXT: v_writelane_b32 v2, s5, 9 +; GCN-NEXT: v_writelane_b32 v2, s6, 10 +; GCN-NEXT: v_writelane_b32 v2, s7, 11 +; GCN-NEXT: v_writelane_b32 v2, s8, 12 +; GCN-NEXT: v_writelane_b32 v2, s9, 13 +; GCN-NEXT: v_writelane_b32 v2, s10, 14 +; GCN-NEXT: v_writelane_b32 v2, s11, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s4, 16 -; GCN-NEXT: v_writelane_b32 v1, s5, 17 -; GCN-NEXT: v_writelane_b32 v1, s6, 18 -; GCN-NEXT: v_writelane_b32 v1, s7, 19 -; GCN-NEXT: v_writelane_b32 v1, s8, 20 -; GCN-NEXT: v_writelane_b32 v1, s9, 21 -; GCN-NEXT: v_writelane_b32 v1, s10, 22 -; GCN-NEXT: v_writelane_b32 v1, s11, 23 +; GCN-NEXT: v_writelane_b32 v2, s4, 16 +; GCN-NEXT: v_writelane_b32 v2, s5, 17 +; GCN-NEXT: v_writelane_b32 v2, s6, 18 +; GCN-NEXT: v_writelane_b32 v2, s7, 19 +; GCN-NEXT: v_writelane_b32 v2, s8, 20 +; GCN-NEXT: v_writelane_b32 v2, s9, 21 +; GCN-NEXT: v_writelane_b32 v2, s10, 22 +; GCN-NEXT: v_writelane_b32 v2, s11, 23 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s4, 24 -; GCN-NEXT: v_writelane_b32 v1, s5, 25 -; GCN-NEXT: v_writelane_b32 v1, s6, 26 -; GCN-NEXT: v_writelane_b32 v1, s7, 27 -; GCN-NEXT: v_writelane_b32 v1, s8, 28 -; GCN-NEXT: v_writelane_b32 v1, s9, 29 -; GCN-NEXT: v_writelane_b32 v1, s10, 30 -; GCN-NEXT: v_writelane_b32 v1, s11, 31 +; GCN-NEXT: v_writelane_b32 v2, s4, 24 +; GCN-NEXT: v_writelane_b32 v2, s5, 25 +; GCN-NEXT: v_writelane_b32 v2, s6, 26 +; GCN-NEXT: v_writelane_b32 v2, s7, 27 +; GCN-NEXT: v_writelane_b32 v2, s8, 28 +; GCN-NEXT: v_writelane_b32 v2, s9, 29 +; GCN-NEXT: v_writelane_b32 v2, s10, 30 +; GCN-NEXT: v_writelane_b32 v2, s11, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s4, 32 -; GCN-NEXT: v_writelane_b32 v1, s5, 33 -; GCN-NEXT: v_writelane_b32 v1, s6, 34 -; GCN-NEXT: v_writelane_b32 v1, s7, 35 -; GCN-NEXT: v_writelane_b32 v1, s8, 36 -; GCN-NEXT: v_writelane_b32 v1, s9, 37 -; GCN-NEXT: v_writelane_b32 v1, s10, 38 -; GCN-NEXT: v_writelane_b32 v1, s11, 39 +; GCN-NEXT: v_writelane_b32 v2, s4, 32 +; GCN-NEXT: v_writelane_b32 v2, s5, 33 +; GCN-NEXT: v_writelane_b32 v2, s6, 34 +; GCN-NEXT: v_writelane_b32 v2, s7, 35 +; GCN-NEXT: v_writelane_b32 v2, s8, 36 +; GCN-NEXT: v_writelane_b32 v2, s9, 37 +; GCN-NEXT: v_writelane_b32 v2, s10, 38 +; GCN-NEXT: v_writelane_b32 v2, s11, 39 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s4, 40 -; GCN-NEXT: v_writelane_b32 v1, s5, 41 -; GCN-NEXT: v_writelane_b32 v1, s6, 42 -; GCN-NEXT: v_writelane_b32 v1, s7, 43 -; GCN-NEXT: v_writelane_b32 v1, s8, 44 -; GCN-NEXT: v_writelane_b32 v1, s9, 45 -; GCN-NEXT: v_writelane_b32 v1, s10, 46 -; GCN-NEXT: v_writelane_b32 v1, s11, 47 +; GCN-NEXT: v_writelane_b32 v2, s4, 40 +; GCN-NEXT: v_writelane_b32 v2, s5, 41 +; GCN-NEXT: v_writelane_b32 v2, s6, 42 +; GCN-NEXT: v_writelane_b32 v2, s7, 43 +; GCN-NEXT: v_writelane_b32 v2, s8, 44 +; GCN-NEXT: v_writelane_b32 v2, s9, 45 +; GCN-NEXT: v_writelane_b32 v2, s10, 46 +; GCN-NEXT: v_writelane_b32 v2, s11, 47 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s4, 48 -; GCN-NEXT: v_writelane_b32 v1, s5, 49 -; GCN-NEXT: v_writelane_b32 v1, s6, 50 -; GCN-NEXT: v_writelane_b32 v1, s7, 51 -; GCN-NEXT: v_writelane_b32 v1, s8, 52 -; GCN-NEXT: v_writelane_b32 v1, s9, 53 -; GCN-NEXT: v_writelane_b32 v1, s10, 54 -; GCN-NEXT: v_writelane_b32 v1, s11, 55 +; GCN-NEXT: v_writelane_b32 v2, s4, 48 +; GCN-NEXT: v_writelane_b32 v2, s5, 49 +; GCN-NEXT: v_writelane_b32 v2, s6, 50 +; GCN-NEXT: v_writelane_b32 v2, s7, 51 +; GCN-NEXT: v_writelane_b32 v2, s8, 52 +; GCN-NEXT: v_writelane_b32 v2, s9, 53 +; GCN-NEXT: v_writelane_b32 v2, s10, 54 +; GCN-NEXT: v_writelane_b32 v2, s11, 55 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s4, 56 -; GCN-NEXT: v_writelane_b32 v1, s5, 57 -; GCN-NEXT: v_writelane_b32 v1, s6, 58 -; GCN-NEXT: v_writelane_b32 v1, s7, 59 -; GCN-NEXT: v_writelane_b32 v1, s8, 60 -; GCN-NEXT: v_writelane_b32 v1, s9, 61 -; GCN-NEXT: v_writelane_b32 v1, s10, 62 -; GCN-NEXT: v_writelane_b32 v1, s11, 63 +; GCN-NEXT: v_writelane_b32 v2, s4, 56 +; GCN-NEXT: v_writelane_b32 v2, s5, 57 +; GCN-NEXT: v_writelane_b32 v2, s6, 58 +; GCN-NEXT: v_writelane_b32 v2, s7, 59 +; GCN-NEXT: v_writelane_b32 v2, s8, 60 +; GCN-NEXT: v_writelane_b32 v2, s9, 61 +; GCN-NEXT: v_writelane_b32 v2, s10, 62 +; GCN-NEXT: v_writelane_b32 v2, s11, 63 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_store_dword v1, off, s[92:95], 0 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v2, off, s[92:95], 0 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 0 -; GCN-NEXT: v_writelane_b32 v0, s5, 1 -; GCN-NEXT: v_writelane_b32 v0, s6, 2 -; GCN-NEXT: v_writelane_b32 v0, s7, 3 -; GCN-NEXT: v_writelane_b32 v0, s8, 4 -; GCN-NEXT: v_writelane_b32 v0, s9, 5 -; GCN-NEXT: v_writelane_b32 v0, s10, 6 -; GCN-NEXT: v_writelane_b32 v0, s11, 7 +; GCN-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane +; GCN-NEXT: v_writelane_b32 v2, s4, 0 +; GCN-NEXT: v_writelane_b32 v2, s5, 1 +; GCN-NEXT: v_writelane_b32 v2, s6, 2 +; GCN-NEXT: v_writelane_b32 v2, s7, 3 +; GCN-NEXT: v_writelane_b32 v2, s8, 4 +; GCN-NEXT: v_writelane_b32 v2, s9, 5 +; GCN-NEXT: v_writelane_b32 v2, s10, 6 +; GCN-NEXT: v_writelane_b32 v2, s11, 7 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_store_dword v0, off, s[92:95], 0 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v2, off, s[92:95], 0 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: s_mov_b32 s1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) @@ -223,76 +223,76 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out, ; GCN-NEXT: s_cbranch_scc1 .LBB0_2 ; GCN-NEXT: ; %bb.1: ; %bb0 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_load_dword v2, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v0, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GCN-NEXT: buffer_load_dword v1, off, s[92:95], 0 offset:8 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: s_waitcnt vmcnt(1) -; GCN-NEXT: v_readlane_b32 s8, v2, 56 -; GCN-NEXT: v_readlane_b32 s9, v2, 57 -; GCN-NEXT: v_readlane_b32 s10, v2, 58 -; GCN-NEXT: v_readlane_b32 s11, v2, 59 -; GCN-NEXT: v_readlane_b32 s12, v2, 60 -; GCN-NEXT: v_readlane_b32 s13, v2, 61 -; GCN-NEXT: v_readlane_b32 s14, v2, 62 -; GCN-NEXT: v_readlane_b32 s15, v2, 63 -; GCN-NEXT: v_readlane_b32 s16, v2, 48 -; GCN-NEXT: v_readlane_b32 s17, v2, 49 -; GCN-NEXT: v_readlane_b32 s18, v2, 50 -; GCN-NEXT: v_readlane_b32 s19, v2, 51 -; GCN-NEXT: v_readlane_b32 s20, v2, 52 -; GCN-NEXT: v_readlane_b32 s21, v2, 53 -; GCN-NEXT: v_readlane_b32 s22, v2, 54 -; GCN-NEXT: v_readlane_b32 s23, v2, 55 -; GCN-NEXT: v_readlane_b32 s24, v2, 40 -; GCN-NEXT: v_readlane_b32 s25, v2, 41 -; GCN-NEXT: v_readlane_b32 s26, v2, 42 -; GCN-NEXT: v_readlane_b32 s27, v2, 43 -; GCN-NEXT: v_readlane_b32 s28, v2, 44 -; GCN-NEXT: v_readlane_b32 s29, v2, 45 -; GCN-NEXT: v_readlane_b32 s30, v2, 46 -; GCN-NEXT: v_readlane_b32 s31, v2, 47 -; GCN-NEXT: v_readlane_b32 s36, v2, 32 -; GCN-NEXT: v_readlane_b32 s37, v2, 33 -; GCN-NEXT: v_readlane_b32 s38, v2, 34 -; GCN-NEXT: v_readlane_b32 s39, v2, 35 -; GCN-NEXT: v_readlane_b32 s40, v2, 36 -; GCN-NEXT: v_readlane_b32 s41, v2, 37 -; GCN-NEXT: v_readlane_b32 s42, v2, 38 -; GCN-NEXT: v_readlane_b32 s43, v2, 39 -; GCN-NEXT: v_readlane_b32 s44, v2, 24 -; GCN-NEXT: v_readlane_b32 s45, v2, 25 -; GCN-NEXT: v_readlane_b32 s46, v2, 26 -; GCN-NEXT: v_readlane_b32 s47, v2, 27 -; GCN-NEXT: v_readlane_b32 s48, v2, 28 -; GCN-NEXT: v_readlane_b32 s49, v2, 29 -; GCN-NEXT: v_readlane_b32 s50, v2, 30 -; GCN-NEXT: v_readlane_b32 s51, v2, 31 -; GCN-NEXT: v_readlane_b32 s52, v2, 16 -; GCN-NEXT: v_readlane_b32 s53, v2, 17 -; GCN-NEXT: v_readlane_b32 s54, v2, 18 -; GCN-NEXT: v_readlane_b32 s55, v2, 19 -; GCN-NEXT: v_readlane_b32 s56, v2, 20 -; GCN-NEXT: v_readlane_b32 s57, v2, 21 -; GCN-NEXT: v_readlane_b32 s58, v2, 22 -; GCN-NEXT: v_readlane_b32 s59, v2, 23 -; GCN-NEXT: v_readlane_b32 s60, v2, 8 -; GCN-NEXT: v_readlane_b32 s61, v2, 9 -; GCN-NEXT: v_readlane_b32 s62, v2, 10 -; GCN-NEXT: v_readlane_b32 s63, v2, 11 -; GCN-NEXT: v_readlane_b32 s64, v2, 12 -; GCN-NEXT: v_readlane_b32 s65, v2, 13 -; GCN-NEXT: v_readlane_b32 s66, v2, 14 -; GCN-NEXT: v_readlane_b32 s67, v2, 15 -; GCN-NEXT: v_readlane_b32 s68, v2, 0 -; GCN-NEXT: v_readlane_b32 s69, v2, 1 -; GCN-NEXT: v_readlane_b32 s70, v2, 2 -; GCN-NEXT: v_readlane_b32 s71, v2, 3 -; GCN-NEXT: v_readlane_b32 s72, v2, 4 -; GCN-NEXT: v_readlane_b32 s73, v2, 5 -; GCN-NEXT: v_readlane_b32 s74, v2, 6 -; GCN-NEXT: v_readlane_b32 s75, v2, 7 +; GCN-NEXT: v_readlane_b32 s8, v0, 56 +; GCN-NEXT: v_readlane_b32 s9, v0, 57 +; GCN-NEXT: v_readlane_b32 s10, v0, 58 +; GCN-NEXT: v_readlane_b32 s11, v0, 59 +; GCN-NEXT: v_readlane_b32 s12, v0, 60 +; GCN-NEXT: v_readlane_b32 s13, v0, 61 +; GCN-NEXT: v_readlane_b32 s14, v0, 62 +; GCN-NEXT: v_readlane_b32 s15, v0, 63 +; GCN-NEXT: v_readlane_b32 s16, v0, 48 +; GCN-NEXT: v_readlane_b32 s17, v0, 49 +; GCN-NEXT: v_readlane_b32 s18, v0, 50 +; GCN-NEXT: v_readlane_b32 s19, v0, 51 +; GCN-NEXT: v_readlane_b32 s20, v0, 52 +; GCN-NEXT: v_readlane_b32 s21, v0, 53 +; GCN-NEXT: v_readlane_b32 s22, v0, 54 +; GCN-NEXT: v_readlane_b32 s23, v0, 55 +; GCN-NEXT: v_readlane_b32 s24, v0, 40 +; GCN-NEXT: v_readlane_b32 s25, v0, 41 +; GCN-NEXT: v_readlane_b32 s26, v0, 42 +; GCN-NEXT: v_readlane_b32 s27, v0, 43 +; GCN-NEXT: v_readlane_b32 s28, v0, 44 +; GCN-NEXT: v_readlane_b32 s29, v0, 45 +; GCN-NEXT: v_readlane_b32 s30, v0, 46 +; GCN-NEXT: v_readlane_b32 s31, v0, 47 +; GCN-NEXT: v_readlane_b32 s36, v0, 32 +; GCN-NEXT: v_readlane_b32 s37, v0, 33 +; GCN-NEXT: v_readlane_b32 s38, v0, 34 +; GCN-NEXT: v_readlane_b32 s39, v0, 35 +; GCN-NEXT: v_readlane_b32 s40, v0, 36 +; GCN-NEXT: v_readlane_b32 s41, v0, 37 +; GCN-NEXT: v_readlane_b32 s42, v0, 38 +; GCN-NEXT: v_readlane_b32 s43, v0, 39 +; GCN-NEXT: v_readlane_b32 s44, v0, 24 +; GCN-NEXT: v_readlane_b32 s45, v0, 25 +; GCN-NEXT: v_readlane_b32 s46, v0, 26 +; GCN-NEXT: v_readlane_b32 s47, v0, 27 +; GCN-NEXT: v_readlane_b32 s48, v0, 28 +; GCN-NEXT: v_readlane_b32 s49, v0, 29 +; GCN-NEXT: v_readlane_b32 s50, v0, 30 +; GCN-NEXT: v_readlane_b32 s51, v0, 31 +; GCN-NEXT: v_readlane_b32 s52, v0, 16 +; GCN-NEXT: v_readlane_b32 s53, v0, 17 +; GCN-NEXT: v_readlane_b32 s54, v0, 18 +; GCN-NEXT: v_readlane_b32 s55, v0, 19 +; GCN-NEXT: v_readlane_b32 s56, v0, 20 +; GCN-NEXT: v_readlane_b32 s57, v0, 21 +; GCN-NEXT: v_readlane_b32 s58, v0, 22 +; GCN-NEXT: v_readlane_b32 s59, v0, 23 +; GCN-NEXT: v_readlane_b32 s60, v0, 8 +; GCN-NEXT: v_readlane_b32 s61, v0, 9 +; GCN-NEXT: v_readlane_b32 s62, v0, 10 +; GCN-NEXT: v_readlane_b32 s63, v0, 11 +; GCN-NEXT: v_readlane_b32 s64, v0, 12 +; GCN-NEXT: v_readlane_b32 s65, v0, 13 +; GCN-NEXT: v_readlane_b32 s66, v0, 14 +; GCN-NEXT: v_readlane_b32 s67, v0, 15 +; GCN-NEXT: v_readlane_b32 s68, v0, 0 +; GCN-NEXT: v_readlane_b32 s69, v0, 1 +; GCN-NEXT: v_readlane_b32 s70, v0, 2 +; GCN-NEXT: v_readlane_b32 s71, v0, 3 +; GCN-NEXT: v_readlane_b32 s72, v0, 4 +; GCN-NEXT: v_readlane_b32 s73, v0, 5 +; GCN-NEXT: v_readlane_b32 s74, v0, 6 +; GCN-NEXT: v_readlane_b32 s75, v0, 7 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_readlane_b32 s76, v1, 56 ; GCN-NEXT: v_readlane_b32 s77, v1, 57 @@ -319,7 +319,7 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out, ; GCN-NEXT: v_readlane_b32 s6, v1, 6 ; GCN-NEXT: v_readlane_b32 s7, v1, 7 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_load_dword v0, off, s[92:95], 0 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v2, off, s[92:95], 0 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] @@ -380,14 +380,14 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out, ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s0, v0, 0 -; GCN-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-NEXT: v_readlane_b32 s2, v0, 2 -; GCN-NEXT: v_readlane_b32 s3, v0, 3 -; GCN-NEXT: v_readlane_b32 s4, v0, 4 -; GCN-NEXT: v_readlane_b32 s5, v0, 5 -; GCN-NEXT: v_readlane_b32 s6, v0, 6 -; GCN-NEXT: v_readlane_b32 s7, v0, 7 +; GCN-NEXT: v_readlane_b32 s0, v2, 0 +; GCN-NEXT: v_readlane_b32 s1, v2, 1 +; GCN-NEXT: v_readlane_b32 s2, v2, 2 +; GCN-NEXT: v_readlane_b32 s3, v2, 3 +; GCN-NEXT: v_readlane_b32 s4, v2, 4 +; GCN-NEXT: v_readlane_b32 s5, v2, 5 +; GCN-NEXT: v_readlane_b32 s6, v2, 6 +; GCN-NEXT: v_readlane_b32 s7, v2, 7 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[84:91] ; GCN-NEXT: ;;#ASMEND @@ -422,18 +422,6 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(ptr addrspace(1) %out, ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: .LBB0_2: ; %ret -; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_load_dword v0, off, s[92:95], 0 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[34:35] -; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_load_dword v1, off, s[92:95], 0 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[34:35] -; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_load_dword v2, off, s[92:95], 0 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[34:35] -; GCN-NEXT: ; kill: killed $vgpr2 -; GCN-NEXT: ; kill: killed $vgpr1 -; GCN-NEXT: ; kill: killed $vgpr0 ; GCN-NEXT: s_endpgm %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 %wide.sgpr1 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 @@ -490,12 +478,11 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 % ; GCN-NEXT: s_mov_b32 s55, 0xe8f000 ; GCN-NEXT: s_add_u32 s52, s52, s9 ; GCN-NEXT: s_addc_u32 s53, s53, 0 -; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane -; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; GCN-NEXT: s_load_dword s0, s[2:3], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; GCN-NEXT: v_writelane_b32 v1, s4, 0 ; GCN-NEXT: v_writelane_b32 v1, s5, 1 ; GCN-NEXT: v_writelane_b32 v1, s6, 2 @@ -575,21 +562,22 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 % ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 0 -; GCN-NEXT: v_writelane_b32 v0, s5, 1 -; GCN-NEXT: v_writelane_b32 v0, s6, 2 -; GCN-NEXT: v_writelane_b32 v0, s7, 3 -; GCN-NEXT: v_writelane_b32 v0, s8, 4 -; GCN-NEXT: v_writelane_b32 v0, s9, 5 -; GCN-NEXT: v_writelane_b32 v0, s10, 6 -; GCN-NEXT: v_writelane_b32 v0, s11, 7 +; GCN-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane +; GCN-NEXT: v_writelane_b32 v1, s4, 0 +; GCN-NEXT: v_writelane_b32 v1, s5, 1 +; GCN-NEXT: v_writelane_b32 v1, s6, 2 +; GCN-NEXT: v_writelane_b32 v1, s7, 3 +; GCN-NEXT: v_writelane_b32 v1, s8, 4 +; GCN-NEXT: v_writelane_b32 v1, s9, 5 +; GCN-NEXT: v_writelane_b32 v1, s10, 6 +; GCN-NEXT: v_writelane_b32 v1, s11, 7 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[2:3] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s2, 8 -; GCN-NEXT: v_writelane_b32 v0, s3, 9 +; GCN-NEXT: v_writelane_b32 v1, s2, 8 +; GCN-NEXT: v_writelane_b32 v1, s3, 9 ; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 -; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[28:29] ; GCN-NEXT: s_mov_b32 s1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) @@ -597,93 +585,93 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 % ; GCN-NEXT: s_cbranch_scc1 .LBB1_2 ; GCN-NEXT: ; %bb.1: ; %bb0 ; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 -; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[28:29] ; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 -; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[28:29] ; GCN-NEXT: s_waitcnt vmcnt(1) -; GCN-NEXT: v_readlane_b32 s16, v1, 8 -; GCN-NEXT: v_readlane_b32 s17, v1, 9 -; GCN-NEXT: v_readlane_b32 s20, v1, 0 -; GCN-NEXT: v_readlane_b32 s21, v1, 1 -; GCN-NEXT: v_readlane_b32 s22, v1, 2 -; GCN-NEXT: v_readlane_b32 s23, v1, 3 -; GCN-NEXT: v_readlane_b32 s24, v1, 4 -; GCN-NEXT: v_readlane_b32 s25, v1, 5 -; GCN-NEXT: v_readlane_b32 s26, v1, 6 -; GCN-NEXT: v_readlane_b32 s27, v1, 7 +; GCN-NEXT: v_readlane_b32 s16, v0, 8 +; GCN-NEXT: v_readlane_b32 s17, v0, 9 +; GCN-NEXT: v_readlane_b32 s20, v0, 0 +; GCN-NEXT: v_readlane_b32 s21, v0, 1 +; GCN-NEXT: v_readlane_b32 s22, v0, 2 +; GCN-NEXT: v_readlane_b32 s23, v0, 3 +; GCN-NEXT: v_readlane_b32 s24, v0, 4 +; GCN-NEXT: v_readlane_b32 s25, v0, 5 +; GCN-NEXT: v_readlane_b32 s26, v0, 6 +; GCN-NEXT: v_readlane_b32 s27, v0, 7 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s36, v0, 32 -; GCN-NEXT: v_readlane_b32 s37, v0, 33 -; GCN-NEXT: v_readlane_b32 s38, v0, 34 -; GCN-NEXT: v_readlane_b32 s39, v0, 35 -; GCN-NEXT: v_readlane_b32 s40, v0, 36 -; GCN-NEXT: v_readlane_b32 s41, v0, 37 -; GCN-NEXT: v_readlane_b32 s42, v0, 38 -; GCN-NEXT: v_readlane_b32 s43, v0, 39 -; GCN-NEXT: v_readlane_b32 s44, v0, 40 -; GCN-NEXT: v_readlane_b32 s45, v0, 41 -; GCN-NEXT: v_readlane_b32 s46, v0, 42 -; GCN-NEXT: v_readlane_b32 s47, v0, 43 -; GCN-NEXT: v_readlane_b32 s48, v0, 44 -; GCN-NEXT: v_readlane_b32 s49, v0, 45 -; GCN-NEXT: v_readlane_b32 s50, v0, 46 -; GCN-NEXT: v_readlane_b32 s51, v0, 47 -; GCN-NEXT: v_readlane_b32 s0, v0, 0 -; GCN-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-NEXT: v_readlane_b32 s2, v0, 2 -; GCN-NEXT: v_readlane_b32 s3, v0, 3 -; GCN-NEXT: v_readlane_b32 s4, v0, 4 -; GCN-NEXT: v_readlane_b32 s5, v0, 5 -; GCN-NEXT: v_readlane_b32 s6, v0, 6 -; GCN-NEXT: v_readlane_b32 s7, v0, 7 -; GCN-NEXT: v_readlane_b32 s8, v0, 8 -; GCN-NEXT: v_readlane_b32 s9, v0, 9 -; GCN-NEXT: v_readlane_b32 s10, v0, 10 -; GCN-NEXT: v_readlane_b32 s11, v0, 11 -; GCN-NEXT: v_readlane_b32 s12, v0, 12 -; GCN-NEXT: v_readlane_b32 s13, v0, 13 -; GCN-NEXT: v_readlane_b32 s14, v0, 14 -; GCN-NEXT: v_readlane_b32 s15, v0, 15 +; GCN-NEXT: v_readlane_b32 s36, v1, 32 +; GCN-NEXT: v_readlane_b32 s37, v1, 33 +; GCN-NEXT: v_readlane_b32 s38, v1, 34 +; GCN-NEXT: v_readlane_b32 s39, v1, 35 +; GCN-NEXT: v_readlane_b32 s40, v1, 36 +; GCN-NEXT: v_readlane_b32 s41, v1, 37 +; GCN-NEXT: v_readlane_b32 s42, v1, 38 +; GCN-NEXT: v_readlane_b32 s43, v1, 39 +; GCN-NEXT: v_readlane_b32 s44, v1, 40 +; GCN-NEXT: v_readlane_b32 s45, v1, 41 +; GCN-NEXT: v_readlane_b32 s46, v1, 42 +; GCN-NEXT: v_readlane_b32 s47, v1, 43 +; GCN-NEXT: v_readlane_b32 s48, v1, 44 +; GCN-NEXT: v_readlane_b32 s49, v1, 45 +; GCN-NEXT: v_readlane_b32 s50, v1, 46 +; GCN-NEXT: v_readlane_b32 s51, v1, 47 +; GCN-NEXT: v_readlane_b32 s0, v1, 0 +; GCN-NEXT: v_readlane_b32 s1, v1, 1 +; GCN-NEXT: v_readlane_b32 s2, v1, 2 +; GCN-NEXT: v_readlane_b32 s3, v1, 3 +; GCN-NEXT: v_readlane_b32 s4, v1, 4 +; GCN-NEXT: v_readlane_b32 s5, v1, 5 +; GCN-NEXT: v_readlane_b32 s6, v1, 6 +; GCN-NEXT: v_readlane_b32 s7, v1, 7 +; GCN-NEXT: v_readlane_b32 s8, v1, 8 +; GCN-NEXT: v_readlane_b32 s9, v1, 9 +; GCN-NEXT: v_readlane_b32 s10, v1, 10 +; GCN-NEXT: v_readlane_b32 s11, v1, 11 +; GCN-NEXT: v_readlane_b32 s12, v1, 12 +; GCN-NEXT: v_readlane_b32 s13, v1, 13 +; GCN-NEXT: v_readlane_b32 s14, v1, 14 +; GCN-NEXT: v_readlane_b32 s15, v1, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 16 -; GCN-NEXT: v_readlane_b32 s1, v0, 17 -; GCN-NEXT: v_readlane_b32 s2, v0, 18 -; GCN-NEXT: v_readlane_b32 s3, v0, 19 -; GCN-NEXT: v_readlane_b32 s4, v0, 20 -; GCN-NEXT: v_readlane_b32 s5, v0, 21 -; GCN-NEXT: v_readlane_b32 s6, v0, 22 -; GCN-NEXT: v_readlane_b32 s7, v0, 23 -; GCN-NEXT: v_readlane_b32 s8, v0, 24 -; GCN-NEXT: v_readlane_b32 s9, v0, 25 -; GCN-NEXT: v_readlane_b32 s10, v0, 26 -; GCN-NEXT: v_readlane_b32 s11, v0, 27 -; GCN-NEXT: v_readlane_b32 s12, v0, 28 -; GCN-NEXT: v_readlane_b32 s13, v0, 29 -; GCN-NEXT: v_readlane_b32 s14, v0, 30 -; GCN-NEXT: v_readlane_b32 s15, v0, 31 +; GCN-NEXT: v_readlane_b32 s0, v1, 16 +; GCN-NEXT: v_readlane_b32 s1, v1, 17 +; GCN-NEXT: v_readlane_b32 s2, v1, 18 +; GCN-NEXT: v_readlane_b32 s3, v1, 19 +; GCN-NEXT: v_readlane_b32 s4, v1, 20 +; GCN-NEXT: v_readlane_b32 s5, v1, 21 +; GCN-NEXT: v_readlane_b32 s6, v1, 22 +; GCN-NEXT: v_readlane_b32 s7, v1, 23 +; GCN-NEXT: v_readlane_b32 s8, v1, 24 +; GCN-NEXT: v_readlane_b32 s9, v1, 25 +; GCN-NEXT: v_readlane_b32 s10, v1, 26 +; GCN-NEXT: v_readlane_b32 s11, v1, 27 +; GCN-NEXT: v_readlane_b32 s12, v1, 28 +; GCN-NEXT: v_readlane_b32 s13, v1, 29 +; GCN-NEXT: v_readlane_b32 s14, v1, 30 +; GCN-NEXT: v_readlane_b32 s15, v1, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 48 -; GCN-NEXT: v_readlane_b32 s1, v0, 49 -; GCN-NEXT: v_readlane_b32 s2, v0, 50 -; GCN-NEXT: v_readlane_b32 s3, v0, 51 -; GCN-NEXT: v_readlane_b32 s4, v0, 52 -; GCN-NEXT: v_readlane_b32 s5, v0, 53 -; GCN-NEXT: v_readlane_b32 s6, v0, 54 -; GCN-NEXT: v_readlane_b32 s7, v0, 55 -; GCN-NEXT: v_readlane_b32 s8, v0, 56 -; GCN-NEXT: v_readlane_b32 s9, v0, 57 -; GCN-NEXT: v_readlane_b32 s10, v0, 58 -; GCN-NEXT: v_readlane_b32 s11, v0, 59 -; GCN-NEXT: v_readlane_b32 s12, v0, 60 -; GCN-NEXT: v_readlane_b32 s13, v0, 61 -; GCN-NEXT: v_readlane_b32 s14, v0, 62 -; GCN-NEXT: v_readlane_b32 s15, v0, 63 +; GCN-NEXT: v_readlane_b32 s0, v1, 48 +; GCN-NEXT: v_readlane_b32 s1, v1, 49 +; GCN-NEXT: v_readlane_b32 s2, v1, 50 +; GCN-NEXT: v_readlane_b32 s3, v1, 51 +; GCN-NEXT: v_readlane_b32 s4, v1, 52 +; GCN-NEXT: v_readlane_b32 s5, v1, 53 +; GCN-NEXT: v_readlane_b32 s6, v1, 54 +; GCN-NEXT: v_readlane_b32 s7, v1, 55 +; GCN-NEXT: v_readlane_b32 s8, v1, 56 +; GCN-NEXT: v_readlane_b32 s9, v1, 57 +; GCN-NEXT: v_readlane_b32 s10, v1, 58 +; GCN-NEXT: v_readlane_b32 s11, v1, 59 +; GCN-NEXT: v_readlane_b32 s12, v1, 60 +; GCN-NEXT: v_readlane_b32 s13, v1, 61 +; GCN-NEXT: v_readlane_b32 s14, v1, 62 +; GCN-NEXT: v_readlane_b32 s15, v1, 63 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[36:51] ; GCN-NEXT: ;;#ASMEND @@ -697,14 +685,6 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(ptr addrspace(1) %out, i32 % ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: .LBB1_2: ; %ret -; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 -; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[28:29] -; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 -; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[28:29] -; GCN-NEXT: ; kill: killed $vgpr1 -; GCN-NEXT: ; kill: killed $vgpr0 ; GCN-NEXT: s_endpgm %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 @@ -741,17 +721,9 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 % ; GCN-NEXT: s_mov_b32 s55, 0xe8f000 ; GCN-NEXT: s_add_u32 s52, s52, s9 ; GCN-NEXT: s_addc_u32 s53, s53, 0 -; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane -; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN-NEXT: s_load_dword s0, s[2:3], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[34:35] -; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART @@ -765,91 +737,91 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 % ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_waitcnt vmcnt(1) -; GCN-NEXT: v_writelane_b32 v1, s4, 0 -; GCN-NEXT: v_writelane_b32 v1, s5, 1 -; GCN-NEXT: v_writelane_b32 v1, s6, 2 -; GCN-NEXT: v_writelane_b32 v1, s7, 3 -; GCN-NEXT: v_writelane_b32 v1, s8, 4 -; GCN-NEXT: v_writelane_b32 v1, s9, 5 -; GCN-NEXT: v_writelane_b32 v1, s10, 6 -; GCN-NEXT: v_writelane_b32 v1, s11, 7 -; GCN-NEXT: v_writelane_b32 v1, s12, 8 -; GCN-NEXT: v_writelane_b32 v1, s13, 9 -; GCN-NEXT: v_writelane_b32 v1, s14, 10 -; GCN-NEXT: v_writelane_b32 v1, s15, 11 -; GCN-NEXT: v_writelane_b32 v1, s16, 12 -; GCN-NEXT: v_writelane_b32 v1, s17, 13 -; GCN-NEXT: v_writelane_b32 v1, s18, 14 -; GCN-NEXT: v_writelane_b32 v1, s19, 15 +; GCN-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane +; GCN-NEXT: v_writelane_b32 v32, s4, 0 +; GCN-NEXT: v_writelane_b32 v32, s5, 1 +; GCN-NEXT: v_writelane_b32 v32, s6, 2 +; GCN-NEXT: v_writelane_b32 v32, s7, 3 +; GCN-NEXT: v_writelane_b32 v32, s8, 4 +; GCN-NEXT: v_writelane_b32 v32, s9, 5 +; GCN-NEXT: v_writelane_b32 v32, s10, 6 +; GCN-NEXT: v_writelane_b32 v32, s11, 7 +; GCN-NEXT: v_writelane_b32 v32, s12, 8 +; GCN-NEXT: v_writelane_b32 v32, s13, 9 +; GCN-NEXT: v_writelane_b32 v32, s14, 10 +; GCN-NEXT: v_writelane_b32 v32, s15, 11 +; GCN-NEXT: v_writelane_b32 v32, s16, 12 +; GCN-NEXT: v_writelane_b32 v32, s17, 13 +; GCN-NEXT: v_writelane_b32 v32, s18, 14 +; GCN-NEXT: v_writelane_b32 v32, s19, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s4, 16 -; GCN-NEXT: v_writelane_b32 v1, s5, 17 -; GCN-NEXT: v_writelane_b32 v1, s6, 18 -; GCN-NEXT: v_writelane_b32 v1, s7, 19 -; GCN-NEXT: v_writelane_b32 v1, s8, 20 -; GCN-NEXT: v_writelane_b32 v1, s9, 21 -; GCN-NEXT: v_writelane_b32 v1, s10, 22 -; GCN-NEXT: v_writelane_b32 v1, s11, 23 -; GCN-NEXT: v_writelane_b32 v1, s12, 24 -; GCN-NEXT: v_writelane_b32 v1, s13, 25 -; GCN-NEXT: v_writelane_b32 v1, s14, 26 -; GCN-NEXT: v_writelane_b32 v1, s15, 27 -; GCN-NEXT: v_writelane_b32 v1, s16, 28 -; GCN-NEXT: v_writelane_b32 v1, s17, 29 -; GCN-NEXT: v_writelane_b32 v1, s18, 30 -; GCN-NEXT: v_writelane_b32 v1, s19, 31 +; GCN-NEXT: v_writelane_b32 v32, s4, 16 +; GCN-NEXT: v_writelane_b32 v32, s5, 17 +; GCN-NEXT: v_writelane_b32 v32, s6, 18 +; GCN-NEXT: v_writelane_b32 v32, s7, 19 +; GCN-NEXT: v_writelane_b32 v32, s8, 20 +; GCN-NEXT: v_writelane_b32 v32, s9, 21 +; GCN-NEXT: v_writelane_b32 v32, s10, 22 +; GCN-NEXT: v_writelane_b32 v32, s11, 23 +; GCN-NEXT: v_writelane_b32 v32, s12, 24 +; GCN-NEXT: v_writelane_b32 v32, s13, 25 +; GCN-NEXT: v_writelane_b32 v32, s14, 26 +; GCN-NEXT: v_writelane_b32 v32, s15, 27 +; GCN-NEXT: v_writelane_b32 v32, s16, 28 +; GCN-NEXT: v_writelane_b32 v32, s17, 29 +; GCN-NEXT: v_writelane_b32 v32, s18, 30 +; GCN-NEXT: v_writelane_b32 v32, s19, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s4, 32 -; GCN-NEXT: v_writelane_b32 v1, s5, 33 -; GCN-NEXT: v_writelane_b32 v1, s6, 34 -; GCN-NEXT: v_writelane_b32 v1, s7, 35 -; GCN-NEXT: v_writelane_b32 v1, s8, 36 -; GCN-NEXT: v_writelane_b32 v1, s9, 37 -; GCN-NEXT: v_writelane_b32 v1, s10, 38 -; GCN-NEXT: v_writelane_b32 v1, s11, 39 -; GCN-NEXT: v_writelane_b32 v1, s12, 40 -; GCN-NEXT: v_writelane_b32 v1, s13, 41 -; GCN-NEXT: v_writelane_b32 v1, s14, 42 -; GCN-NEXT: v_writelane_b32 v1, s15, 43 -; GCN-NEXT: v_writelane_b32 v1, s16, 44 -; GCN-NEXT: v_writelane_b32 v1, s17, 45 -; GCN-NEXT: v_writelane_b32 v1, s18, 46 -; GCN-NEXT: v_writelane_b32 v1, s19, 47 +; GCN-NEXT: v_writelane_b32 v32, s4, 32 +; GCN-NEXT: v_writelane_b32 v32, s5, 33 +; GCN-NEXT: v_writelane_b32 v32, s6, 34 +; GCN-NEXT: v_writelane_b32 v32, s7, 35 +; GCN-NEXT: v_writelane_b32 v32, s8, 36 +; GCN-NEXT: v_writelane_b32 v32, s9, 37 +; GCN-NEXT: v_writelane_b32 v32, s10, 38 +; GCN-NEXT: v_writelane_b32 v32, s11, 39 +; GCN-NEXT: v_writelane_b32 v32, s12, 40 +; GCN-NEXT: v_writelane_b32 v32, s13, 41 +; GCN-NEXT: v_writelane_b32 v32, s14, 42 +; GCN-NEXT: v_writelane_b32 v32, s15, 43 +; GCN-NEXT: v_writelane_b32 v32, s16, 44 +; GCN-NEXT: v_writelane_b32 v32, s17, 45 +; GCN-NEXT: v_writelane_b32 v32, s18, 46 +; GCN-NEXT: v_writelane_b32 v32, s19, 47 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s4, 48 -; GCN-NEXT: v_writelane_b32 v1, s5, 49 -; GCN-NEXT: v_writelane_b32 v1, s6, 50 -; GCN-NEXT: v_writelane_b32 v1, s7, 51 -; GCN-NEXT: v_writelane_b32 v1, s8, 52 -; GCN-NEXT: v_writelane_b32 v1, s9, 53 -; GCN-NEXT: v_writelane_b32 v1, s10, 54 -; GCN-NEXT: v_writelane_b32 v1, s11, 55 -; GCN-NEXT: v_writelane_b32 v1, s12, 56 -; GCN-NEXT: v_writelane_b32 v1, s13, 57 -; GCN-NEXT: v_writelane_b32 v1, s14, 58 -; GCN-NEXT: v_writelane_b32 v1, s15, 59 -; GCN-NEXT: v_writelane_b32 v1, s16, 60 -; GCN-NEXT: v_writelane_b32 v1, s17, 61 -; GCN-NEXT: v_writelane_b32 v1, s18, 62 -; GCN-NEXT: v_writelane_b32 v1, s19, 63 +; GCN-NEXT: v_writelane_b32 v32, s4, 48 +; GCN-NEXT: v_writelane_b32 v32, s5, 49 +; GCN-NEXT: v_writelane_b32 v32, s6, 50 +; GCN-NEXT: v_writelane_b32 v32, s7, 51 +; GCN-NEXT: v_writelane_b32 v32, s8, 52 +; GCN-NEXT: v_writelane_b32 v32, s9, 53 +; GCN-NEXT: v_writelane_b32 v32, s10, 54 +; GCN-NEXT: v_writelane_b32 v32, s11, 55 +; GCN-NEXT: v_writelane_b32 v32, s12, 56 +; GCN-NEXT: v_writelane_b32 v32, s13, 57 +; GCN-NEXT: v_writelane_b32 v32, s14, 58 +; GCN-NEXT: v_writelane_b32 v32, s15, 59 +; GCN-NEXT: v_writelane_b32 v32, s16, 60 +; GCN-NEXT: v_writelane_b32 v32, s17, 61 +; GCN-NEXT: v_writelane_b32 v32, s18, 62 +; GCN-NEXT: v_writelane_b32 v32, s19, 63 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v32, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[2:3] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_waitcnt vmcnt(1) -; GCN-NEXT: v_writelane_b32 v0, s2, 0 -; GCN-NEXT: v_writelane_b32 v0, s3, 1 +; GCN-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane +; GCN-NEXT: v_writelane_b32 v32, s2, 0 +; GCN-NEXT: v_writelane_b32 v32, s3, 1 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v32, off, s[52:55], 0 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: s_mov_b32 s1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) @@ -857,59 +829,59 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 % ; GCN-NEXT: s_cbranch_scc1 .LBB2_2 ; GCN-NEXT: ; %bb.1: ; %bb0 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v31, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s36, v1, 32 -; GCN-NEXT: v_readlane_b32 s37, v1, 33 -; GCN-NEXT: v_readlane_b32 s38, v1, 34 -; GCN-NEXT: v_readlane_b32 s39, v1, 35 -; GCN-NEXT: v_readlane_b32 s40, v1, 36 -; GCN-NEXT: v_readlane_b32 s41, v1, 37 -; GCN-NEXT: v_readlane_b32 s42, v1, 38 -; GCN-NEXT: v_readlane_b32 s43, v1, 39 -; GCN-NEXT: v_readlane_b32 s44, v1, 40 -; GCN-NEXT: v_readlane_b32 s45, v1, 41 -; GCN-NEXT: v_readlane_b32 s46, v1, 42 -; GCN-NEXT: v_readlane_b32 s47, v1, 43 -; GCN-NEXT: v_readlane_b32 s48, v1, 44 -; GCN-NEXT: v_readlane_b32 s49, v1, 45 -; GCN-NEXT: v_readlane_b32 s50, v1, 46 -; GCN-NEXT: v_readlane_b32 s51, v1, 47 -; GCN-NEXT: v_readlane_b32 s0, v1, 16 -; GCN-NEXT: v_readlane_b32 s1, v1, 17 -; GCN-NEXT: v_readlane_b32 s2, v1, 18 -; GCN-NEXT: v_readlane_b32 s3, v1, 19 -; GCN-NEXT: v_readlane_b32 s4, v1, 20 -; GCN-NEXT: v_readlane_b32 s5, v1, 21 -; GCN-NEXT: v_readlane_b32 s6, v1, 22 -; GCN-NEXT: v_readlane_b32 s7, v1, 23 -; GCN-NEXT: v_readlane_b32 s8, v1, 24 -; GCN-NEXT: v_readlane_b32 s9, v1, 25 -; GCN-NEXT: v_readlane_b32 s10, v1, 26 -; GCN-NEXT: v_readlane_b32 s11, v1, 27 -; GCN-NEXT: v_readlane_b32 s12, v1, 28 -; GCN-NEXT: v_readlane_b32 s13, v1, 29 -; GCN-NEXT: v_readlane_b32 s14, v1, 30 -; GCN-NEXT: v_readlane_b32 s15, v1, 31 -; GCN-NEXT: v_readlane_b32 s16, v1, 0 -; GCN-NEXT: v_readlane_b32 s17, v1, 1 -; GCN-NEXT: v_readlane_b32 s18, v1, 2 -; GCN-NEXT: v_readlane_b32 s19, v1, 3 -; GCN-NEXT: v_readlane_b32 s20, v1, 4 -; GCN-NEXT: v_readlane_b32 s21, v1, 5 -; GCN-NEXT: v_readlane_b32 s22, v1, 6 -; GCN-NEXT: v_readlane_b32 s23, v1, 7 -; GCN-NEXT: v_readlane_b32 s24, v1, 8 -; GCN-NEXT: v_readlane_b32 s25, v1, 9 -; GCN-NEXT: v_readlane_b32 s26, v1, 10 -; GCN-NEXT: v_readlane_b32 s27, v1, 11 -; GCN-NEXT: v_readlane_b32 s28, v1, 12 -; GCN-NEXT: v_readlane_b32 s29, v1, 13 -; GCN-NEXT: v_readlane_b32 s30, v1, 14 -; GCN-NEXT: v_readlane_b32 s31, v1, 15 +; GCN-NEXT: v_readlane_b32 s36, v31, 32 +; GCN-NEXT: v_readlane_b32 s37, v31, 33 +; GCN-NEXT: v_readlane_b32 s38, v31, 34 +; GCN-NEXT: v_readlane_b32 s39, v31, 35 +; GCN-NEXT: v_readlane_b32 s40, v31, 36 +; GCN-NEXT: v_readlane_b32 s41, v31, 37 +; GCN-NEXT: v_readlane_b32 s42, v31, 38 +; GCN-NEXT: v_readlane_b32 s43, v31, 39 +; GCN-NEXT: v_readlane_b32 s44, v31, 40 +; GCN-NEXT: v_readlane_b32 s45, v31, 41 +; GCN-NEXT: v_readlane_b32 s46, v31, 42 +; GCN-NEXT: v_readlane_b32 s47, v31, 43 +; GCN-NEXT: v_readlane_b32 s48, v31, 44 +; GCN-NEXT: v_readlane_b32 s49, v31, 45 +; GCN-NEXT: v_readlane_b32 s50, v31, 46 +; GCN-NEXT: v_readlane_b32 s51, v31, 47 +; GCN-NEXT: v_readlane_b32 s0, v31, 16 +; GCN-NEXT: v_readlane_b32 s1, v31, 17 +; GCN-NEXT: v_readlane_b32 s2, v31, 18 +; GCN-NEXT: v_readlane_b32 s3, v31, 19 +; GCN-NEXT: v_readlane_b32 s4, v31, 20 +; GCN-NEXT: v_readlane_b32 s5, v31, 21 +; GCN-NEXT: v_readlane_b32 s6, v31, 22 +; GCN-NEXT: v_readlane_b32 s7, v31, 23 +; GCN-NEXT: v_readlane_b32 s8, v31, 24 +; GCN-NEXT: v_readlane_b32 s9, v31, 25 +; GCN-NEXT: v_readlane_b32 s10, v31, 26 +; GCN-NEXT: v_readlane_b32 s11, v31, 27 +; GCN-NEXT: v_readlane_b32 s12, v31, 28 +; GCN-NEXT: v_readlane_b32 s13, v31, 29 +; GCN-NEXT: v_readlane_b32 s14, v31, 30 +; GCN-NEXT: v_readlane_b32 s15, v31, 31 +; GCN-NEXT: v_readlane_b32 s16, v31, 0 +; GCN-NEXT: v_readlane_b32 s17, v31, 1 +; GCN-NEXT: v_readlane_b32 s18, v31, 2 +; GCN-NEXT: v_readlane_b32 s19, v31, 3 +; GCN-NEXT: v_readlane_b32 s20, v31, 4 +; GCN-NEXT: v_readlane_b32 s21, v31, 5 +; GCN-NEXT: v_readlane_b32 s22, v31, 6 +; GCN-NEXT: v_readlane_b32 s23, v31, 7 +; GCN-NEXT: v_readlane_b32 s24, v31, 8 +; GCN-NEXT: v_readlane_b32 s25, v31, 9 +; GCN-NEXT: v_readlane_b32 s26, v31, 10 +; GCN-NEXT: v_readlane_b32 s27, v31, 11 +; GCN-NEXT: v_readlane_b32 s28, v31, 12 +; GCN-NEXT: v_readlane_b32 s29, v31, 13 +; GCN-NEXT: v_readlane_b32 s30, v31, 14 +; GCN-NEXT: v_readlane_b32 s31, v31, 15 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v32, off, s[52:55], 0 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[16:31] @@ -917,25 +889,25 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 % ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s4, v1, 48 -; GCN-NEXT: v_readlane_b32 s5, v1, 49 -; GCN-NEXT: v_readlane_b32 s6, v1, 50 -; GCN-NEXT: v_readlane_b32 s7, v1, 51 -; GCN-NEXT: v_readlane_b32 s8, v1, 52 -; GCN-NEXT: v_readlane_b32 s9, v1, 53 -; GCN-NEXT: v_readlane_b32 s10, v1, 54 -; GCN-NEXT: v_readlane_b32 s11, v1, 55 -; GCN-NEXT: v_readlane_b32 s12, v1, 56 -; GCN-NEXT: v_readlane_b32 s13, v1, 57 -; GCN-NEXT: v_readlane_b32 s14, v1, 58 -; GCN-NEXT: v_readlane_b32 s15, v1, 59 -; GCN-NEXT: v_readlane_b32 s16, v1, 60 -; GCN-NEXT: v_readlane_b32 s17, v1, 61 -; GCN-NEXT: v_readlane_b32 s18, v1, 62 -; GCN-NEXT: v_readlane_b32 s19, v1, 63 +; GCN-NEXT: v_readlane_b32 s4, v31, 48 +; GCN-NEXT: v_readlane_b32 s5, v31, 49 +; GCN-NEXT: v_readlane_b32 s6, v31, 50 +; GCN-NEXT: v_readlane_b32 s7, v31, 51 +; GCN-NEXT: v_readlane_b32 s8, v31, 52 +; GCN-NEXT: v_readlane_b32 s9, v31, 53 +; GCN-NEXT: v_readlane_b32 s10, v31, 54 +; GCN-NEXT: v_readlane_b32 s11, v31, 55 +; GCN-NEXT: v_readlane_b32 s12, v31, 56 +; GCN-NEXT: v_readlane_b32 s13, v31, 57 +; GCN-NEXT: v_readlane_b32 s14, v31, 58 +; GCN-NEXT: v_readlane_b32 s15, v31, 59 +; GCN-NEXT: v_readlane_b32 s16, v31, 60 +; GCN-NEXT: v_readlane_b32 s17, v31, 61 +; GCN-NEXT: v_readlane_b32 s18, v31, 62 +; GCN-NEXT: v_readlane_b32 s19, v31, 63 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s0, v0, 0 -; GCN-NEXT: v_readlane_b32 s1, v0, 1 +; GCN-NEXT: v_readlane_b32 s0, v32, 0 +; GCN-NEXT: v_readlane_b32 s1, v32, 1 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[36:51] ; GCN-NEXT: ;;#ASMEND @@ -946,14 +918,6 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 % ; GCN-NEXT: ; use s[0:1] ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: .LBB2_2: ; %ret -; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[34:35] -; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[34:35] -; GCN-NEXT: ; kill: killed $vgpr1 -; GCN-NEXT: ; kill: killed $vgpr0 ; GCN-NEXT: s_endpgm call void asm sideeffect "", "~{v[0:7]}" () #0 call void asm sideeffect "", "~{v[8:15]}" () #0 @@ -993,17 +957,9 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 { ; GCN-NEXT: s_mov_b32 s55, 0xe8f000 ; GCN-NEXT: s_add_u32 s52, s52, s9 ; GCN-NEXT: s_addc_u32 s53, s53, 0 -; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane -; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN-NEXT: s_load_dword s0, s[2:3], 0x9 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[34:35] -; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART @@ -1017,91 +973,91 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 { ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_waitcnt vmcnt(1) -; GCN-NEXT: v_writelane_b32 v1, s4, 0 -; GCN-NEXT: v_writelane_b32 v1, s5, 1 -; GCN-NEXT: v_writelane_b32 v1, s6, 2 -; GCN-NEXT: v_writelane_b32 v1, s7, 3 -; GCN-NEXT: v_writelane_b32 v1, s8, 4 -; GCN-NEXT: v_writelane_b32 v1, s9, 5 -; GCN-NEXT: v_writelane_b32 v1, s10, 6 -; GCN-NEXT: v_writelane_b32 v1, s11, 7 -; GCN-NEXT: v_writelane_b32 v1, s12, 8 -; GCN-NEXT: v_writelane_b32 v1, s13, 9 -; GCN-NEXT: v_writelane_b32 v1, s14, 10 -; GCN-NEXT: v_writelane_b32 v1, s15, 11 -; GCN-NEXT: v_writelane_b32 v1, s16, 12 -; GCN-NEXT: v_writelane_b32 v1, s17, 13 -; GCN-NEXT: v_writelane_b32 v1, s18, 14 -; GCN-NEXT: v_writelane_b32 v1, s19, 15 +; GCN-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane +; GCN-NEXT: v_writelane_b32 v32, s4, 0 +; GCN-NEXT: v_writelane_b32 v32, s5, 1 +; GCN-NEXT: v_writelane_b32 v32, s6, 2 +; GCN-NEXT: v_writelane_b32 v32, s7, 3 +; GCN-NEXT: v_writelane_b32 v32, s8, 4 +; GCN-NEXT: v_writelane_b32 v32, s9, 5 +; GCN-NEXT: v_writelane_b32 v32, s10, 6 +; GCN-NEXT: v_writelane_b32 v32, s11, 7 +; GCN-NEXT: v_writelane_b32 v32, s12, 8 +; GCN-NEXT: v_writelane_b32 v32, s13, 9 +; GCN-NEXT: v_writelane_b32 v32, s14, 10 +; GCN-NEXT: v_writelane_b32 v32, s15, 11 +; GCN-NEXT: v_writelane_b32 v32, s16, 12 +; GCN-NEXT: v_writelane_b32 v32, s17, 13 +; GCN-NEXT: v_writelane_b32 v32, s18, 14 +; GCN-NEXT: v_writelane_b32 v32, s19, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s4, 16 -; GCN-NEXT: v_writelane_b32 v1, s5, 17 -; GCN-NEXT: v_writelane_b32 v1, s6, 18 -; GCN-NEXT: v_writelane_b32 v1, s7, 19 -; GCN-NEXT: v_writelane_b32 v1, s8, 20 -; GCN-NEXT: v_writelane_b32 v1, s9, 21 -; GCN-NEXT: v_writelane_b32 v1, s10, 22 -; GCN-NEXT: v_writelane_b32 v1, s11, 23 -; GCN-NEXT: v_writelane_b32 v1, s12, 24 -; GCN-NEXT: v_writelane_b32 v1, s13, 25 -; GCN-NEXT: v_writelane_b32 v1, s14, 26 -; GCN-NEXT: v_writelane_b32 v1, s15, 27 -; GCN-NEXT: v_writelane_b32 v1, s16, 28 -; GCN-NEXT: v_writelane_b32 v1, s17, 29 -; GCN-NEXT: v_writelane_b32 v1, s18, 30 -; GCN-NEXT: v_writelane_b32 v1, s19, 31 +; GCN-NEXT: v_writelane_b32 v32, s4, 16 +; GCN-NEXT: v_writelane_b32 v32, s5, 17 +; GCN-NEXT: v_writelane_b32 v32, s6, 18 +; GCN-NEXT: v_writelane_b32 v32, s7, 19 +; GCN-NEXT: v_writelane_b32 v32, s8, 20 +; GCN-NEXT: v_writelane_b32 v32, s9, 21 +; GCN-NEXT: v_writelane_b32 v32, s10, 22 +; GCN-NEXT: v_writelane_b32 v32, s11, 23 +; GCN-NEXT: v_writelane_b32 v32, s12, 24 +; GCN-NEXT: v_writelane_b32 v32, s13, 25 +; GCN-NEXT: v_writelane_b32 v32, s14, 26 +; GCN-NEXT: v_writelane_b32 v32, s15, 27 +; GCN-NEXT: v_writelane_b32 v32, s16, 28 +; GCN-NEXT: v_writelane_b32 v32, s17, 29 +; GCN-NEXT: v_writelane_b32 v32, s18, 30 +; GCN-NEXT: v_writelane_b32 v32, s19, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s4, 32 -; GCN-NEXT: v_writelane_b32 v1, s5, 33 -; GCN-NEXT: v_writelane_b32 v1, s6, 34 -; GCN-NEXT: v_writelane_b32 v1, s7, 35 -; GCN-NEXT: v_writelane_b32 v1, s8, 36 -; GCN-NEXT: v_writelane_b32 v1, s9, 37 -; GCN-NEXT: v_writelane_b32 v1, s10, 38 -; GCN-NEXT: v_writelane_b32 v1, s11, 39 -; GCN-NEXT: v_writelane_b32 v1, s12, 40 -; GCN-NEXT: v_writelane_b32 v1, s13, 41 -; GCN-NEXT: v_writelane_b32 v1, s14, 42 -; GCN-NEXT: v_writelane_b32 v1, s15, 43 -; GCN-NEXT: v_writelane_b32 v1, s16, 44 -; GCN-NEXT: v_writelane_b32 v1, s17, 45 -; GCN-NEXT: v_writelane_b32 v1, s18, 46 -; GCN-NEXT: v_writelane_b32 v1, s19, 47 +; GCN-NEXT: v_writelane_b32 v32, s4, 32 +; GCN-NEXT: v_writelane_b32 v32, s5, 33 +; GCN-NEXT: v_writelane_b32 v32, s6, 34 +; GCN-NEXT: v_writelane_b32 v32, s7, 35 +; GCN-NEXT: v_writelane_b32 v32, s8, 36 +; GCN-NEXT: v_writelane_b32 v32, s9, 37 +; GCN-NEXT: v_writelane_b32 v32, s10, 38 +; GCN-NEXT: v_writelane_b32 v32, s11, 39 +; GCN-NEXT: v_writelane_b32 v32, s12, 40 +; GCN-NEXT: v_writelane_b32 v32, s13, 41 +; GCN-NEXT: v_writelane_b32 v32, s14, 42 +; GCN-NEXT: v_writelane_b32 v32, s15, 43 +; GCN-NEXT: v_writelane_b32 v32, s16, 44 +; GCN-NEXT: v_writelane_b32 v32, s17, 45 +; GCN-NEXT: v_writelane_b32 v32, s18, 46 +; GCN-NEXT: v_writelane_b32 v32, s19, 47 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s4, 48 -; GCN-NEXT: v_writelane_b32 v1, s5, 49 -; GCN-NEXT: v_writelane_b32 v1, s6, 50 -; GCN-NEXT: v_writelane_b32 v1, s7, 51 -; GCN-NEXT: v_writelane_b32 v1, s8, 52 -; GCN-NEXT: v_writelane_b32 v1, s9, 53 -; GCN-NEXT: v_writelane_b32 v1, s10, 54 -; GCN-NEXT: v_writelane_b32 v1, s11, 55 -; GCN-NEXT: v_writelane_b32 v1, s12, 56 -; GCN-NEXT: v_writelane_b32 v1, s13, 57 -; GCN-NEXT: v_writelane_b32 v1, s14, 58 -; GCN-NEXT: v_writelane_b32 v1, s15, 59 -; GCN-NEXT: v_writelane_b32 v1, s16, 60 -; GCN-NEXT: v_writelane_b32 v1, s17, 61 -; GCN-NEXT: v_writelane_b32 v1, s18, 62 -; GCN-NEXT: v_writelane_b32 v1, s19, 63 +; GCN-NEXT: v_writelane_b32 v32, s4, 48 +; GCN-NEXT: v_writelane_b32 v32, s5, 49 +; GCN-NEXT: v_writelane_b32 v32, s6, 50 +; GCN-NEXT: v_writelane_b32 v32, s7, 51 +; GCN-NEXT: v_writelane_b32 v32, s8, 52 +; GCN-NEXT: v_writelane_b32 v32, s9, 53 +; GCN-NEXT: v_writelane_b32 v32, s10, 54 +; GCN-NEXT: v_writelane_b32 v32, s11, 55 +; GCN-NEXT: v_writelane_b32 v32, s12, 56 +; GCN-NEXT: v_writelane_b32 v32, s13, 57 +; GCN-NEXT: v_writelane_b32 v32, s14, 58 +; GCN-NEXT: v_writelane_b32 v32, s15, 59 +; GCN-NEXT: v_writelane_b32 v32, s16, 60 +; GCN-NEXT: v_writelane_b32 v32, s17, 61 +; GCN-NEXT: v_writelane_b32 v32, s18, 62 +; GCN-NEXT: v_writelane_b32 v32, s19, 63 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_store_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v32, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[2:3] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_waitcnt vmcnt(1) -; GCN-NEXT: v_writelane_b32 v0, s2, 0 -; GCN-NEXT: v_writelane_b32 v0, s3, 1 +; GCN-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane +; GCN-NEXT: v_writelane_b32 v32, s2, 0 +; GCN-NEXT: v_writelane_b32 v32, s3, 1 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v32, off, s[52:55], 0 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: s_mov_b32 s1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) @@ -1109,59 +1065,59 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 { ; GCN-NEXT: s_cbranch_scc1 .LBB3_2 ; GCN-NEXT: ; %bb.1: ; %bb0 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v31, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s36, v2, 32 -; GCN-NEXT: v_readlane_b32 s37, v2, 33 -; GCN-NEXT: v_readlane_b32 s38, v2, 34 -; GCN-NEXT: v_readlane_b32 s39, v2, 35 -; GCN-NEXT: v_readlane_b32 s40, v2, 36 -; GCN-NEXT: v_readlane_b32 s41, v2, 37 -; GCN-NEXT: v_readlane_b32 s42, v2, 38 -; GCN-NEXT: v_readlane_b32 s43, v2, 39 -; GCN-NEXT: v_readlane_b32 s44, v2, 40 -; GCN-NEXT: v_readlane_b32 s45, v2, 41 -; GCN-NEXT: v_readlane_b32 s46, v2, 42 -; GCN-NEXT: v_readlane_b32 s47, v2, 43 -; GCN-NEXT: v_readlane_b32 s48, v2, 44 -; GCN-NEXT: v_readlane_b32 s49, v2, 45 -; GCN-NEXT: v_readlane_b32 s50, v2, 46 -; GCN-NEXT: v_readlane_b32 s51, v2, 47 -; GCN-NEXT: v_readlane_b32 s0, v2, 16 -; GCN-NEXT: v_readlane_b32 s1, v2, 17 -; GCN-NEXT: v_readlane_b32 s2, v2, 18 -; GCN-NEXT: v_readlane_b32 s3, v2, 19 -; GCN-NEXT: v_readlane_b32 s4, v2, 20 -; GCN-NEXT: v_readlane_b32 s5, v2, 21 -; GCN-NEXT: v_readlane_b32 s6, v2, 22 -; GCN-NEXT: v_readlane_b32 s7, v2, 23 -; GCN-NEXT: v_readlane_b32 s8, v2, 24 -; GCN-NEXT: v_readlane_b32 s9, v2, 25 -; GCN-NEXT: v_readlane_b32 s10, v2, 26 -; GCN-NEXT: v_readlane_b32 s11, v2, 27 -; GCN-NEXT: v_readlane_b32 s12, v2, 28 -; GCN-NEXT: v_readlane_b32 s13, v2, 29 -; GCN-NEXT: v_readlane_b32 s14, v2, 30 -; GCN-NEXT: v_readlane_b32 s15, v2, 31 -; GCN-NEXT: v_readlane_b32 s16, v2, 0 -; GCN-NEXT: v_readlane_b32 s17, v2, 1 -; GCN-NEXT: v_readlane_b32 s18, v2, 2 -; GCN-NEXT: v_readlane_b32 s19, v2, 3 -; GCN-NEXT: v_readlane_b32 s20, v2, 4 -; GCN-NEXT: v_readlane_b32 s21, v2, 5 -; GCN-NEXT: v_readlane_b32 s22, v2, 6 -; GCN-NEXT: v_readlane_b32 s23, v2, 7 -; GCN-NEXT: v_readlane_b32 s24, v2, 8 -; GCN-NEXT: v_readlane_b32 s25, v2, 9 -; GCN-NEXT: v_readlane_b32 s26, v2, 10 -; GCN-NEXT: v_readlane_b32 s27, v2, 11 -; GCN-NEXT: v_readlane_b32 s28, v2, 12 -; GCN-NEXT: v_readlane_b32 s29, v2, 13 -; GCN-NEXT: v_readlane_b32 s30, v2, 14 -; GCN-NEXT: v_readlane_b32 s31, v2, 15 +; GCN-NEXT: v_readlane_b32 s36, v31, 32 +; GCN-NEXT: v_readlane_b32 s37, v31, 33 +; GCN-NEXT: v_readlane_b32 s38, v31, 34 +; GCN-NEXT: v_readlane_b32 s39, v31, 35 +; GCN-NEXT: v_readlane_b32 s40, v31, 36 +; GCN-NEXT: v_readlane_b32 s41, v31, 37 +; GCN-NEXT: v_readlane_b32 s42, v31, 38 +; GCN-NEXT: v_readlane_b32 s43, v31, 39 +; GCN-NEXT: v_readlane_b32 s44, v31, 40 +; GCN-NEXT: v_readlane_b32 s45, v31, 41 +; GCN-NEXT: v_readlane_b32 s46, v31, 42 +; GCN-NEXT: v_readlane_b32 s47, v31, 43 +; GCN-NEXT: v_readlane_b32 s48, v31, 44 +; GCN-NEXT: v_readlane_b32 s49, v31, 45 +; GCN-NEXT: v_readlane_b32 s50, v31, 46 +; GCN-NEXT: v_readlane_b32 s51, v31, 47 +; GCN-NEXT: v_readlane_b32 s0, v31, 16 +; GCN-NEXT: v_readlane_b32 s1, v31, 17 +; GCN-NEXT: v_readlane_b32 s2, v31, 18 +; GCN-NEXT: v_readlane_b32 s3, v31, 19 +; GCN-NEXT: v_readlane_b32 s4, v31, 20 +; GCN-NEXT: v_readlane_b32 s5, v31, 21 +; GCN-NEXT: v_readlane_b32 s6, v31, 22 +; GCN-NEXT: v_readlane_b32 s7, v31, 23 +; GCN-NEXT: v_readlane_b32 s8, v31, 24 +; GCN-NEXT: v_readlane_b32 s9, v31, 25 +; GCN-NEXT: v_readlane_b32 s10, v31, 26 +; GCN-NEXT: v_readlane_b32 s11, v31, 27 +; GCN-NEXT: v_readlane_b32 s12, v31, 28 +; GCN-NEXT: v_readlane_b32 s13, v31, 29 +; GCN-NEXT: v_readlane_b32 s14, v31, 30 +; GCN-NEXT: v_readlane_b32 s15, v31, 31 +; GCN-NEXT: v_readlane_b32 s16, v31, 0 +; GCN-NEXT: v_readlane_b32 s17, v31, 1 +; GCN-NEXT: v_readlane_b32 s18, v31, 2 +; GCN-NEXT: v_readlane_b32 s19, v31, 3 +; GCN-NEXT: v_readlane_b32 s20, v31, 4 +; GCN-NEXT: v_readlane_b32 s21, v31, 5 +; GCN-NEXT: v_readlane_b32 s22, v31, 6 +; GCN-NEXT: v_readlane_b32 s23, v31, 7 +; GCN-NEXT: v_readlane_b32 s24, v31, 8 +; GCN-NEXT: v_readlane_b32 s25, v31, 9 +; GCN-NEXT: v_readlane_b32 s26, v31, 10 +; GCN-NEXT: v_readlane_b32 s27, v31, 11 +; GCN-NEXT: v_readlane_b32 s28, v31, 12 +; GCN-NEXT: v_readlane_b32 s29, v31, 13 +; GCN-NEXT: v_readlane_b32 s30, v31, 14 +; GCN-NEXT: v_readlane_b32 s31, v31, 15 ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v32, off, s[52:55], 0 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def v0 @@ -1172,25 +1128,25 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 { ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s4, v2, 48 -; GCN-NEXT: v_readlane_b32 s5, v2, 49 -; GCN-NEXT: v_readlane_b32 s6, v2, 50 -; GCN-NEXT: v_readlane_b32 s7, v2, 51 -; GCN-NEXT: v_readlane_b32 s8, v2, 52 -; GCN-NEXT: v_readlane_b32 s9, v2, 53 -; GCN-NEXT: v_readlane_b32 s10, v2, 54 -; GCN-NEXT: v_readlane_b32 s11, v2, 55 -; GCN-NEXT: v_readlane_b32 s12, v2, 56 -; GCN-NEXT: v_readlane_b32 s13, v2, 57 -; GCN-NEXT: v_readlane_b32 s14, v2, 58 -; GCN-NEXT: v_readlane_b32 s15, v2, 59 -; GCN-NEXT: v_readlane_b32 s16, v2, 60 -; GCN-NEXT: v_readlane_b32 s17, v2, 61 -; GCN-NEXT: v_readlane_b32 s18, v2, 62 -; GCN-NEXT: v_readlane_b32 s19, v2, 63 +; GCN-NEXT: v_readlane_b32 s4, v31, 48 +; GCN-NEXT: v_readlane_b32 s5, v31, 49 +; GCN-NEXT: v_readlane_b32 s6, v31, 50 +; GCN-NEXT: v_readlane_b32 s7, v31, 51 +; GCN-NEXT: v_readlane_b32 s8, v31, 52 +; GCN-NEXT: v_readlane_b32 s9, v31, 53 +; GCN-NEXT: v_readlane_b32 s10, v31, 54 +; GCN-NEXT: v_readlane_b32 s11, v31, 55 +; GCN-NEXT: v_readlane_b32 s12, v31, 56 +; GCN-NEXT: v_readlane_b32 s13, v31, 57 +; GCN-NEXT: v_readlane_b32 s14, v31, 58 +; GCN-NEXT: v_readlane_b32 s15, v31, 59 +; GCN-NEXT: v_readlane_b32 s16, v31, 60 +; GCN-NEXT: v_readlane_b32 s17, v31, 61 +; GCN-NEXT: v_readlane_b32 s18, v31, 62 +; GCN-NEXT: v_readlane_b32 s19, v31, 63 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s0, v1, 0 -; GCN-NEXT: v_readlane_b32 s1, v1, 1 +; GCN-NEXT: v_readlane_b32 s0, v32, 0 +; GCN-NEXT: v_readlane_b32 s1, v32, 1 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[36:51] ; GCN-NEXT: ;;#ASMEND @@ -1204,14 +1160,6 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill_live_v0(i32 %in) #1 { ; GCN-NEXT: ; use v0 ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: .LBB3_2: ; %ret -; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[34:35] -; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GCN-NEXT: buffer_load_dword v1, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[34:35] -; GCN-NEXT: ; kill: killed $vgpr1 -; GCN-NEXT: ; kill: killed $vgpr0 ; GCN-NEXT: s_endpgm call void asm sideeffect "", "~{v[0:7]}" () #0 call void asm sideeffect "", "~{v[8:15]}" () #0 @@ -1243,7 +1191,7 @@ ret: } attributes #0 = { nounwind } -attributes #1 = { nounwind "amdgpu-waves-per-eu"="8,8" } +attributes #1 = { nounwind "amdgpu-waves-per-eu"="7,7" } !llvm.module.flags = !{!0} !0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir index 8e2a56b463c40..fa62048fd31ad 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir @@ -61,35 +61,27 @@ machineFunctionInfo: isChainFunction: true returnsVoid: true wwmReservedRegs: - - '$vgpr11' + - '$vgpr10' body: | bb.0: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10 ; GCN-LABEL: name: preserve_all_lanes_wwm_above_args - ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10 + ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: SCRATCH_STORE_DWORD_ST killed $vgpr11, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 - ; GCN-NEXT: renamable $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10 + ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 - ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr10, 0 - ; GCN-NEXT: renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec - ; GCN-NEXT: $vgpr8 = COPY killed renamable $vgpr10 + ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0 + ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 10, implicit $exec + ; GCN-NEXT: $vgpr8 = COPY killed $vgpr0 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) - ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: $vgpr10 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) - ; GCN-NEXT: $vgpr11 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) - ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9 - renamable $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10 + $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10 $sgpr35 = S_MOV_B32 5 $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr10, 0 - renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec - $vgpr8 = COPY renamable killed $vgpr10 + $vgpr10 = V_MOV_B32_e32 10, implicit $exec + $vgpr8 = COPY killed $vgpr10 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9 @@ -139,23 +131,15 @@ body: | liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10 ; GCN-LABEL: name: preserve_inactive_lanes_wwm_args - ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10 + ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr10 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 - ; GCN-NEXT: renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 + ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0 ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) - ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: (load (s32) from %stack.0, addrspace 5) - ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $vgpr9(tied-def 0) :: (load (s32) from %stack.1, addrspace 5) - ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 - ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9 + ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr0 renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 $sgpr35 = S_MOV_B32 5 $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0 @@ -184,7 +168,7 @@ body: | ; GCN-LABEL: name: dont_preserve_if_no_chain_calls ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8, $vgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 + ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0 ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec @@ -218,7 +202,7 @@ body: | ; GCN-LABEL: name: dont_preserve_v0_v7 ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr8 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0 + ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0 ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir index 4b8b71a740085..49001a2cfd7a6 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir @@ -36,19 +36,11 @@ body: | liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9 ; GCN-LABEL: name: preserve_inactive_wwm - ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9 + ; GCN: liveins: $sgpr0, $sgpr35 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) - ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: (load (s32) from %stack.0, addrspace 5) - ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) - ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 - ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 + ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr1 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 @@ -72,24 +64,16 @@ body: | ; GCN-LABEL: name: preserve_inactive_detected_wwm ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 - ; GCN-NEXT: renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 + ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0 ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec - ; GCN-NEXT: renamable $vgpr9 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr9 + ; GCN-NEXT: $vgpr9 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr9 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr9, 0 ; GCN-NEXT: renamable $vgpr9 = V_MOV_B32_e32 10, implicit $exec ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) - ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: (load (s32) from %stack.0, addrspace 5) - ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) - ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 $sgpr35 = S_MOV_B32 5 @@ -122,7 +106,7 @@ body: | ; GCN-LABEL: name: dont_preserve_wwm_if_no_chain_calls ; GCN: liveins: $sgpr35, $vgpr8 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 + ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr8, 0 ; GCN-NEXT: renamable $vgpr8 = V_MOV_B32_e32 10, implicit $exec @@ -151,11 +135,11 @@ body: | liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9 ; GCN-LABEL: name: dont_preserve_wwm_if_init_whole_wave - ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9 + ; GCN: liveins: $sgpr0, $sgpr35 ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) - ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 + ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr1 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4) SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 @@ -209,7 +193,7 @@ body: | ; GCN-LABEL: name: dont_preserve_v0_v7 ; GCN: liveins: $sgpr0, $sgpr35, $vgpr0, $vgpr7, $vgpr8, $vgpr9 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0 + ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0 ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/pr51516.mir b/llvm/test/CodeGen/AMDGPU/pr51516.mir index b21285e83dc21..4be102f7860ea 100644 --- a/llvm/test/CodeGen/AMDGPU/pr51516.mir +++ b/llvm/test/CodeGen/AMDGPU/pr51516.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -amdgpu-disable-unclustered-high-rp-reschedule -verify-machineinstrs -start-before=machine-scheduler -stop-after=virtregrewriter,1 -o - %s | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -amdgpu-disable-unclustered-high-rp-reschedule -verify-machineinstrs -start-before=machine-scheduler -stop-after=virtregrewriter,2 -o - %s | FileCheck -check-prefix=GCN %s # Check that %3 was not rematerialized before the last store since its operand %1 # is killed by that store. diff --git a/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir b/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir index 4571e792c7cb5..168d63d3a95b9 100644 --- a/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir +++ b/llvm/test/CodeGen/AMDGPU/preserve-only-inactive-lane.mir @@ -20,16 +20,10 @@ body: | ; GCN-LABEL: name: preserve_scratch_vgpr_inactive_lanes ; GCN: liveins: $sgpr35, $vgpr0, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0 + ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0 ; GCN-NEXT: $sgpr35 = S_MOV_B32 5 ; GCN-NEXT: $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0 ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 10, implicit $exec - ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5) - ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: S_SETPC_B64_return killed renamable $sgpr30_sgpr31, implicit $vgpr0 renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr0 $sgpr35 = S_MOV_B32 5 diff --git a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll index bbeb2e1884a9f..924340ec8a2a6 100644 --- a/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll @@ -13,333 +13,333 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { ; GFX906-NEXT: s_mov_b32 s16, s33 ; GFX906-NEXT: s_mov_b32 s33, s32 ; GFX906-NEXT: s_xor_saveexec_b64 s[18:19], -1 -; GFX906-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX906-NEXT: s_mov_b64 exec, -1 -; GFX906-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill ; GFX906-NEXT: s_mov_b64 exec, s[18:19] -; GFX906-NEXT: ; implicit-def: $vgpr2 +; GFX906-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane ; GFX906-NEXT: s_mov_b32 s21, s15 -; GFX906-NEXT: v_writelane_b32 v2, s6, 0 -; GFX906-NEXT: v_writelane_b32 v2, s7, 1 -; GFX906-NEXT: v_writelane_b32 v2, s21, 2 +; GFX906-NEXT: v_writelane_b32 v39, s6, 0 +; GFX906-NEXT: v_writelane_b32 v39, s7, 1 +; GFX906-NEXT: v_writelane_b32 v39, s21, 2 ; GFX906-NEXT: s_mov_b32 s22, s14 -; GFX906-NEXT: v_writelane_b32 v2, s22, 3 +; GFX906-NEXT: v_writelane_b32 v39, s22, 3 ; GFX906-NEXT: s_mov_b32 s23, s13 -; GFX906-NEXT: v_writelane_b32 v2, s23, 4 +; GFX906-NEXT: v_writelane_b32 v39, s23, 4 ; GFX906-NEXT: s_mov_b32 s24, s12 -; GFX906-NEXT: v_writelane_b32 v2, s24, 5 +; GFX906-NEXT: v_writelane_b32 v39, s24, 5 ; GFX906-NEXT: s_mov_b64 s[26:27], s[10:11] -; GFX906-NEXT: v_writelane_b32 v2, s26, 6 +; GFX906-NEXT: v_writelane_b32 v39, s26, 6 ; GFX906-NEXT: v_writelane_b32 v41, s16, 4 -; GFX906-NEXT: v_writelane_b32 v2, s27, 7 +; GFX906-NEXT: v_writelane_b32 v39, s27, 7 ; GFX906-NEXT: v_writelane_b32 v41, s34, 2 -; GFX906-NEXT: v_writelane_b32 v2, s8, 8 +; GFX906-NEXT: v_writelane_b32 v39, s8, 8 ; GFX906-NEXT: v_writelane_b32 v41, s35, 3 -; GFX906-NEXT: v_writelane_b32 v2, s9, 9 +; GFX906-NEXT: v_writelane_b32 v39, s9, 9 ; GFX906-NEXT: v_writelane_b32 v41, s30, 0 -; GFX906-NEXT: v_writelane_b32 v2, s4, 10 +; GFX906-NEXT: v_writelane_b32 v39, s4, 10 ; GFX906-NEXT: s_addk_i32 s32, 0x2800 +; GFX906-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX906-NEXT: v_writelane_b32 v41, s31, 1 ; GFX906-NEXT: v_mov_b32_e32 v32, v31 -; GFX906-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX906-NEXT: s_nop 0 -; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; GFX906-NEXT: v_writelane_b32 v2, s5, 11 +; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX906-NEXT: v_writelane_b32 v39, s5, 11 ; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX906-NEXT: v_mov_b32_e32 v33, v2 ; GFX906-NEXT: s_mov_b64 exec, s[34:35] ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def v[0:31] ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill ; GFX906-NEXT: s_nop 0 -; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill -; GFX906-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; GFX906-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def v40 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s11 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX906-NEXT: v_mov_b32_e32 v40, v33 -; GFX906-NEXT: s_mov_b64 exec, s[34:35] -; GFX906-NEXT: v_writelane_b32 v40, s11, 12 +; GFX906-NEXT: v_writelane_b32 v39, s11, 12 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s12 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_writelane_b32 v40, s12, 13 +; GFX906-NEXT: v_writelane_b32 v39, s12, 13 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s13 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_writelane_b32 v40, s13, 14 +; GFX906-NEXT: v_writelane_b32 v39, s13, 14 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s14 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_writelane_b32 v40, s14, 15 +; GFX906-NEXT: v_writelane_b32 v39, s14, 15 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s15 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_writelane_b32 v40, s15, 16 +; GFX906-NEXT: v_writelane_b32 v39, s15, 16 ; GFX906-NEXT: s_getpc_b64 s[10:11] ; GFX906-NEXT: s_add_u32 s10, s10, foo@gotpcrel32@lo+4 ; GFX906-NEXT: s_addc_u32 s11, s11, foo@gotpcrel32@hi+12 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s16 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_writelane_b32 v40, s16, 17 +; GFX906-NEXT: v_writelane_b32 v39, s16, 17 ; GFX906-NEXT: s_load_dwordx2 s[10:11], s[10:11], 0x0 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s17 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_writelane_b32 v40, s17, 18 +; GFX906-NEXT: v_writelane_b32 v39, s17, 18 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s18 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_writelane_b32 v40, s18, 19 +; GFX906-NEXT: v_writelane_b32 v39, s18, 19 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s19 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_writelane_b32 v40, s19, 20 +; GFX906-NEXT: v_writelane_b32 v39, s19, 20 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s20 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_writelane_b32 v40, s20, 21 +; GFX906-NEXT: v_writelane_b32 v39, s20, 21 ; GFX906-NEXT: s_waitcnt lgkmcnt(0) -; GFX906-NEXT: v_writelane_b32 v40, s10, 22 -; GFX906-NEXT: v_writelane_b32 v40, s11, 23 +; GFX906-NEXT: v_writelane_b32 v39, s10, 22 +; GFX906-NEXT: v_writelane_b32 v39, s11, 23 +; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX906-NEXT: s_mov_b64 exec, s[34:35] ; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX906-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX906-NEXT: s_mov_b64 exec, s[34:35] -; GFX906-NEXT: v_readlane_b32 s16, v40, 22 +; GFX906-NEXT: v_readlane_b32 s16, v39, 22 ; GFX906-NEXT: s_mov_b32 s12, s24 ; GFX906-NEXT: s_mov_b32 s13, s23 ; GFX906-NEXT: s_mov_b32 s14, s22 ; GFX906-NEXT: v_mov_b32_e32 v31, v32 ; GFX906-NEXT: s_mov_b32 s15, s21 ; GFX906-NEXT: s_mov_b64 s[10:11], s[26:27] -; GFX906-NEXT: v_readlane_b32 s17, v40, 23 -; GFX906-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX906-NEXT: v_readlane_b32 s17, v39, 23 +; GFX906-NEXT: v_mov_b32_e32 v40, v32 ; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX906-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX906-NEXT: s_mov_b64 exec, s[34:35] -; GFX906-NEXT: v_readlane_b32 s11, v40, 12 +; GFX906-NEXT: s_waitcnt vmcnt(0) +; GFX906-NEXT: v_readlane_b32 s11, v39, 12 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s11 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_readlane_b32 s12, v40, 13 +; GFX906-NEXT: v_readlane_b32 s12, v39, 13 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s12 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_readlane_b32 s13, v40, 14 +; GFX906-NEXT: v_readlane_b32 s13, v39, 14 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s13 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_readlane_b32 s14, v40, 15 +; GFX906-NEXT: v_readlane_b32 s14, v39, 15 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s14 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_readlane_b32 s15, v40, 16 +; GFX906-NEXT: v_readlane_b32 s15, v39, 16 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s15 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_readlane_b32 s16, v40, 17 +; GFX906-NEXT: v_readlane_b32 s16, v39, 17 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s16 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_readlane_b32 s17, v40, 18 +; GFX906-NEXT: v_readlane_b32 s17, v39, 18 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s17 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_readlane_b32 s18, v40, 19 +; GFX906-NEXT: v_readlane_b32 s18, v39, 19 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s18 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_readlane_b32 s19, v40, 20 +; GFX906-NEXT: v_readlane_b32 s19, v39, 20 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s19 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_readlane_b32 s20, v40, 21 +; GFX906-NEXT: v_readlane_b32 s20, v39, 21 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s20 ; GFX906-NEXT: ;;#ASMEND ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s21 ; GFX906-NEXT: ;;#ASMEND +; GFX906-NEXT: v_writelane_b32 v39, s21, 24 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s22 ; GFX906-NEXT: ;;#ASMEND +; GFX906-NEXT: v_writelane_b32 v39, s22, 25 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s23 ; GFX906-NEXT: ;;#ASMEND +; GFX906-NEXT: v_writelane_b32 v39, s23, 26 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s24 ; GFX906-NEXT: ;;#ASMEND +; GFX906-NEXT: v_writelane_b32 v39, s24, 27 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s25 ; GFX906-NEXT: ;;#ASMEND +; GFX906-NEXT: v_writelane_b32 v39, s25, 28 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s26 ; GFX906-NEXT: ;;#ASMEND +; GFX906-NEXT: v_writelane_b32 v39, s26, 29 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s27 ; GFX906-NEXT: ;;#ASMEND +; GFX906-NEXT: v_writelane_b32 v39, s27, 30 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s28 ; GFX906-NEXT: ;;#ASMEND +; GFX906-NEXT: v_writelane_b32 v39, s28, 31 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; def s29 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX906-NEXT: v_writelane_b32 v40, s21, 24 -; GFX906-NEXT: v_writelane_b32 v40, s22, 25 -; GFX906-NEXT: v_writelane_b32 v40, s23, 26 -; GFX906-NEXT: v_writelane_b32 v40, s24, 27 -; GFX906-NEXT: v_writelane_b32 v40, s25, 28 -; GFX906-NEXT: v_writelane_b32 v40, s26, 29 -; GFX906-NEXT: v_writelane_b32 v40, s27, 30 -; GFX906-NEXT: v_writelane_b32 v40, s28, 31 -; GFX906-NEXT: v_writelane_b32 v40, s29, 32 -; GFX906-NEXT: v_readlane_b32 s4, v40, 10 -; GFX906-NEXT: v_readlane_b32 s6, v40, 0 -; GFX906-NEXT: v_readlane_b32 s8, v40, 8 -; GFX906-NEXT: v_readlane_b32 s10, v40, 6 -; GFX906-NEXT: v_readlane_b32 s16, v40, 22 -; GFX906-NEXT: v_readlane_b32 s12, v40, 5 -; GFX906-NEXT: v_readlane_b32 s13, v40, 4 -; GFX906-NEXT: v_readlane_b32 s14, v40, 3 -; GFX906-NEXT: v_readlane_b32 s15, v40, 2 -; GFX906-NEXT: v_readlane_b32 s5, v40, 11 -; GFX906-NEXT: v_readlane_b32 s7, v40, 1 -; GFX906-NEXT: v_readlane_b32 s9, v40, 9 -; GFX906-NEXT: v_readlane_b32 s11, v40, 7 -; GFX906-NEXT: v_readlane_b32 s17, v40, 23 +; GFX906-NEXT: v_writelane_b32 v39, s29, 32 ; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX906-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX906-NEXT: s_mov_b64 exec, s[34:35] +; GFX906-NEXT: v_readlane_b32 s4, v39, 10 +; GFX906-NEXT: v_readlane_b32 s6, v39, 0 +; GFX906-NEXT: v_readlane_b32 s8, v39, 8 +; GFX906-NEXT: v_readlane_b32 s10, v39, 6 +; GFX906-NEXT: v_readlane_b32 s16, v39, 22 +; GFX906-NEXT: v_readlane_b32 s12, v39, 5 +; GFX906-NEXT: v_mov_b32_e32 v31, v40 +; GFX906-NEXT: v_readlane_b32 s13, v39, 4 +; GFX906-NEXT: v_readlane_b32 s14, v39, 3 +; GFX906-NEXT: v_readlane_b32 s15, v39, 2 +; GFX906-NEXT: v_readlane_b32 s5, v39, 11 +; GFX906-NEXT: v_readlane_b32 s7, v39, 1 +; GFX906-NEXT: v_readlane_b32 s9, v39, 9 +; GFX906-NEXT: v_readlane_b32 s11, v39, 7 +; GFX906-NEXT: v_readlane_b32 s17, v39, 23 ; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX906-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX906-NEXT: s_mov_b64 exec, s[34:35] -; GFX906-NEXT: v_readlane_b32 s21, v40, 24 +; GFX906-NEXT: s_waitcnt vmcnt(0) +; GFX906-NEXT: v_readlane_b32 s4, v39, 10 +; GFX906-NEXT: v_readlane_b32 s6, v39, 0 +; GFX906-NEXT: v_readlane_b32 s8, v39, 8 +; GFX906-NEXT: v_readlane_b32 s10, v39, 6 +; GFX906-NEXT: v_readlane_b32 s16, v39, 22 +; GFX906-NEXT: v_readlane_b32 s5, v39, 11 +; GFX906-NEXT: v_readlane_b32 s7, v39, 1 +; GFX906-NEXT: v_readlane_b32 s9, v39, 9 +; GFX906-NEXT: v_readlane_b32 s11, v39, 7 +; GFX906-NEXT: v_readlane_b32 s12, v39, 5 +; GFX906-NEXT: v_readlane_b32 s13, v39, 4 +; GFX906-NEXT: v_readlane_b32 s14, v39, 3 +; GFX906-NEXT: v_readlane_b32 s15, v39, 2 +; GFX906-NEXT: v_mov_b32_e32 v31, v40 +; GFX906-NEXT: v_readlane_b32 s17, v39, 23 +; GFX906-NEXT: v_readlane_b32 s21, v39, 24 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s21 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_readlane_b32 s22, v40, 25 +; GFX906-NEXT: v_readlane_b32 s22, v39, 25 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s22 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_readlane_b32 s23, v40, 26 +; GFX906-NEXT: v_readlane_b32 s23, v39, 26 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s23 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_readlane_b32 s24, v40, 27 +; GFX906-NEXT: v_readlane_b32 s24, v39, 27 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s24 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_readlane_b32 s25, v40, 28 +; GFX906-NEXT: v_readlane_b32 s25, v39, 28 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s25 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_readlane_b32 s26, v40, 29 +; GFX906-NEXT: v_readlane_b32 s26, v39, 29 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s26 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_readlane_b32 s27, v40, 30 +; GFX906-NEXT: v_readlane_b32 s27, v39, 30 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s27 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_readlane_b32 s28, v40, 31 +; GFX906-NEXT: v_readlane_b32 s28, v39, 31 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s28 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: v_readlane_b32 s29, v40, 32 +; GFX906-NEXT: v_readlane_b32 s29, v39, 32 ; GFX906-NEXT: ;;#ASMSTART ; GFX906-NEXT: ; use s29 ; GFX906-NEXT: ;;#ASMEND -; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX906-NEXT: v_readlane_b32 s4, v40, 10 -; GFX906-NEXT: v_readlane_b32 s6, v40, 0 -; GFX906-NEXT: v_readlane_b32 s8, v40, 8 -; GFX906-NEXT: v_readlane_b32 s10, v40, 6 -; GFX906-NEXT: v_readlane_b32 s16, v40, 22 -; GFX906-NEXT: v_readlane_b32 s5, v40, 11 -; GFX906-NEXT: v_readlane_b32 s7, v40, 1 -; GFX906-NEXT: v_readlane_b32 s9, v40, 9 -; GFX906-NEXT: v_readlane_b32 s11, v40, 7 -; GFX906-NEXT: v_readlane_b32 s12, v40, 5 -; GFX906-NEXT: v_readlane_b32 s13, v40, 4 -; GFX906-NEXT: v_readlane_b32 s14, v40, 3 -; GFX906-NEXT: v_readlane_b32 s15, v40, 2 -; GFX906-NEXT: v_readlane_b32 s17, v40, 23 -; GFX906-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX906-NEXT: s_mov_b64 exec, s[34:35] ; GFX906-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX906-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload ; GFX906-NEXT: v_readlane_b32 s31, v41, 1 ; GFX906-NEXT: v_readlane_b32 s30, v41, 0 -; GFX906-NEXT: ; kill: killed $vgpr40 ; GFX906-NEXT: v_readlane_b32 s4, v41, 4 ; GFX906-NEXT: v_readlane_b32 s34, v41, 2 ; GFX906-NEXT: v_readlane_b32 s35, v41, 3 @@ -360,12 +360,11 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { ; GFX906-NEXT: s_waitcnt vmcnt(0) ; GFX906-NEXT: flat_store_dwordx4 v[0:1], v[2:5] ; GFX906-NEXT: s_waitcnt vmcnt(0) +; GFX906-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX906-NEXT: s_xor_saveexec_b64 s[6:7], -1 -; GFX906-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX906-NEXT: s_mov_b64 exec, -1 -; GFX906-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX906-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload +; GFX906-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload ; GFX906-NEXT: s_mov_b64 exec, s[6:7] ; GFX906-NEXT: s_addk_i32 s32, 0xd800 ; GFX906-NEXT: s_mov_b32 s33, s4 @@ -378,346 +377,346 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { ; GFX908-NEXT: s_mov_b32 s16, s33 ; GFX908-NEXT: s_mov_b32 s33, s32 ; GFX908-NEXT: s_xor_saveexec_b64 s[18:19], -1 -; GFX908-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill -; GFX908-NEXT: s_mov_b64 exec, -1 -; GFX908-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX908-NEXT: s_mov_b64 exec, s[18:19] -; GFX908-NEXT: v_mov_b32_e32 v3, s16 -; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill -; GFX908-NEXT: v_mov_b32_e32 v3, s34 -; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill -; GFX908-NEXT: v_mov_b32_e32 v3, s35 -; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; GFX908-NEXT: v_mov_b32_e32 v2, s16 +; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; GFX908-NEXT: v_mov_b32_e32 v2, s34 +; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; GFX908-NEXT: v_mov_b32_e32 v2, s35 +; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill ; GFX908-NEXT: s_addk_i32 s32, 0x2c00 +; GFX908-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX908-NEXT: s_mov_b64 s[16:17], exec ; GFX908-NEXT: s_mov_b64 exec, 1 -; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:172 +; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:168 ; GFX908-NEXT: v_writelane_b32 v2, s30, 0 -; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:172 +; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:168 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_mov_b64 exec, s[16:17] ; GFX908-NEXT: s_mov_b64 s[16:17], exec ; GFX908-NEXT: s_mov_b64 exec, 1 -; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:172 +; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:168 ; GFX908-NEXT: v_writelane_b32 v2, s31, 0 -; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:172 +; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:168 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_mov_b64 exec, s[16:17] -; GFX908-NEXT: ; implicit-def: $vgpr2 +; GFX908-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane ; GFX908-NEXT: s_mov_b32 s21, s15 -; GFX908-NEXT: v_writelane_b32 v2, s6, 0 -; GFX908-NEXT: v_writelane_b32 v2, s7, 1 -; GFX908-NEXT: v_writelane_b32 v2, s21, 2 +; GFX908-NEXT: v_writelane_b32 v39, s6, 0 +; GFX908-NEXT: v_writelane_b32 v39, s7, 1 +; GFX908-NEXT: v_writelane_b32 v39, s21, 2 ; GFX908-NEXT: s_mov_b32 s22, s14 -; GFX908-NEXT: v_writelane_b32 v2, s22, 3 +; GFX908-NEXT: v_writelane_b32 v39, s22, 3 ; GFX908-NEXT: s_mov_b32 s23, s13 -; GFX908-NEXT: v_writelane_b32 v2, s23, 4 +; GFX908-NEXT: v_writelane_b32 v39, s23, 4 ; GFX908-NEXT: s_mov_b32 s24, s12 -; GFX908-NEXT: v_writelane_b32 v2, s24, 5 +; GFX908-NEXT: v_writelane_b32 v39, s24, 5 ; GFX908-NEXT: s_mov_b64 s[26:27], s[10:11] -; GFX908-NEXT: v_writelane_b32 v2, s26, 6 -; GFX908-NEXT: v_writelane_b32 v2, s27, 7 -; GFX908-NEXT: v_writelane_b32 v2, s8, 8 -; GFX908-NEXT: v_writelane_b32 v2, s9, 9 -; GFX908-NEXT: v_writelane_b32 v2, s4, 10 +; GFX908-NEXT: v_writelane_b32 v39, s26, 6 +; GFX908-NEXT: v_writelane_b32 v39, s27, 7 +; GFX908-NEXT: v_writelane_b32 v39, s8, 8 +; GFX908-NEXT: v_writelane_b32 v39, s9, 9 +; GFX908-NEXT: v_writelane_b32 v39, s4, 10 ; GFX908-NEXT: v_mov_b32_e32 v32, v31 -; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill ; GFX908-NEXT: s_nop 0 -; GFX908-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill -; GFX908-NEXT: v_writelane_b32 v2, s5, 11 +; GFX908-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GFX908-NEXT: v_writelane_b32 v39, s5, 11 ; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX908-NEXT: v_mov_b32_e32 v33, v2 ; GFX908-NEXT: s_mov_b64 exec, s[34:35] ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def v[0:31] ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill ; GFX908-NEXT: s_nop 0 -; GFX908-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill -; GFX908-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; GFX908-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def v40 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s11 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX908-NEXT: v_mov_b32_e32 v40, v33 -; GFX908-NEXT: s_mov_b64 exec, s[34:35] -; GFX908-NEXT: v_writelane_b32 v40, s11, 12 +; GFX908-NEXT: v_writelane_b32 v39, s11, 12 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s12 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_writelane_b32 v40, s12, 13 +; GFX908-NEXT: v_writelane_b32 v39, s12, 13 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s13 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_writelane_b32 v40, s13, 14 +; GFX908-NEXT: v_writelane_b32 v39, s13, 14 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s14 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_writelane_b32 v40, s14, 15 +; GFX908-NEXT: v_writelane_b32 v39, s14, 15 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s15 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_writelane_b32 v40, s15, 16 +; GFX908-NEXT: v_writelane_b32 v39, s15, 16 ; GFX908-NEXT: s_getpc_b64 s[10:11] ; GFX908-NEXT: s_add_u32 s10, s10, foo@gotpcrel32@lo+4 ; GFX908-NEXT: s_addc_u32 s11, s11, foo@gotpcrel32@hi+12 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s16 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_writelane_b32 v40, s16, 17 +; GFX908-NEXT: v_writelane_b32 v39, s16, 17 ; GFX908-NEXT: s_load_dwordx2 s[10:11], s[10:11], 0x0 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s17 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_writelane_b32 v40, s17, 18 +; GFX908-NEXT: v_writelane_b32 v39, s17, 18 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s18 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_writelane_b32 v40, s18, 19 +; GFX908-NEXT: v_writelane_b32 v39, s18, 19 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s19 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_writelane_b32 v40, s19, 20 +; GFX908-NEXT: v_writelane_b32 v39, s19, 20 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s20 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_writelane_b32 v40, s20, 21 +; GFX908-NEXT: v_writelane_b32 v39, s20, 21 ; GFX908-NEXT: s_waitcnt lgkmcnt(0) -; GFX908-NEXT: v_writelane_b32 v40, s10, 22 -; GFX908-NEXT: v_writelane_b32 v40, s11, 23 +; GFX908-NEXT: v_writelane_b32 v39, s10, 22 +; GFX908-NEXT: v_writelane_b32 v39, s11, 23 ; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX908-NEXT: s_mov_b64 exec, s[34:35] -; GFX908-NEXT: v_readlane_b32 s16, v40, 22 +; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX908-NEXT: s_mov_b64 exec, s[34:35] +; GFX908-NEXT: v_readlane_b32 s16, v39, 22 ; GFX908-NEXT: s_mov_b32 s12, s24 ; GFX908-NEXT: s_mov_b32 s13, s23 ; GFX908-NEXT: s_mov_b32 s14, s22 ; GFX908-NEXT: v_mov_b32_e32 v31, v32 ; GFX908-NEXT: s_mov_b32 s15, s21 ; GFX908-NEXT: s_mov_b64 s[10:11], s[26:27] -; GFX908-NEXT: v_readlane_b32 s17, v40, 23 -; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX908-NEXT: v_readlane_b32 s17, v39, 23 +; GFX908-NEXT: v_mov_b32_e32 v40, v32 ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload ; GFX908-NEXT: s_mov_b64 exec, s[34:35] -; GFX908-NEXT: v_readlane_b32 s11, v40, 12 +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: v_readlane_b32 s11, v39, 12 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s11 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_readlane_b32 s12, v40, 13 +; GFX908-NEXT: v_readlane_b32 s12, v39, 13 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s12 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_readlane_b32 s13, v40, 14 +; GFX908-NEXT: v_readlane_b32 s13, v39, 14 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s13 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_readlane_b32 s14, v40, 15 +; GFX908-NEXT: v_readlane_b32 s14, v39, 15 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s14 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_readlane_b32 s15, v40, 16 +; GFX908-NEXT: v_readlane_b32 s15, v39, 16 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s15 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_readlane_b32 s16, v40, 17 +; GFX908-NEXT: v_readlane_b32 s16, v39, 17 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s16 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_readlane_b32 s17, v40, 18 +; GFX908-NEXT: v_readlane_b32 s17, v39, 18 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s17 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_readlane_b32 s18, v40, 19 +; GFX908-NEXT: v_readlane_b32 s18, v39, 19 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s18 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_readlane_b32 s19, v40, 20 +; GFX908-NEXT: v_readlane_b32 s19, v39, 20 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s19 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_readlane_b32 s20, v40, 21 +; GFX908-NEXT: v_readlane_b32 s20, v39, 21 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s20 ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s21 ; GFX908-NEXT: ;;#ASMEND +; GFX908-NEXT: v_writelane_b32 v39, s21, 24 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s22 ; GFX908-NEXT: ;;#ASMEND +; GFX908-NEXT: v_writelane_b32 v39, s22, 25 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s23 ; GFX908-NEXT: ;;#ASMEND +; GFX908-NEXT: v_writelane_b32 v39, s23, 26 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s24 ; GFX908-NEXT: ;;#ASMEND +; GFX908-NEXT: v_writelane_b32 v39, s24, 27 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s25 ; GFX908-NEXT: ;;#ASMEND +; GFX908-NEXT: v_writelane_b32 v39, s25, 28 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s26 ; GFX908-NEXT: ;;#ASMEND +; GFX908-NEXT: v_writelane_b32 v39, s26, 29 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s27 ; GFX908-NEXT: ;;#ASMEND +; GFX908-NEXT: v_writelane_b32 v39, s27, 30 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s28 ; GFX908-NEXT: ;;#ASMEND +; GFX908-NEXT: v_writelane_b32 v39, s28, 31 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; def s29 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GFX908-NEXT: v_writelane_b32 v40, s21, 24 -; GFX908-NEXT: v_writelane_b32 v40, s22, 25 -; GFX908-NEXT: v_writelane_b32 v40, s23, 26 -; GFX908-NEXT: v_writelane_b32 v40, s24, 27 -; GFX908-NEXT: v_writelane_b32 v40, s25, 28 -; GFX908-NEXT: v_writelane_b32 v40, s26, 29 -; GFX908-NEXT: v_writelane_b32 v40, s27, 30 -; GFX908-NEXT: v_writelane_b32 v40, s28, 31 -; GFX908-NEXT: v_writelane_b32 v40, s29, 32 -; GFX908-NEXT: v_readlane_b32 s4, v40, 10 -; GFX908-NEXT: v_readlane_b32 s6, v40, 0 -; GFX908-NEXT: v_readlane_b32 s8, v40, 8 -; GFX908-NEXT: v_readlane_b32 s10, v40, 6 -; GFX908-NEXT: v_readlane_b32 s16, v40, 22 -; GFX908-NEXT: v_readlane_b32 s12, v40, 5 -; GFX908-NEXT: v_readlane_b32 s13, v40, 4 -; GFX908-NEXT: v_readlane_b32 s14, v40, 3 -; GFX908-NEXT: v_readlane_b32 s15, v40, 2 -; GFX908-NEXT: v_readlane_b32 s5, v40, 11 -; GFX908-NEXT: v_readlane_b32 s7, v40, 1 -; GFX908-NEXT: v_readlane_b32 s9, v40, 9 -; GFX908-NEXT: v_readlane_b32 s11, v40, 7 -; GFX908-NEXT: v_readlane_b32 s17, v40, 23 +; GFX908-NEXT: v_writelane_b32 v39, s29, 32 ; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX908-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX908-NEXT: s_mov_b64 exec, s[34:35] +; GFX908-NEXT: v_readlane_b32 s4, v39, 10 +; GFX908-NEXT: v_readlane_b32 s6, v39, 0 +; GFX908-NEXT: v_readlane_b32 s8, v39, 8 +; GFX908-NEXT: v_readlane_b32 s10, v39, 6 +; GFX908-NEXT: v_readlane_b32 s16, v39, 22 +; GFX908-NEXT: v_readlane_b32 s12, v39, 5 +; GFX908-NEXT: v_mov_b32_e32 v31, v40 +; GFX908-NEXT: v_readlane_b32 s13, v39, 4 +; GFX908-NEXT: v_readlane_b32 s14, v39, 3 +; GFX908-NEXT: v_readlane_b32 s15, v39, 2 +; GFX908-NEXT: v_readlane_b32 s5, v39, 11 +; GFX908-NEXT: v_readlane_b32 s7, v39, 1 +; GFX908-NEXT: v_readlane_b32 s9, v39, 9 +; GFX908-NEXT: v_readlane_b32 s11, v39, 7 +; GFX908-NEXT: v_readlane_b32 s17, v39, 23 ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload ; GFX908-NEXT: s_mov_b64 exec, s[34:35] -; GFX908-NEXT: v_readlane_b32 s21, v40, 24 +; GFX908-NEXT: s_waitcnt vmcnt(0) +; GFX908-NEXT: v_readlane_b32 s4, v39, 10 +; GFX908-NEXT: v_readlane_b32 s6, v39, 0 +; GFX908-NEXT: v_readlane_b32 s8, v39, 8 +; GFX908-NEXT: v_readlane_b32 s10, v39, 6 +; GFX908-NEXT: v_readlane_b32 s16, v39, 22 +; GFX908-NEXT: v_readlane_b32 s5, v39, 11 +; GFX908-NEXT: v_readlane_b32 s7, v39, 1 +; GFX908-NEXT: v_readlane_b32 s9, v39, 9 +; GFX908-NEXT: v_readlane_b32 s11, v39, 7 +; GFX908-NEXT: v_readlane_b32 s12, v39, 5 +; GFX908-NEXT: v_readlane_b32 s13, v39, 4 +; GFX908-NEXT: v_readlane_b32 s14, v39, 3 +; GFX908-NEXT: v_readlane_b32 s15, v39, 2 +; GFX908-NEXT: v_mov_b32_e32 v31, v40 +; GFX908-NEXT: v_readlane_b32 s17, v39, 23 +; GFX908-NEXT: v_readlane_b32 s21, v39, 24 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s21 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_readlane_b32 s22, v40, 25 +; GFX908-NEXT: v_readlane_b32 s22, v39, 25 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s22 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_readlane_b32 s23, v40, 26 +; GFX908-NEXT: v_readlane_b32 s23, v39, 26 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s23 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_readlane_b32 s24, v40, 27 +; GFX908-NEXT: v_readlane_b32 s24, v39, 27 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s24 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_readlane_b32 s25, v40, 28 +; GFX908-NEXT: v_readlane_b32 s25, v39, 28 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s25 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_readlane_b32 s26, v40, 29 +; GFX908-NEXT: v_readlane_b32 s26, v39, 29 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s26 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_readlane_b32 s27, v40, 30 +; GFX908-NEXT: v_readlane_b32 s27, v39, 30 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s27 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_readlane_b32 s28, v40, 31 +; GFX908-NEXT: v_readlane_b32 s28, v39, 31 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s28 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_readlane_b32 s29, v40, 32 +; GFX908-NEXT: v_readlane_b32 s29, v39, 32 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ; use s29 ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GFX908-NEXT: v_readlane_b32 s4, v40, 10 -; GFX908-NEXT: v_readlane_b32 s6, v40, 0 -; GFX908-NEXT: v_readlane_b32 s8, v40, 8 -; GFX908-NEXT: v_readlane_b32 s10, v40, 6 -; GFX908-NEXT: v_readlane_b32 s16, v40, 22 -; GFX908-NEXT: v_readlane_b32 s5, v40, 11 -; GFX908-NEXT: v_readlane_b32 s7, v40, 1 -; GFX908-NEXT: v_readlane_b32 s9, v40, 9 -; GFX908-NEXT: v_readlane_b32 s11, v40, 7 -; GFX908-NEXT: v_readlane_b32 s12, v40, 5 -; GFX908-NEXT: v_readlane_b32 s13, v40, 4 -; GFX908-NEXT: v_readlane_b32 s14, v40, 3 -; GFX908-NEXT: v_readlane_b32 s15, v40, 2 -; GFX908-NEXT: v_readlane_b32 s17, v40, 23 -; GFX908-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX908-NEXT: s_mov_b64 exec, s[34:35] ; GFX908-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload ; GFX908-NEXT: s_mov_b64 s[4:5], exec ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[30:33] offset:112 @@ -737,37 +736,34 @@ define void @preserve_wwm_copy_dstreg(ptr %parg0, ptr %parg1, ptr %parg2) #0 { ; GFX908-NEXT: flat_store_dwordx4 v[0:1], v[2:5] ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_mov_b64 exec, 1 -; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:172 -; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:168 +; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: v_readlane_b32 s31, v0, 0 -; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:172 +; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_mov_b64 exec, s[4:5] ; GFX908-NEXT: s_mov_b64 s[4:5], exec ; GFX908-NEXT: s_mov_b64 exec, 1 -; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:172 -; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:168 +; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: v_readlane_b32 s30, v0, 0 -; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:172 +; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168 ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: s_mov_b64 exec, s[4:5] -; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload -; GFX908-NEXT: ; kill: killed $vgpr40 +; GFX908-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: v_readfirstlane_b32 s4, v0 -; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: v_readfirstlane_b32 s34, v0 -; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload ; GFX908-NEXT: s_waitcnt vmcnt(0) ; GFX908-NEXT: v_readfirstlane_b32 s35, v0 ; GFX908-NEXT: s_xor_saveexec_b64 s[6:7], -1 -; GFX908-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload -; GFX908-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload -; GFX908-NEXT: s_mov_b64 exec, -1 -; GFX908-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload +; GFX908-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload ; GFX908-NEXT: s_mov_b64 exec, s[6:7] ; GFX908-NEXT: s_addk_i32 s32, 0xd400 ; GFX908-NEXT: s_mov_b32 s33, s4 diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir b/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir index 447a8bf9956f3..fe01728c00563 100644 --- a/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir +++ b/llvm/test/CodeGen/AMDGPU/regalloc-failure-overlapping-insert-assert.mir @@ -1,5 +1,5 @@ -# RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs=0 -start-before=greedy,1 -stop-after=virtregrewriter,1 %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s -# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,1 %s -o /dev/null 2>&1 | FileCheck -check-prefixes=ERR,VERIFIER %s +# RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs=0 -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefix=ERR %s +# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -start-before=greedy,1 -stop-after=virtregrewriter,2 %s -o /dev/null 2>&1 | FileCheck -check-prefixes=ERR,VERIFIER %s # FIXME: We should not produce a verifier error after erroring diff --git a/llvm/test/CodeGen/AMDGPU/rem_i128.ll b/llvm/test/CodeGen/AMDGPU/rem_i128.ll index 19cc60963e900..f7f5bd56fa6f1 100644 --- a/llvm/test/CodeGen/AMDGPU/rem_i128.ll +++ b/llvm/test/CodeGen/AMDGPU/rem_i128.ll @@ -243,350 +243,345 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0: ; %bb.0: ; %_udiv-special-cases ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v4 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v7 +; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_mov_b32_e32 v18, v1 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v4 +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v3 +; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v3 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9-O0-NEXT: s_mov_b32 s4, 63 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v11 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v12 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v13 -; GFX9-O0-NEXT: v_ashrrev_i64 v[3:4], s4, v[3:4] -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_ashrrev_i64 v[2:3], s4, v[2:3] +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v11 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v12 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v13 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 -; GFX9-O0-NEXT: s_waitcnt vmcnt(4) -; GFX9-O0-NEXT: v_writelane_b32 v0, s6, 0 -; GFX9-O0-NEXT: v_writelane_b32 v0, s7, 1 +; GFX9-O0-NEXT: ; implicit-def: $vgpr30 : SGPR spill to VGPR lane +; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 0 +; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 1 ; GFX9-O0-NEXT: s_mov_b32 s10, s6 -; GFX9-O0-NEXT: v_writelane_b32 v0, s10, 2 +; GFX9-O0-NEXT: v_writelane_b32 v30, s10, 2 ; GFX9-O0-NEXT: s_mov_b32 s11, s7 -; GFX9-O0-NEXT: v_writelane_b32 v0, s11, 3 -; GFX9-O0-NEXT: v_sub_co_u32_e32 v10, vcc, s10, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v4, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s10 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v1, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v7, s11 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v2, vcc +; GFX9-O0-NEXT: v_writelane_b32 v30, s11, 3 +; GFX9-O0-NEXT: v_sub_co_u32_e32 v9, vcc, s10, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v4, v3, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s10 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v0, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v6, s11 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v6, v1, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 +; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 ; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7] -; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[12:13], s[4:5] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[4:5] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[4:5] +; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[11:12], s[4:5] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[4:5] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5] ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v2 ; GFX9-O0-NEXT: v_mov_b32_e32 v16, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v7, s[4:5] -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5] +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v8 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v9 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v17 ; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v19 -; GFX9-O0-NEXT: v_sub_co_u32_e32 v14, vcc, s10, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, s11 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v11, v10, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v11, s10 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v8, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v13, s11 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v13, vcc, v13, v9, vcc +; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, s10, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, s11 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v10, v9, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v10, s10 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v7, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v12, s11 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v8, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15 +; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14 ; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7] -; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[18:19], s[4:5] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, v10, v12, s[4:5] -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v14 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v12, s[4:5] +; GFX9-O0-NEXT: v_cmp_lt_i64_e64 s[4:5], v[17:18], s[4:5] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[4:5] +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v13 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v11, s[4:5] ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v10 +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v18, v9 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v13 -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v12 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v9, v13, s[4:5] -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec -; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v8, v11, s[4:5] +; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v12 +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v11 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v8, v12, s[4:5] +; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec +; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[4:5] ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v7 ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v9 -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v6 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5 -; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v4 +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16 -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17 -; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v12 +; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 -; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v10 +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v17 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v19 -; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v12 +; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 -; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v10 +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v17 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v19 -; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v6 +; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5 -; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v4 +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16 -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17 -; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v12 -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v19 -; GFX9-O0-NEXT: v_or_b32_e64 v15, v13, v14 -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11 -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18 -; GFX9-O0-NEXT: v_or_b32_e64 v13, v13, v14 -; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15 -; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[13:14], s[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17 -; GFX9-O0-NEXT: v_or_b32_e64 v15, v13, v14 -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16 -; GFX9-O0-NEXT: v_or_b32_e64 v13, v13, v14 -; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15 -; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[13:14], s[6:7] +; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v18 +; GFX9-O0-NEXT: v_or_b32_e64 v14, v12, v13 +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v10 +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v17 +; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v13 +; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v14 +; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[12:13], s[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v16 +; GFX9-O0-NEXT: v_or_b32_e64 v14, v12, v13 +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15 +; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v13 +; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v14 +; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[12:13], s[6:7] ; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[11:12], s[8:9] -; GFX9-O0-NEXT: v_ffbh_u32_e64 v8, v8 +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[10:11], s[8:9] +; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7 ; GFX9-O0-NEXT: s_mov_b32 s13, 32 -; GFX9-O0-NEXT: v_add_u32_e64 v8, v8, s13 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v9, v9 -; GFX9-O0-NEXT: v_min_u32_e64 v8, v8, v9 +; GFX9-O0-NEXT: v_add_u32_e64 v7, v7, s13 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v8, v8 +; GFX9-O0-NEXT: v_min_u32_e64 v7, v7, v8 ; GFX9-O0-NEXT: s_mov_b32 s12, 0 ; GFX9-O0-NEXT: ; implicit-def: $sgpr14 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, s12 -; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v11 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v9 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7 -; GFX9-O0-NEXT: v_add_u32_e64 v7, v7, s13 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v10 -; GFX9-O0-NEXT: v_min_u32_e64 v13, v7, v10 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, s12 +; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v10 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v8 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6 +; GFX9-O0-NEXT: v_add_u32_e64 v6, v6, s13 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v9, v9 +; GFX9-O0-NEXT: v_min_u32_e64 v12, v6, v9 ; GFX9-O0-NEXT: ; implicit-def: $sgpr14 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, s12 -; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, s12 +; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v6 ; GFX9-O0-NEXT: s_mov_b64 s[14:15], 64 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v13 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v12 ; GFX9-O0-NEXT: s_mov_b32 s16, s14 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13 ; GFX9-O0-NEXT: s_mov_b32 s18, s15 -; GFX9-O0-NEXT: v_add_co_u32_e64 v10, s[16:17], v10, s16 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, s18 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v7, s[16:17], v7, v11, s[16:17] -; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v12, s[8:9] -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v10 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v8, v9, s[8:9] +; GFX9-O0-NEXT: v_add_co_u32_e64 v9, s[16:17], v9, s16 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, s18 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v6, s[16:17], v6, v10, s[16:17] +; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v11, s[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v9 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v7, v8, s[8:9] ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v7 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6 ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[5:6], s[8:9] +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[4:5], s[8:9] +; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v0 +; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s13 ; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v1 -; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s13 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v2 -; GFX9-O0-NEXT: v_min_u32_e64 v6, v5, v6 +; GFX9-O0-NEXT: v_min_u32_e64 v5, v4, v5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr16 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s12 -; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v3 -; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s13 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v11, v4 -; GFX9-O0-NEXT: v_min_u32_e64 v12, v5, v11 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s12 +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v2 +; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s13 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v3 +; GFX9-O0-NEXT: v_min_u32_e64 v11, v4, v10 ; GFX9-O0-NEXT: ; implicit-def: $sgpr13 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s12 -; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v12 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s12 +; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v11 ; GFX9-O0-NEXT: s_mov_b32 s12, s14 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12 ; GFX9-O0-NEXT: s_mov_b32 s14, s15 -; GFX9-O0-NEXT: v_add_co_u32_e64 v11, s[12:13], v11, s12 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, s14 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v5, s[12:13], v5, v12, s[12:13] -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[8:9] -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[8:9] +; GFX9-O0-NEXT: v_add_co_u32_e64 v10, s[12:13], v10, s12 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, s14 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[12:13], v4, v11, s[12:13] +; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v11 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[8:9] ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 -; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10 -; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, v5, v8 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v9, vcc, v6, v7, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v7, s10 +; GFX9-O0-NEXT: v_sub_co_u32_e32 v4, vcc, v4, v7 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v5, v6, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v6, s10 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v6, v7, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v7, s11 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, s10 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v6, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v6, s11 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v6, v7, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7 -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6 +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7] +; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7] ; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f -; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15] -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9] -; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7 -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v7, 1 +; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15] +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9] +; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6 +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1 ; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9] ; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1 ; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5] -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 ; GFX9-O0-NEXT: s_mov_b32 s14, s13 -; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14 +; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14 ; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13 -; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v9 -; GFX9-O0-NEXT: v_or_b32_e64 v7, v7, v10 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8 -; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[5:6], s[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v2, v5, s[8:9] -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s10 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[8:9] +; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v9 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 +; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9] ; GFX9-O0-NEXT: ; implicit-def: $sgpr12 ; GFX9-O0-NEXT: ; implicit-def: $sgpr12 -; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[8:9] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, s10 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[8:9] +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9] ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 ; GFX9-O0-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7] -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 s[4:5], exec -; GFX9-O0-NEXT: v_writelane_b32 v0, s4, 4 -; GFX9-O0-NEXT: v_writelane_b32 v0, s5, 5 +; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 4 +; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 5 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] @@ -594,67 +589,66 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_branch .LBB0_8 ; GFX9-O0-NEXT: .LBB0_1: ; %Flow ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s4, v0, 6 -; GFX9-O0-NEXT: v_readlane_b32 s5, v0, 7 +; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 6 +; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 7 ; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: ; %bb.2: ; %Flow -; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(7) -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(7) -; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(7) -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(7) -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(7) -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(7) -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(7) -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(7) -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_5 ; GFX9-O0-NEXT: .LBB0_3: ; %Flow2 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s4, v4, 4 -; GFX9-O0-NEXT: v_readlane_b32 s5, v4, 5 -; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 4 +; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 5 +; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_9 ; GFX9-O0-NEXT: .LBB0_4: ; %udiv-loop-exit -; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b32 s4, 1 ; GFX9-O0-NEXT: s_waitcnt vmcnt(2) ; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s4, v[0:1] @@ -687,123 +681,117 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_3 ; GFX9-O0-NEXT: .LBB0_5: ; %Flow1 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s4, v8, 8 -; GFX9-O0-NEXT: v_readlane_b32 s5, v8, 9 +; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 8 +; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 9 ; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) -; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_4 ; GFX9-O0-NEXT: .LBB0_6: ; %udiv-do-while ; GFX9-O0-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s6, v16, 10 -; GFX9-O0-NEXT: v_readlane_b32 s7, v16, 11 -; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload +; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 10 +; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 11 ; GFX9-O0-NEXT: s_mov_b32 s4, 63 -; GFX9-O0-NEXT: s_waitcnt vmcnt(16) -; GFX9-O0-NEXT: v_lshrrev_b64 v[29:30], s4, v[2:3] -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v30 +; GFX9-O0-NEXT: v_lshrrev_b64 v[28:29], s4, v[2:3] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v29 ; GFX9-O0-NEXT: s_mov_b32 s5, 1 -; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], s5, v[23:24] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v24 +; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], s5, v[22:23] +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23 ; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v29 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 -; GFX9-O0-NEXT: v_or_b32_e64 v23, v5, v10 -; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v24, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[29:30], s5, v[2:3] +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v22 +; GFX9-O0-NEXT: v_or_b32_e64 v22, v5, v10 +; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v23, v4 +; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[2:3] ; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], s4, v[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v30 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v29 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5 ; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v29 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v28 ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec ; GFX9-O0-NEXT: v_or_b32_e64 v4, v3, v4 ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2 ; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s5, v[0:1] -; GFX9-O0-NEXT: v_lshlrev_b64 v[29:30], s5, v[6:7] +; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[6:7] ; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v30 -; GFX9-O0-NEXT: s_waitcnt vmcnt(10) -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v29 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v27 ; GFX9-O0-NEXT: v_or3_b32 v6, v6, v7, v10 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v29 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v27 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v28 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v26 ; GFX9-O0-NEXT: v_or3_b32 v0, v0, v1, v7 ; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3 -; GFX9-O0-NEXT: s_waitcnt vmcnt(8) -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v26 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v25 ; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v25 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v24 ; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 ; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 ; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v24 -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v22 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v15 ; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, v13, v6 -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v10, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v4, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc @@ -823,22 +811,22 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12 ; GFX9-O0-NEXT: v_mov_b32_e32 v12, 0 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v23, v22 -; GFX9-O0-NEXT: v_and_b32_e64 v23, v7, v23 -; GFX9-O0-NEXT: v_and_b32_e64 v21, v11, v21 -; GFX9-O0-NEXT: ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v22, v23 -; GFX9-O0-NEXT: v_mov_b32_e32 v23, v20 -; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v23 -; GFX9-O0-NEXT: v_and_b32_e64 v23, v11, v19 -; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v24, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v23 -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v24 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v21 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22 -; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v20 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v19, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v22, v21 +; GFX9-O0-NEXT: v_and_b32_e64 v22, v7, v22 +; GFX9-O0-NEXT: v_and_b32_e64 v20, v11, v20 +; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v21, v22 +; GFX9-O0-NEXT: v_mov_b32_e32 v22, v19 +; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v22 +; GFX9-O0-NEXT: v_and_b32_e64 v22, v11, v18 +; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v23, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v19, v22 +; GFX9-O0-NEXT: v_mov_b32_e32 v18, v23 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21 +; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v19 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v18, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v11, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v7, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 @@ -854,149 +842,149 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1 ; GFX9-O0-NEXT: s_mov_b32 s5, s8 ; GFX9-O0-NEXT: s_mov_b32 s4, s9 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5 -; GFX9-O0-NEXT: v_add_co_u32_e32 v20, vcc, v11, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 +; GFX9-O0-NEXT: v_add_co_u32_e32 v19, vcc, v11, v16 ; GFX9-O0-NEXT: v_mov_b32_e32 v11, s4 ; GFX9-O0-NEXT: v_addc_co_u32_e32 v9, vcc, v9, v11, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5 -; GFX9-O0-NEXT: v_addc_co_u32_e32 v17, vcc, v10, v11, vcc +; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v10, v11, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v10, s4 ; GFX9-O0-NEXT: v_addc_co_u32_e32 v8, vcc, v8, v10, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v21, v9 +; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v20, v9 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v8 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v20 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v21 -; GFX9-O0-NEXT: v_mov_b32_e32 v22, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v21 -; GFX9-O0-NEXT: v_or_b32_e64 v19, v19, v22 -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v20 -; GFX9-O0-NEXT: v_or_b32_e64 v17, v17, v18 -; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v19 -; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[17:18], v[12:13] +; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v19 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20 +; GFX9-O0-NEXT: v_mov_b32_e32 v21, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v18, v20 +; GFX9-O0-NEXT: v_or_b32_e64 v18, v18, v21 +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v19 +; GFX9-O0-NEXT: v_or_b32_e64 v16, v16, v17 +; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v18 +; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13] ; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v2 -; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v2 +; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v0 ; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v0 +; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v15 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v14 ; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v15 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v14 +; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v13 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v12 ; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v13 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v12 +; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX9-O0-NEXT: v_writelane_b32 v16, s6, 6 -; GFX9-O0-NEXT: v_writelane_b32 v16, s7, 7 +; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 6 +; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 7 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX9-O0-NEXT: v_writelane_b32 v16, s6, 10 -; GFX9-O0-NEXT: v_writelane_b32 v16, s7, 11 +; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 10 +; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 11 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_andn2_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: s_cbranch_execnz .LBB0_6 ; GFX9-O0-NEXT: s_branch .LBB0_1 ; GFX9-O0-NEXT: .LBB0_7: ; %udiv-preheader -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: s_waitcnt vmcnt(10) ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[21:22] +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[20:21] ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 ; GFX9-O0-NEXT: s_mov_b32 s6, 64 ; GFX9-O0-NEXT: v_sub_u32_e64 v12, s6, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], v12, v[19:20] -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v24 +; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], v12, v[18:19] +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v23 ; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v12 ; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v23 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22 ; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 ; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 ; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7 ; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v4, s6 ; GFX9-O0-NEXT: v_sub_u32_e64 v5, v4, s6 -; GFX9-O0-NEXT: v_lshrrev_b64 v[23:24], v5, v[19:20] -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v24 +; GFX9-O0-NEXT: v_lshrrev_b64 v[22:23], v5, v[18:19] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[4:5] ; GFX9-O0-NEXT: s_mov_b32 s6, 0 ; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v4, s6 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v22 +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v21 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[6:7] ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v23 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v22 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v20 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7] ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 -; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[19:20] +; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[18:19] ; GFX9-O0-NEXT: v_mov_b32_e32 v15, v5 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 ; GFX9-O0-NEXT: s_mov_b32 s8, s7 @@ -1015,12 +1003,12 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1 ; GFX9-O0-NEXT: s_mov_b32 s5, s8 ; GFX9-O0-NEXT: s_mov_b32 s4, s9 -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5 -; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, s4 -; GFX9-O0-NEXT: v_addc_co_u32_e32 v17, vcc, v15, v17, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 +; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, s4 +; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v15, v16, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5 ; GFX9-O0-NEXT: v_addc_co_u32_e32 v14, vcc, v14, v15, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v15, s4 @@ -1032,429 +1020,428 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16 ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[6:7] ; GFX9-O0-NEXT: v_mov_b32_e32 v15, s9 ; GFX9-O0-NEXT: v_mov_b32_e32 v14, s8 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, s7 ; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6 -; GFX9-O0-NEXT: v_writelane_b32 v16, s4, 10 -; GFX9-O0-NEXT: v_writelane_b32 v16, s5, 11 +; GFX9-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 10 +; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 11 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v15, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB0_6 ; GFX9-O0-NEXT: .LBB0_8: ; %udiv-bb1 +; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1 ; GFX9-O0-NEXT: s_mov_b32 s5, s6 -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 +; GFX9-O0-NEXT: s_waitcnt vmcnt(2) +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-O0-NEXT: s_mov_b32 s4, s7 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 ; GFX9-O0-NEXT: s_mov_b32 s8, s6 ; GFX9-O0-NEXT: s_mov_b32 s9, s7 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5 -; GFX9-O0-NEXT: v_add_co_u32_e32 v9, vcc, v4, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s4 -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v5, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8 -; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v5, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s9 -; GFX9-O0-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5 +; GFX9-O0-NEXT: v_add_co_u32_e32 v8, vcc, v3, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s4 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 +; GFX9-O0-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v4, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9 +; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v2 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2 +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10 ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b32 s4, 0x7f -; GFX9-O0-NEXT: v_sub_u32_e64 v3, s4, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[5:6], v3, v[11:12] -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v6 +; GFX9-O0-NEXT: v_sub_u32_e64 v2, s4, v3 +; GFX9-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[10:11] +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5 ; GFX9-O0-NEXT: s_mov_b32 s4, 64 -; GFX9-O0-NEXT: v_sub_u32_e64 v14, s4, v3 -; GFX9-O0-NEXT: v_lshrrev_b64 v[14:15], v14, v[7:8] -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v15 -; GFX9-O0-NEXT: v_or_b32_e64 v13, v13, v16 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v14 -; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13 -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v6 -; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v3, s4 +; GFX9-O0-NEXT: v_sub_u32_e64 v13, s4, v2 +; GFX9-O0-NEXT: v_lshrrev_b64 v[13:14], v13, v[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v14 +; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v15 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13 +; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5 +; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v2, s4 ; GFX9-O0-NEXT: s_mov_b32 s10, 63 -; GFX9-O0-NEXT: v_sub_u32_e64 v4, s10, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[13:14], v4, v[7:8] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v15, s[4:5] +; GFX9-O0-NEXT: v_sub_u32_e64 v3, s10, v3 +; GFX9-O0-NEXT: v_lshlrev_b64 v[12:13], v3, v[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[4:5] ; GFX9-O0-NEXT: s_mov_b32 s10, 0 -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v3, s10 -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v15, s[10:11] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[10:11] +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v2, s10 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[10:11] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[10:11] ; GFX9-O0-NEXT: ; implicit-def: $sgpr10 ; GFX9-O0-NEXT: ; implicit-def: $sgpr10 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[7:8], v3, v[7:8] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5] -; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v4, v7, s[4:5] +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v3 +; GFX9-O0-NEXT: v_lshlrev_b64 v[6:7], v2, v[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s9 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5] +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v3, v6, s[4:5] ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v3 -; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2 +; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10 -; GFX9-O0-NEXT: v_or_b32_e64 v3, v3, v4 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v9 -; GFX9-O0-NEXT: v_or_b32_e64 v1, v1, v2 -; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[1:2], s[6:7] +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9 +; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v8 +; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1 +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7] ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s7 -; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s8 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s7 +; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 s[6:7], exec ; GFX9-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] ; GFX9-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] -; GFX9-O0-NEXT: v_writelane_b32 v0, s6, 8 -; GFX9-O0-NEXT: v_writelane_b32 v0, s7, 9 +; GFX9-O0-NEXT: s_waitcnt vmcnt(16) +; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 8 +; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 9 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] ; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-O0-NEXT: s_cbranch_execz .LBB0_5 ; GFX9-O0-NEXT: s_branch .LBB0_7 ; GFX9-O0-NEXT: .LBB0_9: ; %udiv-end -; GFX9-O0-NEXT: s_or_saveexec_b64 s[22:23], -1 -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[22:23] -; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b32 s4, 32 ; GFX9-O0-NEXT: s_waitcnt vmcnt(2) -; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[5:6] +; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[16:17] ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-O0-NEXT: s_waitcnt vmcnt(1) -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v17 -; GFX9-O0-NEXT: v_mul_lo_u32 v3, v1, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v20 +; GFX9-O0-NEXT: v_mul_lo_u32 v8, v1, v0 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_lshrrev_b64 v[17:18], s4, v[17:18] -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v17 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mul_lo_u32 v2, v5, v2 -; GFX9-O0-NEXT: v_mad_u64_u32 v[17:18], s[6:7], v5, v0, 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v18 -; GFX9-O0-NEXT: v_add3_u32 v2, v0, v2, v3 +; GFX9-O0-NEXT: v_lshrrev_b64 v[20:21], s4, v[20:21] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v20 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v16 +; GFX9-O0-NEXT: v_mul_lo_u32 v5, v2, v5 +; GFX9-O0-NEXT: v_mad_u64_u32 v[16:17], s[6:7], v2, v0, 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v17 +; GFX9-O0-NEXT: v_add3_u32 v8, v0, v5, v8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, s5 -; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s4, v[2:3] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v3 -; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 killed $vgpr17_vgpr18 killed $exec +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v0 +; GFX9-O0-NEXT: v_lshlrev_b64 v[8:9], s4, v[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 +; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 killed $vgpr16_vgpr17 killed $exec ; GFX9-O0-NEXT: s_mov_b32 s5, 0 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, s5 -; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v0 +; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v17 +; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v16 +; GFX9-O0-NEXT: v_or_b32_e64 v16, v5, v8 +; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v0 +; GFX9-O0-NEXT: v_lshrrev_b64 v[8:9], s4, v[18:19] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v14 +; GFX9-O0-NEXT: v_mul_lo_u32 v9, v8, v5 +; GFX9-O0-NEXT: v_lshrrev_b64 v[14:15], s4, v[14:15] +; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v18 -; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v17 -; GFX9-O0-NEXT: v_or_b32_e64 v17, v2, v3 -; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v0 -; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[19:20] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v11 -; GFX9-O0-NEXT: v_mul_lo_u32 v3, v2, v6 -; GFX9-O0-NEXT: v_lshrrev_b64 v[11:12], s4, v[11:12] -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v19 -; GFX9-O0-NEXT: v_mul_lo_u32 v11, v11, v0 -; GFX9-O0-NEXT: v_mad_u64_u32 v[19:20], s[6:7], v2, v0, 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v20 -; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v11 +; GFX9-O0-NEXT: v_mul_lo_u32 v14, v14, v0 +; GFX9-O0-NEXT: v_mad_u64_u32 v[18:19], s[6:7], v8, v0, 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v19 +; GFX9-O0-NEXT: v_add3_u32 v8, v8, v9, v14 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; implicit-def: $sgpr7 ; GFX9-O0-NEXT: ; implicit-def: $sgpr7 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, s6 -; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11 -; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s4, v[2:3] -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v3 -; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 killed $vgpr19_vgpr20 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v14, s6 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v14 +; GFX9-O0-NEXT: v_lshlrev_b64 v[8:9], s4, v[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v9 +; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 killed $vgpr18_vgpr19 killed $exec ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5 -; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v11 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20 -; GFX9-O0-NEXT: v_or_b32_e64 v11, v11, v12 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v19 -; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 -; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v18 -; GFX9-O0-NEXT: v_add_co_u32_e64 v17, s[6:7], v11, v12 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v3, s[6:7] -; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v2 -; GFX9-O0-NEXT: v_mad_u64_u32 v[19:20], s[6:7], v6, v1, 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v19 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, s5 +; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v19, v14 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v19 +; GFX9-O0-NEXT: v_or_b32_e64 v14, v14, v15 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18 +; GFX9-O0-NEXT: v_or_b32_e64 v8, v8, v9 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v14 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v9 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v17 +; GFX9-O0-NEXT: v_add_co_u32_e64 v16, s[6:7], v14, v15 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v8, s[6:7], v8, v9, s[6:7] +; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v8 +; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v5, v1, 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v18, v14 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5 -; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v20 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, s5 +; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v19, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v19 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; implicit-def: $sgpr7 ; GFX9-O0-NEXT: ; implicit-def: $sgpr7 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6 -; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v12 -; GFX9-O0-NEXT: v_lshlrev_b64 v[19:20], s4, v[19:20] -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v20 -; GFX9-O0-NEXT: v_or_b32_e64 v11, v11, v12 -; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $vgpr2_vgpr3 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v19 -; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 -; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11 -; GFX9-O0-NEXT: v_mad_u64_u32 v[19:20], s[6:7], v6, v5, 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v19 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, s6 +; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v9 +; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15] +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v15 +; GFX9-O0-NEXT: v_or_b32_e64 v8, v8, v9 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18 +; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec +; GFX9-O0-NEXT: v_or_b32_e64 v20, v9, v14 +; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v21, v8 +; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v5, v2, 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v14 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, s5 -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v12 -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v20 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; implicit-def: $sgpr7 ; GFX9-O0-NEXT: ; implicit-def: $sgpr7 -; GFX9-O0-NEXT: v_mov_b32_e32 v21, s6 -; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v21 -; GFX9-O0-NEXT: v_lshlrev_b64 v[19:20], s4, v[19:20] -; GFX9-O0-NEXT: v_mov_b32_e32 v21, v20 -; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v21 -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v19 -; GFX9-O0-NEXT: v_or_b32_e64 v23, v11, v12 -; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v24, v6 -; GFX9-O0-NEXT: v_mad_u64_u32 v[11:12], s[6:7], v0, v5, 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v21, v12 +; GFX9-O0-NEXT: v_mov_b32_e32 v18, s6 +; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v18 +; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15] +; GFX9-O0-NEXT: v_mov_b32_e32 v18, v15 +; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v18 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v14 +; GFX9-O0-NEXT: v_or_b32_e64 v22, v8, v9 +; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v23, v5 +; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v0, v2, 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v18, v15 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5 -; GFX9-O0-NEXT: ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v22, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v21 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v24 -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v22 -; GFX9-O0-NEXT: v_add_co_u32_e64 v5, s[6:7], v5, v20 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v19, s[6:7], v6, v19, s[6:7] -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v19 -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5 +; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v19, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v22 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v23 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v19 +; GFX9-O0-NEXT: v_add_co_u32_e64 v8, s[6:7], v8, v9 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7] +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0xffffffff ; GFX9-O0-NEXT: s_mov_b32 s8, s7 -; GFX9-O0-NEXT: v_and_b32_e64 v19, v19, s8 -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v5 +; GFX9-O0-NEXT: v_and_b32_e64 v2, v2, s8 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 ; GFX9-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 killed $sgpr6_sgpr7 -; GFX9-O0-NEXT: v_and_b32_e64 v21, v20, s6 -; GFX9-O0-NEXT: ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v22, v19 -; GFX9-O0-NEXT: v_mad_u64_u32 v[19:20], s[6:7], v0, v1, 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v23, v19 +; GFX9-O0-NEXT: v_and_b32_e64 v18, v5, s6 +; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v19, v2 +; GFX9-O0-NEXT: v_mad_u64_u32 v[22:23], s[6:7], v0, v1, 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v22 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, s5 -; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v24, v0 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v24 -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v20 +; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v22, v23 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; implicit-def: $sgpr7 ; GFX9-O0-NEXT: ; implicit-def: $sgpr7 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v1 -; GFX9-O0-NEXT: v_lshlrev_b64 v[19:20], s4, v[19:20] -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v20 -; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v23 -; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 killed $vgpr19_vgpr20 killed $exec -; GFX9-O0-NEXT: v_or_b32_e64 v23, v1, v19 -; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v24, v0 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v23 -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v21 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v24 -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v22 -; GFX9-O0-NEXT: v_add_co_u32_e64 v0, s[6:7], v0, v20 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v19, s[6:7], v1, v19, s[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, s6 +; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v23, v5 +; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], s4, v[22:23] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 +; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 killed $vgpr1_vgpr2 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v22 +; GFX9-O0-NEXT: v_or_b32_e64 v1, v1, v2 +; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v18 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v19 +; GFX9-O0-NEXT: v_add_co_u32_e64 v0, s[6:7], v0, v5 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v1, v2, s[6:7] ; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v19 -; GFX9-O0-NEXT: v_lshrrev_b64 v[21:22], s4, v[0:1] -; GFX9-O0-NEXT: v_lshrrev_b64 v[5:6], s4, v[5:6] -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v21 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v22 -; GFX9-O0-NEXT: v_add_co_u32_e64 v19, s[6:7], v19, v20 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v5, s[6:7], v5, v6, s[6:7] -; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v19 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v20 -; GFX9-O0-NEXT: v_add_co_u32_e64 v19, s[6:7], v5, v6 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v3, s[6:7] -; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 +; GFX9-O0-NEXT: v_lshrrev_b64 v[18:19], s4, v[0:1] +; GFX9-O0-NEXT: v_lshrrev_b64 v[22:23], s4, v[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v22 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v23 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v19 +; GFX9-O0-NEXT: v_add_co_u32_e64 v18, s[6:7], v8, v9 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7] +; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v19, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v20 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v21 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v19 +; GFX9-O0-NEXT: v_add_co_u32_e64 v18, s[6:7], v8, v9 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7] +; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v19, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v16 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v19 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v20 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v18 -; GFX9-O0-NEXT: v_add_co_u32_e64 v2, s[6:7], v2, v6 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v5, s[6:7], v3, v5, s[6:7] -; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v17 +; GFX9-O0-NEXT: v_add_co_u32_e64 v8, s[6:7], v8, v9 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v5, s[6:7] +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v2 ; GFX9-O0-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v1 -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1 +; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5 -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 -; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5 +; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v15 +; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v5 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v11 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v14 ; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1 ; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v0 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v15 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v16 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v14 -; GFX9-O0-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v12 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v11, vcc -; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v6, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v13 +; GFX9-O0-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v9 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v8, vcc +; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v5, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v2, vcc, v1, v2, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 @@ -1462,53 +1449,48 @@ define i128 @v_srem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v10 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v7 ; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 -; GFX9-O0-NEXT: v_xor_b32_e64 v9, v6, v5 -; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v6 +; GFX9-O0-NEXT: v_xor_b32_e64 v8, v5, v4 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v3 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 ; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v6 ; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7 -; GFX9-O0-NEXT: v_xor_b32_e64 v0, v0, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v10 +; GFX9-O0-NEXT: v_xor_b32_e64 v0, v0, v7 ; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v10 -; GFX9-O0-NEXT: v_sub_co_u32_e32 v7, vcc, v7, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v9 +; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, v5, v7 ; GFX9-O0-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v6, vcc -; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v3, v5, vcc +; GFX9-O0-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v4, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v1 +; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 -; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v0 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 -; GFX9-O0-NEXT: v_lshrrev_b64 v[7:8], s4, v[7:8] -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7 +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 ; GFX9-O0-NEXT: v_lshrrev_b64 v[5:6], s4, v[5:6] -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-O0-NEXT: ; kill: killed $vgpr4 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 +; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s4, v[3:4] +; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: s_setpc_b64 s[30:31] @@ -1725,266 +1707,258 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0: ; %bb.0: ; %_udiv-special-cases ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6 ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v0 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v0 +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v2 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v3 +; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v3 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7 +; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v7 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4_sgpr5 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v10 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(7) -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; GFX9-O0-NEXT: s_waitcnt vmcnt(6) ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v10 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v12 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v2 -; GFX9-O0-NEXT: v_or_b32_e64 v3, v8, v7 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill ; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1 -; GFX9-O0-NEXT: v_or_b32_e64 v1, v5, v6 -; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1 +; GFX9-O0-NEXT: v_or_b32_e64 v2, v7, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-O0-NEXT: v_or_b32_e64 v0, v4, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 -; GFX9-O0-NEXT: s_waitcnt vmcnt(16) -; GFX9-O0-NEXT: v_writelane_b32 v0, s6, 0 -; GFX9-O0-NEXT: v_writelane_b32 v0, s7, 1 -; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[1:2], s[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v14 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 -; GFX9-O0-NEXT: v_or_b32_e64 v15, v4, v2 +; GFX9-O0-NEXT: ; implicit-def: $vgpr30 : SGPR spill to VGPR lane +; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 0 +; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 1 +; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[0:1], s[6:7] ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v13 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9 -; GFX9-O0-NEXT: v_or_b32_e64 v9, v3, v1 -; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v15 -; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[9:10], s[6:7] +; GFX9-O0-NEXT: v_or_b32_e64 v14, v3, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v12 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8 +; GFX9-O0-NEXT: v_or_b32_e64 v8, v2, v0 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v14 +; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7] ; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] -; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v5 ; GFX9-O0-NEXT: s_mov_b32 s9, 32 -; GFX9-O0-NEXT: v_add_u32_e64 v6, v6, s9 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7 -; GFX9-O0-NEXT: v_min_u32_e64 v6, v6, v7 +; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v6 +; GFX9-O0-NEXT: v_min_u32_e64 v5, v5, v6 ; GFX9-O0-NEXT: s_mov_b32 s8, 0 ; GFX9-O0-NEXT: ; implicit-def: $sgpr10 -; GFX9-O0-NEXT: v_mov_b32_e32 v9, s8 -; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v9 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v7 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v5 -; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v8, v8 -; GFX9-O0-NEXT: v_min_u32_e64 v15, v5, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, s8 +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v4 +; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v7, v7 +; GFX9-O0-NEXT: v_min_u32_e64 v14, v4, v7 ; GFX9-O0-NEXT: ; implicit-def: $sgpr10 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8 -; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 +; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v4 ; GFX9-O0-NEXT: s_mov_b64 s[10:11], 64 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v15 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14 ; GFX9-O0-NEXT: s_mov_b32 s12, s10 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v15 ; GFX9-O0-NEXT: s_mov_b32 s14, s11 -; GFX9-O0-NEXT: v_add_co_u32_e64 v8, s[12:13], v8, s12 -; GFX9-O0-NEXT: v_mov_b32_e32 v9, s14 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v5, s[12:13], v5, v9, s[12:13] -; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 +; GFX9-O0-NEXT: v_add_co_u32_e64 v7, s[12:13], v7, s12 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, s14 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[12:13], v4, v8, s[12:13] +; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 ; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[6:7] -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[12:13] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[12:13] -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, v6, v7, s[12:13] +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[10:11], s[12:13] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[12:13] +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v8, v5, v6, s[12:13] ; GFX9-O0-NEXT: ; implicit-def: $sgpr12 ; GFX9-O0-NEXT: ; implicit-def: $sgpr12 -; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v0 +; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9 ; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v1 -; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v6, v2 -; GFX9-O0-NEXT: v_min_u32_e64 v6, v5, v6 +; GFX9-O0-NEXT: v_min_u32_e64 v5, v4, v5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr12 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8 -; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v5, v3 -; GFX9-O0-NEXT: v_add_u32_e64 v5, v5, s9 -; GFX9-O0-NEXT: v_ffbh_u32_e64 v11, v4 -; GFX9-O0-NEXT: v_min_u32_e64 v15, v5, v11 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v4, v2 +; GFX9-O0-NEXT: v_add_u32_e64 v4, v4, s9 +; GFX9-O0-NEXT: v_ffbh_u32_e64 v10, v3 +; GFX9-O0-NEXT: v_min_u32_e64 v14, v4, v10 ; GFX9-O0-NEXT: ; implicit-def: $sgpr9 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8 -; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v15 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 +; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v14 ; GFX9-O0-NEXT: s_mov_b32 s8, s10 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v15 ; GFX9-O0-NEXT: s_mov_b32 s10, s11 -; GFX9-O0-NEXT: v_add_co_u32_e64 v11, s[8:9], v11, s8 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, s10 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v5, s[8:9], v5, v12, s[8:9] -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 +; GFX9-O0-NEXT: v_add_co_u32_e64 v10, s[8:9], v10, s8 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, s10 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[8:9], v4, v11, s[8:9] +; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v11 ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[8:9] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[8:9] -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[8:9] +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[12:13], s[8:9] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[8:9] ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 -; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10 ; GFX9-O0-NEXT: s_mov_b32 s10, s6 ; GFX9-O0-NEXT: s_mov_b32 s11, s7 -; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, v5, v8 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v9, vcc, v6, v7, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v7, s10 +; GFX9-O0-NEXT: v_sub_co_u32_e32 v4, vcc, v4, v7 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v5, v6, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v6, s10 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v6, v7, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v7, s11 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, s10 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v6, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v6, s11 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v6, v7, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v6, vcc, v5, v6, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7 -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6 +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[8:9], s[6:7] +; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7] ; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f -; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[5:6], s[12:13] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[14:15] -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[8:9], s[6:7] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[14:15] -; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v10, s[8:9] -; GFX9-O0-NEXT: v_and_b32_e64 v7, 1, v7 -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v7, 1 +; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15] +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9] +; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6 +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, 1 ; GFX9-O0-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9] ; GFX9-O0-NEXT: s_mov_b64 s[4:5], -1 ; GFX9-O0-NEXT: s_xor_b64 s[4:5], s[8:9], s[4:5] -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 ; GFX9-O0-NEXT: s_mov_b32 s14, s13 -; GFX9-O0-NEXT: v_xor_b32_e64 v7, v7, s14 +; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14 ; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13 -; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, s12 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v9 -; GFX9-O0-NEXT: v_or_b32_e64 v7, v7, v10 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8 -; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[5:6], s[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v2, v5, s[8:9] -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s10 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[8:9] +; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v9 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 +; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v6 +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[6:7], v[4:5], s[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v1, v4, s[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[8:9] ; GFX9-O0-NEXT: ; implicit-def: $sgpr12 ; GFX9-O0-NEXT: ; implicit-def: $sgpr12 -; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[8:9] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, s10 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[8:9] +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s11 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[8:9] +; GFX9-O0-NEXT: v_mov_b32_e32 v3, s10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[8:9] ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8 -; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 ; GFX9-O0-NEXT: s_and_b64 s[6:7], s[4:5], s[6:7] -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 s[4:5], exec -; GFX9-O0-NEXT: v_writelane_b32 v0, s4, 2 -; GFX9-O0-NEXT: v_writelane_b32 v0, s5, 3 +; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 2 +; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 3 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] @@ -1992,11 +1966,11 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_branch .LBB1_8 ; GFX9-O0-NEXT: .LBB1_1: ; %Flow ; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s4, v0, 4 -; GFX9-O0-NEXT: v_readlane_b32 s5, v0, 5 +; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 4 +; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 5 ; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: ; %bb.2: ; %Flow ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload @@ -2025,20 +1999,19 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB1_5 ; GFX9-O0-NEXT: .LBB1_3: ; %Flow2 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s4, v4, 2 -; GFX9-O0-NEXT: v_readlane_b32 s5, v4, 3 -; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 2 +; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 3 +; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_nop 0 ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2085,13 +2058,6 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB1_3 ; GFX9-O0-NEXT: .LBB1_5: ; %Flow1 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s4, v8, 6 -; GFX9-O0-NEXT: v_readlane_b32 s5, v8, 7 -; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload @@ -2100,9 +2066,15 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s4, v30, 6 +; GFX9-O0-NEXT: v_readlane_b32 s5, v30, 7 +; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: s_nop 0 ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2116,92 +2088,87 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_branch .LBB1_4 ; GFX9-O0-NEXT: .LBB1_6: ; %udiv-do-while ; GFX9-O0-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s6, v16, 8 -; GFX9-O0-NEXT: v_readlane_b32 s7, v16, 9 ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v24, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] +; GFX9-O0-NEXT: s_waitcnt vmcnt(0) +; GFX9-O0-NEXT: v_readlane_b32 s6, v30, 8 +; GFX9-O0-NEXT: v_readlane_b32 s7, v30, 9 ; GFX9-O0-NEXT: s_mov_b32 s4, 63 -; GFX9-O0-NEXT: s_waitcnt vmcnt(16) -; GFX9-O0-NEXT: v_lshrrev_b64 v[29:30], s4, v[2:3] -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v30 +; GFX9-O0-NEXT: v_lshrrev_b64 v[28:29], s4, v[2:3] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v29 ; GFX9-O0-NEXT: s_mov_b32 s5, 1 -; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], s5, v[23:24] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v24 +; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], s5, v[22:23] +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23 ; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v29 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 -; GFX9-O0-NEXT: v_or_b32_e64 v23, v5, v10 -; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v24, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[29:30], s5, v[2:3] +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v22 +; GFX9-O0-NEXT: v_or_b32_e64 v22, v5, v10 +; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v23, v4 +; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[2:3] ; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], s4, v[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v30 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v29 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5 ; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v29 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v28 ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec ; GFX9-O0-NEXT: v_or_b32_e64 v4, v3, v4 ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2 ; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s5, v[0:1] -; GFX9-O0-NEXT: v_lshlrev_b64 v[29:30], s5, v[6:7] +; GFX9-O0-NEXT: v_lshlrev_b64 v[28:29], s5, v[6:7] ; GFX9-O0-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v30 -; GFX9-O0-NEXT: s_waitcnt vmcnt(10) -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v28 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v29 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v27 ; GFX9-O0-NEXT: v_or3_b32 v6, v6, v7, v10 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v29 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v27 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v28 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v26 ; GFX9-O0-NEXT: v_or3_b32 v0, v0, v1, v7 ; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3 -; GFX9-O0-NEXT: s_waitcnt vmcnt(8) -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v26 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v25 ; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v25 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v24 ; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 ; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 ; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v23 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v24 -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v22 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v11 ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v15 ; GFX9-O0-NEXT: v_sub_co_u32_e32 v13, vcc, v13, v6 -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_subb_co_u32_e32 v12, vcc, v12, v10, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v11, v4, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc @@ -2221,22 +2188,22 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12 ; GFX9-O0-NEXT: v_mov_b32_e32 v12, 0 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v23, v22 -; GFX9-O0-NEXT: v_and_b32_e64 v23, v7, v23 -; GFX9-O0-NEXT: v_and_b32_e64 v21, v11, v21 -; GFX9-O0-NEXT: ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v22, v23 -; GFX9-O0-NEXT: v_mov_b32_e32 v23, v20 -; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v23 -; GFX9-O0-NEXT: v_and_b32_e64 v23, v11, v19 -; GFX9-O0-NEXT: ; kill: def $vgpr23 killed $vgpr23 def $vgpr23_vgpr24 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v24, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v23 -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v24 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v21 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22 -; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v20 -; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v19, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v22, v21 +; GFX9-O0-NEXT: v_and_b32_e64 v22, v7, v22 +; GFX9-O0-NEXT: v_and_b32_e64 v20, v11, v20 +; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v21, v22 +; GFX9-O0-NEXT: v_mov_b32_e32 v22, v19 +; GFX9-O0-NEXT: v_and_b32_e64 v7, v7, v22 +; GFX9-O0-NEXT: v_and_b32_e64 v22, v11, v18 +; GFX9-O0-NEXT: ; kill: def $vgpr22 killed $vgpr22 def $vgpr22_vgpr23 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v23, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v19, v22 +; GFX9-O0-NEXT: v_mov_b32_e32 v18, v23 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21 +; GFX9-O0-NEXT: v_sub_co_u32_e32 v6, vcc, v6, v19 +; GFX9-O0-NEXT: v_subb_co_u32_e32 v10, vcc, v10, v18, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v11, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v5, v7, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 @@ -2252,66 +2219,66 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1 ; GFX9-O0-NEXT: s_mov_b32 s5, s8 ; GFX9-O0-NEXT: s_mov_b32 s4, s9 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5 -; GFX9-O0-NEXT: v_add_co_u32_e32 v20, vcc, v11, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 +; GFX9-O0-NEXT: v_add_co_u32_e32 v19, vcc, v11, v16 ; GFX9-O0-NEXT: v_mov_b32_e32 v11, s4 ; GFX9-O0-NEXT: v_addc_co_u32_e32 v9, vcc, v9, v11, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5 -; GFX9-O0-NEXT: v_addc_co_u32_e32 v17, vcc, v10, v11, vcc +; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v10, v11, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v10, s4 ; GFX9-O0-NEXT: v_addc_co_u32_e32 v8, vcc, v8, v10, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr20 killed $vgpr20 def $vgpr20_vgpr21 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v21, v9 +; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v20, v9 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v8 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v20 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v21 -; GFX9-O0-NEXT: v_mov_b32_e32 v22, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v21 -; GFX9-O0-NEXT: v_or_b32_e64 v19, v19, v22 -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v20 -; GFX9-O0-NEXT: v_or_b32_e64 v17, v17, v18 -; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v19 -; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[17:18], v[12:13] +; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v19 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v20 +; GFX9-O0-NEXT: v_mov_b32_e32 v21, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v18, v20 +; GFX9-O0-NEXT: v_or_b32_e64 v18, v18, v21 +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v19 +; GFX9-O0-NEXT: v_or_b32_e64 v16, v16, v17 +; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v18 +; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[4:5], v[16:17], v[12:13] ; GFX9-O0-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v2 -; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v2 +; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v0 -; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v0 +; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v15 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v14 -; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v15 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v14 +; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v13 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v12 -; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v13 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v12 +; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v18, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v17, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX9-O0-NEXT: v_writelane_b32 v16, s6, 4 -; GFX9-O0-NEXT: v_writelane_b32 v16, s7, 5 +; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 4 +; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 5 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX9-O0-NEXT: v_writelane_b32 v16, s6, 8 -; GFX9-O0-NEXT: v_writelane_b32 v16, s7, 9 +; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 8 +; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 9 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2349,52 +2316,52 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-O0-NEXT: s_waitcnt vmcnt(10) ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10 -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[21:22] +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], v4, v[20:21] ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v7 ; GFX9-O0-NEXT: s_mov_b32 s6, 64 ; GFX9-O0-NEXT: v_sub_u32_e64 v12, s6, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[23:24], v12, v[19:20] -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v24 +; GFX9-O0-NEXT: v_lshlrev_b64 v[22:23], v12, v[18:19] +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v23 ; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v12 ; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v23 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v22 ; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 ; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 ; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7 ; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v4, s6 ; GFX9-O0-NEXT: v_sub_u32_e64 v5, v4, s6 -; GFX9-O0-NEXT: v_lshrrev_b64 v[23:24], v5, v[19:20] -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v24 +; GFX9-O0-NEXT: v_lshrrev_b64 v[22:23], v5, v[18:19] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v23 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[4:5] ; GFX9-O0-NEXT: s_mov_b32 s6, 0 ; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v4, s6 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v22 +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v21 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v12, s[6:7] ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v23 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v22 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v21 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v20 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7] ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 -; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[19:20] +; GFX9-O0-NEXT: v_lshrrev_b64 v[4:5], v4, v[18:19] ; GFX9-O0-NEXT: v_mov_b32_e32 v15, v5 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 ; GFX9-O0-NEXT: s_mov_b32 s8, s7 @@ -2413,12 +2380,12 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: s_mov_b64 s[8:9], -1 ; GFX9-O0-NEXT: s_mov_b32 s5, s8 ; GFX9-O0-NEXT: s_mov_b32 s4, s9 -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v18 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, s5 -; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, s4 -; GFX9-O0-NEXT: v_addc_co_u32_e32 v17, vcc, v15, v17, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, s5 +; GFX9-O0-NEXT: v_add_co_u32_e32 v12, vcc, v12, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, s4 +; GFX9-O0-NEXT: v_addc_co_u32_e32 v16, vcc, v15, v16, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v15, s5 ; GFX9-O0-NEXT: v_addc_co_u32_e32 v14, vcc, v14, v15, vcc ; GFX9-O0-NEXT: v_mov_b32_e32 v15, s4 @@ -2430,7 +2397,7 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v17 +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v16 ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2443,10 +2410,11 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: v_mov_b32_e32 v14, s8 ; GFX9-O0-NEXT: v_mov_b32_e32 v13, s7 ; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6 -; GFX9-O0-NEXT: v_writelane_b32 v16, s4, 8 -; GFX9-O0-NEXT: v_writelane_b32 v16, s5, 9 +; GFX9-O0-NEXT: s_waitcnt vmcnt(4) +; GFX9-O0-NEXT: v_writelane_b32 v30, s4, 8 +; GFX9-O0-NEXT: v_writelane_b32 v30, s5, 9 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_store_dword v16, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 @@ -2474,403 +2442,396 @@ define i128 @v_urem_i128_vv(i128 %lhs, i128 %rhs) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_branch .LBB1_6 ; GFX9-O0-NEXT: .LBB1_8: ; %udiv-bb1 +; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1 ; GFX9-O0-NEXT: s_mov_b32 s5, s6 -; GFX9-O0-NEXT: s_waitcnt vmcnt(1) -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 +; GFX9-O0-NEXT: s_waitcnt vmcnt(2) +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 ; GFX9-O0-NEXT: s_mov_b32 s4, s7 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 ; GFX9-O0-NEXT: s_mov_b32 s8, s6 ; GFX9-O0-NEXT: s_mov_b32 s9, s7 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5 -; GFX9-O0-NEXT: v_add_co_u32_e32 v9, vcc, v4, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s4 -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v5, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s8 -; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v5, vcc -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s9 -; GFX9-O0-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5 +; GFX9-O0-NEXT: v_add_co_u32_e32 v8, vcc, v3, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s4 +; GFX9-O0-NEXT: s_waitcnt vmcnt(1) +; GFX9-O0-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v4, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 +; GFX9-O0-NEXT: v_addc_co_u32_e32 v0, vcc, v0, v4, vcc +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9 +; GFX9-O0-NEXT: v_addc_co_u32_e32 v2, vcc, v2, v4, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v10, v2 +; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2 +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0 +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b32 s4, 0x7f -; GFX9-O0-NEXT: v_sub_u32_e64 v3, s4, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[5:6], v3, v[11:12] -; GFX9-O0-NEXT: v_mov_b32_e32 v13, v6 +; GFX9-O0-NEXT: v_sub_u32_e64 v2, s4, v3 +; GFX9-O0-NEXT: v_lshlrev_b64 v[4:5], v2, v[10:11] +; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5 ; GFX9-O0-NEXT: s_mov_b32 s4, 64 -; GFX9-O0-NEXT: v_sub_u32_e64 v14, s4, v3 -; GFX9-O0-NEXT: v_lshrrev_b64 v[14:15], v14, v[7:8] -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v15 -; GFX9-O0-NEXT: v_or_b32_e64 v13, v13, v16 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v14 -; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13 -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v6 -; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v3, s4 +; GFX9-O0-NEXT: v_sub_u32_e64 v13, s4, v2 +; GFX9-O0-NEXT: v_lshrrev_b64 v[13:14], v13, v[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v14 +; GFX9-O0-NEXT: v_or_b32_e64 v12, v12, v15 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13 +; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5 +; GFX9-O0-NEXT: v_cmp_lt_u32_e64 s[4:5], v2, s4 ; GFX9-O0-NEXT: s_mov_b32 s10, 63 -; GFX9-O0-NEXT: v_sub_u32_e64 v4, s10, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[13:14], v4, v[7:8] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v15, s[4:5] +; GFX9-O0-NEXT: v_sub_u32_e64 v3, s10, v3 +; GFX9-O0-NEXT: v_lshlrev_b64 v[12:13], v3, v[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[4:5] ; GFX9-O0-NEXT: s_mov_b32 s10, 0 -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v3, s10 -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v12 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v15, s[10:11] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v13 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[10:11] +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[10:11], v2, s10 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v11 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v14, s[10:11] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[10:11] ; GFX9-O0-NEXT: ; implicit-def: $sgpr10 ; GFX9-O0-NEXT: ; implicit-def: $sgpr10 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 -; GFX9-O0-NEXT: v_lshlrev_b64 v[7:8], v3, v[7:8] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5] -; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v4, s8 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v4, v7, s[4:5] +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v3 +; GFX9-O0-NEXT: v_lshlrev_b64 v[6:7], v2, v[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s9 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5] +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v3, v6, s[4:5] ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr4 -; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v3 -; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2 +; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10 -; GFX9-O0-NEXT: v_or_b32_e64 v3, v3, v4 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v9 -; GFX9-O0-NEXT: v_or_b32_e64 v1, v1, v2 -; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[1:2], s[6:7] +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9 +; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v8 +; GFX9-O0-NEXT: v_or_b32_e64 v0, v0, v1 +; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 +; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[4:5], v[0:1], s[6:7] ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v3, s8 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, s9 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s7 -; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s8 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, s9 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s7 +; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 s[6:7], exec ; GFX9-O0-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] ; GFX9-O0-NEXT: s_xor_b64 s[6:7], s[4:5], s[6:7] -; GFX9-O0-NEXT: v_writelane_b32 v0, s6, 6 -; GFX9-O0-NEXT: v_writelane_b32 v0, s7, 7 +; GFX9-O0-NEXT: s_waitcnt vmcnt(16) +; GFX9-O0-NEXT: v_writelane_b32 v30, s6, 6 +; GFX9-O0-NEXT: v_writelane_b32 v30, s7, 7 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v30, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] ; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-O0-NEXT: s_cbranch_execz .LBB1_5 ; GFX9-O0-NEXT: s_branch .LBB1_7 ; GFX9-O0-NEXT: .LBB1_9: ; %udiv-end -; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1 -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19] -; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b32 s4, 32 ; GFX9-O0-NEXT: s_waitcnt vmcnt(2) -; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[7:8] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2 +; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2 ; GFX9-O0-NEXT: s_waitcnt vmcnt(1) -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v13 -; GFX9-O0-NEXT: v_mul_lo_u32 v5, v6, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12 +; GFX9-O0-NEXT: v_mul_lo_u32 v4, v5, v2 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_lshrrev_b64 v[13:14], s4, v[13:14] -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13 -; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 killed $vgpr7_vgpr8 killed $exec -; GFX9-O0-NEXT: v_mul_lo_u32 v3, v7, v3 -; GFX9-O0-NEXT: v_mad_u64_u32 v[13:14], s[6:7], v7, v2, 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v14 -; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v5 +; GFX9-O0-NEXT: v_lshrrev_b64 v[12:13], s4, v[12:13] +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v12 +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: v_mul_lo_u32 v3, v6, v3 +; GFX9-O0-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v6, v2, 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v13 +; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5 ; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-O0-NEXT: v_lshlrev_b64 v[17:18], s4, v[2:3] -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v18 -; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 killed $vgpr13_vgpr14 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v4 +; GFX9-O0-NEXT: v_lshlrev_b64 v[3:4], s4, v[2:3] +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v4 +; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 killed $vgpr12_vgpr13 killed $exec ; GFX9-O0-NEXT: s_mov_b32 s5, 0 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, s5 -; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v14 -; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13 -; GFX9-O0-NEXT: v_or_b32_e64 v13, v3, v5 -; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v2 -; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[15:16] -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v11 -; GFX9-O0-NEXT: v_mul_lo_u32 v3, v2, v8 -; GFX9-O0-NEXT: v_lshrrev_b64 v[11:12], s4, v[11:12] -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v15 -; GFX9-O0-NEXT: v_mul_lo_u32 v11, v11, v5 -; GFX9-O0-NEXT: v_mad_u64_u32 v[15:16], s[6:7], v2, v5, 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v16 -; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v11 +; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v13 +; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v12 +; GFX9-O0-NEXT: v_or_b32_e64 v12, v3, v4 +; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v2 +; GFX9-O0-NEXT: v_lshrrev_b64 v[2:3], s4, v[14:15] +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v10 +; GFX9-O0-NEXT: v_mul_lo_u32 v3, v2, v7 +; GFX9-O0-NEXT: v_lshrrev_b64 v[10:11], s4, v[10:11] +; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14 +; GFX9-O0-NEXT: v_mul_lo_u32 v10, v10, v4 +; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v2, v4, 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v15 +; GFX9-O0-NEXT: v_add3_u32 v2, v2, v3, v10 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; implicit-def: $sgpr7 ; GFX9-O0-NEXT: ; implicit-def: $sgpr7 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, s6 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, s6 ; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10 ; GFX9-O0-NEXT: v_lshlrev_b64 v[2:3], s4, v[2:3] -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v3 -; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 killed $vgpr15_vgpr16 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v3 +; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5 -; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v11 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v16 -; GFX9-O0-NEXT: v_or_b32_e64 v11, v11, v12 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, s5 +; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v10 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v15 +; GFX9-O0-NEXT: v_or_b32_e64 v10, v10, v11 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v15 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v14 ; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 ; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v13 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v12 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v14 -; GFX9-O0-NEXT: v_add_co_u32_e64 v13, s[6:7], v11, v12 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13 +; GFX9-O0-NEXT: v_add_co_u32_e64 v12, s[6:7], v10, v11 ; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v3, s[6:7] -; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v14, v2 -; GFX9-O0-NEXT: v_mad_u64_u32 v[15:16], s[6:7], v8, v6, 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v15 +; GFX9-O0-NEXT: ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v13, v2 +; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v7, v5, 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v14 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, s5 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, s5 ; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; implicit-def: $sgpr7 ; GFX9-O0-NEXT: ; implicit-def: $sgpr7 -; GFX9-O0-NEXT: v_mov_b32_e32 v12, s6 -; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v12 -; GFX9-O0-NEXT: v_lshlrev_b64 v[15:16], s4, v[15:16] -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v16 -; GFX9-O0-NEXT: v_or_b32_e64 v11, v11, v12 +; GFX9-O0-NEXT: v_mov_b32_e32 v11, s6 +; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v11 +; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15] +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v15 +; GFX9-O0-NEXT: v_or_b32_e64 v10, v10, v11 ; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 killed $vgpr2_vgpr3 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v15 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v14 ; GFX9-O0-NEXT: v_or_b32_e64 v2, v2, v3 ; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11 -; GFX9-O0-NEXT: v_mad_u64_u32 v[15:16], s[6:7], v8, v7, 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v11, v15 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v10 +; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v7, v6, 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v14 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, s5 -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v8 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v12 -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, s5 +; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; implicit-def: $sgpr7 ; GFX9-O0-NEXT: ; implicit-def: $sgpr7 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, s6 -; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v17 -; GFX9-O0-NEXT: v_lshlrev_b64 v[15:16], s4, v[15:16] -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v16 -; GFX9-O0-NEXT: v_or_b32_e64 v8, v8, v17 -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v15 -; GFX9-O0-NEXT: v_or_b32_e64 v19, v11, v12 -; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v8 -; GFX9-O0-NEXT: v_mad_u64_u32 v[11:12], s[6:7], v5, v7, 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v17, v12 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, s6 +; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16 +; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15] +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v15 +; GFX9-O0-NEXT: v_or_b32_e64 v7, v7, v16 +; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v14 +; GFX9-O0-NEXT: v_or_b32_e64 v18, v10, v11 +; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v19, v7 +; GFX9-O0-NEXT: v_mad_u64_u32 v[10:11], s[6:7], v4, v6, 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v16, v11 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, s5 -; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, s5 +; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v18 +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v19 -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v20 -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v18 -; GFX9-O0-NEXT: v_add_co_u32_e64 v7, s[6:7], v7, v16 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v15, s[6:7], v8, v15, s[6:7] -; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v15 -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17 +; GFX9-O0-NEXT: v_add_co_u32_e64 v6, s[6:7], v6, v15 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v14, s[6:7], v7, v14, s[6:7] +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v7 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0xffffffff ; GFX9-O0-NEXT: s_mov_b32 s8, s7 -; GFX9-O0-NEXT: v_and_b32_e64 v15, v15, s8 -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v7 +; GFX9-O0-NEXT: v_and_b32_e64 v14, v14, s8 +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v6 ; GFX9-O0-NEXT: ; kill: def $sgpr6 killed $sgpr6 killed $sgpr6_sgpr7 -; GFX9-O0-NEXT: v_and_b32_e64 v17, v16, s6 -; GFX9-O0-NEXT: ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v18, v15 -; GFX9-O0-NEXT: v_mad_u64_u32 v[15:16], s[6:7], v5, v6, 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v19, v15 +; GFX9-O0-NEXT: v_and_b32_e64 v16, v15, s6 +; GFX9-O0-NEXT: ; kill: def $vgpr16 killed $vgpr16 def $vgpr16_vgpr17 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v17, v14 +; GFX9-O0-NEXT: v_mad_u64_u32 v[14:15], s[6:7], v4, v5, 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v18, v14 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5 -; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v20 -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5 +; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v19, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v19 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v15 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; implicit-def: $sgpr7 ; GFX9-O0-NEXT: ; implicit-def: $sgpr7 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, s6 -; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v6 -; GFX9-O0-NEXT: v_lshlrev_b64 v[15:16], s4, v[15:16] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v16 -; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v19 -; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 killed $vgpr15_vgpr16 killed $exec -; GFX9-O0-NEXT: v_or_b32_e64 v19, v6, v15 -; GFX9-O0-NEXT: ; kill: def $vgpr19 killed $vgpr19 def $vgpr19_vgpr20 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v20, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, s6 +; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v5 +; GFX9-O0-NEXT: v_lshlrev_b64 v[14:15], s4, v[14:15] +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v15 +; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v18 +; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 killed $vgpr14_vgpr15 killed $exec +; GFX9-O0-NEXT: v_or_b32_e64 v18, v5, v14 +; GFX9-O0-NEXT: ; kill: def $vgpr18 killed $vgpr18 def $vgpr18_vgpr19 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v19, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v18 +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16 ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v19 -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v20 -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v18 -; GFX9-O0-NEXT: v_add_co_u32_e64 v5, s[6:7], v5, v16 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v15, s[6:7], v6, v15, s[6:7] -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v15 -; GFX9-O0-NEXT: v_lshrrev_b64 v[17:18], s4, v[5:6] -; GFX9-O0-NEXT: v_lshrrev_b64 v[7:8], s4, v[7:8] -; GFX9-O0-NEXT: v_mov_b32_e32 v15, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v17 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v8 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v18 -; GFX9-O0-NEXT: v_add_co_u32_e64 v15, s[6:7], v15, v16 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v7, v8, s[6:7] -; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v15 +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v17 +; GFX9-O0-NEXT: v_add_co_u32_e64 v4, s[6:7], v4, v15 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v14, s[6:7], v5, v14, s[6:7] +; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v14 +; GFX9-O0-NEXT: v_lshrrev_b64 v[16:17], s4, v[4:5] +; GFX9-O0-NEXT: v_lshrrev_b64 v[6:7], s4, v[6:7] +; GFX9-O0-NEXT: v_mov_b32_e32 v14, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v16 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v17 +; GFX9-O0-NEXT: v_add_co_u32_e64 v14, s[6:7], v14, v15 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v6, v7, s[6:7] +; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v16 -; GFX9-O0-NEXT: v_add_co_u32_e64 v15, s[6:7], v7, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v15 +; GFX9-O0-NEXT: v_add_co_u32_e64 v14, s[6:7], v6, v7 ; GFX9-O0-NEXT: v_addc_co_u32_e64 v2, s[6:7], v2, v3, s[6:7] -; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v16, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v15 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v13 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v16 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v14 -; GFX9-O0-NEXT: v_add_co_u32_e64 v2, s[6:7], v2, v8 -; GFX9-O0-NEXT: v_addc_co_u32_e64 v7, s[6:7], v3, v7, s[6:7] +; GFX9-O0-NEXT: ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v15, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v14 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v12 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v15 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v13 +; GFX9-O0-NEXT: v_add_co_u32_e64 v2, s[6:7], v2, v7 +; GFX9-O0-NEXT: v_addc_co_u32_e64 v6, s[6:7], v3, v6, s[6:7] ; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v7 -; GFX9-O0-NEXT: v_lshlrev_b64 v[6:7], s4, v[5:6] -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7 -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 killed $vgpr11_vgpr12 killed $exec -; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, s5 -; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12 -; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v6 +; GFX9-O0-NEXT: v_lshlrev_b64 v[5:6], s4, v[4:5] ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 -; GFX9-O0-NEXT: v_or_b32_e64 v6, v6, v7 -; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v2 +; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 killed $vgpr10_vgpr11 killed $exec +; GFX9-O0-NEXT: ; implicit-def: $sgpr6 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, s5 +; GFX9-O0-NEXT: ; kill: def $vgpr10 killed $vgpr10 def $vgpr10_vgpr11 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v11, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v11 +; GFX9-O0-NEXT: v_or_b32_e64 v4, v4, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10 +; GFX9-O0-NEXT: v_or_b32_e64 v5, v5, v6 +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, v5 +; GFX9-O0-NEXT: ; kill: def $vgpr6 killed $vgpr6 killed $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v10 -; GFX9-O0-NEXT: v_sub_co_u32_e32 v7, vcc, v7, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v8 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v9 +; GFX9-O0-NEXT: v_sub_co_u32_e32 v5, vcc, v5, v7 ; GFX9-O0-NEXT: v_subb_co_u32_e32 v0, vcc, v0, v6, vcc -; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v3, v5, vcc +; GFX9-O0-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v4, vcc ; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 -; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v1 +; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 ; GFX9-O0-NEXT: ; implicit-def: $sgpr5 -; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v0 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v5 -; GFX9-O0-NEXT: v_lshrrev_b64 v[7:8], s4, v[7:8] -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7 +; GFX9-O0-NEXT: ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6 killed $exec +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v5 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, v3 ; GFX9-O0-NEXT: v_lshrrev_b64 v[5:6], s4, v[5:6] -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-O0-NEXT: ; kill: killed $vgpr4 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v5 +; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s4, v[3:4] +; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $vgpr3_vgpr4 killed $exec ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/remat-vop.mir b/llvm/test/CodeGen/AMDGPU/remat-vop.mir index 248a9e2ddb636..4f6ea44ccf68b 100644 --- a/llvm/test/CodeGen/AMDGPU/remat-vop.mir +++ b/llvm/test/CodeGen/AMDGPU/remat-vop.mir @@ -7,12 +7,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mov_b32_e32 - ; GCN: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec %1:vgpr_32 = V_MOV_B32_e32 2, implicit $exec @@ -31,16 +31,12 @@ body: | bb.0: ; GCN-LABEL: name: test_no_remat_v_mov_b32_e32_impuse ; GCN: $m0 = IMPLICIT_DEF - ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec, implicit $m0 - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec, implicit $m0 - ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec, implicit $m0 - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec, implicit $m0 + ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec, implicit $m0 + ; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec, implicit $m0 + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 $m0 = IMPLICIT_DEF %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec, implicit $m0 @@ -59,12 +55,12 @@ machineFunctionInfo: body: | bb.0: ; GCN-LABEL: name: test_remat_v_mov_b32_e32_exec_def - ; GCN: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_2]] ; GCN-NEXT: $exec = S_ANDN2_B64_term $exec, undef renamable $sgpr0_sgpr1, implicit-def $scc ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec @@ -82,12 +78,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mov_b32_e64 - ; GCN: renamable $vgpr0 = V_MOV_B32_e64 1, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e64 2, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e64 3, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MOV_B32_e64_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e64 1, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e64 2, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e64_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e64 3, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_MOV_B32_e64 1, implicit $exec %1:vgpr_32 = V_MOV_B32_e64 2, implicit $exec @@ -105,16 +101,12 @@ machineFunctionInfo: body: | bb.0: ; GCN-LABEL: name: test_no_remat_v_mov_b32_dpp - ; GCN: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_dpp undef $vgpr1, undef $vgpr0, 1, 15, 15, 1, implicit $exec - ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp undef [[V_MOV_B32_dpp]], undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp undef [[V_MOV_B32_dpp1]], undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_dpp2:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp undef [[V_MOV_B32_dpp2]], undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_dpp]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_dpp1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_dpp2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec %2:vgpr_32 = V_MOV_B32_dpp undef %2:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec @@ -130,12 +122,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_accvgpr_read_b32 - ; GCN: renamable $vgpr0 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_ACCVGPR_READ_B32_e64_:%[0-9]+]]:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec + ; GCN-NEXT: [[V_ACCVGPR_READ_B32_e64_1:%[0-9]+]]:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec + ; GCN-NEXT: [[V_ACCVGPR_READ_B32_e64_2:%[0-9]+]]:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_READ_B32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_READ_B32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_READ_B32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec %1:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec @@ -151,12 +143,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_accvgpr_write_b32 - ; GCN: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec - ; GCN-NEXT: renamable $agpr1 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr1 - ; GCN-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 3, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0 + ; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec + ; GCN-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec + ; GCN-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 3, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_WRITE_B32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_WRITE_B32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_WRITE_B32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %0:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec %1:agpr_32 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec @@ -172,12 +164,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mov_b64_pseudo - ; GCN: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 1, implicit $exec - ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_MOV_B64_PSEUDO 2, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 3, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec + ; GCN-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 2, implicit $exec + ; GCN-NEXT: [[V_MOV_B2:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 3, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B2]] ; GCN-NEXT: S_ENDPGM 0 %0:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec %1:vreg_64_align2 = V_MOV_B64_PSEUDO 2, implicit $exec @@ -193,12 +185,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode @@ -216,16 +208,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_fp_except - ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode @@ -245,16 +233,12 @@ body: | bb.0: ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_mode_def ; GCN: $mode = IMPLICIT_DEF - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]] ; GCN-NEXT: S_ENDPGM 0 $mode = IMPLICIT_DEF %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode @@ -271,12 +255,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e64 - ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, 3, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_I32_F64_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_I32_F64_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_I32_F64_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 3, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode @@ -294,12 +278,12 @@ machineFunctionInfo: body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e64_undef - ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_I32_F64_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %1:vreg_64, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_I32_F64_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %1:vreg_64, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_I32_F64_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %1:vreg_64, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %0:vreg_64, 0, 0, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %0:vreg_64, 0, 0, implicit $exec, implicit $mode @@ -317,16 +301,12 @@ machineFunctionInfo: body: | bb.0: ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_dpp - ; GCN: renamable $vgpr0 = V_CVT_I32_F64_dpp undef $vgpr0, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_dpp undef $vgpr1, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_dpp undef $vgpr0, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_I32_F64_dpp:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_dpp undef [[V_CVT_I32_F64_dpp]], 0, undef %1:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_I32_F64_dpp1:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_dpp undef [[V_CVT_I32_F64_dpp1]], 0, undef %1:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_I32_F64_dpp2:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_dpp undef [[V_CVT_I32_F64_dpp2]], 0, undef %1:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_dpp]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_dpp1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_dpp2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_CVT_I32_F64_dpp undef %1:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode %2:vgpr_32 = V_CVT_I32_F64_dpp undef %2:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode @@ -344,16 +324,12 @@ machineFunctionInfo: body: | bb.0: ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_imp_def - ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0 - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0 - ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0 - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0 + ; GCN-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0 + ; GCN-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0 + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0 %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0 @@ -371,16 +347,12 @@ machineFunctionInfo: body: | bb.0: ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_imp_use - ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit $m0 - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0 - ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit $m0 - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_I32_F64_e32_:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit $m0 + ; GCN-NEXT: [[V_CVT_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0 + ; GCN-NEXT: [[V_CVT_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit $m0 + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F64_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit $m0 %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0 @@ -396,12 +368,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_f64_i32_e32 - ; GCN: renamable $vgpr0_vgpr1 = V_CVT_F64_I32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_CVT_F64_I32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_CVT_F64_I32_e32_:%[0-9]+]]:vreg_64_align2 = V_CVT_F64_I32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_F64_I32_e32_1:%[0-9]+]]:vreg_64_align2 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_F64_I32_e32_2:%[0-9]+]]:vreg_64_align2 = V_CVT_F64_I32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_I32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_I32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_I32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vreg_64_align2 = V_CVT_F64_I32_e32 1, implicit $exec, implicit $mode %1:vreg_64_align2 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode @@ -417,12 +389,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_f32_f64_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_CVT_F32_F64_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_F32_F64_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_F32_F64_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_F32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_F32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_F32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_F64_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_F64_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_F64_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 2, implicit $exec, implicit $mode @@ -438,12 +410,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_f64_f32_e32 - ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_CVT_F64_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_CVT_F64_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_CVT_F64_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_CVT_F64_F32_e32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_F64_F32_e32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_F64_F32_e32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 1, implicit $exec, implicit $mode %1:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 2, implicit $exec, implicit $mode @@ -459,12 +431,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_u32_f64_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_CVT_U32_F64_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_U32_F64_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_U32_F64_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_U32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_U32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_U32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_U32_F64_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_U32_F64_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_U32_F64_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 2, implicit $exec, implicit $mode @@ -480,12 +452,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_f64_u32_e32 - ; GCN: renamable $vgpr0_vgpr1 = V_CVT_F64_U32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_CVT_F64_U32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_CVT_F64_U32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_CVT_F64_U32_e32_:%[0-9]+]]:vreg_64_align2 = V_CVT_F64_U32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_F64_U32_e32_1:%[0-9]+]]:vreg_64_align2 = V_CVT_F64_U32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_F64_U32_e32_2:%[0-9]+]]:vreg_64_align2 = V_CVT_F64_U32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_U32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_U32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F64_U32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vreg_64_align2 = V_CVT_F64_U32_e32 1, implicit $exec, implicit $mode %1:vreg_64_align2 = V_CVT_F64_U32_e32 2, implicit $exec, implicit $mode @@ -501,12 +473,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_f32_i32_e32 - ; GCN: renamable $vgpr0 = V_CVT_F32_I32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_I32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_I32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_F32_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_F32_I32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_F32_I32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_CVT_F32_I32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = V_CVT_F32_I32_e32 2, implicit $exec, implicit $mode @@ -522,12 +494,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_f32_i32_sdwa - ; GCN: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_F32_I32_sdwa:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_F32_I32_sdwa1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_F32_I32_sdwa2:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_sdwa]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_sdwa1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_sdwa2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode %2:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode @@ -547,16 +519,12 @@ machineFunctionInfo: body: | bb.0: ; GCN-LABEL: name: test_no_remat_v_cvt_f32_i32_sdwa_dst_unused_preserve - ; GCN: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr0(tied-def 0) - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr1(tied-def 0) - ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr0(tied-def 0) - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_F32_I32_sdwa:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %1:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef [[V_CVT_F32_I32_sdwa]](tied-def 0) + ; GCN-NEXT: [[V_CVT_F32_I32_sdwa1:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %1:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef [[V_CVT_F32_I32_sdwa1]](tied-def 0) + ; GCN-NEXT: [[V_CVT_F32_I32_sdwa2:%[0-9]+]]:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %1:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef [[V_CVT_F32_I32_sdwa2]](tied-def 0) + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_sdwa]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_sdwa1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_I32_sdwa2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef %1:vgpr_32(tied-def 0) %2:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef %2:vgpr_32(tied-def 0) @@ -572,12 +540,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_f32_u32_e32 - ; GCN: renamable $vgpr0 = V_CVT_F32_U32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_U32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_U32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_F32_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_F32_U32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_F32_U32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_U32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_U32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_U32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_CVT_F32_U32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = V_CVT_F32_U32_e32 2, implicit $exec, implicit $mode @@ -593,12 +561,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_u32_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_CVT_U32_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_U32_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_U32_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_U32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_U32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_U32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_U32_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_U32_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_U32_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 2, implicit $exec, implicit $mode @@ -614,12 +582,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_i32_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_I32_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode @@ -635,12 +603,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_f32_f16_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_CVT_F32_F16_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_F32_F16_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_F32_F16_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_F32_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_F32_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_F32_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_F16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_F16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_F16_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 2, implicit $exec, implicit $mode @@ -656,12 +624,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_rpi_i32_f32_e32 - ; GCN: renamable $vgpr0 = V_CVT_RPI_I32_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_CVT_RPI_I32_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_CVT_RPI_I32_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_RPI_I32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_RPI_I32_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_RPI_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_RPI_I32_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_RPI_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_RPI_I32_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_RPI_I32_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_RPI_I32_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_RPI_I32_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_CVT_RPI_I32_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = V_CVT_RPI_I32_F32_e32 2, implicit $exec, implicit $mode @@ -677,12 +645,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_flr_i32_f32_e32 - ; GCN: renamable $vgpr0 = V_CVT_FLR_I32_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_CVT_FLR_I32_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_CVT_FLR_I32_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_FLR_I32_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_FLR_I32_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_FLR_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_FLR_I32_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_FLR_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_FLR_I32_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_FLR_I32_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_FLR_I32_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_FLR_I32_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_CVT_FLR_I32_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = V_CVT_FLR_I32_F32_e32 2, implicit $exec, implicit $mode @@ -698,12 +666,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_off_f32_i4_e32 - ; GCN: renamable $vgpr0 = V_CVT_OFF_F32_I4_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_CVT_OFF_F32_I4_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_CVT_OFF_F32_I4_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_OFF_F32_I4_e32_:%[0-9]+]]:vgpr_32 = V_CVT_OFF_F32_I4_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_OFF_F32_I4_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_OFF_F32_I4_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_OFF_F32_I4_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_OFF_F32_I4_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_OFF_F32_I4_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_OFF_F32_I4_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_OFF_F32_I4_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_CVT_OFF_F32_I4_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = V_CVT_OFF_F32_I4_e32 2, implicit $exec, implicit $mode @@ -719,12 +687,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_f32_ubyte0_e32 - ; GCN: renamable $vgpr0 = V_CVT_F32_UBYTE0_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_UBYTE0_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_UBYTE0_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_F32_UBYTE0_e32_:%[0-9]+]]:vgpr_32 = V_CVT_F32_UBYTE0_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_F32_UBYTE0_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_F32_UBYTE0_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_F32_UBYTE0_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_F32_UBYTE0_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_UBYTE0_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_UBYTE0_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_F32_UBYTE0_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_CVT_F32_UBYTE0_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = V_CVT_F32_UBYTE0_e32 2, implicit $exec, implicit $mode @@ -740,12 +708,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_fract_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_FRACT_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FRACT_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FRACT_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_FRACT_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FRACT_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FRACT_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FRACT_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FRACT_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FRACT_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_FRACT_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FRACT_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FRACT_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_FRACT_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_FRACT_F32_e32 2, implicit $exec, implicit $mode @@ -761,12 +729,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_trunc_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_TRUNC_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_TRUNC_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_TRUNC_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_TRUNC_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_TRUNC_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_TRUNC_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_TRUNC_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_TRUNC_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_TRUNC_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_TRUNC_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_TRUNC_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_TRUNC_F32_e32 2, implicit $exec, implicit $mode @@ -782,12 +750,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_ceil_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_CEIL_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CEIL_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CEIL_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CEIL_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CEIL_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CEIL_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CEIL_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CEIL_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CEIL_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CEIL_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_CEIL_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_CEIL_F32_e32 2, implicit $exec, implicit $mode @@ -803,12 +771,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_rndne_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_RNDNE_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RNDNE_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RNDNE_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_RNDNE_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_RNDNE_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_RNDNE_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_RNDNE_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_RNDNE_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_RNDNE_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_RNDNE_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_RNDNE_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_RNDNE_F32_e32 2, implicit $exec, implicit $mode @@ -824,12 +792,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_floor_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_FLOOR_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FLOOR_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FLOOR_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_FLOOR_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FLOOR_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FLOOR_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FLOOR_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_FLOOR_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FLOOR_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FLOOR_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_FLOOR_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_FLOOR_F32_e32 2, implicit $exec, implicit $mode @@ -845,12 +813,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_exp_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_EXP_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_EXP_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_EXP_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_EXP_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_EXP_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_EXP_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_EXP_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_EXP_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_EXP_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_EXP_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_EXP_F32_e32 2, implicit $exec, implicit $mode @@ -866,12 +834,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_log_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_LOG_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LOG_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_LOG_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_LOG_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_LOG_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_LOG_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_LOG_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LOG_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LOG_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_LOG_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_LOG_F32_e32 2, implicit $exec, implicit $mode @@ -887,12 +855,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_rcp_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_RCP_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RCP_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RCP_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_RCP_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_RCP_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_RCP_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_RCP_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_RCP_F32_e32 2, implicit $exec, implicit $mode @@ -908,12 +876,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_rcp_iflag_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_RCP_IFLAG_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RCP_IFLAG_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RCP_IFLAG_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_RCP_IFLAG_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_RCP_IFLAG_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_RCP_IFLAG_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_IFLAG_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_IFLAG_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_IFLAG_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 2, implicit $exec, implicit $mode @@ -929,12 +897,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_rsq_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_RSQ_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RSQ_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RSQ_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_RSQ_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_RSQ_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_RSQ_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_RSQ_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_RSQ_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_RSQ_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_RSQ_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_RSQ_F32_e32 2, implicit $exec, implicit $mode @@ -950,12 +918,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_sqrt_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_SQRT_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SQRT_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SQRT_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_SQRT_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_SQRT_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_SQRT_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SQRT_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_SQRT_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SQRT_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_SQRT_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SQRT_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SQRT_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_SQRT_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_SQRT_F32_e32 2, implicit $exec, implicit $mode @@ -971,12 +939,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_rcp_f64_e32 - ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_RCP_F64_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_RCP_F64_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_RCP_F64_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_RCP_F64_e32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_RCP_F64_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_RCP_F64_e32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_RCP_F64_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_RCP_F64_e32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_RCP_F64_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_F64_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_F64_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_RCP_F64_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vreg_64_align2 = nofpexcept V_RCP_F64_e32 1, implicit $exec, implicit $mode %1:vreg_64_align2 = nofpexcept V_RCP_F64_e32 2, implicit $exec, implicit $mode @@ -992,12 +960,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_rsq_f64_e32 - ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_RSQ_F64_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_RSQ_F64_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_RSQ_F64_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_RSQ_F64_e32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_RSQ_F64_e32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_RSQ_F64_e32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_RSQ_F64_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_RSQ_F64_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_RSQ_F64_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 1, implicit $exec, implicit $mode %1:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 2, implicit $exec, implicit $mode @@ -1013,12 +981,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_sqrt_f64_e32 - ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_SQRT_F64_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_SQRT_F64_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_SQRT_F64_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_SQRT_F64_e32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_SQRT_F64_e32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_SQRT_F64_e32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_SQRT_F64_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SQRT_F64_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SQRT_F64_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 1, implicit $exec, implicit $mode %1:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 2, implicit $exec, implicit $mode @@ -1034,12 +1002,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_sin_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_SIN_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SIN_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SIN_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_SIN_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_SIN_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_SIN_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SIN_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_SIN_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SIN_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SIN_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_SIN_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_SIN_F32_e32 2, implicit $exec, implicit $mode @@ -1055,12 +1023,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cos_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_COS_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_COS_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_COS_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_COS_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_COS_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_COS_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_COS_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_COS_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_COS_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_COS_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_COS_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_COS_F32_e32 2, implicit $exec, implicit $mode @@ -1076,12 +1044,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_not_b32_e32 - ; GCN: renamable $vgpr0 = V_NOT_B32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_NOT_B32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_NOT_B32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_NOT_B32_e32_:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_NOT_B32_e32_1:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_NOT_B32_e32_2:%[0-9]+]]:vgpr_32 = V_NOT_B32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_NOT_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_NOT_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_NOT_B32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_NOT_B32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = V_NOT_B32_e32 2, implicit $exec, implicit $mode @@ -1097,12 +1065,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_bfrev_b32_e32 - ; GCN: renamable $vgpr0 = V_BFREV_B32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_BFREV_B32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_BFREV_B32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_BFREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_BFREV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_BFREV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_BFREV_B32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_BFREV_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_BFREV_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_BFREV_B32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_BFREV_B32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = V_BFREV_B32_e32 2, implicit $exec, implicit $mode @@ -1118,12 +1086,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_ffbh_u32_e32 - ; GCN: renamable $vgpr0 = V_FFBH_U32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_FFBH_U32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_FFBH_U32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_FFBH_U32_e32_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FFBH_U32_e32_1:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FFBH_U32_e32_2:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_FFBH_U32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FFBH_U32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FFBH_U32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_FFBH_U32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = V_FFBH_U32_e32 2, implicit $exec, implicit $mode @@ -1139,12 +1107,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_ffbl_b32_e32 - ; GCN: renamable $vgpr0 = V_FFBL_B32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_FFBL_B32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_FFBL_B32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_FFBL_B32_e32_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FFBL_B32_e32_1:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FFBL_B32_e32_2:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_FFBL_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FFBL_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FFBL_B32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_FFBL_B32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = V_FFBL_B32_e32 2, implicit $exec, implicit $mode @@ -1160,12 +1128,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_ffbh_i32_e32 - ; GCN: renamable $vgpr0 = V_FFBH_I32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_FFBH_I32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_FFBH_I32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_FFBH_I32_e32_:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FFBH_I32_e32_1:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FFBH_I32_e32_2:%[0-9]+]]:vgpr_32 = V_FFBH_I32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_FFBH_I32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FFBH_I32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FFBH_I32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_FFBH_I32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = V_FFBH_I32_e32 2, implicit $exec, implicit $mode @@ -1181,12 +1149,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_frexp_exp_i32_f64_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F64_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FREXP_EXP_I32_F64_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F64_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_FREXP_EXP_I32_F64_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FREXP_EXP_I32_F64_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FREXP_EXP_I32_F64_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_EXP_I32_F64_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_EXP_I32_F64_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_EXP_I32_F64_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 2, implicit $exec, implicit $mode @@ -1202,12 +1170,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_frexp_mant_f64_e32 - ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FREXP_MANT_F64_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_FREXP_MANT_F64_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FREXP_MANT_F64_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_FREXP_MANT_F64_e32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FREXP_MANT_F64_e32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FREXP_MANT_F64_e32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_MANT_F64_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_MANT_F64_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_MANT_F64_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 1, implicit $exec, implicit $mode %1:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 2, implicit $exec, implicit $mode @@ -1223,12 +1191,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_fract_f64_e32 - ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FRACT_F64_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_FRACT_F64_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FRACT_F64_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_FRACT_F64_e32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FRACT_F64_e32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FRACT_F64_e32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_FRACT_F64_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FRACT_F64_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FRACT_F64_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 1, implicit $exec, implicit $mode %1:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 2, implicit $exec, implicit $mode @@ -1244,12 +1212,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_frexp_exp_i32_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FREXP_EXP_I32_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_FREXP_EXP_I32_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FREXP_EXP_I32_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FREXP_EXP_I32_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_EXP_I32_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_EXP_I32_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_EXP_I32_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 2, implicit $exec, implicit $mode @@ -1265,12 +1233,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_frexp_mant_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_MANT_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FREXP_MANT_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FREXP_MANT_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_FREXP_MANT_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FREXP_MANT_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FREXP_MANT_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_MANT_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_MANT_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FREXP_MANT_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 2, implicit $exec, implicit $mode @@ -1286,12 +1254,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_exp_legacy_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_EXP_LEGACY_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_EXP_LEGACY_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_EXP_LEGACY_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_EXP_LEGACY_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_EXP_LEGACY_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_EXP_LEGACY_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_EXP_LEGACY_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_EXP_LEGACY_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_EXP_LEGACY_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 2, implicit $exec, implicit $mode @@ -1307,12 +1275,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_log_legacy_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_LOG_LEGACY_F32_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LOG_LEGACY_F32_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_LOG_LEGACY_F32_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_LOG_LEGACY_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_LOG_LEGACY_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_LOG_LEGACY_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_LOG_LEGACY_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LOG_LEGACY_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LOG_LEGACY_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 2, implicit $exec, implicit $mode @@ -1328,12 +1296,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_sat_pk_u8_i16_e32 - ; GCN: renamable $vgpr0 = V_SAT_PK_U8_I16_e32 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_SAT_PK_U8_I16_e32 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_SAT_PK_U8_I16_e32 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_SAT_PK_U8_I16_e32_:%[0-9]+]]:vgpr_32 = V_SAT_PK_U8_I16_e32 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_SAT_PK_U8_I16_e32_1:%[0-9]+]]:vgpr_32 = V_SAT_PK_U8_I16_e32 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_SAT_PK_U8_I16_e32_2:%[0-9]+]]:vgpr_32 = V_SAT_PK_U8_I16_e32 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_SAT_PK_U8_I16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SAT_PK_U8_I16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SAT_PK_U8_I16_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_SAT_PK_U8_I16_e32 1, implicit $exec, implicit $mode %1:vgpr_32 = V_SAT_PK_U8_I16_e32 2, implicit $exec, implicit $mode @@ -1349,12 +1317,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_accvgpr_mov_b32 - ; GCN: renamable $agpr0 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec - ; GCN-NEXT: renamable $agpr1 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr1 - ; GCN-NEXT: renamable $agpr0 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0 + ; GCN: [[V_ACCVGPR_MOV_B32_:%[0-9]+]]:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec + ; GCN-NEXT: [[V_ACCVGPR_MOV_B32_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec + ; GCN-NEXT: [[V_ACCVGPR_MOV_B32_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_MOV_B32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_MOV_B32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ACCVGPR_MOV_B32_2]] ; GCN-NEXT: S_ENDPGM 0 %0:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec %1:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec @@ -1372,16 +1340,12 @@ machineFunctionInfo: body: | bb.0: ; GCN-LABEL: name: test_no_remat_v_cndmask_b32_e32 - ; GCN: renamable $vgpr0 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc - ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 1, undef %1:vgpr_32, implicit $exec, implicit undef $vcc + ; GCN-NEXT: [[V_CNDMASK_B32_e32_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 1, undef %1:vgpr_32, implicit $exec, implicit undef $vcc + ; GCN-NEXT: [[V_CNDMASK_B32_e32_2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 1, undef %1:vgpr_32, implicit $exec, implicit undef $vcc + ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_CNDMASK_B32_e32 1, undef %0:vgpr_32, implicit $exec, implicit undef $vcc %2:vgpr_32 = V_CNDMASK_B32_e32 1, undef %0:vgpr_32, implicit $exec, implicit undef $vcc @@ -1399,16 +1363,12 @@ machineFunctionInfo: body: | bb.0: ; GCN-LABEL: name: test_no_remat_v_cndmask_b32_sdwa - ; GCN: renamable $vgpr0 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc - ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CNDMASK_B32_sdwa:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc + ; GCN-NEXT: [[V_CNDMASK_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc + ; GCN-NEXT: [[V_CNDMASK_B32_sdwa2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc + ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_sdwa]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_sdwa1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_sdwa2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc %2:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc @@ -1426,16 +1386,12 @@ machineFunctionInfo: body: | bb.0: ; GCN-LABEL: name: test_no_remat_v_cndmask_b32_dpp - ; GCN: renamable $vgpr0 = V_CNDMASK_B32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_dpp undef $vgpr1, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc - ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CNDMASK_B32_dpp:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_dpp undef [[V_CNDMASK_B32_dpp]], 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc + ; GCN-NEXT: [[V_CNDMASK_B32_dpp1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_dpp undef [[V_CNDMASK_B32_dpp1]], 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc + ; GCN-NEXT: [[V_CNDMASK_B32_dpp2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_dpp undef [[V_CNDMASK_B32_dpp2]], 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc + ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_dpp]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_dpp1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_dpp2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_CNDMASK_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc %2:vgpr_32 = V_CNDMASK_B32_dpp undef %2:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc @@ -1451,12 +1407,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cndmask_b32_e64 - ; GCN: renamable $vgpr0 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec + ; GCN-NEXT: [[V_CNDMASK_B32_e64_2:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CNDMASK_B32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef %0:sreg_64_xexec, implicit $exec %2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef %0:sreg_64_xexec, implicit $exec @@ -1472,12 +1428,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_madmk_f32 - ; GCN: renamable $vgpr0 = nofpexcept V_MADMK_F32 1, 1, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADMK_F32 2, 2, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MADMK_F32 3, 3, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MADMK_F32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MADMK_F32 1, 1, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MADMK_F32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MADMK_F32 2, 2, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MADMK_F32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MADMK_F32 3, 3, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_MADMK_F32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MADMK_F32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MADMK_F32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_MADMK_F32 1, 1, undef %0:vgpr_32, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_MADMK_F32 2, 2, undef %0:vgpr_32, implicit $exec, implicit $mode @@ -1493,12 +1449,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_add_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_ADD_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_ADD_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_ADD_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_ADD_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_ADD_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode @@ -1514,12 +1470,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_add_f32_e64 - ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_e64 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_ADD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_ADD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode @@ -1535,12 +1491,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_add_f32_sdwa - ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_ADD_F32_sdwa:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_ADD_F32_sdwa1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_ADD_F32_sdwa2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_sdwa]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_sdwa1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_sdwa2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode @@ -1558,16 +1514,12 @@ machineFunctionInfo: body: | bb.0: ; GCN-LABEL: name: test_no_remat_v_add_f32_dpp - ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_dpp undef $vgpr1, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_ADD_F32_dpp:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_dpp undef [[V_ADD_F32_dpp]], 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_ADD_F32_dpp1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_dpp undef [[V_ADD_F32_dpp1]], 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_ADD_F32_dpp2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_dpp undef [[V_ADD_F32_dpp2]], 0, undef %1:vgpr_32, 0, undef %1:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_dpp]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_dpp1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F32_dpp2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %2:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode @@ -1583,12 +1535,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_sub_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_SUB_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SUB_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_SUB_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_SUB_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_SUB_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_SUB_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_SUB_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode @@ -1604,12 +1556,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_subrev_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_SUBREV_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SUBREV_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_SUBREV_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_SUBREV_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_SUBREV_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_SUBREV_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_SUBREV_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode @@ -1625,12 +1577,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mul_legacy_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_MUL_LEGACY_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MUL_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MUL_LEGACY_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MUL_LEGACY_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MUL_LEGACY_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LEGACY_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LEGACY_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LEGACY_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode @@ -1646,12 +1598,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mul_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_MUL_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MUL_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MUL_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MUL_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MUL_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_MUL_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_MUL_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode @@ -1667,12 +1619,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mul_i32_i24_e32 - ; GCN: renamable $vgpr0 = V_MUL_I32_I24_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MUL_I32_I24_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MUL_I32_I24_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MUL_I32_I24_e32_:%[0-9]+]]:vgpr_32 = V_MUL_I32_I24_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MUL_I32_I24_e32_1:%[0-9]+]]:vgpr_32 = V_MUL_I32_I24_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MUL_I32_I24_e32_2:%[0-9]+]]:vgpr_32 = V_MUL_I32_I24_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_I32_I24_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_I32_I24_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_I32_I24_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MUL_I32_I24_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MUL_I32_I24_e32 2, undef %0:vgpr_32, implicit $exec @@ -1688,12 +1640,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mul_hi_i32_i24_e32 - ; GCN: renamable $vgpr0 = V_MUL_HI_I32_I24_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_I32_I24_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_I32_I24_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MUL_HI_I32_I24_e32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MUL_HI_I32_I24_e32_1:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MUL_HI_I32_I24_e32_2:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_I32_I24_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_I32_I24_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_I32_I24_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MUL_HI_I32_I24_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MUL_HI_I32_I24_e32 2, undef %0:vgpr_32, implicit $exec @@ -1709,12 +1661,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mul_u32_u24_e32 - ; GCN: renamable $vgpr0 = V_MUL_U32_U24_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MUL_U32_U24_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MUL_U32_U24_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MUL_U32_U24_e32_:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MUL_U32_U24_e32_1:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MUL_U32_U24_e32_2:%[0-9]+]]:vgpr_32 = V_MUL_U32_U24_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_U32_U24_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_U32_U24_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_U32_U24_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MUL_U32_U24_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MUL_U32_U24_e32 2, undef %0:vgpr_32, implicit $exec @@ -1730,12 +1682,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mul_hi_u32_u24_e32 - ; GCN: renamable $vgpr0 = V_MUL_HI_U32_U24_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_U32_U24_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_U32_U24_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MUL_HI_U32_U24_e32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MUL_HI_U32_U24_e32_1:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MUL_HI_U32_U24_e32_2:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_U32_U24_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_U32_U24_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_U32_U24_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MUL_HI_U32_U24_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MUL_HI_U32_U24_e32 2, undef %0:vgpr_32, implicit $exec @@ -1751,12 +1703,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_min_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_MIN_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MIN_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MIN_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MIN_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MIN_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_MIN_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_MIN_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode @@ -1772,12 +1724,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_max_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_MAX_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAX_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MAX_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MAX_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MAX_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_MAX_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_MAX_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode @@ -1793,12 +1745,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_min_i32_e32 - ; GCN: renamable $vgpr0 = V_MIN_I32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MIN_I32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MIN_I32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MIN_I32_e32_:%[0-9]+]]:vgpr_32 = V_MIN_I32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MIN_I32_e32_1:%[0-9]+]]:vgpr_32 = V_MIN_I32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MIN_I32_e32_2:%[0-9]+]]:vgpr_32 = V_MIN_I32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MIN_I32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MIN_I32_e32 2, undef %0:vgpr_32, implicit $exec @@ -1814,12 +1766,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_max_i32_e32 - ; GCN: renamable $vgpr0 = V_MAX_I32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MAX_I32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MAX_I32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MAX_I32_e32_:%[0-9]+]]:vgpr_32 = V_MAX_I32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MAX_I32_e32_1:%[0-9]+]]:vgpr_32 = V_MAX_I32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MAX_I32_e32_2:%[0-9]+]]:vgpr_32 = V_MAX_I32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MAX_I32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MAX_I32_e32 2, undef %0:vgpr_32, implicit $exec @@ -1835,12 +1787,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_min_u32_e32 - ; GCN: renamable $vgpr0 = V_MIN_U32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MIN_U32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MIN_U32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MIN_U32_e32_:%[0-9]+]]:vgpr_32 = V_MIN_U32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MIN_U32_e32_1:%[0-9]+]]:vgpr_32 = V_MIN_U32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MIN_U32_e32_2:%[0-9]+]]:vgpr_32 = V_MIN_U32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MIN_U32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MIN_U32_e32 2, undef %0:vgpr_32, implicit $exec @@ -1856,12 +1808,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_max_u32_e32 - ; GCN: renamable $vgpr0 = V_MAX_U32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MAX_U32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MAX_U32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MAX_U32_e32_:%[0-9]+]]:vgpr_32 = V_MAX_U32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MAX_U32_e32_1:%[0-9]+]]:vgpr_32 = V_MAX_U32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MAX_U32_e32_2:%[0-9]+]]:vgpr_32 = V_MAX_U32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MAX_U32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MAX_U32_e32 2, undef %0:vgpr_32, implicit $exec @@ -1877,12 +1829,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_lshrrev_b32_e32 - ; GCN: renamable $vgpr0 = V_LSHRREV_B32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_LSHRREV_B32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_LSHRREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_LSHRREV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_LSHRREV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_LSHRREV_B32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_LSHRREV_B32_e32 2, undef %0:vgpr_32, implicit $exec @@ -1898,12 +1850,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_lshlrev_b32_e32 - ; GCN: renamable $vgpr0 = V_LSHLREV_B32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_LSHLREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_LSHLREV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_LSHLREV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_LSHLREV_B32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_LSHLREV_B32_e32 2, undef %0:vgpr_32, implicit $exec @@ -1919,12 +1871,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_ashrrev_i32_e32 - ; GCN: renamable $vgpr0 = V_ASHRREV_I32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_ASHRREV_I32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_ASHRREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_ASHRREV_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_ASHRREV_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_ASHRREV_I32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_ASHRREV_I32_e32 2, undef %0:vgpr_32, implicit $exec @@ -1940,12 +1892,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_and_b32_e32 - ; GCN: renamable $vgpr0 = V_AND_B32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_AND_B32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_AND_B32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e32_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_AND_B32_e32_2:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_AND_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_AND_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_AND_B32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_AND_B32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_AND_B32_e32 2, undef %0:vgpr_32, implicit $exec @@ -1961,12 +1913,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_or_b32_e32 - ; GCN: renamable $vgpr0 = V_OR_B32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_OR_B32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_OR_B32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_OR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_OR_B32_e32_2:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_OR_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_OR_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_OR_B32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_OR_B32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_OR_B32_e32 2, undef %0:vgpr_32, implicit $exec @@ -1982,12 +1934,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_xor_b32_e32 - ; GCN: renamable $vgpr0 = V_XOR_B32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_XOR_B32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_XOR_B32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_XOR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_XOR_B32_e32_2:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_XOR_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_XOR_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_XOR_B32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_XOR_B32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_XOR_B32_e32 2, undef %0:vgpr_32, implicit $exec @@ -2003,12 +1955,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_madak_f32 - ; GCN: renamable $vgpr0 = nofpexcept V_MADAK_F32 1, undef $vgpr0, 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADAK_F32 2, undef $vgpr0, 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MADAK_F32 3, undef $vgpr0, 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MADAK_F32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MADAK_F32 1, undef %1:vgpr_32, 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MADAK_F32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MADAK_F32 2, undef %1:vgpr_32, 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MADAK_F32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MADAK_F32 3, undef %1:vgpr_32, 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_MADAK_F32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MADAK_F32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MADAK_F32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_MADAK_F32 1, undef %0:vgpr_32, 1, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_MADAK_F32 2, undef %0:vgpr_32, 2, implicit $exec, implicit $mode @@ -2024,12 +1976,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_add_u32_e32 - ; GCN: renamable $vgpr0 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_ADD_U32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_ADD_U32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_ADD_U32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_ADD_U32_e32 2, undef %0:vgpr_32, implicit $exec @@ -2045,12 +1997,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_sub_u32_e32 - ; GCN: renamable $vgpr0 = V_SUB_U32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_SUB_U32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_SUB_U32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_SUB_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_SUB_U32_e32_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_SUB_U32_e32_2:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_SUB_U32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_SUB_U32_e32 2, undef %0:vgpr_32, implicit $exec @@ -2066,12 +2018,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_subrev_u32_e32 - ; GCN: renamable $vgpr0 = V_SUBREV_U32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_SUBREV_U32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_SUBREV_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_SUBREV_U32_e32_1:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_SUBREV_U32_e32_2:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_SUBREV_U32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_SUBREV_U32_e32 2, undef %0:vgpr_32, implicit $exec @@ -2087,12 +2039,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_bfm_b32_e32 - ; GCN: renamable $vgpr0 = V_BFM_B32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_BFM_B32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_BFM_B32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_BFM_B32_e32_:%[0-9]+]]:vgpr_32 = V_BFM_B32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_BFM_B32_e32_1:%[0-9]+]]:vgpr_32 = V_BFM_B32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_BFM_B32_e32_2:%[0-9]+]]:vgpr_32 = V_BFM_B32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_BFM_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_BFM_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_BFM_B32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_BFM_B32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_BFM_B32_e32 2, undef %0:vgpr_32, implicit $exec @@ -2108,12 +2060,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_bcnt_u32_b32_e32 - ; GCN: renamable $vgpr0 = V_BCNT_U32_B32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_BCNT_U32_B32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_BCNT_U32_B32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_BCNT_U32_B32_e32_:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_BCNT_U32_B32_e32_1:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_BCNT_U32_B32_e32_2:%[0-9]+]]:vgpr_32 = V_BCNT_U32_B32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_BCNT_U32_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_BCNT_U32_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_BCNT_U32_B32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_BCNT_U32_B32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_BCNT_U32_B32_e32 2, undef %0:vgpr_32, implicit $exec @@ -2129,12 +2081,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mbcnt_lo_u32_b32_e32 - ; GCN: renamable $vgpr0 = V_MBCNT_LO_U32_B32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MBCNT_LO_U32_B32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MBCNT_LO_U32_B32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MBCNT_LO_U32_B32_e32_:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MBCNT_LO_U32_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MBCNT_LO_U32_B32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MBCNT_LO_U32_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MBCNT_LO_U32_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MBCNT_LO_U32_B32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MBCNT_LO_U32_B32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MBCNT_LO_U32_B32_e32 2, undef %0:vgpr_32, implicit $exec @@ -2150,12 +2102,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mbcnt_hi_u32_b32_e32 - ; GCN: renamable $vgpr0 = V_MBCNT_HI_U32_B32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MBCNT_HI_U32_B32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MBCNT_HI_U32_B32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MBCNT_HI_U32_B32_e32_:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MBCNT_HI_U32_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MBCNT_HI_U32_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MBCNT_HI_U32_B32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MBCNT_HI_U32_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MBCNT_HI_U32_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MBCNT_HI_U32_B32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MBCNT_HI_U32_B32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MBCNT_HI_U32_B32_e32 2, undef %0:vgpr_32, implicit $exec @@ -2171,12 +2123,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_ldexp_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_LDEXP_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_LDEXP_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_LDEXP_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_LDEXP_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_LDEXP_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_LDEXP_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_LDEXP_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode @@ -2192,12 +2144,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_pknorm_i16_f32_e32 - ; GCN: renamable $vgpr0 = V_CVT_PKNORM_I16_F32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_CVT_PKNORM_I16_F32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_CVT_PKNORM_I16_F32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_PKNORM_I16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_CVT_PKNORM_I16_F32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKNORM_I16_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKNORM_I16_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKNORM_I16_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 2, undef %0:vgpr_32, implicit $exec @@ -2213,12 +2165,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_pknorm_u16_f32_e32 - ; GCN: renamable $vgpr0 = V_CVT_PKNORM_U16_F32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_CVT_PKNORM_U16_F32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_CVT_PKNORM_U16_F32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_PKNORM_U16_F32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_CVT_PKNORM_U16_F32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKNORM_U16_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKNORM_U16_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKNORM_U16_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 2, undef %0:vgpr_32, implicit $exec @@ -2234,12 +2186,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_pkrtz_f16_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_PKRTZ_F16_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_PKRTZ_F16_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_PKRTZ_F16_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKRTZ_F16_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKRTZ_F16_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PKRTZ_F16_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode @@ -2255,12 +2207,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_pk_u16_u32_e32 - ; GCN: renamable $vgpr0 = V_CVT_PK_U16_U32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_CVT_PK_U16_U32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_CVT_PK_U16_U32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_PK_U16_U32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_CVT_PK_U16_U32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_CVT_PK_U16_U32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_PK_U16_U32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_U16_U32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_U16_U32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_U16_U32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_CVT_PK_U16_U32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_CVT_PK_U16_U32_e32 2, undef %0:vgpr_32, implicit $exec @@ -2276,12 +2228,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_pk_i16_i32_e32 - ; GCN: renamable $vgpr0 = V_CVT_PK_I16_I32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_CVT_PK_I16_I32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_CVT_PK_I16_I32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_PK_I16_I32_e32_:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_CVT_PK_I16_I32_e32_1:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_CVT_PK_I16_I32_e32_2:%[0-9]+]]:vgpr_32 = V_CVT_PK_I16_I32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_I16_I32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_I16_I32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_I16_I32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_CVT_PK_I16_I32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_CVT_PK_I16_I32_e32 2, undef %0:vgpr_32, implicit $exec @@ -2297,12 +2249,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_min_legacy_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_MIN_LEGACY_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MIN_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MIN_LEGACY_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MIN_LEGACY_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MIN_LEGACY_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_LEGACY_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_LEGACY_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_LEGACY_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode @@ -2318,12 +2270,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_max_legacy_f32_e32 - ; GCN: renamable $vgpr0 = nofpexcept V_MAX_LEGACY_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAX_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MAX_LEGACY_F32_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 1, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MAX_LEGACY_F32_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 2, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MAX_LEGACY_F32_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 3, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_LEGACY_F32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_LEGACY_F32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_LEGACY_F32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode @@ -2339,12 +2291,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_lshr_b32_e32 - ; GCN: renamable $vgpr0 = V_LSHR_B32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_LSHR_B32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_LSHR_B32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_LSHR_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHR_B32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_LSHR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_LSHR_B32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_LSHR_B32_e32_2:%[0-9]+]]:vgpr_32 = V_LSHR_B32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHR_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHR_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHR_B32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_LSHR_B32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_LSHR_B32_e32 2, undef %0:vgpr_32, implicit $exec @@ -2360,12 +2312,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_lshl_b32_e32 - ; GCN: renamable $vgpr0 = V_LSHL_B32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_LSHL_B32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_LSHL_B32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_LSHL_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHL_B32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_LSHL_B32_e32_1:%[0-9]+]]:vgpr_32 = V_LSHL_B32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_LSHL_B32_e32_2:%[0-9]+]]:vgpr_32 = V_LSHL_B32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_B32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_LSHL_B32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_LSHL_B32_e32 2, undef %0:vgpr_32, implicit $exec @@ -2381,12 +2333,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_ashr_i32_e32 - ; GCN: renamable $vgpr0 = V_ASHR_I32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_ASHR_I32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_ASHR_I32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_ASHR_I32_e32_:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_ASHR_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_ASHR_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_ASHR_I32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ASHR_I32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ASHR_I32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_ASHR_I32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_ASHR_I32_e32 2, undef %0:vgpr_32, implicit $exec @@ -2402,12 +2354,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_xnor_b32_e32 - ; GCN: renamable $vgpr0 = V_XNOR_B32_e32 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_XNOR_B32_e32 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_XNOR_B32_e32 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_XNOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XNOR_B32_e32 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_XNOR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_XNOR_B32_e32 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_XNOR_B32_e32_2:%[0-9]+]]:vgpr_32 = V_XNOR_B32_e32 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_XNOR_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_XNOR_B32_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_XNOR_B32_e32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_XNOR_B32_e32 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_XNOR_B32_e32 2, undef %0:vgpr_32, implicit $exec @@ -2423,12 +2375,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_fmamk_f32 - ; GCN: renamable $vgpr0 = nofpexcept V_FMAMK_F32 1, 1, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAMK_F32 2, 2, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMAMK_F32 3, 3, undef $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_FMAMK_F32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAMK_F32 1, 1, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FMAMK_F32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAMK_F32 2, 2, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FMAMK_F32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAMK_F32 3, 3, undef %1:vgpr_32, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_FMAMK_F32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FMAMK_F32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FMAMK_F32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_FMAMK_F32 1, 1, undef %0:vgpr_32, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_FMAMK_F32 2, 2, undef %0:vgpr_32, implicit $exec, implicit $mode @@ -2444,12 +2396,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_fmaak_f32 - ; GCN: renamable $vgpr0 = nofpexcept V_FMAAK_F32 1, undef $vgpr0, 1, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAAK_F32 2, undef $vgpr0, 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMAAK_F32 3, undef $vgpr0, 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_FMAAK_F32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAAK_F32 1, undef %1:vgpr_32, 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FMAAK_F32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAAK_F32 2, undef %1:vgpr_32, 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FMAAK_F32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAAK_F32 3, undef %1:vgpr_32, 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_FMAAK_F32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FMAAK_F32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FMAAK_F32_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_FMAAK_F32 1, undef %0:vgpr_32, 1, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_FMAAK_F32 2, undef %0:vgpr_32, 2, implicit $exec, implicit $mode @@ -2465,12 +2417,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mad_legacy_f32_e64 - ; GCN: renamable $vgpr0 = nofpexcept V_MAD_LEGACY_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_LEGACY_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAD_LEGACY_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MAD_LEGACY_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MAD_LEGACY_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MAD_LEGACY_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_LEGACY_F32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_LEGACY_F32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_LEGACY_F32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode @@ -2486,12 +2438,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mad_f32_e64 - ; GCN: renamable $vgpr0 = nofpexcept V_MAD_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAD_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MAD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MAD_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MAD_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_F32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_F32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_F32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode @@ -2507,12 +2459,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_fma_legacy_f32_e64 - ; GCN: renamable $vgpr0 = nofpexcept V_FMA_LEGACY_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_LEGACY_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMA_LEGACY_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_FMA_LEGACY_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FMA_LEGACY_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FMA_LEGACY_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_LEGACY_F32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_LEGACY_F32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_LEGACY_F32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode @@ -2528,12 +2480,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_fma_f32_e64 - ; GCN: renamable $vgpr0 = nofpexcept V_FMA_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMA_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_FMA_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FMA_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FMA_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_F32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_F32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_F32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode @@ -2549,12 +2501,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mad_i32_i24_e64 - ; GCN: renamable $vgpr0 = V_MAD_I32_I24_e64 1, 1, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MAD_I32_I24_e64 2, 2, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 3, 3, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MAD_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MAD_I32_I24_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: [[V_MAD_I32_I24_e64_1:%[0-9]+]]:vgpr_32 = V_MAD_I32_I24_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: [[V_MAD_I32_I24_e64_2:%[0-9]+]]:vgpr_32 = V_MAD_I32_I24_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_I32_I24_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_I32_I24_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_I32_I24_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MAD_I32_I24_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec %2:vgpr_32 = V_MAD_I32_I24_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec @@ -2570,12 +2522,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mad_u32_u24_e64 - ; GCN: renamable $vgpr0 = V_MAD_U32_U24_e64 1, 1, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MAD_U32_U24_e64 2, 2, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MAD_U32_U24_e64 3, 3, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MAD_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MAD_U32_U24_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: [[V_MAD_U32_U24_e64_1:%[0-9]+]]:vgpr_32 = V_MAD_U32_U24_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: [[V_MAD_U32_U24_e64_2:%[0-9]+]]:vgpr_32 = V_MAD_U32_U24_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_U32_U24_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_U32_U24_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_U32_U24_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MAD_U32_U24_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec %2:vgpr_32 = V_MAD_U32_U24_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec @@ -2591,12 +2543,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_lerp_u8_e64 - ; GCN: renamable $vgpr0 = V_LERP_U8_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_LERP_U8_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_LERP_U8_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_LERP_U8_e64_:%[0-9]+]]:vgpr_32 = V_LERP_U8_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_LERP_U8_e64_1:%[0-9]+]]:vgpr_32 = V_LERP_U8_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_LERP_U8_e64_2:%[0-9]+]]:vgpr_32 = V_LERP_U8_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_LERP_U8_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LERP_U8_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LERP_U8_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_LERP_U8_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_LERP_U8_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -2612,12 +2564,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_fma_f64_e64 - ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, 1, 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_FMA_F64_e64 0, 2, 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, 3, 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_FMA_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 1, 0, 1, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FMA_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 2, 0, 2, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FMA_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 3, 0, 3, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_F64_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_F64_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_F64_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 1, 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode %2:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 2, 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode @@ -2633,12 +2585,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_add_f64_e64 - ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_ADD_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_ADD_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_ADD_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_ADD_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 1, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_ADD_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 2, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_ADD_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 3, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F64_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F64_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F64_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode %2:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode @@ -2654,12 +2606,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mul_f64_e64 - ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MUL_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_MUL_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_MUL_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 1, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MUL_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 2, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MUL_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 3, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F64_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F64_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F64_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode %2:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode @@ -2675,12 +2627,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_min_f64_e64 - ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MIN_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_MIN_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_MIN_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_MIN_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 1, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MIN_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 2, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MIN_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 3, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F64_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F64_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F64_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode %2:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode @@ -2696,12 +2648,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_max_f64_e64 - ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MAX_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_MAX_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_MAX_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_MAX_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 1, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MAX_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 2, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MAX_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 3, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F64_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F64_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F64_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode %2:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode @@ -2717,12 +2669,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mul_lo_u32_e64 - ; GCN: renamable $vgpr0 = V_MUL_LO_U32_e64 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U32_e64 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MUL_LO_U32_e64 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MUL_LO_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MUL_LO_U32_e64 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MUL_LO_U32_e64 2, undef %0:vgpr_32, implicit $exec @@ -2738,12 +2690,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mul_hi_u32_e64 - ; GCN: renamable $vgpr0 = V_MUL_HI_U32_e64 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_U32_e64 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_U32_e64 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MUL_HI_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MUL_HI_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_U32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_U32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_U32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MUL_HI_U32_e64 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MUL_HI_U32_e64 2, undef %0:vgpr_32, implicit $exec @@ -2759,12 +2711,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mul_lo_i32_e64 - ; GCN: renamable $vgpr0 = V_MUL_LO_I32_e64 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_I32_e64 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MUL_LO_I32_e64 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MUL_LO_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_I32_e64 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MUL_LO_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_I32_e64 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MUL_LO_I32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_I32_e64 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_I32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_I32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_I32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MUL_LO_I32_e64 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MUL_LO_I32_e64 2, undef %0:vgpr_32, implicit $exec @@ -2780,12 +2732,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_mul_hi_i32_e64 - ; GCN: renamable $vgpr0 = V_MUL_HI_I32_e64 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_I32_e64 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_I32_e64 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MUL_HI_I32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MUL_HI_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MUL_HI_I32_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_e64 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_I32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_I32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_HI_I32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MUL_HI_I32_e64 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MUL_HI_I32_e64 2, undef %0:vgpr_32, implicit $exec @@ -2801,12 +2753,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cubeid_f32_e64 - ; GCN: renamable $vgpr0 = V_CUBEID_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_CUBEID_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_CUBEID_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CUBEID_F32_e64_:%[0-9]+]]:vgpr_32 = V_CUBEID_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CUBEID_F32_e64_1:%[0-9]+]]:vgpr_32 = V_CUBEID_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CUBEID_F32_e64_2:%[0-9]+]]:vgpr_32 = V_CUBEID_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CUBEID_F32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CUBEID_F32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CUBEID_F32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_CUBEID_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode %2:vgpr_32 = V_CUBEID_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode @@ -2822,12 +2774,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cubesc_f32_e64 - ; GCN: renamable $vgpr0 = V_CUBESC_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_CUBESC_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_CUBESC_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CUBESC_F32_e64_:%[0-9]+]]:vgpr_32 = V_CUBESC_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CUBESC_F32_e64_1:%[0-9]+]]:vgpr_32 = V_CUBESC_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CUBESC_F32_e64_2:%[0-9]+]]:vgpr_32 = V_CUBESC_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CUBESC_F32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CUBESC_F32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CUBESC_F32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_CUBESC_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode %2:vgpr_32 = V_CUBESC_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode @@ -2843,12 +2795,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cubetc_f32_e64 - ; GCN: renamable $vgpr0 = V_CUBETC_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_CUBETC_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_CUBETC_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CUBETC_F32_e64_:%[0-9]+]]:vgpr_32 = V_CUBETC_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CUBETC_F32_e64_1:%[0-9]+]]:vgpr_32 = V_CUBETC_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CUBETC_F32_e64_2:%[0-9]+]]:vgpr_32 = V_CUBETC_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CUBETC_F32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CUBETC_F32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CUBETC_F32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_CUBETC_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode %2:vgpr_32 = V_CUBETC_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode @@ -2864,12 +2816,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cubema_f32_e64 - ; GCN: renamable $vgpr0 = V_CUBEMA_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_CUBEMA_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_CUBEMA_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CUBEMA_F32_e64_:%[0-9]+]]:vgpr_32 = V_CUBEMA_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CUBEMA_F32_e64_1:%[0-9]+]]:vgpr_32 = V_CUBEMA_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CUBEMA_F32_e64_2:%[0-9]+]]:vgpr_32 = V_CUBEMA_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CUBEMA_F32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CUBEMA_F32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CUBEMA_F32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_CUBEMA_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode %2:vgpr_32 = V_CUBEMA_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode @@ -2885,12 +2837,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_bfe_u32_e64 - ; GCN: renamable $vgpr0 = V_BFE_U32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_BFE_U32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_BFE_U32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_BFE_U32_e64_1:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_BFE_U32_e64_2:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_BFE_U32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_BFE_U32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_BFE_U32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_BFE_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_BFE_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -2906,12 +2858,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_bfe_i32_e64 - ; GCN: renamable $vgpr0 = V_BFE_I32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_BFE_I32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_BFE_I32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_BFE_I32_e64_1:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_BFE_I32_e64_2:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_BFE_I32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_BFE_I32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_BFE_I32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_BFE_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_BFE_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -2927,12 +2879,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_bfi_b32_e64 - ; GCN: renamable $vgpr0 = V_BFI_B32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_BFI_B32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_BFI_B32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_BFI_B32_e64_:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_BFI_B32_e64_1:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_BFI_B32_e64_2:%[0-9]+]]:vgpr_32 = V_BFI_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_BFI_B32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_BFI_B32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_BFI_B32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_BFI_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_BFI_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -2948,12 +2900,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_alignbit_b32_e64 - ; GCN: renamable $vgpr0 = V_ALIGNBIT_B32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_ALIGNBIT_B32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_ALIGNBIT_B32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_ALIGNBIT_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_ALIGNBIT_B32_e64_1:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_ALIGNBIT_B32_e64_2:%[0-9]+]]:vgpr_32 = V_ALIGNBIT_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_ALIGNBIT_B32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ALIGNBIT_B32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ALIGNBIT_B32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_ALIGNBIT_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_ALIGNBIT_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -2969,12 +2921,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_alignbyte_b32_e64 - ; GCN: renamable $vgpr0 = V_ALIGNBYTE_B32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_ALIGNBYTE_B32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_ALIGNBYTE_B32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_ALIGNBYTE_B32_e64_:%[0-9]+]]:vgpr_32 = V_ALIGNBYTE_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_ALIGNBYTE_B32_e64_1:%[0-9]+]]:vgpr_32 = V_ALIGNBYTE_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_ALIGNBYTE_B32_e64_2:%[0-9]+]]:vgpr_32 = V_ALIGNBYTE_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_ALIGNBYTE_B32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ALIGNBYTE_B32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ALIGNBYTE_B32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_ALIGNBYTE_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_ALIGNBYTE_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -2990,12 +2942,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_min3_i32_e64 - ; GCN: renamable $vgpr0 = V_MIN3_I32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MIN3_I32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MIN3_I32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MIN3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MIN3_I32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MIN3_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN3_I32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MIN3_I32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN3_I32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_I32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_I32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_I32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MIN3_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MIN3_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -3011,12 +2963,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_min3_u32_e64 - ; GCN: renamable $vgpr0 = V_MIN3_U32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MIN3_U32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MIN3_U32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MIN3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MIN3_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MIN3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN3_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MIN3_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN3_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_U32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_U32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_U32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MIN3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MIN3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -3032,12 +2984,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_max3_i32_e64 - ; GCN: renamable $vgpr0 = V_MAX3_I32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MAX3_I32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MAX3_I32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MAX3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX3_I32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MAX3_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX3_I32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MAX3_I32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX3_I32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_I32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_I32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_I32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MAX3_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MAX3_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -3053,12 +3005,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_max3_u32_e64 - ; GCN: renamable $vgpr0 = V_MAX3_U32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MAX3_U32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MAX3_U32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MAX3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MAX3_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MAX3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX3_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MAX3_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX3_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_U32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_U32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_U32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MAX3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MAX3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -3074,12 +3026,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_med3_i32_e64 - ; GCN: renamable $vgpr0 = V_MED3_I32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MED3_I32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MED3_I32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MED3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MED3_I32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MED3_I32_e64_2:%[0-9]+]]:vgpr_32 = V_MED3_I32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_I32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_I32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_I32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MED3_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MED3_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -3095,12 +3047,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_med3_u32_e64 - ; GCN: renamable $vgpr0 = V_MED3_U32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MED3_U32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MED3_U32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MED3_U32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MED3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_MED3_U32_e64_2:%[0-9]+]]:vgpr_32 = V_MED3_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_U32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_U32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_U32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MED3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MED3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -3116,12 +3068,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_min3_f32_e64 - ; GCN: renamable $vgpr0 = V_MIN3_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_MIN3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MIN3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MIN3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MIN3_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MIN3_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MIN3_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MIN3_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MIN3_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_F32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_F32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN3_F32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MIN3_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode %2:vgpr_32 = V_MIN3_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode @@ -3137,12 +3089,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_max3_f32_e64 - ; GCN: renamable $vgpr0 = V_MAX3_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_MAX3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MAX3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MAX3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX3_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MAX3_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MAX3_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MAX3_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MAX3_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_F32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_F32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX3_F32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MAX3_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode %2:vgpr_32 = V_MAX3_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode @@ -3158,12 +3110,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_med3_f32_e64 - ; GCN: renamable $vgpr0 = V_MED3_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = V_MED3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MED3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MED3_F32_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MED3_F32_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MED3_F32_e64_2:%[0-9]+]]:vgpr_32 = V_MED3_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_F32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_F32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MED3_F32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MED3_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode %2:vgpr_32 = V_MED3_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode @@ -3179,12 +3131,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_sad_u8_e64 - ; GCN: renamable $vgpr0 = V_SAD_U8_e64 1, 1, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_SAD_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_SAD_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_SAD_U8_e64_:%[0-9]+]]:vgpr_32 = V_SAD_U8_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: [[V_SAD_U8_e64_1:%[0-9]+]]:vgpr_32 = V_SAD_U8_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: [[V_SAD_U8_e64_2:%[0-9]+]]:vgpr_32 = V_SAD_U8_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U8_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U8_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U8_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_SAD_U8_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec %2:vgpr_32 = V_SAD_U8_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec @@ -3200,12 +3152,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_sad_hi_u8_e64 - ; GCN: renamable $vgpr0 = V_SAD_HI_U8_e64 1, 1, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_SAD_HI_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_SAD_HI_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_SAD_HI_U8_e64_:%[0-9]+]]:vgpr_32 = V_SAD_HI_U8_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: [[V_SAD_HI_U8_e64_1:%[0-9]+]]:vgpr_32 = V_SAD_HI_U8_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: [[V_SAD_HI_U8_e64_2:%[0-9]+]]:vgpr_32 = V_SAD_HI_U8_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_HI_U8_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_HI_U8_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_HI_U8_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_SAD_HI_U8_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec %2:vgpr_32 = V_SAD_HI_U8_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec @@ -3221,12 +3173,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_sad_u16_e64 - ; GCN: renamable $vgpr0 = V_SAD_U16_e64 1, 1, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_SAD_U16_e64 2, 2, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_SAD_U16_e64 3, 3, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_SAD_U16_e64_:%[0-9]+]]:vgpr_32 = V_SAD_U16_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: [[V_SAD_U16_e64_1:%[0-9]+]]:vgpr_32 = V_SAD_U16_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: [[V_SAD_U16_e64_2:%[0-9]+]]:vgpr_32 = V_SAD_U16_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U16_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_SAD_U16_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec %2:vgpr_32 = V_SAD_U16_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec @@ -3242,12 +3194,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_sad_u32_e64 - ; GCN: renamable $vgpr0 = V_SAD_U32_e64 1, 1, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_SAD_U32_e64 2, 2, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_SAD_U32_e64 3, 3, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_SAD_U32_e64_:%[0-9]+]]:vgpr_32 = V_SAD_U32_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: [[V_SAD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SAD_U32_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: [[V_SAD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SAD_U32_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SAD_U32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_SAD_U32_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec %2:vgpr_32 = V_SAD_U32_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec @@ -3263,12 +3215,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_cvt_pk_u8_f32_e64 - ; GCN: renamable $vgpr0 = nofpexcept V_CVT_PK_U8_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_PK_U8_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_PK_U8_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_CVT_PK_U8_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 1, 0, 1, 0, undef %1:vgpr_32, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_PK_U8_F32_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 2, 0, 2, 0, undef %1:vgpr_32, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_CVT_PK_U8_F32_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 3, 0, 3, 0, undef %1:vgpr_32, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_U8_F32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_U8_F32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_CVT_PK_U8_F32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, implicit $exec, implicit $mode @@ -3284,12 +3236,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_div_fixup_f64_e64 - ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_DIV_FIXUP_F64_e64 0, 1, 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_DIV_FIXUP_F64_e64 0, 2, 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_DIV_FIXUP_F64_e64 0, 3, 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_DIV_FIXUP_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 1, 0, 1, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_DIV_FIXUP_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 2, 0, 2, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_DIV_FIXUP_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 3, 0, 3, 0, undef %1:vreg_64_align2, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_DIV_FIXUP_F64_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_DIV_FIXUP_F64_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_DIV_FIXUP_F64_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 1, 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode %2:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 2, 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode @@ -3305,12 +3257,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_ldexp_f64_e64 - ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_LDEXP_F64_e64 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_LDEXP_F64_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_LDEXP_F64_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_LDEXP_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_LDEXP_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_LDEXP_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F64_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F64_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F64_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode %2:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode @@ -3326,12 +3278,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_msad_u8_e64 - ; GCN: renamable $vgpr0 = V_MSAD_U8_e64 1, 1, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_MSAD_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_MSAD_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_MSAD_U8_e64_:%[0-9]+]]:vgpr_32 = V_MSAD_U8_e64 1, 1, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: [[V_MSAD_U8_e64_1:%[0-9]+]]:vgpr_32 = V_MSAD_U8_e64 2, 2, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: [[V_MSAD_U8_e64_2:%[0-9]+]]:vgpr_32 = V_MSAD_U8_e64 3, 3, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MSAD_U8_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MSAD_U8_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MSAD_U8_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_MSAD_U8_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec %2:vgpr_32 = V_MSAD_U8_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec @@ -3347,12 +3299,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_trig_preop_f64_e64 - ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_TRIG_PREOP_F64_e64 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_TRIG_PREOP_F64_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_TRIG_PREOP_F64_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_TRIG_PREOP_F64_e64_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 1, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_TRIG_PREOP_F64_e64_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 2, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_TRIG_PREOP_F64_e64_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 3, 0, undef %1:vgpr_32, 0, 0, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_TRIG_PREOP_F64_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_TRIG_PREOP_F64_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_TRIG_PREOP_F64_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode %2:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode @@ -3368,12 +3320,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_lshlrev_b64_e64 - ; GCN: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 1, undef $vgpr0_vgpr1, implicit $exec - ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_LSHLREV_B64_e64 2, undef $vgpr0_vgpr1, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 3, undef $vgpr0_vgpr1, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_LSHLREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 1, undef %1:vreg_64_align2, implicit $exec + ; GCN-NEXT: [[V_LSHLREV_B64_e64_1:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 2, undef %1:vreg_64_align2, implicit $exec + ; GCN-NEXT: [[V_LSHLREV_B64_e64_2:%[0-9]+]]:vreg_64_align2 = V_LSHLREV_B64_e64 3, undef %1:vreg_64_align2, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B64_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B64_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B64_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vreg_64_align2 = V_LSHLREV_B64_e64 1, undef %0:vreg_64_align2, implicit $exec %2:vreg_64_align2 = V_LSHLREV_B64_e64 2, undef %0:vreg_64_align2, implicit $exec @@ -3389,12 +3341,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_lshrrev_b64_e64 - ; GCN: renamable $vgpr0_vgpr1 = V_LSHRREV_B64_e64 1, undef $vgpr0_vgpr1, implicit $exec - ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_LSHRREV_B64_e64 2, undef $vgpr0_vgpr1, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_LSHRREV_B64_e64 3, undef $vgpr0_vgpr1, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_LSHRREV_B64_e64_:%[0-9]+]]:vreg_64_align2 = V_LSHRREV_B64_e64 1, undef %1:vreg_64_align2, implicit $exec + ; GCN-NEXT: [[V_LSHRREV_B64_e64_1:%[0-9]+]]:vreg_64_align2 = V_LSHRREV_B64_e64 2, undef %1:vreg_64_align2, implicit $exec + ; GCN-NEXT: [[V_LSHRREV_B64_e64_2:%[0-9]+]]:vreg_64_align2 = V_LSHRREV_B64_e64 3, undef %1:vreg_64_align2, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B64_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B64_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B64_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vreg_64_align2 = V_LSHRREV_B64_e64 1, undef %0:vreg_64_align2, implicit $exec %2:vreg_64_align2 = V_LSHRREV_B64_e64 2, undef %0:vreg_64_align2, implicit $exec @@ -3410,12 +3362,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_ashrrev_i64_e64 - ; GCN: renamable $vgpr0_vgpr1 = V_ASHRREV_I64_e64 1, undef $vgpr0_vgpr1, implicit $exec - ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_ASHRREV_I64_e64 2, undef $vgpr0_vgpr1, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_ASHRREV_I64_e64 3, undef $vgpr0_vgpr1, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN: [[V_ASHRREV_I64_e64_:%[0-9]+]]:vreg_64_align2 = V_ASHRREV_I64_e64 1, undef %1:vreg_64_align2, implicit $exec + ; GCN-NEXT: [[V_ASHRREV_I64_e64_1:%[0-9]+]]:vreg_64_align2 = V_ASHRREV_I64_e64 2, undef %1:vreg_64_align2, implicit $exec + ; GCN-NEXT: [[V_ASHRREV_I64_e64_2:%[0-9]+]]:vreg_64_align2 = V_ASHRREV_I64_e64 3, undef %1:vreg_64_align2, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I64_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I64_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I64_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vreg_64_align2 = V_ASHRREV_I64_e64 1, undef %0:vreg_64_align2, implicit $exec %2:vreg_64_align2 = V_ASHRREV_I64_e64 2, undef %0:vreg_64_align2, implicit $exec @@ -3431,12 +3383,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_perm_b32_e64 - ; GCN: renamable $vgpr0 = V_PERM_B32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_PERM_B32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_PERM_B32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_PERM_B32_e64_:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_PERM_B32_e64_1:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_PERM_B32_e64_2:%[0-9]+]]:vgpr_32 = V_PERM_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PERM_B32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PERM_B32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PERM_B32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_PERM_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_PERM_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -3452,12 +3404,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_add3_u32_e64 - ; GCN: renamable $vgpr0 = V_ADD3_U32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_ADD3_U32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_ADD3_U32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_ADD3_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_ADD3_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_ADD3_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD3_U32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD3_U32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD3_U32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_ADD3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_ADD3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -3473,12 +3425,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_and_or_b32_e64 - ; GCN: renamable $vgpr0 = V_AND_OR_B32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_AND_OR_B32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_AND_OR_B32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_AND_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_AND_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_AND_OR_B32_e64_2:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_AND_OR_B32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_AND_OR_B32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_AND_OR_B32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_AND_OR_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_AND_OR_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -3494,12 +3446,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_or3_b32_e64 - ; GCN: renamable $vgpr0 = V_OR3_B32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_OR3_B32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_OR3_B32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_OR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_OR3_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_OR3_B32_e64_2:%[0-9]+]]:vgpr_32 = V_OR3_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_OR3_B32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_OR3_B32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_OR3_B32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_OR3_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_OR3_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -3515,12 +3467,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_xad_u32_e64 - ; GCN: renamable $vgpr0 = V_XAD_U32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_XAD_U32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_XAD_U32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_XAD_U32_e64_:%[0-9]+]]:vgpr_32 = V_XAD_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_XAD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_XAD_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_XAD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_XAD_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_XAD_U32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_XAD_U32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_XAD_U32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_XAD_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_XAD_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -3536,12 +3488,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_add_i32_e64 - ; GCN: renamable $vgpr0 = V_ADD_I32_e64 1, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_ADD_I32_e64 2, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_ADD_I32_e64 3, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e64 1, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: [[V_ADD_I32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_I32_e64 2, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: [[V_ADD_I32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_I32_e64 3, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_I32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_I32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_I32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_ADD_I32_e64 1, undef %0:vgpr_32, 0, implicit $exec %2:vgpr_32 = V_ADD_I32_e64 2, undef %0:vgpr_32, 0, implicit $exec @@ -3557,12 +3509,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_add_lshl_u32_e64 - ; GCN: renamable $vgpr0 = V_ADD_LSHL_U32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_ADD_LSHL_U32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_ADD_LSHL_U32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_ADD_LSHL_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_LSHL_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_ADD_LSHL_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_LSHL_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_ADD_LSHL_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_LSHL_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_LSHL_U32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_LSHL_U32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_LSHL_U32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_ADD_LSHL_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_ADD_LSHL_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -3578,12 +3530,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_sub_i32_e64 - ; GCN: renamable $vgpr0 = V_SUB_I32_e64 1, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_SUB_I32_e64 2, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_SUB_I32_e64 3, undef $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e64 1, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: [[V_SUB_I32_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_I32_e64 2, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: [[V_SUB_I32_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_I32_e64 3, undef %1:vgpr_32, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_I32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_I32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_I32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_SUB_I32_e64 1, undef %0:vgpr_32, 0, implicit $exec %2:vgpr_32 = V_SUB_I32_e64 2, undef %0:vgpr_32, 0, implicit $exec @@ -3599,12 +3551,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_lshl_add_u32_e64 - ; GCN: renamable $vgpr0 = V_LSHL_ADD_U32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_LSHL_ADD_U32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_LSHL_ADD_U32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_LSHL_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_LSHL_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_LSHL_ADD_U32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_ADD_U32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_ADD_U32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_ADD_U32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_LSHL_ADD_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_LSHL_ADD_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -3620,12 +3572,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_v_lshl_or_b32_e64 - ; GCN: renamable $vgpr0 = V_LSHL_OR_B32_e64 1, 1, undef $vgpr0, implicit $exec - ; GCN-NEXT: renamable $vgpr1 = V_LSHL_OR_B32_e64 2, 2, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr0 = V_LSHL_OR_B32_e64 3, 3, undef $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 + ; GCN: [[V_LSHL_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 1, 1, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_LSHL_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 2, 2, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: [[V_LSHL_OR_B32_e64_2:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 3, 3, undef %1:vgpr_32, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_OR_B32_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_OR_B32_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHL_OR_B32_e64_2]] ; GCN-NEXT: S_ENDPGM 0 %1:vgpr_32 = V_LSHL_OR_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec %2:vgpr_32 = V_LSHL_OR_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec @@ -3645,13 +3597,14 @@ body: | ; GCN-LABEL: name: test_remat_v_lshlrev_b16_e32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e32 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e32 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e32 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_LSHLREV_B16_e32_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_LSHLREV_B16_e32_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_LSHLREV_B16_e32_2:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e32 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B16_e32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_LSHLREV_B16_e32 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_LSHLREV_B16_e32 2, %0:vgpr_32, implicit $exec @@ -3670,13 +3623,14 @@ body: | ; GCN-LABEL: name: test_remat_v_lshlrev_b16_e64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e64 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e64 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e64 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_LSHLREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_LSHLREV_B16_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_LSHLREV_B16_e64_2:%[0-9]+]]:vgpr_32 = V_LSHLREV_B16_e64 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHLREV_B16_e64_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_LSHLREV_B16_e64 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_LSHLREV_B16_e64 2, %0:vgpr_32, implicit $exec @@ -3696,13 +3650,14 @@ body: | ; GCN-LABEL: name: test_remat_v_lshrrev_b16_e32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e32 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e32 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e32 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_LSHRREV_B16_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_LSHRREV_B16_e32_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_LSHRREV_B16_e32_2:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e32 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B16_e32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_LSHRREV_B16_e32 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_LSHRREV_B16_e32 2, %0:vgpr_32, implicit $exec @@ -3721,13 +3676,14 @@ body: | ; GCN-LABEL: name: test_remat_v_lshrrev_b16_e64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e64 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e64 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e64 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_LSHRREV_B16_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_LSHRREV_B16_e64_2:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LSHRREV_B16_e64_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_LSHRREV_B16_e64 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_LSHRREV_B16_e64 2, %0:vgpr_32, implicit $exec @@ -3747,13 +3703,14 @@ body: | ; GCN-LABEL: name: test_remat_v_ashrrev_i16_e32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e32 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e32 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e32 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_ASHRREV_I16_e32_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e32 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_ASHRREV_I16_e32_1:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e32 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_ASHRREV_I16_e32_2:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e32 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I16_e32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_ASHRREV_I16_e32 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_ASHRREV_I16_e32 2, %0:vgpr_32, implicit $exec @@ -3772,13 +3729,14 @@ body: | ; GCN-LABEL: name: test_remat_v_ashrrev_i16_e64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e64 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e64 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e64 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_ASHRREV_I16_e64_:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_ASHRREV_I16_e64_1:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_ASHRREV_I16_e64_2:%[0-9]+]]:vgpr_32 = V_ASHRREV_I16_e64 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ASHRREV_I16_e64_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_ASHRREV_I16_e64 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_ASHRREV_I16_e64 2, %0:vgpr_32, implicit $exec @@ -3798,13 +3756,14 @@ body: | ; GCN-LABEL: name: test_remat_v_add_u16_e32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_ADD_U16_e32 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_ADD_U16_e32 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_ADD_U16_e32 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_ADD_U16_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e32 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_ADD_U16_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U16_e32 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_ADD_U16_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U16_e32 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U16_e32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_ADD_U16_e32 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_ADD_U16_e32 2, %0:vgpr_32, implicit $exec @@ -3824,13 +3783,14 @@ body: | ; GCN-LABEL: name: test_remat_v_add_u16_e64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_ADD_U16_e64 1, $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_ADD_U16_e64 2, $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_ADD_U16_e64 3, $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_ADD_U16_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 1, [[COPY]], 0, implicit $exec + ; GCN-NEXT: [[V_ADD_U16_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 2, [[COPY]], 0, implicit $exec + ; GCN-NEXT: [[V_ADD_U16_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U16_e64 3, [[COPY]], 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_U16_e64_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_ADD_U16_e64 1, %0:vgpr_32, 0, implicit $exec %2:vgpr_32 = V_ADD_U16_e64 2, %0:vgpr_32, 0, implicit $exec @@ -3850,13 +3810,14 @@ body: | ; GCN-LABEL: name: test_remat_v_sub_u16_e32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_SUB_U16_e32 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_SUB_U16_e32 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_SUB_U16_e32 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_SUB_U16_e32_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e32 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_SUB_U16_e32_1:%[0-9]+]]:vgpr_32 = V_SUB_U16_e32 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_SUB_U16_e32_2:%[0-9]+]]:vgpr_32 = V_SUB_U16_e32 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U16_e32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_SUB_U16_e32 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_SUB_U16_e32 2, %0:vgpr_32, implicit $exec @@ -3876,13 +3837,14 @@ body: | ; GCN-LABEL: name: test_remat_v_sub_u16_e64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_SUB_U16_e64 1, $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_SUB_U16_e64 2, $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_SUB_U16_e64 3, $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_SUB_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 1, [[COPY]], 0, implicit $exec + ; GCN-NEXT: [[V_SUB_U16_e64_1:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 2, [[COPY]], 0, implicit $exec + ; GCN-NEXT: [[V_SUB_U16_e64_2:%[0-9]+]]:vgpr_32 = V_SUB_U16_e64 3, [[COPY]], 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_U16_e64_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_SUB_U16_e64 1, %0:vgpr_32, 0, implicit $exec %2:vgpr_32 = V_SUB_U16_e64 2, %0:vgpr_32, 0, implicit $exec @@ -3902,13 +3864,14 @@ body: | ; GCN-LABEL: name: test_remat_v_subrev_u16_e32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U16_e32 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U16_e32 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U16_e32 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_SUBREV_U16_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_U16_e32 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_SUBREV_U16_e32_1:%[0-9]+]]:vgpr_32 = V_SUBREV_U16_e32 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_SUBREV_U16_e32_2:%[0-9]+]]:vgpr_32 = V_SUBREV_U16_e32 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U16_e32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_SUBREV_U16_e32 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_SUBREV_U16_e32 2, %0:vgpr_32, implicit $exec @@ -3928,13 +3891,14 @@ body: | ; GCN-LABEL: name: test_remat_v_subrev_u16_e64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U16_e64 1, $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U16_e64 2, $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U16_e64 3, $vgpr0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_SUBREV_U16_e64_:%[0-9]+]]:vgpr_32 = V_SUBREV_U16_e64 1, [[COPY]], 0, implicit $exec + ; GCN-NEXT: [[V_SUBREV_U16_e64_1:%[0-9]+]]:vgpr_32 = V_SUBREV_U16_e64 2, [[COPY]], 0, implicit $exec + ; GCN-NEXT: [[V_SUBREV_U16_e64_2:%[0-9]+]]:vgpr_32 = V_SUBREV_U16_e64 3, [[COPY]], 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_U16_e64_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_SUBREV_U16_e64 1, %0:vgpr_32, 0, implicit $exec %2:vgpr_32 = V_SUBREV_U16_e64 2, %0:vgpr_32, 0, implicit $exec @@ -3954,13 +3918,14 @@ body: | ; GCN-LABEL: name: test_remat_v_min_u16_e32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_MIN_U16_e32 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MIN_U16_e32 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MIN_U16_e32 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MIN_U16_e32_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e32 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MIN_U16_e32_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e32 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MIN_U16_e32_2:%[0-9]+]]:vgpr_32 = V_MIN_U16_e32 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U16_e32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_MIN_U16_e32 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MIN_U16_e32 2, %0:vgpr_32, implicit $exec @@ -3980,13 +3945,14 @@ body: | ; GCN-LABEL: name: test_remat_v_min_u16_e64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_MIN_U16_e64 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MIN_U16_e64 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MIN_U16_e64 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MIN_U16_e64_2:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_U16_e64_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_MIN_U16_e64 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MIN_U16_e64 2, %0:vgpr_32, implicit $exec @@ -4006,13 +3972,14 @@ body: | ; GCN-LABEL: name: test_remat_v_max_u16_e32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_MAX_U16_e32 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MAX_U16_e32 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MAX_U16_e32 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MAX_U16_e32_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e32 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MAX_U16_e32_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e32 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MAX_U16_e32_2:%[0-9]+]]:vgpr_32 = V_MAX_U16_e32 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U16_e32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_MAX_U16_e32 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MAX_U16_e32 2, %0:vgpr_32, implicit $exec @@ -4032,13 +3999,14 @@ body: | ; GCN-LABEL: name: test_remat_v_max_u16_e64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_MAX_U16_e64 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MAX_U16_e64 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MAX_U16_e64 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MAX_U16_e64_2:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_U16_e64_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_MAX_U16_e64 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MAX_U16_e64 2, %0:vgpr_32, implicit $exec @@ -4058,13 +4026,14 @@ body: | ; GCN-LABEL: name: test_remat_v_min_i16_e32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_MIN_I16_e32 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MIN_I16_e32 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MIN_I16_e32 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MIN_I16_e32_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e32 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MIN_I16_e32_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e32 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MIN_I16_e32_2:%[0-9]+]]:vgpr_32 = V_MIN_I16_e32 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I16_e32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_MIN_I16_e32 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MIN_I16_e32 2, %0:vgpr_32, implicit $exec @@ -4084,13 +4053,14 @@ body: | ; GCN-LABEL: name: test_remat_v_min_i16_e64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_MIN_I16_e64 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MIN_I16_e64 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MIN_I16_e64 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MIN_I16_e64_2:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_I16_e64_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_MIN_I16_e64 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MIN_I16_e64 2, %0:vgpr_32, implicit $exec @@ -4110,13 +4080,14 @@ body: | ; GCN-LABEL: name: test_remat_v_max_i16_e32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_MAX_I16_e32 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MAX_I16_e32 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MAX_I16_e32 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MAX_I16_e32_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e32 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MAX_I16_e32_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e32 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MAX_I16_e32_2:%[0-9]+]]:vgpr_32 = V_MAX_I16_e32 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I16_e32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_MAX_I16_e32 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MAX_I16_e32 2, %0:vgpr_32, implicit $exec @@ -4136,13 +4107,14 @@ body: | ; GCN-LABEL: name: test_remat_v_max_i16_e64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_MAX_I16_e64 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MAX_I16_e64 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MAX_I16_e64 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MAX_I16_e64_2:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_I16_e64_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_MAX_I16_e64 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MAX_I16_e64 2, %0:vgpr_32, implicit $exec @@ -4162,13 +4134,14 @@ body: | ; GCN-LABEL: name: test_remat_v_mul_lo_u16_e32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U16_e32 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U16_e32 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U16_e32 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MUL_LO_U16_e32_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U16_e32 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MUL_LO_U16_e32_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U16_e32 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MUL_LO_U16_e32_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_U16_e32 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U16_e32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_MUL_LO_U16_e32 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MUL_LO_U16_e32 2, %0:vgpr_32, implicit $exec @@ -4188,13 +4161,14 @@ body: | ; GCN-LABEL: name: test_remat_v_mul_lo_u16_e64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U16_e64 1, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U16_e64 2, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U16_e64 3, $vgpr0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MUL_LO_U16_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U16_e64 1, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MUL_LO_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U16_e64 2, [[COPY]], implicit $exec + ; GCN-NEXT: [[V_MUL_LO_U16_e64_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_U16_e64 3, [[COPY]], implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_LO_U16_e64_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_MUL_LO_U16_e64 1, %0:vgpr_32, implicit $exec %2:vgpr_32 = V_MUL_LO_U16_e64 2, %0:vgpr_32, implicit $exec @@ -4214,13 +4188,14 @@ body: | ; GCN-LABEL: name: test_remat_v_add_f16_e32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F16_e32 1, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F16_e32 2, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F16_e32 3, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_ADD_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e32 1, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: [[V_ADD_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e32 2, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: [[V_ADD_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e32 3, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F16_e32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_ADD_F16_e32 1, %0:vgpr_32, implicit $mode, implicit $exec %2:vgpr_32 = nofpexcept V_ADD_F16_e32 2, %0:vgpr_32, implicit $mode, implicit $exec @@ -4240,13 +4215,14 @@ body: | ; GCN-LABEL: name: test_remat_v_add_f16_e64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F16_e64 0, 1, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F16_e64 0, 2, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F16_e64 0, 3, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_ADD_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, 1, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_ADD_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, 2, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_ADD_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F16_e64 0, 3, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_ADD_F16_e64_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_ADD_F16_e64 0, 1, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec %2:vgpr_32 = nofpexcept V_ADD_F16_e64 0, 2, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec @@ -4266,13 +4242,14 @@ body: | ; GCN-LABEL: name: test_remat_v_sub_f16_e32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F16_e32 1, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F16_e32 2, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F16_e32 3, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_SUB_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F16_e32 1, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: [[V_SUB_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F16_e32 2, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: [[V_SUB_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F16_e32 3, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F16_e32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_SUB_F16_e32 1, %0:vgpr_32, implicit $mode, implicit $exec %2:vgpr_32 = nofpexcept V_SUB_F16_e32 2, %0:vgpr_32, implicit $mode, implicit $exec @@ -4292,13 +4269,14 @@ body: | ; GCN-LABEL: name: test_remat_v_sub_f16_e64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F16_e64 0, 1, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F16_e64 0, 2, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F16_e64 0, 3, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_SUB_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F16_e64 0, 1, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_SUB_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F16_e64 0, 2, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_SUB_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SUB_F16_e64 0, 3, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUB_F16_e64_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_SUB_F16_e64 0, 1, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec %2:vgpr_32 = nofpexcept V_SUB_F16_e64 0, 2, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec @@ -4318,13 +4296,14 @@ body: | ; GCN-LABEL: name: test_remat_v_subrev_f16_e32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F16_e32 1, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F16_e32 2, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F16_e32 3, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_SUBREV_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F16_e32 1, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: [[V_SUBREV_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F16_e32 2, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: [[V_SUBREV_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F16_e32 3, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F16_e32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_SUBREV_F16_e32 1, %0:vgpr_32, implicit $mode, implicit $exec %2:vgpr_32 = nofpexcept V_SUBREV_F16_e32 2, %0:vgpr_32, implicit $mode, implicit $exec @@ -4344,13 +4323,14 @@ body: | ; GCN-LABEL: name: test_remat_v_subrev_f16_e64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F16_e64 0, 1, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F16_e64 0, 2, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F16_e64 0, 3, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_SUBREV_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F16_e64 0, 1, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_SUBREV_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F16_e64 0, 2, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_SUBREV_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_SUBREV_F16_e64 0, 3, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_SUBREV_F16_e64_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_SUBREV_F16_e64 0, 1, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec %2:vgpr_32 = nofpexcept V_SUBREV_F16_e64 0, 2, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec @@ -4370,13 +4350,14 @@ body: | ; GCN-LABEL: name: test_remat_v_mul_f16_e32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F16_e32 1, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F16_e32 2, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F16_e32 3, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MUL_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e32 1, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e32 2, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e32 3, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F16_e32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_MUL_F16_e32 1, %0:vgpr_32, implicit $mode, implicit $exec %2:vgpr_32 = nofpexcept V_MUL_F16_e32 2, %0:vgpr_32, implicit $mode, implicit $exec @@ -4396,13 +4377,14 @@ body: | ; GCN-LABEL: name: test_remat_v_mul_f16_e64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F16_e64 0, 1, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F16_e64 0, 2, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F16_e64 0, 3, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MUL_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 1, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 2, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MUL_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 3, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MUL_F16_e64_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 1, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec %2:vgpr_32 = nofpexcept V_MUL_F16_e64 0, 2, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec @@ -4422,13 +4404,14 @@ body: | ; GCN-LABEL: name: test_remat_v_ldexp_f16_e32 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e32 1, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e32 1, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e32 1, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_LDEXP_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F16_e32 1, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: [[V_LDEXP_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F16_e32 1, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: [[V_LDEXP_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F16_e32 1, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F16_e32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_LDEXP_F16_e32 1, %0, implicit $mode, implicit $exec %2:vgpr_32 = nofpexcept V_LDEXP_F16_e32 1, %0, implicit $mode, implicit $exec @@ -4448,13 +4431,14 @@ body: | ; GCN-LABEL: name: test_remat_v_ldexp_f16_e64 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e64 0, $vgpr0, 0, 1, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e64 0, $vgpr0, 0, 1, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e64 0, $vgpr0, 0, 1, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_LDEXP_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, 1, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_LDEXP_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, 1, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_LDEXP_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, 1, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_LDEXP_F16_e64_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, %0, 0, 1, 0, 0, implicit $mode, implicit $exec %2:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, %0, 0, 1, 0, 0, implicit $mode, implicit $exec @@ -4474,13 +4458,14 @@ body: | ; GCN-LABEL: name: test_remat_v_min_f16_e32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F16_e32 1, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F16_e32 2, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F16_e32 3, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MIN_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e32 1, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MIN_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e32 2, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MIN_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e32 3, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F16_e32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_MIN_F16_e32 1, %0:vgpr_32, implicit $mode, implicit $exec %2:vgpr_32 = nofpexcept V_MIN_F16_e32 2, %0:vgpr_32, implicit $mode, implicit $exec @@ -4500,13 +4485,14 @@ body: | ; GCN-LABEL: name: test_remat_v_min_f16_e64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F16_e64 0, 1, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F16_e64 0, 2, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F16_e64 0, 3, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MIN_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, 1, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MIN_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, 2, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MIN_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MIN_F16_e64 0, 3, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MIN_F16_e64_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_MIN_F16_e64 0, 1, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec %2:vgpr_32 = nofpexcept V_MIN_F16_e64 0, 2, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec @@ -4526,13 +4512,14 @@ body: | ; GCN-LABEL: name: test_remat_v_max_f16_e32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F16_e32 1, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F16_e32 2, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F16_e32 3, $vgpr0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MAX_F16_e32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e32 1, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MAX_F16_e32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e32 2, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MAX_F16_e32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e32 3, [[COPY]], implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F16_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F16_e32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F16_e32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_MAX_F16_e32 1, %0:vgpr_32, implicit $mode, implicit $exec %2:vgpr_32 = nofpexcept V_MAX_F16_e32 2, %0:vgpr_32, implicit $mode, implicit $exec @@ -4552,13 +4539,14 @@ body: | ; GCN-LABEL: name: test_remat_v_max_f16_e64 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F16_e64 0, 1, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F16_e64 0, 2, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F16_e64 0, 3, 0, $vgpr0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, 1, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MAX_F16_e64_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, 2, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MAX_F16_e64_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAX_F16_e64 0, 3, 0, [[COPY]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F16_e64_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F16_e64_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAX_F16_e64_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_MAX_F16_e64 0, 1, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec %2:vgpr_32 = nofpexcept V_MAX_F16_e64 0, 2, 0, %0:vgpr_32, 0, 0, implicit $mode, implicit $exec @@ -4578,13 +4566,14 @@ body: | ; GCN-LABEL: name: test_remat_v_madak_f16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADAK_F16 1, $vgpr0, 1, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADAK_F16 2, $vgpr0, 2, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADAK_F16 3, $vgpr0, 3, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MADAK_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_MADAK_F16 1, [[COPY]], 1, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MADAK_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MADAK_F16 2, [[COPY]], 2, implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MADAK_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MADAK_F16 3, [[COPY]], 3, implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_MADAK_F16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MADAK_F16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MADAK_F16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_MADAK_F16 1, %0, 1, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_MADAK_F16 2, %0, 2, implicit $exec, implicit $mode @@ -4604,13 +4593,14 @@ body: | ; GCN-LABEL: name: test_remat_v_madmk_f16 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADMK_F16 1, 1, $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADMK_F16 2, 2, $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADMK_F16 3, 3, $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MADMK_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_MADMK_F16 1, 1, [[COPY]], implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MADMK_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MADMK_F16 2, 2, [[COPY]], implicit $exec, implicit $mode + ; GCN-NEXT: [[V_MADMK_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MADMK_F16 3, 3, [[COPY]], implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_MADMK_F16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MADMK_F16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MADMK_F16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_MADMK_F16 1, 1, %0, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_MADMK_F16 2, 2, %0, implicit $exec, implicit $mode @@ -4630,13 +4620,14 @@ body: | ; GCN-LABEL: name: test_remat_v_fmamk_f16 ; GCN: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAMK_F16 1, 1, $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAMK_F16 2, 2, $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAMK_F16 3, 3, $vgpr0, implicit $exec, implicit $mode - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_FMAMK_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAMK_F16 1, 1, [[COPY]], implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FMAMK_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAMK_F16 2, 2, [[COPY]], implicit $exec, implicit $mode + ; GCN-NEXT: [[V_FMAMK_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMAMK_F16 3, 3, [[COPY]], implicit $exec, implicit $mode + ; GCN-NEXT: S_NOP 0, implicit [[V_FMAMK_F16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FMAMK_F16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FMAMK_F16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_FMAMK_F16 1, 1, %0, implicit $exec, implicit $mode %2:vgpr_32 = nofpexcept V_FMAMK_F16 2, 2, %0, implicit $exec, implicit $mode @@ -4656,13 +4647,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_mad_i16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_PK_MAD_I16 8, $vgpr0, 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_MAD_I16 9, $vgpr0, 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_MAD_I16 10, $vgpr0, 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_MAD_I16_:%[0-9]+]]:vgpr_32 = V_PK_MAD_I16 8, [[COPY]], 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_MAD_I16_1:%[0-9]+]]:vgpr_32 = V_PK_MAD_I16 9, [[COPY]], 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_MAD_I16_2:%[0-9]+]]:vgpr_32 = V_PK_MAD_I16 10, [[COPY]], 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAD_I16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAD_I16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAD_I16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_PK_MAD_I16 8, %0, 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec @@ -4683,13 +4675,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_mad_u16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_PK_MAD_U16 8, $vgpr0, 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_MAD_U16 9, $vgpr0, 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_MAD_U16 10, $vgpr0, 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_MAD_U16_:%[0-9]+]]:vgpr_32 = V_PK_MAD_U16 8, [[COPY]], 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_MAD_U16_1:%[0-9]+]]:vgpr_32 = V_PK_MAD_U16 9, [[COPY]], 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_MAD_U16_2:%[0-9]+]]:vgpr_32 = V_PK_MAD_U16 10, [[COPY]], 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAD_U16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAD_U16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAD_U16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_PK_MAD_U16 8, %0, 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec @@ -4710,13 +4703,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_add_u16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_U16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_U16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_U16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_ADD_U16_:%[0-9]+]]:vgpr_32 = V_PK_ADD_U16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_ADD_U16_1:%[0-9]+]]:vgpr_32 = V_PK_ADD_U16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_ADD_U16_2:%[0-9]+]]:vgpr_32 = V_PK_ADD_U16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_U16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_U16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_U16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_PK_ADD_U16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec @@ -4737,13 +4731,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_add_i16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_I16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_I16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_I16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_ADD_I16_:%[0-9]+]]:vgpr_32 = V_PK_ADD_I16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_ADD_I16_1:%[0-9]+]]:vgpr_32 = V_PK_ADD_I16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_ADD_I16_2:%[0-9]+]]:vgpr_32 = V_PK_ADD_I16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_I16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_I16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_I16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_PK_ADD_I16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec @@ -4764,13 +4759,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_mul_lo_u16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_PK_MUL_LO_U16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_MUL_LO_U16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_MUL_LO_U16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_MUL_LO_U16_:%[0-9]+]]:vgpr_32 = V_PK_MUL_LO_U16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_MUL_LO_U16_1:%[0-9]+]]:vgpr_32 = V_PK_MUL_LO_U16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_MUL_LO_U16_2:%[0-9]+]]:vgpr_32 = V_PK_MUL_LO_U16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_LO_U16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_LO_U16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_LO_U16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_PK_MUL_LO_U16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec @@ -4791,13 +4787,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_min_i16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_PK_MIN_I16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_MIN_I16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_MIN_I16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_MIN_I16_:%[0-9]+]]:vgpr_32 = V_PK_MIN_I16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_MIN_I16_1:%[0-9]+]]:vgpr_32 = V_PK_MIN_I16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_MIN_I16_2:%[0-9]+]]:vgpr_32 = V_PK_MIN_I16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_I16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_I16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_I16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_PK_MIN_I16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec @@ -4818,13 +4815,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_max_i16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_PK_MAX_I16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_MAX_I16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_MAX_I16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_MAX_I16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_I16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_MAX_I16_1:%[0-9]+]]:vgpr_32 = V_PK_MAX_I16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_MAX_I16_2:%[0-9]+]]:vgpr_32 = V_PK_MAX_I16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_I16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_I16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_I16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_PK_MAX_I16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec @@ -4845,13 +4843,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_min_u16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_PK_MIN_U16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_MIN_U16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_MIN_U16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_MIN_U16_:%[0-9]+]]:vgpr_32 = V_PK_MIN_U16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_MIN_U16_1:%[0-9]+]]:vgpr_32 = V_PK_MIN_U16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_MIN_U16_2:%[0-9]+]]:vgpr_32 = V_PK_MIN_U16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_U16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_U16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_U16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_PK_MIN_U16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec @@ -4872,13 +4871,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_max_u16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_PK_MAX_U16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_MAX_U16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_MAX_U16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_MAX_U16_:%[0-9]+]]:vgpr_32 = V_PK_MAX_U16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_MAX_U16_1:%[0-9]+]]:vgpr_32 = V_PK_MAX_U16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_MAX_U16_2:%[0-9]+]]:vgpr_32 = V_PK_MAX_U16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_U16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_U16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_U16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_PK_MAX_U16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec @@ -4899,13 +4899,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_sub_u16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_PK_SUB_U16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_SUB_U16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_SUB_U16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_SUB_U16_:%[0-9]+]]:vgpr_32 = V_PK_SUB_U16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_SUB_U16_1:%[0-9]+]]:vgpr_32 = V_PK_SUB_U16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_SUB_U16_2:%[0-9]+]]:vgpr_32 = V_PK_SUB_U16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_SUB_U16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_SUB_U16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_SUB_U16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_PK_SUB_U16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec @@ -4926,13 +4927,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_sub_i16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_PK_SUB_I16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_SUB_I16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_SUB_I16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_SUB_I16_:%[0-9]+]]:vgpr_32 = V_PK_SUB_I16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_SUB_I16_1:%[0-9]+]]:vgpr_32 = V_PK_SUB_I16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_SUB_I16_2:%[0-9]+]]:vgpr_32 = V_PK_SUB_I16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_SUB_I16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_SUB_I16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_SUB_I16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_PK_SUB_I16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec @@ -4953,13 +4955,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_lshlrev_b16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_PK_LSHLREV_B16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_LSHLREV_B16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_LSHLREV_B16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_LSHLREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_LSHLREV_B16_1:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_LSHLREV_B16_2:%[0-9]+]]:vgpr_32 = V_PK_LSHLREV_B16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_LSHLREV_B16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_LSHLREV_B16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_LSHLREV_B16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_PK_LSHLREV_B16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec @@ -4980,13 +4983,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_ashrrev_i16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_PK_ASHRREV_I16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_ASHRREV_I16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_ASHRREV_I16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_ASHRREV_I16_:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_ASHRREV_I16_1:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_ASHRREV_I16_2:%[0-9]+]]:vgpr_32 = V_PK_ASHRREV_I16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ASHRREV_I16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ASHRREV_I16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ASHRREV_I16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_PK_ASHRREV_I16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec @@ -5007,13 +5011,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_lshrrev_b16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_PK_LSHRREV_B16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_LSHRREV_B16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = V_PK_LSHRREV_B16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_LSHRREV_B16_:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_LSHRREV_B16_1:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_LSHRREV_B16_2:%[0-9]+]]:vgpr_32 = V_PK_LSHRREV_B16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_LSHRREV_B16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_LSHRREV_B16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_LSHRREV_B16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_PK_LSHRREV_B16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $exec @@ -5034,13 +5039,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_add_f16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_ADD_F16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_ADD_F16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_ADD_F16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_ADD_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_ADD_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_ADD_F16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_PK_ADD_F16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec @@ -5064,19 +5070,14 @@ body: | ; GCN-LABEL: name: test_no_remat_v_pk_add_f16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_F16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_F16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: renamable $vgpr1 = V_PK_ADD_F16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) - ; GCN-NEXT: renamable $vgpr1 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = SI_SPILL_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_ADD_F16_:%[0-9]+]]:vgpr_32 = V_PK_ADD_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_ADD_F16_1:%[0-9]+]]:vgpr_32 = V_PK_ADD_F16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_ADD_F16_2:%[0-9]+]]:vgpr_32 = V_PK_ADD_F16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = V_PK_ADD_F16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec @@ -5097,13 +5098,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_mul_f16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MUL_F16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MUL_F16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MUL_F16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_MUL_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_MUL_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_MUL_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MUL_F16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_F16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_F16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_F16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_PK_MUL_F16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec @@ -5124,13 +5126,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_min_f16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MIN_F16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MIN_F16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MIN_F16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_MIN_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MIN_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_MIN_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MIN_F16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_MIN_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MIN_F16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_F16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_F16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MIN_F16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_PK_MIN_F16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec @@ -5151,13 +5154,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_max_f16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MAX_F16 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MAX_F16 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_MAX_F16 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_MAX_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_MAX_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_MAX_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_MAX_F16 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_F16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_F16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MAX_F16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_PK_MAX_F16 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec @@ -5178,13 +5182,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_fma_f16 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_FMA_F16 8, $vgpr0, 8, $vgpr0, 8, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_FMA_F16 9, $vgpr0, 9, $vgpr0, 9, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_PK_FMA_F16 10, $vgpr0, 10, $vgpr0, 10, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_PK_FMA_F16_:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_FMA_F16 8, [[COPY]], 8, [[COPY]], 8, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_FMA_F16_1:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_FMA_F16 9, [[COPY]], 9, [[COPY]], 9, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_FMA_F16_2:%[0-9]+]]:vgpr_32 = nofpexcept V_PK_FMA_F16 10, [[COPY]], 10, [[COPY]], 10, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F16_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F16_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F16_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_PK_FMA_F16 8, %0, 8, %0, 8, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec %2:vgpr_32 = nofpexcept V_PK_FMA_F16 9, %0, 9, %0, 9, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec @@ -5205,13 +5210,14 @@ body: | ; GCN-LABEL: name: test_remat_v_mad_mix_f32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_MIX_F32 8, $vgpr0, 8, $vgpr0, 8, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_MIX_F32 9, $vgpr0, 9, $vgpr0, 9, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_MIX_F32 10, $vgpr0, 10, $vgpr0, 10, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_MAD_MIX_F32_:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_MIX_F32 8, [[COPY]], 8, [[COPY]], 8, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MAD_MIX_F32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_MIX_F32 9, [[COPY]], 9, [[COPY]], 9, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_MAD_MIX_F32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_MAD_MIX_F32 10, [[COPY]], 10, [[COPY]], 10, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_MIX_F32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_MIX_F32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MAD_MIX_F32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_MAD_MIX_F32 8, %0, 8, %0, 8, %0, 0, 0, 0, implicit $mode, implicit $exec %2:vgpr_32 = nofpexcept V_MAD_MIX_F32 9, %0, 9, %0, 9, %0, 0, 0, 0, implicit $mode, implicit $exec @@ -5231,13 +5237,14 @@ body: | ; GCN-LABEL: name: test_remat_v_fma_mix_f32 ; GCN: liveins: $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_MIX_F32 8, $vgpr0, 8, $vgpr0, 8, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_MIX_F32 9, $vgpr0, 9, $vgpr0, 9, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_MIX_F32 10, $vgpr0, 10, $vgpr0, 10, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: [[V_FMA_MIX_F32_:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, [[COPY]], 8, [[COPY]], 8, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_MIX_F32_1:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIX_F32 9, [[COPY]], 9, [[COPY]], 9, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_FMA_MIX_F32_2:%[0-9]+]]:vgpr_32 = nofpexcept V_FMA_MIX_F32 10, [[COPY]], 10, [[COPY]], 10, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_MIX_F32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_MIX_F32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_FMA_MIX_F32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vgpr_32 = COPY $vgpr0 %1:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %0, 8, %0, 8, %0, 0, 0, 0, implicit $mode, implicit $exec %2:vgpr_32 = nofpexcept V_FMA_MIX_F32 9, %0, 9, %0, 9, %0, 0, 0, 0, implicit $mode, implicit $exec @@ -5257,13 +5264,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_fma_f32 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[V_PK_FMA_F32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, [[COPY]], 8, [[COPY]], 11, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_FMA_F32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, [[COPY]], 8, [[COPY]], 11, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_FMA_F32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, [[COPY]], 8, [[COPY]], 11, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vreg_64_align2 = COPY $vgpr0_vgpr1 %1:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec %2:vreg_64_align2 = nofpexcept V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec @@ -5285,19 +5293,14 @@ body: | ; GCN-LABEL: name: test_no_remat_v_pk_fma_f32 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.1, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.1, align 4, addrspace 5) - ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) - ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_FMA_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, $vgpr0_vgpr1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.2, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.2, align 4, addrspace 5) - ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.1, align 4, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr2_vgpr3 = SI_SPILL_V64_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.2, align 4, addrspace 5) - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[V_PK_FMA_F32_:%[0-9]+]]:vreg_64_align2 = V_PK_FMA_F32 8, [[COPY]], 8, [[COPY]], 11, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_FMA_F32_1:%[0-9]+]]:vreg_64_align2 = V_PK_FMA_F32 8, [[COPY]], 8, [[COPY]], 11, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_FMA_F32_2:%[0-9]+]]:vreg_64_align2 = V_PK_FMA_F32 8, [[COPY]], 8, [[COPY]], 11, [[COPY]], 0, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_FMA_F32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vreg_64_align2 = COPY $vgpr0_vgpr1 %1:vreg_64_align2 = V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec %2:vreg_64_align2 = V_PK_FMA_F32 8, %0, 8, %0, 11, %0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec @@ -5317,13 +5320,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_mul_f32 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_MUL_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_MUL_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_MUL_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[V_PK_MUL_F32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_MUL_F32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_MUL_F32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_F32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_F32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MUL_F32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vreg_64_align2 = COPY $vgpr0_vgpr1 %1:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec %2:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec @@ -5343,13 +5347,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_add_f32 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_ADD_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_ADD_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_PK_ADD_F32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[V_PK_ADD_F32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_ADD_F32_1:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[V_PK_ADD_F32_2:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_ADD_F32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vreg_64_align2 = COPY $vgpr0_vgpr1 %1:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec %2:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $mode, implicit $exec @@ -5369,13 +5374,14 @@ body: | ; GCN-LABEL: name: test_remat_v_pk_mov_b32 ; GCN: liveins: $vgpr0_vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_MOV_B32 8, $vgpr0_vgpr1, 8, $vgpr0_vgpr1, 11, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_MOV_B32 9, $vgpr0_vgpr1, 9, $vgpr0_vgpr1, 12, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_PK_MOV_B32 10, $vgpr0_vgpr1, 10, $vgpr0_vgpr1, 13, 0, 0, 0, 0, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3 - ; GCN-NEXT: S_ENDPGM 0, implicit killed renamable $vgpr0_vgpr1 + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY $vgpr0_vgpr1 + ; GCN-NEXT: [[V_PK_MOV_B32_:%[0-9]+]]:vreg_64_align2 = V_PK_MOV_B32 8, [[COPY]], 8, [[COPY]], 11, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_MOV_B32_1:%[0-9]+]]:vreg_64_align2 = V_PK_MOV_B32 9, [[COPY]], 9, [[COPY]], 12, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: [[V_PK_MOV_B32_2:%[0-9]+]]:vreg_64_align2 = V_PK_MOV_B32 10, [[COPY]], 10, [[COPY]], 13, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MOV_B32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MOV_B32_1]] + ; GCN-NEXT: S_NOP 0, implicit [[V_PK_MOV_B32_2]] + ; GCN-NEXT: S_ENDPGM 0, implicit [[COPY]] %0:vreg_64_align2 = COPY $vgpr0_vgpr1 %1:vreg_64_align2 = V_PK_MOV_B32 8, %0, 8, %0, 11, 0, 0, 0, 0, implicit $exec %2:vreg_64_align2 = V_PK_MOV_B32 9, %0, 9, %0, 12, 0, 0, 0, 0, implicit $exec @@ -5395,12 +5401,12 @@ tracksRegLiveness: true body: | bb.0: ; GCN-LABEL: name: test_remat_subreg_def - ; GCN: renamable $vgpr1 = V_MOV_B32_e32 1, implicit $exec - ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1 - ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0 - ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 2, implicit $exec - ; GCN-NEXT: S_NOP 0, implicit renamable $vgpr0_vgpr1 + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; GCN-NEXT: undef [[V_MOV_B32_e32_1:%[0-9]+]].sub0:vreg_64 = V_MOV_B32_e32 2, implicit $exec + ; GCN-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_2]] + ; GCN-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]] ; GCN-NEXT: S_ENDPGM 0 %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec undef %1.sub0:vreg_64 = V_MOV_B32_e32 2, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll b/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll index ad82869c001f6..7f8240eeb98eb 100644 --- a/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll @@ -8,9 +8,6 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 { ; CHECK-LABEL: kernel0: ; CHECK: ; %bb.0: -; CHECK-NEXT: ; implicit-def: $vgpr23 : SGPR spill to VGPR lane -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ;;#ASMEND ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ;;#ASMEND ; CHECK-NEXT: ;;#ASMSTART @@ -22,46 +19,47 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 { ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def s[2:3] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_writelane_b32 v23, s2, 0 +; CHECK-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane ; CHECK-NEXT: s_load_dword s0, s[6:7], 0x8 -; CHECK-NEXT: v_writelane_b32 v23, s3, 1 +; CHECK-NEXT: v_writelane_b32 v22, s2, 0 +; CHECK-NEXT: v_writelane_b32 v22, s3, 1 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def s[4:7] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_writelane_b32 v23, s4, 2 -; CHECK-NEXT: v_writelane_b32 v23, s5, 3 -; CHECK-NEXT: v_writelane_b32 v23, s6, 4 -; CHECK-NEXT: v_writelane_b32 v23, s7, 5 +; CHECK-NEXT: v_writelane_b32 v22, s4, 2 +; CHECK-NEXT: v_writelane_b32 v22, s5, 3 +; CHECK-NEXT: v_writelane_b32 v22, s6, 4 +; CHECK-NEXT: v_writelane_b32 v22, s7, 5 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def s[4:11] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_writelane_b32 v23, s4, 6 -; CHECK-NEXT: v_writelane_b32 v23, s5, 7 -; CHECK-NEXT: v_writelane_b32 v23, s6, 8 -; CHECK-NEXT: v_writelane_b32 v23, s7, 9 -; CHECK-NEXT: v_writelane_b32 v23, s8, 10 -; CHECK-NEXT: v_writelane_b32 v23, s9, 11 -; CHECK-NEXT: v_writelane_b32 v23, s10, 12 -; CHECK-NEXT: v_writelane_b32 v23, s11, 13 +; CHECK-NEXT: v_writelane_b32 v22, s4, 6 +; CHECK-NEXT: v_writelane_b32 v22, s5, 7 +; CHECK-NEXT: v_writelane_b32 v22, s6, 8 +; CHECK-NEXT: v_writelane_b32 v22, s7, 9 +; CHECK-NEXT: v_writelane_b32 v22, s8, 10 +; CHECK-NEXT: v_writelane_b32 v22, s9, 11 +; CHECK-NEXT: v_writelane_b32 v22, s10, 12 +; CHECK-NEXT: v_writelane_b32 v22, s11, 13 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def s[4:19] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_writelane_b32 v23, s4, 14 -; CHECK-NEXT: v_writelane_b32 v23, s5, 15 -; CHECK-NEXT: v_writelane_b32 v23, s6, 16 -; CHECK-NEXT: v_writelane_b32 v23, s7, 17 -; CHECK-NEXT: v_writelane_b32 v23, s8, 18 -; CHECK-NEXT: v_writelane_b32 v23, s9, 19 -; CHECK-NEXT: v_writelane_b32 v23, s10, 20 -; CHECK-NEXT: v_writelane_b32 v23, s11, 21 -; CHECK-NEXT: v_writelane_b32 v23, s12, 22 -; CHECK-NEXT: v_writelane_b32 v23, s13, 23 -; CHECK-NEXT: v_writelane_b32 v23, s14, 24 -; CHECK-NEXT: v_writelane_b32 v23, s15, 25 -; CHECK-NEXT: v_writelane_b32 v23, s16, 26 -; CHECK-NEXT: v_writelane_b32 v23, s17, 27 -; CHECK-NEXT: v_writelane_b32 v23, s18, 28 -; CHECK-NEXT: v_writelane_b32 v23, s19, 29 +; CHECK-NEXT: v_writelane_b32 v22, s4, 14 +; CHECK-NEXT: v_writelane_b32 v22, s5, 15 +; CHECK-NEXT: v_writelane_b32 v22, s6, 16 +; CHECK-NEXT: v_writelane_b32 v22, s7, 17 +; CHECK-NEXT: v_writelane_b32 v22, s8, 18 +; CHECK-NEXT: v_writelane_b32 v22, s9, 19 +; CHECK-NEXT: v_writelane_b32 v22, s10, 20 +; CHECK-NEXT: v_writelane_b32 v22, s11, 21 +; CHECK-NEXT: v_writelane_b32 v22, s12, 22 +; CHECK-NEXT: v_writelane_b32 v22, s13, 23 +; CHECK-NEXT: v_writelane_b32 v22, s14, 24 +; CHECK-NEXT: v_writelane_b32 v22, s15, 25 +; CHECK-NEXT: v_writelane_b32 v22, s16, 26 +; CHECK-NEXT: v_writelane_b32 v22, s17, 27 +; CHECK-NEXT: v_writelane_b32 v22, s18, 28 +; CHECK-NEXT: v_writelane_b32 v22, s19, 29 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def s[42:43] ; CHECK-NEXT: ;;#ASMEND @@ -71,14 +69,14 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 { ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def s[4:11] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_writelane_b32 v23, s4, 30 -; CHECK-NEXT: v_writelane_b32 v23, s5, 31 -; CHECK-NEXT: v_writelane_b32 v23, s6, 32 -; CHECK-NEXT: v_writelane_b32 v23, s7, 33 -; CHECK-NEXT: v_writelane_b32 v23, s8, 34 -; CHECK-NEXT: v_writelane_b32 v23, s9, 35 -; CHECK-NEXT: v_writelane_b32 v23, s10, 36 -; CHECK-NEXT: v_writelane_b32 v23, s11, 37 +; CHECK-NEXT: v_writelane_b32 v22, s4, 30 +; CHECK-NEXT: v_writelane_b32 v22, s5, 31 +; CHECK-NEXT: v_writelane_b32 v22, s6, 32 +; CHECK-NEXT: v_writelane_b32 v22, s7, 33 +; CHECK-NEXT: v_writelane_b32 v22, s8, 34 +; CHECK-NEXT: v_writelane_b32 v22, s9, 35 +; CHECK-NEXT: v_writelane_b32 v22, s10, 36 +; CHECK-NEXT: v_writelane_b32 v22, s11, 37 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_cmp_lg_u32 s0, 0 ; CHECK-NEXT: ;;#ASMSTART @@ -96,161 +94,159 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 { ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def s[0:15] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_writelane_b32 v23, s0, 38 -; CHECK-NEXT: v_writelane_b32 v23, s1, 39 -; CHECK-NEXT: v_writelane_b32 v23, s2, 40 -; CHECK-NEXT: v_writelane_b32 v23, s3, 41 -; CHECK-NEXT: v_writelane_b32 v23, s4, 42 -; CHECK-NEXT: v_writelane_b32 v23, s5, 43 -; CHECK-NEXT: v_writelane_b32 v23, s6, 44 -; CHECK-NEXT: v_writelane_b32 v23, s7, 45 -; CHECK-NEXT: v_writelane_b32 v23, s8, 46 -; CHECK-NEXT: v_writelane_b32 v23, s9, 47 -; CHECK-NEXT: v_writelane_b32 v23, s10, 48 -; CHECK-NEXT: v_writelane_b32 v23, s11, 49 -; CHECK-NEXT: v_writelane_b32 v23, s12, 50 -; CHECK-NEXT: v_writelane_b32 v23, s13, 51 -; CHECK-NEXT: v_writelane_b32 v23, s14, 52 -; CHECK-NEXT: v_writelane_b32 v23, s15, 53 +; CHECK-NEXT: v_writelane_b32 v22, s0, 38 +; CHECK-NEXT: v_writelane_b32 v22, s1, 39 +; CHECK-NEXT: v_writelane_b32 v22, s2, 40 +; CHECK-NEXT: v_writelane_b32 v22, s3, 41 +; CHECK-NEXT: v_writelane_b32 v22, s4, 42 +; CHECK-NEXT: v_writelane_b32 v22, s5, 43 +; CHECK-NEXT: v_writelane_b32 v22, s6, 44 +; CHECK-NEXT: v_writelane_b32 v22, s7, 45 +; CHECK-NEXT: v_writelane_b32 v22, s8, 46 +; CHECK-NEXT: v_writelane_b32 v22, s9, 47 +; CHECK-NEXT: v_writelane_b32 v22, s10, 48 +; CHECK-NEXT: v_writelane_b32 v22, s11, 49 +; CHECK-NEXT: v_writelane_b32 v22, s12, 50 +; CHECK-NEXT: v_writelane_b32 v22, s13, 51 +; CHECK-NEXT: v_writelane_b32 v22, s14, 52 +; CHECK-NEXT: v_writelane_b32 v22, s15, 53 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def s[34:35] ; CHECK-NEXT: ;;#ASMEND ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def s[0:3] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_writelane_b32 v23, s0, 54 -; CHECK-NEXT: v_writelane_b32 v23, s1, 55 -; CHECK-NEXT: v_writelane_b32 v23, s2, 56 -; CHECK-NEXT: v_writelane_b32 v23, s3, 57 +; CHECK-NEXT: v_writelane_b32 v22, s0, 54 +; CHECK-NEXT: v_writelane_b32 v22, s1, 55 +; CHECK-NEXT: v_writelane_b32 v22, s2, 56 +; CHECK-NEXT: v_writelane_b32 v22, s3, 57 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def s[0:7] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_writelane_b32 v23, s0, 58 -; CHECK-NEXT: v_writelane_b32 v23, s1, 59 -; CHECK-NEXT: v_writelane_b32 v23, s2, 60 -; CHECK-NEXT: ; implicit-def: $vgpr0 -; CHECK-NEXT: v_writelane_b32 v23, s3, 61 -; CHECK-NEXT: v_writelane_b32 v23, s4, 62 -; CHECK-NEXT: v_writelane_b32 v0, s6, 0 -; CHECK-NEXT: v_writelane_b32 v23, s5, 63 -; CHECK-NEXT: v_writelane_b32 v0, s7, 1 +; CHECK-NEXT: v_writelane_b32 v22, s0, 58 +; CHECK-NEXT: v_writelane_b32 v22, s1, 59 +; CHECK-NEXT: v_writelane_b32 v22, s2, 60 +; CHECK-NEXT: ; implicit-def: $vgpr23 : SGPR spill to VGPR lane +; CHECK-NEXT: v_writelane_b32 v22, s3, 61 +; CHECK-NEXT: v_writelane_b32 v22, s4, 62 +; CHECK-NEXT: v_writelane_b32 v23, s6, 0 +; CHECK-NEXT: v_writelane_b32 v22, s5, 63 +; CHECK-NEXT: v_writelane_b32 v23, s7, 1 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def s[0:15] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_writelane_b32 v0, s0, 2 -; CHECK-NEXT: v_writelane_b32 v0, s1, 3 -; CHECK-NEXT: v_writelane_b32 v0, s2, 4 -; CHECK-NEXT: v_writelane_b32 v0, s3, 5 -; CHECK-NEXT: v_writelane_b32 v0, s4, 6 -; CHECK-NEXT: v_writelane_b32 v0, s5, 7 -; CHECK-NEXT: v_writelane_b32 v0, s6, 8 -; CHECK-NEXT: v_writelane_b32 v0, s7, 9 -; CHECK-NEXT: v_writelane_b32 v0, s8, 10 -; CHECK-NEXT: v_writelane_b32 v0, s9, 11 -; CHECK-NEXT: v_writelane_b32 v0, s10, 12 -; CHECK-NEXT: v_writelane_b32 v0, s11, 13 -; CHECK-NEXT: v_writelane_b32 v0, s12, 14 -; CHECK-NEXT: v_writelane_b32 v0, s13, 15 -; CHECK-NEXT: v_writelane_b32 v0, s14, 16 -; CHECK-NEXT: v_writelane_b32 v0, s15, 17 +; CHECK-NEXT: v_writelane_b32 v23, s0, 2 +; CHECK-NEXT: v_writelane_b32 v23, s1, 3 +; CHECK-NEXT: v_writelane_b32 v23, s2, 4 +; CHECK-NEXT: v_writelane_b32 v23, s3, 5 +; CHECK-NEXT: v_writelane_b32 v23, s4, 6 +; CHECK-NEXT: v_writelane_b32 v23, s5, 7 +; CHECK-NEXT: v_writelane_b32 v23, s6, 8 +; CHECK-NEXT: v_writelane_b32 v23, s7, 9 +; CHECK-NEXT: v_writelane_b32 v23, s8, 10 +; CHECK-NEXT: v_writelane_b32 v23, s9, 11 +; CHECK-NEXT: v_writelane_b32 v23, s10, 12 +; CHECK-NEXT: v_writelane_b32 v23, s11, 13 +; CHECK-NEXT: v_writelane_b32 v23, s12, 14 +; CHECK-NEXT: v_writelane_b32 v23, s13, 15 +; CHECK-NEXT: v_writelane_b32 v23, s14, 16 +; CHECK-NEXT: v_writelane_b32 v23, s15, 17 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def s[0:1] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_writelane_b32 v0, s0, 18 -; CHECK-NEXT: v_writelane_b32 v0, s1, 19 +; CHECK-NEXT: v_writelane_b32 v23, s0, 18 +; CHECK-NEXT: v_writelane_b32 v23, s1, 19 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def s[0:3] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_writelane_b32 v0, s0, 20 -; CHECK-NEXT: v_writelane_b32 v0, s1, 21 -; CHECK-NEXT: v_writelane_b32 v0, s2, 22 -; CHECK-NEXT: v_writelane_b32 v0, s3, 23 +; CHECK-NEXT: v_writelane_b32 v23, s0, 20 +; CHECK-NEXT: v_writelane_b32 v23, s1, 21 +; CHECK-NEXT: v_writelane_b32 v23, s2, 22 +; CHECK-NEXT: v_writelane_b32 v23, s3, 23 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def s[0:7] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_writelane_b32 v0, s0, 24 -; CHECK-NEXT: v_writelane_b32 v0, s1, 25 -; CHECK-NEXT: v_writelane_b32 v0, s2, 26 -; CHECK-NEXT: v_writelane_b32 v0, s3, 27 -; CHECK-NEXT: v_writelane_b32 v0, s4, 28 -; CHECK-NEXT: v_writelane_b32 v0, s5, 29 -; CHECK-NEXT: v_writelane_b32 v0, s6, 30 -; CHECK-NEXT: v_writelane_b32 v0, s7, 31 +; CHECK-NEXT: v_writelane_b32 v23, s0, 24 +; CHECK-NEXT: v_writelane_b32 v23, s1, 25 +; CHECK-NEXT: v_writelane_b32 v23, s2, 26 +; CHECK-NEXT: v_writelane_b32 v23, s3, 27 +; CHECK-NEXT: v_writelane_b32 v23, s4, 28 +; CHECK-NEXT: v_writelane_b32 v23, s5, 29 +; CHECK-NEXT: v_writelane_b32 v23, s6, 30 +; CHECK-NEXT: v_writelane_b32 v23, s7, 31 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; def s[0:15] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_writelane_b32 v0, s0, 32 -; CHECK-NEXT: v_writelane_b32 v0, s1, 33 -; CHECK-NEXT: v_writelane_b32 v0, s2, 34 -; CHECK-NEXT: v_writelane_b32 v0, s3, 35 -; CHECK-NEXT: v_writelane_b32 v0, s4, 36 -; CHECK-NEXT: v_writelane_b32 v0, s5, 37 -; CHECK-NEXT: v_writelane_b32 v0, s6, 38 -; CHECK-NEXT: v_writelane_b32 v0, s7, 39 -; CHECK-NEXT: v_writelane_b32 v0, s8, 40 -; CHECK-NEXT: v_writelane_b32 v0, s9, 41 -; CHECK-NEXT: v_writelane_b32 v0, s10, 42 -; CHECK-NEXT: v_writelane_b32 v0, s11, 43 -; CHECK-NEXT: v_writelane_b32 v0, s12, 44 -; CHECK-NEXT: v_writelane_b32 v0, s13, 45 -; CHECK-NEXT: v_writelane_b32 v0, s14, 46 -; CHECK-NEXT: v_writelane_b32 v0, s15, 47 +; CHECK-NEXT: v_writelane_b32 v23, s0, 32 +; CHECK-NEXT: v_writelane_b32 v23, s1, 33 +; CHECK-NEXT: v_writelane_b32 v23, s2, 34 +; CHECK-NEXT: v_writelane_b32 v23, s3, 35 +; CHECK-NEXT: v_writelane_b32 v23, s4, 36 +; CHECK-NEXT: v_writelane_b32 v23, s5, 37 +; CHECK-NEXT: v_writelane_b32 v23, s6, 38 +; CHECK-NEXT: v_writelane_b32 v23, s7, 39 +; CHECK-NEXT: v_writelane_b32 v23, s8, 40 +; CHECK-NEXT: v_writelane_b32 v23, s9, 41 +; CHECK-NEXT: v_writelane_b32 v23, s10, 42 +; CHECK-NEXT: v_writelane_b32 v23, s11, 43 +; CHECK-NEXT: v_writelane_b32 v23, s12, 44 +; CHECK-NEXT: v_writelane_b32 v23, s13, 45 +; CHECK-NEXT: v_writelane_b32 v23, s14, 46 +; CHECK-NEXT: v_writelane_b32 v23, s15, 47 ; CHECK-NEXT: s_cbranch_scc0 .LBB0_2 ; CHECK-NEXT: ; %bb.1: ; %ret -; CHECK-NEXT: ; kill: killed $vgpr23 -; CHECK-NEXT: ; kill: killed $vgpr0 ; CHECK-NEXT: s_endpgm ; CHECK-NEXT: .LBB0_2: ; %bb0 -; CHECK-NEXT: v_readlane_b32 s0, v23, 0 -; CHECK-NEXT: v_readlane_b32 s1, v23, 1 +; CHECK-NEXT: v_readlane_b32 s0, v22, 0 +; CHECK-NEXT: v_readlane_b32 s1, v22, 1 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[0:1] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s0, v23, 2 -; CHECK-NEXT: v_readlane_b32 s1, v23, 3 -; CHECK-NEXT: v_readlane_b32 s2, v23, 4 -; CHECK-NEXT: v_readlane_b32 s3, v23, 5 +; CHECK-NEXT: v_readlane_b32 s0, v22, 2 +; CHECK-NEXT: v_readlane_b32 s1, v22, 3 +; CHECK-NEXT: v_readlane_b32 s2, v22, 4 +; CHECK-NEXT: v_readlane_b32 s3, v22, 5 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[0:3] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s0, v23, 6 -; CHECK-NEXT: v_readlane_b32 s1, v23, 7 -; CHECK-NEXT: v_readlane_b32 s2, v23, 8 -; CHECK-NEXT: v_readlane_b32 s3, v23, 9 -; CHECK-NEXT: v_readlane_b32 s4, v23, 10 -; CHECK-NEXT: v_readlane_b32 s5, v23, 11 -; CHECK-NEXT: v_readlane_b32 s6, v23, 12 -; CHECK-NEXT: v_readlane_b32 s7, v23, 13 +; CHECK-NEXT: v_readlane_b32 s0, v22, 6 +; CHECK-NEXT: v_readlane_b32 s1, v22, 7 +; CHECK-NEXT: v_readlane_b32 s2, v22, 8 +; CHECK-NEXT: v_readlane_b32 s3, v22, 9 +; CHECK-NEXT: v_readlane_b32 s4, v22, 10 +; CHECK-NEXT: v_readlane_b32 s5, v22, 11 +; CHECK-NEXT: v_readlane_b32 s6, v22, 12 +; CHECK-NEXT: v_readlane_b32 s7, v22, 13 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[0:7] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s0, v23, 14 -; CHECK-NEXT: v_readlane_b32 s1, v23, 15 -; CHECK-NEXT: v_readlane_b32 s2, v23, 16 -; CHECK-NEXT: v_readlane_b32 s3, v23, 17 -; CHECK-NEXT: v_readlane_b32 s4, v23, 18 -; CHECK-NEXT: v_readlane_b32 s5, v23, 19 -; CHECK-NEXT: v_readlane_b32 s6, v23, 20 -; CHECK-NEXT: v_readlane_b32 s7, v23, 21 -; CHECK-NEXT: v_readlane_b32 s8, v23, 22 -; CHECK-NEXT: v_readlane_b32 s9, v23, 23 -; CHECK-NEXT: v_readlane_b32 s10, v23, 24 -; CHECK-NEXT: v_readlane_b32 s11, v23, 25 -; CHECK-NEXT: v_readlane_b32 s12, v23, 26 -; CHECK-NEXT: v_readlane_b32 s13, v23, 27 -; CHECK-NEXT: v_readlane_b32 s14, v23, 28 -; CHECK-NEXT: v_readlane_b32 s15, v23, 29 +; CHECK-NEXT: v_readlane_b32 s0, v22, 14 +; CHECK-NEXT: v_readlane_b32 s1, v22, 15 +; CHECK-NEXT: v_readlane_b32 s2, v22, 16 +; CHECK-NEXT: v_readlane_b32 s3, v22, 17 +; CHECK-NEXT: v_readlane_b32 s4, v22, 18 +; CHECK-NEXT: v_readlane_b32 s5, v22, 19 +; CHECK-NEXT: v_readlane_b32 s6, v22, 20 +; CHECK-NEXT: v_readlane_b32 s7, v22, 21 +; CHECK-NEXT: v_readlane_b32 s8, v22, 22 +; CHECK-NEXT: v_readlane_b32 s9, v22, 23 +; CHECK-NEXT: v_readlane_b32 s10, v22, 24 +; CHECK-NEXT: v_readlane_b32 s11, v22, 25 +; CHECK-NEXT: v_readlane_b32 s12, v22, 26 +; CHECK-NEXT: v_readlane_b32 s13, v22, 27 +; CHECK-NEXT: v_readlane_b32 s14, v22, 28 +; CHECK-NEXT: v_readlane_b32 s15, v22, 29 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[0:15] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s0, v23, 30 -; CHECK-NEXT: v_readlane_b32 s1, v23, 31 -; CHECK-NEXT: v_readlane_b32 s2, v23, 32 -; CHECK-NEXT: v_readlane_b32 s3, v23, 33 -; CHECK-NEXT: v_readlane_b32 s4, v23, 34 -; CHECK-NEXT: v_readlane_b32 s5, v23, 35 -; CHECK-NEXT: v_readlane_b32 s6, v23, 36 -; CHECK-NEXT: v_readlane_b32 s7, v23, 37 +; CHECK-NEXT: v_readlane_b32 s0, v22, 30 +; CHECK-NEXT: v_readlane_b32 s1, v22, 31 +; CHECK-NEXT: v_readlane_b32 s2, v22, 32 +; CHECK-NEXT: v_readlane_b32 s3, v22, 33 +; CHECK-NEXT: v_readlane_b32 s4, v22, 34 +; CHECK-NEXT: v_readlane_b32 s5, v22, 35 +; CHECK-NEXT: v_readlane_b32 s6, v22, 36 +; CHECK-NEXT: v_readlane_b32 s7, v22, 37 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[42:43] ; CHECK-NEXT: ;;#ASMEND @@ -260,10 +256,10 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 { ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[0:7] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s0, v23, 38 -; CHECK-NEXT: v_readlane_b32 s1, v23, 39 -; CHECK-NEXT: v_readlane_b32 s2, v23, 40 -; CHECK-NEXT: v_readlane_b32 s3, v23, 41 +; CHECK-NEXT: v_readlane_b32 s0, v22, 38 +; CHECK-NEXT: v_readlane_b32 s1, v22, 39 +; CHECK-NEXT: v_readlane_b32 s2, v22, 40 +; CHECK-NEXT: v_readlane_b32 s3, v22, 41 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[16:31] ; CHECK-NEXT: ;;#ASMEND @@ -276,111 +272,108 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 { ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[44:51] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s4, v23, 42 -; CHECK-NEXT: v_readlane_b32 s5, v23, 43 -; CHECK-NEXT: v_readlane_b32 s6, v23, 44 -; CHECK-NEXT: v_readlane_b32 s7, v23, 45 -; CHECK-NEXT: v_readlane_b32 s8, v23, 46 -; CHECK-NEXT: v_readlane_b32 s9, v23, 47 -; CHECK-NEXT: v_readlane_b32 s10, v23, 48 -; CHECK-NEXT: v_readlane_b32 s11, v23, 49 -; CHECK-NEXT: v_readlane_b32 s12, v23, 50 -; CHECK-NEXT: v_readlane_b32 s13, v23, 51 -; CHECK-NEXT: v_readlane_b32 s14, v23, 52 -; CHECK-NEXT: v_readlane_b32 s15, v23, 53 +; CHECK-NEXT: v_readlane_b32 s4, v22, 42 +; CHECK-NEXT: v_readlane_b32 s5, v22, 43 +; CHECK-NEXT: v_readlane_b32 s6, v22, 44 +; CHECK-NEXT: v_readlane_b32 s7, v22, 45 +; CHECK-NEXT: v_readlane_b32 s8, v22, 46 +; CHECK-NEXT: v_readlane_b32 s9, v22, 47 +; CHECK-NEXT: v_readlane_b32 s10, v22, 48 +; CHECK-NEXT: v_readlane_b32 s11, v22, 49 +; CHECK-NEXT: v_readlane_b32 s12, v22, 50 +; CHECK-NEXT: v_readlane_b32 s13, v22, 51 +; CHECK-NEXT: v_readlane_b32 s14, v22, 52 +; CHECK-NEXT: v_readlane_b32 s15, v22, 53 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[0:15] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s0, v23, 54 -; CHECK-NEXT: v_readlane_b32 s1, v23, 55 -; CHECK-NEXT: v_readlane_b32 s2, v23, 56 -; CHECK-NEXT: v_readlane_b32 s3, v23, 57 +; CHECK-NEXT: v_readlane_b32 s0, v22, 54 +; CHECK-NEXT: v_readlane_b32 s1, v22, 55 +; CHECK-NEXT: v_readlane_b32 s2, v22, 56 +; CHECK-NEXT: v_readlane_b32 s3, v22, 57 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[34:35] ; CHECK-NEXT: ;;#ASMEND ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[0:3] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s0, v23, 58 -; CHECK-NEXT: v_readlane_b32 s1, v23, 59 -; CHECK-NEXT: v_readlane_b32 s2, v23, 60 -; CHECK-NEXT: v_readlane_b32 s3, v23, 61 -; CHECK-NEXT: v_readlane_b32 s4, v23, 62 -; CHECK-NEXT: v_readlane_b32 s5, v23, 63 -; CHECK-NEXT: v_readlane_b32 s6, v0, 0 -; CHECK-NEXT: v_readlane_b32 s7, v0, 1 +; CHECK-NEXT: v_readlane_b32 s0, v22, 58 +; CHECK-NEXT: v_readlane_b32 s1, v22, 59 +; CHECK-NEXT: v_readlane_b32 s2, v22, 60 +; CHECK-NEXT: v_readlane_b32 s3, v22, 61 +; CHECK-NEXT: v_readlane_b32 s4, v22, 62 +; CHECK-NEXT: v_readlane_b32 s5, v22, 63 +; CHECK-NEXT: v_readlane_b32 s6, v23, 0 +; CHECK-NEXT: v_readlane_b32 s7, v23, 1 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[0:7] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s0, v0, 2 -; CHECK-NEXT: v_readlane_b32 s1, v0, 3 -; CHECK-NEXT: v_readlane_b32 s2, v0, 4 -; CHECK-NEXT: v_readlane_b32 s3, v0, 5 -; CHECK-NEXT: v_readlane_b32 s4, v0, 6 -; CHECK-NEXT: v_readlane_b32 s5, v0, 7 -; CHECK-NEXT: v_readlane_b32 s6, v0, 8 -; CHECK-NEXT: v_readlane_b32 s7, v0, 9 -; CHECK-NEXT: v_readlane_b32 s8, v0, 10 -; CHECK-NEXT: v_readlane_b32 s9, v0, 11 -; CHECK-NEXT: v_readlane_b32 s10, v0, 12 -; CHECK-NEXT: v_readlane_b32 s11, v0, 13 -; CHECK-NEXT: v_readlane_b32 s12, v0, 14 -; CHECK-NEXT: v_readlane_b32 s13, v0, 15 -; CHECK-NEXT: v_readlane_b32 s14, v0, 16 -; CHECK-NEXT: v_readlane_b32 s15, v0, 17 +; CHECK-NEXT: v_readlane_b32 s0, v23, 2 +; CHECK-NEXT: v_readlane_b32 s1, v23, 3 +; CHECK-NEXT: v_readlane_b32 s2, v23, 4 +; CHECK-NEXT: v_readlane_b32 s3, v23, 5 +; CHECK-NEXT: v_readlane_b32 s4, v23, 6 +; CHECK-NEXT: v_readlane_b32 s5, v23, 7 +; CHECK-NEXT: v_readlane_b32 s6, v23, 8 +; CHECK-NEXT: v_readlane_b32 s7, v23, 9 +; CHECK-NEXT: v_readlane_b32 s8, v23, 10 +; CHECK-NEXT: v_readlane_b32 s9, v23, 11 +; CHECK-NEXT: v_readlane_b32 s10, v23, 12 +; CHECK-NEXT: v_readlane_b32 s11, v23, 13 +; CHECK-NEXT: v_readlane_b32 s12, v23, 14 +; CHECK-NEXT: v_readlane_b32 s13, v23, 15 +; CHECK-NEXT: v_readlane_b32 s14, v23, 16 +; CHECK-NEXT: v_readlane_b32 s15, v23, 17 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[0:15] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s0, v0, 18 -; CHECK-NEXT: v_readlane_b32 s1, v0, 19 +; CHECK-NEXT: v_readlane_b32 s0, v23, 18 +; CHECK-NEXT: v_readlane_b32 s1, v23, 19 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[0:1] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s0, v0, 20 -; CHECK-NEXT: v_readlane_b32 s1, v0, 21 -; CHECK-NEXT: v_readlane_b32 s2, v0, 22 -; CHECK-NEXT: v_readlane_b32 s3, v0, 23 +; CHECK-NEXT: v_readlane_b32 s0, v23, 20 +; CHECK-NEXT: v_readlane_b32 s1, v23, 21 +; CHECK-NEXT: v_readlane_b32 s2, v23, 22 +; CHECK-NEXT: v_readlane_b32 s3, v23, 23 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[0:3] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s0, v0, 24 -; CHECK-NEXT: v_readlane_b32 s1, v0, 25 -; CHECK-NEXT: v_readlane_b32 s2, v0, 26 -; CHECK-NEXT: v_readlane_b32 s3, v0, 27 -; CHECK-NEXT: v_readlane_b32 s4, v0, 28 -; CHECK-NEXT: v_readlane_b32 s5, v0, 29 -; CHECK-NEXT: v_readlane_b32 s6, v0, 30 -; CHECK-NEXT: v_readlane_b32 s7, v0, 31 +; CHECK-NEXT: v_readlane_b32 s0, v23, 24 +; CHECK-NEXT: v_readlane_b32 s1, v23, 25 +; CHECK-NEXT: v_readlane_b32 s2, v23, 26 +; CHECK-NEXT: v_readlane_b32 s3, v23, 27 +; CHECK-NEXT: v_readlane_b32 s4, v23, 28 +; CHECK-NEXT: v_readlane_b32 s5, v23, 29 +; CHECK-NEXT: v_readlane_b32 s6, v23, 30 +; CHECK-NEXT: v_readlane_b32 s7, v23, 31 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[0:7] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s0, v0, 32 -; CHECK-NEXT: v_readlane_b32 s1, v0, 33 -; CHECK-NEXT: v_readlane_b32 s2, v0, 34 -; CHECK-NEXT: v_readlane_b32 s3, v0, 35 -; CHECK-NEXT: v_readlane_b32 s4, v0, 36 -; CHECK-NEXT: v_readlane_b32 s5, v0, 37 -; CHECK-NEXT: v_readlane_b32 s6, v0, 38 -; CHECK-NEXT: v_readlane_b32 s7, v0, 39 -; CHECK-NEXT: v_readlane_b32 s8, v0, 40 -; CHECK-NEXT: v_readlane_b32 s9, v0, 41 -; CHECK-NEXT: v_readlane_b32 s10, v0, 42 -; CHECK-NEXT: v_readlane_b32 s11, v0, 43 -; CHECK-NEXT: v_readlane_b32 s12, v0, 44 -; CHECK-NEXT: v_readlane_b32 s13, v0, 45 -; CHECK-NEXT: v_readlane_b32 s14, v0, 46 -; CHECK-NEXT: v_readlane_b32 s15, v0, 47 +; CHECK-NEXT: v_readlane_b32 s0, v23, 32 +; CHECK-NEXT: v_readlane_b32 s1, v23, 33 +; CHECK-NEXT: v_readlane_b32 s2, v23, 34 +; CHECK-NEXT: v_readlane_b32 s3, v23, 35 +; CHECK-NEXT: v_readlane_b32 s4, v23, 36 +; CHECK-NEXT: v_readlane_b32 s5, v23, 37 +; CHECK-NEXT: v_readlane_b32 s6, v23, 38 +; CHECK-NEXT: v_readlane_b32 s7, v23, 39 +; CHECK-NEXT: v_readlane_b32 s8, v23, 40 +; CHECK-NEXT: v_readlane_b32 s9, v23, 41 +; CHECK-NEXT: v_readlane_b32 s10, v23, 42 +; CHECK-NEXT: v_readlane_b32 s11, v23, 43 +; CHECK-NEXT: v_readlane_b32 s12, v23, 44 +; CHECK-NEXT: v_readlane_b32 s13, v23, 45 +; CHECK-NEXT: v_readlane_b32 s14, v23, 46 +; CHECK-NEXT: v_readlane_b32 s15, v23, 47 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; use s[0:15] ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: ; kill: killed $vgpr23 -; CHECK-NEXT: ; kill: killed $vgpr0 ; CHECK-NEXT: s_endpgm call void asm sideeffect "", "~{v[0:7]}" () #0 call void asm sideeffect "", "~{v[8:15]}" () #0 call void asm sideeffect "", "~{v[16:19]}"() #0 call void asm sideeffect "", "~{v[20:21]}"() #0 - call void asm sideeffect "", "~{v22}"() #0 %val0 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0 %val1 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll index 17a19116735e4..14a02d4d2dcec 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll @@ -1,19 +1,19 @@ ; REQUIRES: asserts ; RUN: llc -verify-machineinstrs=0 -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=DEFAULT %s -; RUN: llc -verify-machineinstrs=0 -sgpr-regalloc=greedy -vgpr-regalloc=greedy -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=DEFAULT %s +; RUN: llc -verify-machineinstrs=0 -sgpr-regalloc=greedy -wwm-regalloc=greedy -vgpr-regalloc=greedy -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=DEFAULT %s ; RUN: llc -verify-machineinstrs=0 -O0 -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=O0 %s -; RUN: llc -verify-machineinstrs=0 -vgpr-regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=DEFAULT-BASIC %s +; RUN: llc -verify-machineinstrs=0 -wwm-regalloc=basic -vgpr-regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=DEFAULT-BASIC %s ; RUN: llc -verify-machineinstrs=0 -sgpr-regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=BASIC-DEFAULT %s -; RUN: llc -verify-machineinstrs=0 -sgpr-regalloc=basic -vgpr-regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=BASIC-BASIC %s +; RUN: llc -verify-machineinstrs=0 -sgpr-regalloc=basic -wwm-regalloc=basic -vgpr-regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=BASIC-BASIC %s ; RUN: not --crash llc -verify-machineinstrs=0 -regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC %s ; RUN: not --crash llc -verify-machineinstrs=0 -regalloc=fast -O0 -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC %s -; REGALLOC: -regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc +; REGALLOC: -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, and -vgpr-regalloc ; DEFAULT: Greedy Register Allocator ; DEFAULT-NEXT: Virtual Register Rewriter @@ -23,6 +23,11 @@ ; DEFAULT-NEXT: SI Pre-allocate WWM Registers ; DEFAULT-NEXT: Greedy Register Allocator ; DEFAULT-NEXT: SI Lower WWM Copies +; DEFAULT-NEXT: Virtual Register Rewriter +; DEFAULT-NEXT: AMDGPU Reserve WWM Registers +; DEFAULT-NEXT: Virtual Register Map +; DEFAULT-NEXT: Live Register Matrix +; DEFAULT-NEXT: Greedy Register Allocator ; DEFAULT-NEXT: GCN NSA Reassign ; DEFAULT-NEXT: Virtual Register Rewriter ; DEFAULT-NEXT: AMDGPU Mark Last Scratch Load @@ -37,6 +42,8 @@ ; O0-NEXT: SI Pre-allocate WWM Registers ; O0-NEXT: Fast Register Allocator ; O0-NEXT: SI Lower WWM Copies +; O0-NEXT: AMDGPU Reserve WWM Registers +; O0-NEXT: Fast Register Allocator ; O0-NEXT: SI Fix VGPR copies @@ -60,6 +67,11 @@ ; BASIC-DEFAULT-NEXT: Machine Optimization Remark Emitter ; BASIC-DEFAULT-NEXT: Greedy Register Allocator ; BASIC-DEFAULT-NEXT: SI Lower WWM Copies +; BASIC-DEFAULT-NEXT: Virtual Register Rewriter +; BASIC-DEFAULT-NEXT: AMDGPU Reserve WWM Registers +; BASIC-DEFAULT-NEXT: Virtual Register Map +; BASIC-DEFAULT-NEXT: Live Register Matrix +; BASIC-DEFAULT-NEXT: Greedy Register Allocator ; BASIC-DEFAULT-NEXT: GCN NSA Reassign ; BASIC-DEFAULT-NEXT: Virtual Register Rewriter ; BASIC-DEFAULT-NEXT: AMDGPU Mark Last Scratch Load @@ -75,6 +87,11 @@ ; DEFAULT-BASIC-NEXT: SI Pre-allocate WWM Registers ; DEFAULT-BASIC-NEXT: Basic Register Allocator ; DEFAULT-BASIC-NEXT: SI Lower WWM Copies +; DEFAULT-BASIC-NEXT: Virtual Register Rewriter +; DEFAULT-BASIC-NEXT: AMDGPU Reserve WWM Registers +; DEFAULT-BASIC-NEXT: Virtual Register Map +; DEFAULT-BASIC-NEXT: Live Register Matrix +; DEFAULT-BASIC-NEXT: Basic Register Allocator ; DEFAULT-BASIC-NEXT: GCN NSA Reassign ; DEFAULT-BASIC-NEXT: Virtual Register Rewriter ; DEFAULT-BASIC-NEXT: AMDGPU Mark Last Scratch Load @@ -96,6 +113,11 @@ ; BASIC-BASIC-NEXT: SI Pre-allocate WWM Registers ; BASIC-BASIC-NEXT: Basic Register Allocator ; BASIC-BASIC-NEXT: SI Lower WWM Copies +; BASIC-BASIC-NEXT: Virtual Register Rewriter +; BASIC-BASIC-NEXT: AMDGPU Reserve WWM Registers +; BASIC-BASIC-NEXT: Virtual Register Map +; BASIC-BASIC-NEXT: Live Register Matrix +; BASIC-BASIC-NEXT: Basic Register Allocator ; BASIC-BASIC-NEXT: GCN NSA Reassign ; BASIC-BASIC-NEXT: Virtual Register Rewriter ; BASIC-BASIC-NEXT: AMDGPU Mark Last Scratch Load diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir index 189aead1e5646..520717391b596 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILL %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs --start-before=si-lower-sgpr-spills --stop-after=prologepilog -o - %s | FileCheck -check-prefix=PEI %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -passes=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILL %s @@ -45,28 +46,25 @@ body: | ; SGPR_SPILL: bb.0: ; SGPR_SPILL-NEXT: successors: %bb.1(0x80000000) ; SGPR_SPILL-NEXT: {{ $}} - ; SGPR_SPILL-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; SGPR_SPILL-NEXT: renamable $sgpr10 = IMPLICIT_DEF - ; SGPR_SPILL-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[V_WRITELANE_B32_]] + ; SGPR_SPILL-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; SGPR_SPILL-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]] ; SGPR_SPILL-NEXT: DBG_VALUE $noreg, 0 ; SGPR_SPILL-NEXT: {{ $}} ; SGPR_SPILL-NEXT: bb.1: - ; SGPR_SPILL-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[V_WRITELANE_B32_]], 0 - ; SGPR_SPILL-NEXT: KILL [[V_WRITELANE_B32_]] + ; SGPR_SPILL-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0 ; SGPR_SPILL-NEXT: S_ENDPGM 0 + ; ; PEI-LABEL: name: test ; PEI: bb.0: ; PEI-NEXT: successors: %bb.1(0x80000000) ; PEI-NEXT: {{ $}} - ; PEI-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; PEI-NEXT: renamable $sgpr10 = IMPLICIT_DEF - ; PEI-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, killed $vgpr0 + ; PEI-NEXT: $vgpr0 = IMPLICIT_DEF + ; PEI-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, killed $vgpr0 ; PEI-NEXT: {{ $}} ; PEI-NEXT: bb.1: - ; PEI-NEXT: liveins: $vgpr0 - ; PEI-NEXT: {{ $}} - ; PEI-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0 - ; PEI-NEXT: KILL killed renamable $vgpr0 + ; PEI-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0 ; PEI-NEXT: S_ENDPGM 0 bb.0: renamable $sgpr10 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll index 29622d3fd0f1b..5692dc1e2a2c6 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll @@ -9,7 +9,6 @@ define amdgpu_kernel void @kernel0(ptr addrspace(1) %out, i32 %in) #1 { call void asm sideeffect "", "~{v[8:15]}" () #0 call void asm sideeffect "", "~{v[16:19]}"() #0 call void asm sideeffect "", "~{v[20:21]}"() #0 - call void asm sideeffect "", "~{v22}"() #0 %val0 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0 %val1 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0 %val2 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll index d430ba758572d..59036c64c8afc 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll @@ -9,19 +9,9 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou ; GCN: ; %bb.0: ; GCN-NEXT: s_add_u32 s0, s0, s13 ; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane -; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN-NEXT: s_load_dword s4, s[6:7], 0x2 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[24:25] -; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[24:25] -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART @@ -31,91 +21,91 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[8:23] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_waitcnt vmcnt(1) -; GCN-NEXT: v_writelane_b32 v1, s8, 0 -; GCN-NEXT: v_writelane_b32 v1, s9, 1 -; GCN-NEXT: v_writelane_b32 v1, s10, 2 -; GCN-NEXT: v_writelane_b32 v1, s11, 3 -; GCN-NEXT: v_writelane_b32 v1, s12, 4 -; GCN-NEXT: v_writelane_b32 v1, s13, 5 -; GCN-NEXT: v_writelane_b32 v1, s14, 6 -; GCN-NEXT: v_writelane_b32 v1, s15, 7 -; GCN-NEXT: v_writelane_b32 v1, s16, 8 -; GCN-NEXT: v_writelane_b32 v1, s17, 9 -; GCN-NEXT: v_writelane_b32 v1, s18, 10 -; GCN-NEXT: v_writelane_b32 v1, s19, 11 -; GCN-NEXT: v_writelane_b32 v1, s20, 12 -; GCN-NEXT: v_writelane_b32 v1, s21, 13 -; GCN-NEXT: v_writelane_b32 v1, s22, 14 -; GCN-NEXT: v_writelane_b32 v1, s23, 15 +; GCN-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane +; GCN-NEXT: v_writelane_b32 v22, s8, 0 +; GCN-NEXT: v_writelane_b32 v22, s9, 1 +; GCN-NEXT: v_writelane_b32 v22, s10, 2 +; GCN-NEXT: v_writelane_b32 v22, s11, 3 +; GCN-NEXT: v_writelane_b32 v22, s12, 4 +; GCN-NEXT: v_writelane_b32 v22, s13, 5 +; GCN-NEXT: v_writelane_b32 v22, s14, 6 +; GCN-NEXT: v_writelane_b32 v22, s15, 7 +; GCN-NEXT: v_writelane_b32 v22, s16, 8 +; GCN-NEXT: v_writelane_b32 v22, s17, 9 +; GCN-NEXT: v_writelane_b32 v22, s18, 10 +; GCN-NEXT: v_writelane_b32 v22, s19, 11 +; GCN-NEXT: v_writelane_b32 v22, s20, 12 +; GCN-NEXT: v_writelane_b32 v22, s21, 13 +; GCN-NEXT: v_writelane_b32 v22, s22, 14 +; GCN-NEXT: v_writelane_b32 v22, s23, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[8:23] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s8, 16 -; GCN-NEXT: v_writelane_b32 v1, s9, 17 -; GCN-NEXT: v_writelane_b32 v1, s10, 18 -; GCN-NEXT: v_writelane_b32 v1, s11, 19 -; GCN-NEXT: v_writelane_b32 v1, s12, 20 -; GCN-NEXT: v_writelane_b32 v1, s13, 21 -; GCN-NEXT: v_writelane_b32 v1, s14, 22 -; GCN-NEXT: v_writelane_b32 v1, s15, 23 -; GCN-NEXT: v_writelane_b32 v1, s16, 24 -; GCN-NEXT: v_writelane_b32 v1, s17, 25 -; GCN-NEXT: v_writelane_b32 v1, s18, 26 -; GCN-NEXT: v_writelane_b32 v1, s19, 27 -; GCN-NEXT: v_writelane_b32 v1, s20, 28 -; GCN-NEXT: v_writelane_b32 v1, s21, 29 -; GCN-NEXT: v_writelane_b32 v1, s22, 30 -; GCN-NEXT: v_writelane_b32 v1, s23, 31 +; GCN-NEXT: v_writelane_b32 v22, s8, 16 +; GCN-NEXT: v_writelane_b32 v22, s9, 17 +; GCN-NEXT: v_writelane_b32 v22, s10, 18 +; GCN-NEXT: v_writelane_b32 v22, s11, 19 +; GCN-NEXT: v_writelane_b32 v22, s12, 20 +; GCN-NEXT: v_writelane_b32 v22, s13, 21 +; GCN-NEXT: v_writelane_b32 v22, s14, 22 +; GCN-NEXT: v_writelane_b32 v22, s15, 23 +; GCN-NEXT: v_writelane_b32 v22, s16, 24 +; GCN-NEXT: v_writelane_b32 v22, s17, 25 +; GCN-NEXT: v_writelane_b32 v22, s18, 26 +; GCN-NEXT: v_writelane_b32 v22, s19, 27 +; GCN-NEXT: v_writelane_b32 v22, s20, 28 +; GCN-NEXT: v_writelane_b32 v22, s21, 29 +; GCN-NEXT: v_writelane_b32 v22, s22, 30 +; GCN-NEXT: v_writelane_b32 v22, s23, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[8:23] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s8, 32 -; GCN-NEXT: v_writelane_b32 v1, s9, 33 -; GCN-NEXT: v_writelane_b32 v1, s10, 34 -; GCN-NEXT: v_writelane_b32 v1, s11, 35 -; GCN-NEXT: v_writelane_b32 v1, s12, 36 -; GCN-NEXT: v_writelane_b32 v1, s13, 37 -; GCN-NEXT: v_writelane_b32 v1, s14, 38 -; GCN-NEXT: v_writelane_b32 v1, s15, 39 -; GCN-NEXT: v_writelane_b32 v1, s16, 40 -; GCN-NEXT: v_writelane_b32 v1, s17, 41 -; GCN-NEXT: v_writelane_b32 v1, s18, 42 -; GCN-NEXT: v_writelane_b32 v1, s19, 43 -; GCN-NEXT: v_writelane_b32 v1, s20, 44 -; GCN-NEXT: v_writelane_b32 v1, s21, 45 -; GCN-NEXT: v_writelane_b32 v1, s22, 46 -; GCN-NEXT: v_writelane_b32 v1, s23, 47 +; GCN-NEXT: v_writelane_b32 v22, s8, 32 +; GCN-NEXT: v_writelane_b32 v22, s9, 33 +; GCN-NEXT: v_writelane_b32 v22, s10, 34 +; GCN-NEXT: v_writelane_b32 v22, s11, 35 +; GCN-NEXT: v_writelane_b32 v22, s12, 36 +; GCN-NEXT: v_writelane_b32 v22, s13, 37 +; GCN-NEXT: v_writelane_b32 v22, s14, 38 +; GCN-NEXT: v_writelane_b32 v22, s15, 39 +; GCN-NEXT: v_writelane_b32 v22, s16, 40 +; GCN-NEXT: v_writelane_b32 v22, s17, 41 +; GCN-NEXT: v_writelane_b32 v22, s18, 42 +; GCN-NEXT: v_writelane_b32 v22, s19, 43 +; GCN-NEXT: v_writelane_b32 v22, s20, 44 +; GCN-NEXT: v_writelane_b32 v22, s21, 45 +; GCN-NEXT: v_writelane_b32 v22, s22, 46 +; GCN-NEXT: v_writelane_b32 v22, s23, 47 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[8:23] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v1, s8, 48 -; GCN-NEXT: v_writelane_b32 v1, s9, 49 -; GCN-NEXT: v_writelane_b32 v1, s10, 50 -; GCN-NEXT: v_writelane_b32 v1, s11, 51 -; GCN-NEXT: v_writelane_b32 v1, s12, 52 -; GCN-NEXT: v_writelane_b32 v1, s13, 53 -; GCN-NEXT: v_writelane_b32 v1, s14, 54 -; GCN-NEXT: v_writelane_b32 v1, s15, 55 -; GCN-NEXT: v_writelane_b32 v1, s16, 56 -; GCN-NEXT: v_writelane_b32 v1, s17, 57 -; GCN-NEXT: v_writelane_b32 v1, s18, 58 -; GCN-NEXT: v_writelane_b32 v1, s19, 59 -; GCN-NEXT: v_writelane_b32 v1, s20, 60 -; GCN-NEXT: v_writelane_b32 v1, s21, 61 -; GCN-NEXT: v_writelane_b32 v1, s22, 62 -; GCN-NEXT: v_writelane_b32 v1, s23, 63 +; GCN-NEXT: v_writelane_b32 v22, s8, 48 +; GCN-NEXT: v_writelane_b32 v22, s9, 49 +; GCN-NEXT: v_writelane_b32 v22, s10, 50 +; GCN-NEXT: v_writelane_b32 v22, s11, 51 +; GCN-NEXT: v_writelane_b32 v22, s12, 52 +; GCN-NEXT: v_writelane_b32 v22, s13, 53 +; GCN-NEXT: v_writelane_b32 v22, s14, 54 +; GCN-NEXT: v_writelane_b32 v22, s15, 55 +; GCN-NEXT: v_writelane_b32 v22, s16, 56 +; GCN-NEXT: v_writelane_b32 v22, s17, 57 +; GCN-NEXT: v_writelane_b32 v22, s18, 58 +; GCN-NEXT: v_writelane_b32 v22, s19, 59 +; GCN-NEXT: v_writelane_b32 v22, s20, 60 +; GCN-NEXT: v_writelane_b32 v22, s21, 61 +; GCN-NEXT: v_writelane_b32 v22, s22, 62 +; GCN-NEXT: v_writelane_b32 v22, s23, 63 ; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v22, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[24:25] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[6:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_waitcnt vmcnt(1) -; GCN-NEXT: v_writelane_b32 v0, s6, 0 -; GCN-NEXT: v_writelane_b32 v0, s7, 1 +; GCN-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane +; GCN-NEXT: v_writelane_b32 v22, s6, 0 +; GCN-NEXT: v_writelane_b32 v22, s7, 1 ; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v22, off, s[0:3], 0 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[24:25] ; GCN-NEXT: s_mov_b32 s5, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) @@ -123,88 +113,88 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou ; GCN-NEXT: s_cbranch_scc1 .LBB0_2 ; GCN-NEXT: ; %bb.1: ; %bb0 ; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v23, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[24:25] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s4, v1, 0 -; GCN-NEXT: v_readlane_b32 s5, v1, 1 -; GCN-NEXT: v_readlane_b32 s6, v1, 2 -; GCN-NEXT: v_readlane_b32 s7, v1, 3 -; GCN-NEXT: v_readlane_b32 s8, v1, 4 -; GCN-NEXT: v_readlane_b32 s9, v1, 5 -; GCN-NEXT: v_readlane_b32 s10, v1, 6 -; GCN-NEXT: v_readlane_b32 s11, v1, 7 -; GCN-NEXT: v_readlane_b32 s12, v1, 8 -; GCN-NEXT: v_readlane_b32 s13, v1, 9 -; GCN-NEXT: v_readlane_b32 s14, v1, 10 -; GCN-NEXT: v_readlane_b32 s15, v1, 11 -; GCN-NEXT: v_readlane_b32 s16, v1, 12 -; GCN-NEXT: v_readlane_b32 s17, v1, 13 -; GCN-NEXT: v_readlane_b32 s18, v1, 14 -; GCN-NEXT: v_readlane_b32 s19, v1, 15 +; GCN-NEXT: v_readlane_b32 s4, v23, 0 +; GCN-NEXT: v_readlane_b32 s5, v23, 1 +; GCN-NEXT: v_readlane_b32 s6, v23, 2 +; GCN-NEXT: v_readlane_b32 s7, v23, 3 +; GCN-NEXT: v_readlane_b32 s8, v23, 4 +; GCN-NEXT: v_readlane_b32 s9, v23, 5 +; GCN-NEXT: v_readlane_b32 s10, v23, 6 +; GCN-NEXT: v_readlane_b32 s11, v23, 7 +; GCN-NEXT: v_readlane_b32 s12, v23, 8 +; GCN-NEXT: v_readlane_b32 s13, v23, 9 +; GCN-NEXT: v_readlane_b32 s14, v23, 10 +; GCN-NEXT: v_readlane_b32 s15, v23, 11 +; GCN-NEXT: v_readlane_b32 s16, v23, 12 +; GCN-NEXT: v_readlane_b32 s17, v23, 13 +; GCN-NEXT: v_readlane_b32 s18, v23, 14 +; GCN-NEXT: v_readlane_b32 s19, v23, 15 ; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], 0 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[24:25] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s4, v1, 16 -; GCN-NEXT: v_readlane_b32 s5, v1, 17 -; GCN-NEXT: v_readlane_b32 s6, v1, 18 -; GCN-NEXT: v_readlane_b32 s7, v1, 19 -; GCN-NEXT: v_readlane_b32 s8, v1, 20 -; GCN-NEXT: v_readlane_b32 s9, v1, 21 -; GCN-NEXT: v_readlane_b32 s10, v1, 22 -; GCN-NEXT: v_readlane_b32 s11, v1, 23 -; GCN-NEXT: v_readlane_b32 s12, v1, 24 -; GCN-NEXT: v_readlane_b32 s13, v1, 25 -; GCN-NEXT: v_readlane_b32 s14, v1, 26 -; GCN-NEXT: v_readlane_b32 s15, v1, 27 -; GCN-NEXT: v_readlane_b32 s16, v1, 28 -; GCN-NEXT: v_readlane_b32 s17, v1, 29 -; GCN-NEXT: v_readlane_b32 s18, v1, 30 -; GCN-NEXT: v_readlane_b32 s19, v1, 31 +; GCN-NEXT: v_readlane_b32 s4, v23, 16 +; GCN-NEXT: v_readlane_b32 s5, v23, 17 +; GCN-NEXT: v_readlane_b32 s6, v23, 18 +; GCN-NEXT: v_readlane_b32 s7, v23, 19 +; GCN-NEXT: v_readlane_b32 s8, v23, 20 +; GCN-NEXT: v_readlane_b32 s9, v23, 21 +; GCN-NEXT: v_readlane_b32 s10, v23, 22 +; GCN-NEXT: v_readlane_b32 s11, v23, 23 +; GCN-NEXT: v_readlane_b32 s12, v23, 24 +; GCN-NEXT: v_readlane_b32 s13, v23, 25 +; GCN-NEXT: v_readlane_b32 s14, v23, 26 +; GCN-NEXT: v_readlane_b32 s15, v23, 27 +; GCN-NEXT: v_readlane_b32 s16, v23, 28 +; GCN-NEXT: v_readlane_b32 s17, v23, 29 +; GCN-NEXT: v_readlane_b32 s18, v23, 30 +; GCN-NEXT: v_readlane_b32 s19, v23, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s4, v1, 32 -; GCN-NEXT: v_readlane_b32 s5, v1, 33 -; GCN-NEXT: v_readlane_b32 s6, v1, 34 -; GCN-NEXT: v_readlane_b32 s7, v1, 35 -; GCN-NEXT: v_readlane_b32 s8, v1, 36 -; GCN-NEXT: v_readlane_b32 s9, v1, 37 -; GCN-NEXT: v_readlane_b32 s10, v1, 38 -; GCN-NEXT: v_readlane_b32 s11, v1, 39 -; GCN-NEXT: v_readlane_b32 s12, v1, 40 -; GCN-NEXT: v_readlane_b32 s13, v1, 41 -; GCN-NEXT: v_readlane_b32 s14, v1, 42 -; GCN-NEXT: v_readlane_b32 s15, v1, 43 -; GCN-NEXT: v_readlane_b32 s16, v1, 44 -; GCN-NEXT: v_readlane_b32 s17, v1, 45 -; GCN-NEXT: v_readlane_b32 s18, v1, 46 -; GCN-NEXT: v_readlane_b32 s19, v1, 47 +; GCN-NEXT: v_readlane_b32 s4, v23, 32 +; GCN-NEXT: v_readlane_b32 s5, v23, 33 +; GCN-NEXT: v_readlane_b32 s6, v23, 34 +; GCN-NEXT: v_readlane_b32 s7, v23, 35 +; GCN-NEXT: v_readlane_b32 s8, v23, 36 +; GCN-NEXT: v_readlane_b32 s9, v23, 37 +; GCN-NEXT: v_readlane_b32 s10, v23, 38 +; GCN-NEXT: v_readlane_b32 s11, v23, 39 +; GCN-NEXT: v_readlane_b32 s12, v23, 40 +; GCN-NEXT: v_readlane_b32 s13, v23, 41 +; GCN-NEXT: v_readlane_b32 s14, v23, 42 +; GCN-NEXT: v_readlane_b32 s15, v23, 43 +; GCN-NEXT: v_readlane_b32 s16, v23, 44 +; GCN-NEXT: v_readlane_b32 s17, v23, 45 +; GCN-NEXT: v_readlane_b32 s18, v23, 46 +; GCN-NEXT: v_readlane_b32 s19, v23, 47 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s8, v1, 48 -; GCN-NEXT: v_readlane_b32 s9, v1, 49 -; GCN-NEXT: v_readlane_b32 s10, v1, 50 -; GCN-NEXT: v_readlane_b32 s11, v1, 51 -; GCN-NEXT: v_readlane_b32 s12, v1, 52 -; GCN-NEXT: v_readlane_b32 s13, v1, 53 -; GCN-NEXT: v_readlane_b32 s14, v1, 54 -; GCN-NEXT: v_readlane_b32 s15, v1, 55 -; GCN-NEXT: v_readlane_b32 s16, v1, 56 -; GCN-NEXT: v_readlane_b32 s17, v1, 57 -; GCN-NEXT: v_readlane_b32 s18, v1, 58 -; GCN-NEXT: v_readlane_b32 s19, v1, 59 -; GCN-NEXT: v_readlane_b32 s20, v1, 60 -; GCN-NEXT: v_readlane_b32 s21, v1, 61 -; GCN-NEXT: v_readlane_b32 s22, v1, 62 -; GCN-NEXT: v_readlane_b32 s23, v1, 63 +; GCN-NEXT: v_readlane_b32 s8, v23, 48 +; GCN-NEXT: v_readlane_b32 s9, v23, 49 +; GCN-NEXT: v_readlane_b32 s10, v23, 50 +; GCN-NEXT: v_readlane_b32 s11, v23, 51 +; GCN-NEXT: v_readlane_b32 s12, v23, 52 +; GCN-NEXT: v_readlane_b32 s13, v23, 53 +; GCN-NEXT: v_readlane_b32 s14, v23, 54 +; GCN-NEXT: v_readlane_b32 s15, v23, 55 +; GCN-NEXT: v_readlane_b32 s16, v23, 56 +; GCN-NEXT: v_readlane_b32 s17, v23, 57 +; GCN-NEXT: v_readlane_b32 s18, v23, 58 +; GCN-NEXT: v_readlane_b32 s19, v23, 59 +; GCN-NEXT: v_readlane_b32 s20, v23, 60 +; GCN-NEXT: v_readlane_b32 s21, v23, 61 +; GCN-NEXT: v_readlane_b32 s22, v23, 62 +; GCN-NEXT: v_readlane_b32 s23, v23, 63 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s4, v0, 0 -; GCN-NEXT: v_readlane_b32 s5, v0, 1 +; GCN-NEXT: v_readlane_b32 s4, v22, 0 +; GCN-NEXT: v_readlane_b32 s5, v22, 1 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[8:23] ; GCN-NEXT: ;;#ASMEND @@ -212,20 +202,11 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou ; GCN-NEXT: ; use s[4:5] ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: .LBB0_2: ; %ret -; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[24:25] -; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[24:25] -; GCN-NEXT: ; kill: killed $vgpr1 -; GCN-NEXT: ; kill: killed $vgpr0 ; GCN-NEXT: s_endpgm call void asm sideeffect "", "~{v[0:7]}" () #0 call void asm sideeffect "", "~{v[8:15]}" () #0 call void asm sideeffect "", "~{v[16:19]}"() #0 call void asm sideeffect "", "~{v[20:21]}"() #0 - call void asm sideeffect "", "~{v22}"() #0 %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir index b0fb24e60bead..bb0a707a7c90b 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir @@ -28,181 +28,180 @@ body: | ; GCN-LABEL: name: test_main ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x80000000) - ; GCN-NEXT: liveins: $vcc_hi, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GCN-NEXT: liveins: $vcc_hi, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $vcc_hi = frame-setup COPY $sgpr33 ; GCN-NEXT: $sgpr33 = frame-setup COPY $sgpr32 ; GCN-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr4, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 ; GCN-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24, implicit-def dead $scc - ; GCN-NEXT: renamable $vgpr2 = IMPLICIT_DEF - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr12, 8, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr13, 9, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr14, 10, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr15, 11, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr16, 12, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr17, 13, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr18, 14, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr19, 15, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr20, 16, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr21, 17, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr22, 18, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr23, 19, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr24, 20, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr25, 21, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr26, 22, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr27, 23, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr28, 24, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr29, 25, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr30, 26, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr31, 27, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr64, 28, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr65, 29, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr66, 30, $vgpr3 - ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr67, 31, $vgpr3 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr68, 0, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr69, 1, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr70, 2, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr71, 3, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr72, 4, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr73, 5, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr74, 6, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr75, 7, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr76, 8, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr77, 9, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr78, 10, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr79, 11, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr80, 12, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr81, 13, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr82, 14, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr83, 15, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr84, 16, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr85, 17, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr86, 18, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr87, 19, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr88, 20, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr89, 21, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr90, 22, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr91, 23, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr92, 24, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr93, 25, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr94, 26, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr95, 27, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr96, 28, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr97, 29, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr98, 30, $vgpr4 - ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr99, 31, $vgpr4 - ; GCN-NEXT: $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr100, 0, $vgpr5 - ; GCN-NEXT: $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr101, 1, $vgpr5 - ; GCN-NEXT: $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr102, 2, $vgpr5 - ; GCN-NEXT: $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr103, 3, $vgpr5 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr12, 8, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr13, 9, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr14, 10, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr15, 11, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr16, 12, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr17, 13, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr18, 14, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr19, 15, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr20, 16, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr21, 17, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr22, 18, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr23, 19, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr24, 20, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr25, 21, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr26, 22, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr27, 23, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr28, 24, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr29, 25, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr30, 26, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr31, 27, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr64, 28, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr65, 29, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr66, 30, $vgpr2 + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr67, 31, $vgpr2 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr68, 0, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr69, 1, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr70, 2, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr71, 3, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr72, 4, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr73, 5, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr74, 6, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr75, 7, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr76, 8, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr77, 9, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr78, 10, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr79, 11, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr80, 12, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr81, 13, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr82, 14, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr83, 15, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr84, 16, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr85, 17, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr86, 18, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr87, 19, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr88, 20, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr89, 21, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr90, 22, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr91, 23, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr92, 24, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr93, 25, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr94, 26, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr95, 27, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr96, 28, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr97, 29, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr98, 30, $vgpr3 + ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr99, 31, $vgpr3 + ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr100, 0, $vgpr4 + ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr101, 1, $vgpr4 + ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr102, 2, $vgpr4 + ; GCN-NEXT: $vgpr4 = SI_SPILL_S32_TO_VGPR $sgpr103, 3, $vgpr4 ; GCN-NEXT: $sgpr22 = IMPLICIT_DEF - ; GCN-NEXT: renamable $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr22, 0, killed $vgpr2 + ; GCN-NEXT: $vgpr5 = IMPLICIT_DEF + ; GCN-NEXT: $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr22, 0, killed $vgpr5 ; GCN-NEXT: dead $vgpr1 = V_SET_INACTIVE_B32 0, $vgpr0, 0, 0, $sgpr_null, implicit $exec, implicit-def $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) - ; GCN-NEXT: liveins: $vcc_hi, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GCN-NEXT: liveins: $vcc_hi ; GCN-NEXT: {{ $}} ; GCN-NEXT: KILL implicit-def $vcc_lo, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: ; GCN-NEXT: successors: %bb.3(0x80000000) - ; GCN-NEXT: liveins: $vcc_hi, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GCN-NEXT: liveins: $vcc_hi ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0 + ; GCN-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR killed $vgpr5, 0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: - ; GCN-NEXT: liveins: $vcc_hi, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GCN-NEXT: liveins: $vcc_hi ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr103 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 3 - ; GCN-NEXT: $sgpr102 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 2 - ; GCN-NEXT: $sgpr101 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 1 - ; GCN-NEXT: $sgpr100 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 0 - ; GCN-NEXT: $sgpr99 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 31 - ; GCN-NEXT: $sgpr98 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 30 - ; GCN-NEXT: $sgpr97 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 29 - ; GCN-NEXT: $sgpr96 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 28 - ; GCN-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 27 - ; GCN-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 26 - ; GCN-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 25 - ; GCN-NEXT: $sgpr92 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 24 - ; GCN-NEXT: $sgpr91 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 23 - ; GCN-NEXT: $sgpr90 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 22 - ; GCN-NEXT: $sgpr89 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 21 - ; GCN-NEXT: $sgpr88 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 20 - ; GCN-NEXT: $sgpr87 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 19 - ; GCN-NEXT: $sgpr86 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 18 - ; GCN-NEXT: $sgpr85 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 17 - ; GCN-NEXT: $sgpr84 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 16 - ; GCN-NEXT: $sgpr83 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 15 - ; GCN-NEXT: $sgpr82 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 14 - ; GCN-NEXT: $sgpr81 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 13 - ; GCN-NEXT: $sgpr80 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 12 - ; GCN-NEXT: $sgpr79 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 11 - ; GCN-NEXT: $sgpr78 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 10 - ; GCN-NEXT: $sgpr77 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 9 - ; GCN-NEXT: $sgpr76 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 8 - ; GCN-NEXT: $sgpr75 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 7 - ; GCN-NEXT: $sgpr74 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 6 - ; GCN-NEXT: $sgpr73 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 5 - ; GCN-NEXT: $sgpr72 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 4 - ; GCN-NEXT: $sgpr71 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 3 - ; GCN-NEXT: $sgpr70 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 2 - ; GCN-NEXT: $sgpr69 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 1 - ; GCN-NEXT: $sgpr68 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 0 - ; GCN-NEXT: $sgpr67 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 31 - ; GCN-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 30 - ; GCN-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 29 - ; GCN-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 28 - ; GCN-NEXT: $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 27 - ; GCN-NEXT: $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 26 - ; GCN-NEXT: $sgpr29 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 25 - ; GCN-NEXT: $sgpr28 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 24 - ; GCN-NEXT: $sgpr27 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 23 - ; GCN-NEXT: $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 22 - ; GCN-NEXT: $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 21 - ; GCN-NEXT: $sgpr24 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 20 - ; GCN-NEXT: $sgpr23 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 19 - ; GCN-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 18 - ; GCN-NEXT: $sgpr21 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 17 - ; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 16 - ; GCN-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 15 - ; GCN-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 14 - ; GCN-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 13 - ; GCN-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 12 - ; GCN-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 11 - ; GCN-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 10 - ; GCN-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 9 - ; GCN-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 8 - ; GCN-NEXT: $sgpr11 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 7 - ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 6 - ; GCN-NEXT: $sgpr9 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 5 - ; GCN-NEXT: $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 4 - ; GCN-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 3 - ; GCN-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 2 - ; GCN-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 1 - ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 0 - ; GCN-NEXT: KILL killed renamable $vgpr2 + ; GCN-NEXT: $sgpr103 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 3 + ; GCN-NEXT: $sgpr102 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 2 + ; GCN-NEXT: $sgpr101 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 1 + ; GCN-NEXT: $sgpr100 = SI_RESTORE_S32_FROM_VGPR $vgpr4, 0 + ; GCN-NEXT: $sgpr99 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 31 + ; GCN-NEXT: $sgpr98 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 30 + ; GCN-NEXT: $sgpr97 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 29 + ; GCN-NEXT: $sgpr96 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 28 + ; GCN-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 27 + ; GCN-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 26 + ; GCN-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 25 + ; GCN-NEXT: $sgpr92 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 24 + ; GCN-NEXT: $sgpr91 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 23 + ; GCN-NEXT: $sgpr90 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 22 + ; GCN-NEXT: $sgpr89 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 21 + ; GCN-NEXT: $sgpr88 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 20 + ; GCN-NEXT: $sgpr87 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 19 + ; GCN-NEXT: $sgpr86 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 18 + ; GCN-NEXT: $sgpr85 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 17 + ; GCN-NEXT: $sgpr84 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 16 + ; GCN-NEXT: $sgpr83 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 15 + ; GCN-NEXT: $sgpr82 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 14 + ; GCN-NEXT: $sgpr81 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 13 + ; GCN-NEXT: $sgpr80 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 12 + ; GCN-NEXT: $sgpr79 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 11 + ; GCN-NEXT: $sgpr78 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 10 + ; GCN-NEXT: $sgpr77 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 9 + ; GCN-NEXT: $sgpr76 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 8 + ; GCN-NEXT: $sgpr75 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 7 + ; GCN-NEXT: $sgpr74 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 6 + ; GCN-NEXT: $sgpr73 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 5 + ; GCN-NEXT: $sgpr72 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 4 + ; GCN-NEXT: $sgpr71 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 3 + ; GCN-NEXT: $sgpr70 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 2 + ; GCN-NEXT: $sgpr69 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 1 + ; GCN-NEXT: $sgpr68 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 0 + ; GCN-NEXT: $sgpr67 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 31 + ; GCN-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 30 + ; GCN-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 29 + ; GCN-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 28 + ; GCN-NEXT: $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 27 + ; GCN-NEXT: $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 26 + ; GCN-NEXT: $sgpr29 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 25 + ; GCN-NEXT: $sgpr28 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 24 + ; GCN-NEXT: $sgpr27 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 23 + ; GCN-NEXT: $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 22 + ; GCN-NEXT: $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 21 + ; GCN-NEXT: $sgpr24 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 20 + ; GCN-NEXT: $sgpr23 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 19 + ; GCN-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 18 + ; GCN-NEXT: $sgpr21 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 17 + ; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 16 + ; GCN-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 15 + ; GCN-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 14 + ; GCN-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 13 + ; GCN-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 12 + ; GCN-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 11 + ; GCN-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 10 + ; GCN-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 9 + ; GCN-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 8 + ; GCN-NEXT: $sgpr11 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 7 + ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 6 + ; GCN-NEXT: $sgpr9 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 5 + ; GCN-NEXT: $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 4 + ; GCN-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 3 + ; GCN-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 2 + ; GCN-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1 + ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0 ; GCN-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5) - ; GCN-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5) - ; GCN-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5) - ; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5) - ; GCN-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.73, addrspace 5) + ; GCN-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5) + ; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5) + ; GCN-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5) + ; GCN-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5) + ; GCN-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.73, addrspace 5) ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 ; GCN-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24, implicit-def dead $scc ; GCN-NEXT: $sgpr33 = frame-destroy COPY $vcc_hi diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir index 2c4b7a22facf4..59c4b715dd12e 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir @@ -218,7 +218,7 @@ body: | ; VMEM-GFX8-LABEL: name: sgpr32_save_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $sgpr8, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec @@ -235,7 +235,7 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $sgpr8, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -271,7 +271,7 @@ body: | ; VMEM-GFX8-LABEL: name: sgpr32_restore_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec @@ -288,7 +288,7 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $sgpr8, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -327,7 +327,7 @@ body: | ; VMEM-GFX8-LABEL: name: sgpr64_save_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $sgpr8_sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec @@ -345,7 +345,7 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $sgpr8_sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -381,7 +381,7 @@ body: | ; VMEM-GFX8-LABEL: name: sgpr64_restore_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec @@ -399,7 +399,7 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $sgpr8_sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -441,7 +441,7 @@ body: | ; VMEM-GFX8-LABEL: name: sgpr32_save_clobber_scc_emergency_stack_slot_x2 ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $sgpr8, $sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec @@ -468,7 +468,7 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $sgpr8, $sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -505,7 +505,7 @@ body: | ; VMEM-GFX8-LABEL: name: sgpr32_restore_clobber_scc_emergency_stack_slot_x2 ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec @@ -532,7 +532,7 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $sgpr8, $sgpr9, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-empty-prolog-block.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spills-empty-prolog-block.mir index 0c6c0069911f0..bed7c0c12b7cb 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-empty-prolog-block.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-empty-prolog-block.mir @@ -18,11 +18,9 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0 - ; CHECK-NEXT: KILL [[DEF]] ; CHECK-NEXT: S_ENDPGM 0 bb.0: liveins: $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll index b2f5b6aa7fe36..ff2202f1e177b 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll @@ -19,7 +19,7 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 { ; GCN-NEXT: s_mov_b32 s18, s33 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 -; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s33 offset:452 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[16:17] ; GCN-NEXT: s_add_i32 s32, s32, 0x7400 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill @@ -135,13 +135,13 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 { ; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: v_writelane_b32 v255, s30, 0 ; GCN-NEXT: v_writelane_b32 v255, s31, 1 -; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:452 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:444 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:452 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, child_function@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, child_function@gotpcrel32@hi+12 @@ -266,7 +266,7 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 { ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v255, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v255, off, s[0:3], s33 offset:452 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00 ; GCN-NEXT: s_mov_b32 s33, s18 @@ -313,7 +313,7 @@ define void @spill_to_lowest_available_vgpr() #0 { ; GCN-NEXT: s_mov_b32 s18, s33 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 -; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[16:17] ; GCN-NEXT: s_add_i32 s32, s32, 0x7400 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill @@ -428,13 +428,13 @@ define void @spill_to_lowest_available_vgpr() #0 { ; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: v_writelane_b32 v254, s30, 0 ; GCN-NEXT: v_writelane_b32 v254, s31, 1 -; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:440 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, child_function@gotpcrel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, child_function@gotpcrel32@hi+12 @@ -558,7 +558,7 @@ define void @spill_to_lowest_available_vgpr() #0 { ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00 ; GCN-NEXT: s_mov_b32 s33, s18 @@ -602,8 +602,8 @@ define void @spill_sgpr_with_sgpr_uses() #0 { ; GCN-LABEL: spill_sgpr_with_sgpr_uses: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill @@ -715,38 +715,30 @@ define void @spill_sgpr_with_sgpr_uses() #0 { ; GCN-NEXT: buffer_store_dword v251, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v252, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:440 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_or_saveexec_b64 s[8:9], -1 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[8:9] ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s4 ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_writelane_b32 v0, s4, 0 +; GCN-NEXT: ; implicit-def: $vgpr254 : SGPR spill to VGPR lane +; GCN-NEXT: v_writelane_b32 v254, s4, 0 ; GCN-NEXT: s_or_saveexec_b64 s[8:9], -1 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[8:9] ; GCN-NEXT: s_cbranch_scc1 .LBB3_2 ; GCN-NEXT: ; %bb.1: ; %bb0 ; GCN-NEXT: s_or_saveexec_b64 s[8:9], -1 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[8:9] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s4, v0, 0 +; GCN-NEXT: v_readlane_b32 s4, v254, 0 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s4 ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: .LBB3_2: ; %ret -; GCN-NEXT: s_or_saveexec_b64 s[8:9], -1 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[8:9] -; GCN-NEXT: ; kill: killed $vgpr0 ; GCN-NEXT: buffer_load_dword v253, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v252, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v251, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload @@ -857,8 +849,8 @@ define void @spill_sgpr_with_sgpr_uses() #0 { ; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload -; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -1183,7 +1175,7 @@ define void @spill_sgpr_no_free_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill @@ -1315,16 +1307,16 @@ define void @spill_sgpr_no_free_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in ; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GCN-NEXT: flat_load_dwordx4 v[6:9], v[2:3] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -1446,7 +1438,7 @@ define void @spill_sgpr_no_free_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload ; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir index 887e9c4b5dc5e..0e6d9ce4a7f31 100644 --- a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir +++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-vgpr-lanes-usage.mir @@ -25,9 +25,9 @@ body: | ; SGPR_SPILLED-LABEL: name: stack-slot-share-equal-sized-spills ; SGPR_SPILLED: liveins: $sgpr30, $sgpr31, $vgpr62 ; SGPR_SPILLED-NEXT: {{ $}} - ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr62 ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62 + ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]] ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr0, 1, [[DEF]], implicit-def $sgpr0_sgpr1, implicit $sgpr0_sgpr1 ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr1, 2, [[DEF]], implicit $sgpr0_sgpr1 @@ -91,9 +91,9 @@ body: | ; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-large-spill-first ; SGPR_SPILLED: liveins: $sgpr30, $sgpr31, $vgpr62 ; SGPR_SPILLED-NEXT: {{ $}} - ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr62 ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62 + ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]] ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr2, 1, [[DEF]], implicit-def $sgpr2_sgpr3, implicit $sgpr2_sgpr3 ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr3, 2, [[DEF]], implicit $sgpr2_sgpr3 @@ -155,9 +155,9 @@ body: | ; SGPR_SPILLED-LABEL: name: stack-slot-share-unequal-sized-spills-with-small-spill-first ; SGPR_SPILLED: liveins: $sgpr30, $sgpr31, $vgpr62 ; SGPR_SPILLED-NEXT: {{ $}} - ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr62 ; SGPR_SPILLED-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr62 + ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr32, 0, [[DEF]] ; SGPR_SPILLED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr0, 1, [[DEF]] ; SGPR_SPILLED-NEXT: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll b/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll deleted file mode 100644 index c5a5a5209f54f..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll +++ /dev/null @@ -1,73 +0,0 @@ -; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SGPR %s - -; Make sure this doesn't crash. -; ALL-LABEL: {{^}}test: -; ALL: s_mov_b32 s[[LO:[0-9]+]], SCRATCH_RSRC_DWORD0 -; ALL: s_mov_b32 s[[HI:[0-9]+]], 0xe80000 - -; Make sure we are handling hazards correctly. -; SGPR: v_mov_b32_e32 v0, vcc_lo -; SGPR-NEXT: s_or_saveexec_b64 [[EXEC_COPY:s\[[0-9]+:[0-9]+\]]], -1 -; SGPR-NEXT: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 ; 4-byte Folded Reload -; SGPR-NEXT: s_mov_b64 exec, [[EXEC_COPY]] -; SGPR-NEXT: s_waitcnt vmcnt(0) -; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 0 -; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 1 -; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 2 -; SGPR-NEXT: v_readlane_b32 s[[HI:[0-9]+]], [[VHI]], 3 -; SGPR-NEXT: s_or_saveexec_b64 s[100:101], -1 -; SGPR-NEXT: s_mov_b64 exec, s[100:101] -; SGPR-NEXT: s_nop 2 -; SGPR-NEXT: buffer_store_dword v0, off, s[{{[0-9]+}}:[[HI]]], 0 -; SGPR-NEXT: ; kill: killed $vgpr1 - -; ALL: s_endpgm -define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %in) { - call void asm sideeffect "", "~{s[0:7]}" () - call void asm sideeffect "", "~{s[8:15]}" () - call void asm sideeffect "", "~{s[16:23]}" () - call void asm sideeffect "", "~{s[24:31]}" () - call void asm sideeffect "", "~{s[32:39]}" () - call void asm sideeffect "", "~{s[40:47]}" () - call void asm sideeffect "", "~{s[48:55]}" () - call void asm sideeffect "", "~{s[56:63]}" () - call void asm sideeffect "", "~{s[64:71]}" () - call void asm sideeffect "", "~{s[72:79]}" () - call void asm sideeffect "", "~{s[80:87]}" () - call void asm sideeffect "", "~{s[88:95]}" () - call void asm sideeffect "", "~{v[0:7]}" () - call void asm sideeffect "", "~{v[8:15]}" () - call void asm sideeffect "", "~{v[16:23]}" () - call void asm sideeffect "", "~{v[24:31]}" () - call void asm sideeffect "", "~{v[32:39]}" () - call void asm sideeffect "", "~{v[40:47]}" () - call void asm sideeffect "", "~{v[48:55]}" () - call void asm sideeffect "", "~{v[56:63]}" () - call void asm sideeffect "", "~{v[64:71]}" () - call void asm sideeffect "", "~{v[72:79]}" () - call void asm sideeffect "", "~{v[80:87]}" () - call void asm sideeffect "", "~{v[88:95]}" () - call void asm sideeffect "", "~{v[96:103]}" () - call void asm sideeffect "", "~{v[104:111]}" () - call void asm sideeffect "", "~{v[112:119]}" () - call void asm sideeffect "", "~{v[120:127]}" () - call void asm sideeffect "", "~{v[128:135]}" () - call void asm sideeffect "", "~{v[136:143]}" () - call void asm sideeffect "", "~{v[144:151]}" () - call void asm sideeffect "", "~{v[152:159]}" () - call void asm sideeffect "", "~{v[160:167]}" () - call void asm sideeffect "", "~{v[168:175]}" () - call void asm sideeffect "", "~{v[176:183]}" () - call void asm sideeffect "", "~{v[184:191]}" () - call void asm sideeffect "", "~{v[192:199]}" () - call void asm sideeffect "", "~{v[200:207]}" () - call void asm sideeffect "", "~{v[208:215]}" () - call void asm sideeffect "", "~{v[216:223]}" () - call void asm sideeffect "", "~{v[224:231]}" () - call void asm sideeffect "", "~{v[232:239]}" () - call void asm sideeffect "", "~{v[240:247]}" () - call void asm sideeffect "", "~{v[248:255]}" () - - store i32 %in, ptr addrspace(1) %out - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir index f8ec6bb5d943f..080bd052a7391 100644 --- a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir +++ b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir @@ -35,7 +35,7 @@ body: | ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr34_sgpr35 = IMPLICIT_DEF - ; CHECK-NEXT: dead renamable $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: renamable $sgpr41 = IMPLICIT_DEF ; CHECK-NEXT: renamable $sgpr38_sgpr39 = COPY undef $sgpr8_sgpr9 ; CHECK-NEXT: renamable $sgpr36_sgpr37 = IMPLICIT_DEF @@ -79,9 +79,9 @@ body: | ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000000F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr38_sgpr39, 40, 0 :: (dereferenceable invariant load (s64), addrspace 4) - ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef renamable $vgpr0, undef renamable $vgpr0, killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s32), addrspace 1) - ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef renamable $vgpr0, undef renamable $vgpr0, renamable $sgpr50_sgpr51, 0, 0, implicit $exec :: (store (s32), addrspace 1) - ; CHECK-NEXT: dead renamable $vgpr0 = COPY killed renamable $sgpr49 + ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], renamable $sgpr50_sgpr51, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr49 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: $sgpr6_sgpr7 = COPY killed renamable $sgpr36_sgpr37 ; CHECK-NEXT: $sgpr10_sgpr11 = COPY killed renamable $sgpr34_sgpr35 diff --git a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll index 34bcc3f02ac66..03988c3994992 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll @@ -2,23 +2,20 @@ ; GCN-LABEL: {{^}}spill_csr_s5_copy: ; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33 -; GCN: s_xor_saveexec_b64 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, -1 +; GCN: s_or_saveexec_b64 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec -; GCN: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 4 +; GCN: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 2 ; GCN: s_swappc_b64 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 9 ; GCN: buffer_store_dword [[K]], off, s[0:3], s33{{$}} -; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 4 -; GCN: s_xor_saveexec_b64 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, -1 +; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 2 +; GCN: s_or_saveexec_b64 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GCN: s_mov_b64 exec ; GCN: s_mov_b32 s33, [[FP_SCRATCH_COPY]] ; GCN: s_setpc_b64 diff --git a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir index e54e5898f8b53..40be0c6b67ee9 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-empty-live-interval.mir @@ -22,14 +22,11 @@ body: | ; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64 = V_MOV_B32_e32 1786773504, implicit $exec ; CHECK-NEXT: dead [[V_MUL_F32_e32_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e32 0, [[V_MOV_B32_e32_]].sub1, implicit $mode, implicit $exec ; CHECK-NEXT: undef [[V_MAC_F32_e32_:%[0-9]+]].sub1:vreg_64 = V_MAC_F32_e32 0, undef %1:vgpr_32, undef [[V_MAC_F32_e32_]].sub1, implicit $mode, implicit $exec - ; CHECK-NEXT: SI_SPILL_V64_SAVE [[V_MAC_F32_e32_]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: undef [[V_MOV_B32_e32_1:%[0-9]+]].sub1:vreg_64 = V_MOV_B32_e32 1786773504, implicit $exec - ; CHECK-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]].sub1 - ; CHECK-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: S_NOP 0, implicit [[SI_SPILL_V64_RESTORE]].sub1 - ; CHECK-NEXT: S_NOP 0, implicit undef %9.sub0:vreg_64 + ; CHECK-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]].sub1 + ; CHECK-NEXT: S_NOP 0, implicit [[V_MAC_F32_e32_]].sub1 + ; CHECK-NEXT: S_NOP 0, implicit undef [[V_MAC_F32_e32_]].sub0 bb.0: successors: %bb.1 @@ -59,13 +56,13 @@ body: | ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub2:vreg_128 = V_MOV_B32_e32 1786773504, implicit $exec + ; CHECK-NEXT: undef [[V_MOV_B32_e32_:%[0-9]+]].sub2:vreg_128 = V_MOV_B32_e32 0, implicit $exec + ; CHECK-NEXT: undef [[V_MOV_B32_e32_1:%[0-9]+]].sub2:vreg_128 = V_MOV_B32_e32 1786773504, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]].sub2 - ; CHECK-NEXT: S_NOP 0, implicit undef %4.sub0:vreg_128 - ; CHECK-NEXT: undef [[V_MOV_B32_e32_1:%[0-9]+]].sub2:vreg_128 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_1]].sub2 + ; CHECK-NEXT: S_NOP 0, implicit undef [[V_MOV_B32_e32_]].sub0 + ; CHECK-NEXT: S_NOP 0, implicit [[V_MOV_B32_e32_]].sub2 bb.0: successors: %bb.1 diff --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir index 05e1082de4478..f4edafd9443ab 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir @@ -25,14 +25,13 @@ body: | ; GCN-NEXT: $sgpr8_sgpr9 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9 - ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 1, killed $vgpr0 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr2, 2, killed $vgpr0 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF + ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 1, killed $vgpr0 + ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr2, 2, killed $vgpr0 + ; GCN-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: renamable $sgpr8 = COPY renamable $sgpr1 - ; GCN-NEXT: KILL killed renamable $vgpr0 ; GCN-NEXT: $sgpr0_sgpr1 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1 @@ -64,13 +63,12 @@ body: | ; GCN-NEXT: $sgpr8_sgpr9 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9 - ; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 1, killed $vgpr0 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr2, 2, killed $vgpr0 - ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: KILL killed renamable $vgpr0 + ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF + ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr1, 1, killed $vgpr0 + ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr2, 2, killed $vgpr0 + ; GCN-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: $sgpr0_sgpr1 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir index 11babc82e919b..dff2bd7f7aef9 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir @@ -21,8 +21,8 @@ body: | ; GCN-LABEL: name: sgpr32_spill ; GCN: liveins: $sgpr30_sgpr31, $sgpr10 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]] ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0 ; GCN-NEXT: S_SETPC_B64 $sgpr30_sgpr31 @@ -55,7 +55,6 @@ body: | ; GCN-LABEL: name: sgpr_spill_lane_crossover ; GCN: liveins: $sgpr10, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $vgpr63, $sgpr30_sgpr31, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr64, 0, $vgpr63 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr65, 1, $vgpr63 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr66, 2, $vgpr63 @@ -89,6 +88,7 @@ body: | ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr94, 30, $vgpr63 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr95, 31, $vgpr63 ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]] ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr64, 1, [[DEF]], implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr65, 2, [[DEF]] @@ -187,9 +187,9 @@ body: | ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; GCN-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: S_NOP 0 ; GCN-NEXT: S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: @@ -256,7 +256,6 @@ body: | ; GCN-NEXT: successors: %bb.3(0x80000000) ; GCN-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: S_NOP 0 ; GCN-NEXT: S_BRANCH %bb.3 ; GCN-NEXT: {{ $}} @@ -264,7 +263,7 @@ body: | ; GCN-NEXT: successors: %bb.2(0x80000000) ; GCN-NEXT: liveins: $sgpr10, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0 + ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR %0, 0 ; GCN-NEXT: $sgpr10 = S_ADD_I32 $sgpr10, 15, implicit-def dead $scc ; GCN-NEXT: S_BRANCH %bb.2 ; GCN-NEXT: {{ $}} @@ -272,7 +271,7 @@ body: | ; GCN-NEXT: successors: %bb.3(0x80000000) ; GCN-NEXT: liveins: $sgpr10, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0 + ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR %0, 0 ; GCN-NEXT: $sgpr10 = S_ADD_I32 $sgpr10, 20, implicit-def dead $scc ; GCN-NEXT: S_BRANCH %bb.3 ; GCN-NEXT: {{ $}} @@ -281,6 +280,7 @@ body: | ; GCN-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr10 = S_MOV_B32 10 + ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]] ; GCN-NEXT: S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir index 2caaabde38e9d..9b0f52cb39b01 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir @@ -19,12 +19,8 @@ body: | bb.0: liveins: $sgpr30_sgpr31, $vgpr0 ; GCN-LABEL: name: shift_back_exec_copy_reserved_reg - ; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN: liveins: $sgpr30_sgpr31, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, killed $vgpr0 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, killed $vgpr0 ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec @@ -60,12 +56,10 @@ body: | bb.0: liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr30_sgpr31, $vcc, $vgpr0 ; GCN-LABEL: name: spill_exec_copy_reserved_reg - ; GCN: liveins: $vcc, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN: liveins: $vcc, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $vgpr0, $vgpr2, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr28_sgpr29 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr28_sgpr29 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 0, undef $vgpr2 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr35, 1, undef $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll index b4a981f1db4ec..882356d994fc6 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll @@ -12,12 +12,10 @@ define amdgpu_kernel void @test_spill_av_class(<4 x i32> %arg) #0 { ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec ; GCN-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2228234 /* regdef:VGPR_32 */, def undef %30.sub0 - ; GCN-NEXT: SI_SPILL_V64_SAVE %30, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5) + ; GCN-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2228234 /* regdef:VGPR_32 */, def undef %12.sub0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]] ; GCN-NEXT: GLOBAL_STORE_DWORDX4 undef %22:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: [[SI_SPILL_V64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5) - ; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3538953 /* reguse:VReg_64 */, [[SI_SPILL_V64_RESTORE]] + ; GCN-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 3538953 /* reguse:VReg_64 */, %12 ; GCN-NEXT: S_ENDPGM 0 %v0 = call i32 asm sideeffect "; def $0", "=v"() %tmp = insertelement <2 x i32> undef, i32 %v0, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll index 81dd2c4457b2f..4384d1e32cf53 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll @@ -11,9 +11,8 @@ define void @test() { ; CHECK: ; %bb.0: ; %bb.0 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; CHECK-NEXT: .LBB0_1: ; %bb.1 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: s_cbranch_scc1 .LBB0_3 @@ -21,42 +20,40 @@ define void @test() { ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: .LBB0_3: ; %bb.3 ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1 -; CHECK-NEXT: v_accvgpr_read_b32 v0, a0 ; Reload Reuse -; CHECK-NEXT: s_mov_b64 exec, s[10:11] ; CHECK-NEXT: ; implicit-def: $sgpr4 -; CHECK-NEXT: v_mov_b32_e32 v1, s4 -; CHECK-NEXT: v_readfirstlane_b32 s6, v1 +; CHECK-NEXT: v_mov_b32_e32 v0, s4 +; CHECK-NEXT: v_readfirstlane_b32 s6, v0 ; CHECK-NEXT: s_mov_b64 s[4:5], -1 ; CHECK-NEXT: s_mov_b32 s7, 0 ; CHECK-NEXT: s_cmp_eq_u32 s6, s7 -; CHECK-NEXT: v_writelane_b32 v0, s4, 0 -; CHECK-NEXT: v_writelane_b32 v0, s5, 1 +; CHECK-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane +; CHECK-NEXT: v_writelane_b32 v1, s4, 0 +; CHECK-NEXT: v_writelane_b32 v1, s5, 1 ; CHECK-NEXT: s_mov_b64 s[10:11], exec ; CHECK-NEXT: s_mov_b64 exec, -1 -; CHECK-NEXT: v_accvgpr_write_b32 a0, v0 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a0, v1 ; Reload Reuse ; CHECK-NEXT: s_mov_b64 exec, s[10:11] ; CHECK-NEXT: s_cbranch_scc1 .LBB0_5 ; CHECK-NEXT: ; %bb.4: ; %bb.4 ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1 -; CHECK-NEXT: v_accvgpr_read_b32 v0, a0 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v1, a0 ; Reload Reuse ; CHECK-NEXT: s_mov_b64 exec, s[10:11] ; CHECK-NEXT: s_mov_b64 s[4:5], 0 -; CHECK-NEXT: v_writelane_b32 v0, s4, 0 -; CHECK-NEXT: v_writelane_b32 v0, s5, 1 +; CHECK-NEXT: v_writelane_b32 v1, s4, 0 +; CHECK-NEXT: v_writelane_b32 v1, s5, 1 ; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1 ; CHECK-NEXT: s_nop 0 -; CHECK-NEXT: v_accvgpr_write_b32 a0, v0 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_write_b32 a0, v1 ; Reload Reuse ; CHECK-NEXT: s_mov_b64 exec, s[10:11] ; CHECK-NEXT: .LBB0_5: ; %Flow ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1 ; CHECK-NEXT: s_nop 0 -; CHECK-NEXT: v_accvgpr_read_b32 v0, a0 ; Reload Reuse +; CHECK-NEXT: v_accvgpr_read_b32 v1, a0 ; Reload Reuse ; CHECK-NEXT: s_mov_b64 exec, s[10:11] -; CHECK-NEXT: v_readlane_b32 s4, v0, 0 -; CHECK-NEXT: v_readlane_b32 s5, v0, 1 +; CHECK-NEXT: v_readlane_b32 s4, v1, 0 +; CHECK-NEXT: v_readlane_b32 s5, v1, 1 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; CHECK-NEXT: s_mov_b32 s4, 1 ; CHECK-NEXT: ; implicit-def: $sgpr5 @@ -64,12 +61,8 @@ define void @test() { ; CHECK-NEXT: s_and_b64 vcc, exec, s[4:5] ; CHECK-NEXT: s_cbranch_vccnz .LBB0_1 ; CHECK-NEXT: ; %bb.6: ; %bb.5 -; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1 -; CHECK-NEXT: v_accvgpr_read_b32 v0, a0 ; Reload Reuse -; CHECK-NEXT: s_mov_b64 exec, s[10:11] -; CHECK-NEXT: ; kill: killed $vgpr0 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/spill192.mir b/llvm/test/CodeGen/AMDGPU/spill192.mir index 5040140a3e935..be2a31d7ccbaa 100644 --- a/llvm/test/CodeGen/AMDGPU/spill192.mir +++ b/llvm/test/CodeGen/AMDGPU/spill192.mir @@ -37,8 +37,8 @@ body: | ; EXPANDED: bb.0: ; EXPANDED-NEXT: successors: %bb.1(0x80000000) ; EXPANDED-NEXT: {{ $}} - ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]] ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]] diff --git a/llvm/test/CodeGen/AMDGPU/spill224.mir b/llvm/test/CodeGen/AMDGPU/spill224.mir index 5e53f93df95f7..f4965dcf61e17 100644 --- a/llvm/test/CodeGen/AMDGPU/spill224.mir +++ b/llvm/test/CodeGen/AMDGPU/spill224.mir @@ -33,8 +33,8 @@ body: | ; EXPANDED: bb.0: ; EXPANDED-NEXT: successors: %bb.1(0x80000000) ; EXPANDED-NEXT: {{ $}} - ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 + ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10 ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]] ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]] diff --git a/llvm/test/CodeGen/AMDGPU/spill288.mir b/llvm/test/CodeGen/AMDGPU/spill288.mir index 3d5404a9c1ad5..312531ba5bc99 100644 --- a/llvm/test/CodeGen/AMDGPU/spill288.mir +++ b/llvm/test/CodeGen/AMDGPU/spill288.mir @@ -33,8 +33,8 @@ body: | ; EXPANDED: bb.0: ; EXPANDED-NEXT: successors: %bb.1(0x80000000) ; EXPANDED-NEXT: {{ $}} - ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 + ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]] ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]] diff --git a/llvm/test/CodeGen/AMDGPU/spill320.mir b/llvm/test/CodeGen/AMDGPU/spill320.mir index 4473a4d6648ef..0c0c01760f8ba 100644 --- a/llvm/test/CodeGen/AMDGPU/spill320.mir +++ b/llvm/test/CodeGen/AMDGPU/spill320.mir @@ -33,8 +33,8 @@ body: | ; EXPANDED: bb.0: ; EXPANDED-NEXT: successors: %bb.1(0x80000000) ; EXPANDED-NEXT: {{ $}} - ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 + ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]] ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]] diff --git a/llvm/test/CodeGen/AMDGPU/spill352.mir b/llvm/test/CodeGen/AMDGPU/spill352.mir index 8fa053a908b60..8823ba1a8326e 100644 --- a/llvm/test/CodeGen/AMDGPU/spill352.mir +++ b/llvm/test/CodeGen/AMDGPU/spill352.mir @@ -33,8 +33,8 @@ body: | ; EXPANDED: bb.0: ; EXPANDED-NEXT: successors: %bb.1(0x80000000) ; EXPANDED-NEXT: {{ $}} - ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 + ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14 ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]] ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]] diff --git a/llvm/test/CodeGen/AMDGPU/spill384.mir b/llvm/test/CodeGen/AMDGPU/spill384.mir index cd604e4483b9c..e33641cf89237 100644 --- a/llvm/test/CodeGen/AMDGPU/spill384.mir +++ b/llvm/test/CodeGen/AMDGPU/spill384.mir @@ -33,8 +33,8 @@ body: | ; EXPANDED: bb.0: ; EXPANDED-NEXT: successors: %bb.1(0x80000000) ; EXPANDED-NEXT: {{ $}} - ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, [[DEF]] ; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, [[DEF]] diff --git a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll index 3c16cd29de8f6..6b0fbc44c65b7 100644 --- a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll +++ b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll @@ -236,20 +236,15 @@ define void @func_stacksave_nonentry_block(i1 %cond) { ; WAVE32-O0: ; %bb.0: ; %bb0 ; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; WAVE32-O0-NEXT: s_xor_saveexec_b32 s4, -1 -; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; WAVE32-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s4 -; WAVE32-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane -; WAVE32-O0-NEXT: v_mov_b32_e32 v1, v0 -; WAVE32-O0-NEXT: s_or_saveexec_b32 s7, -1 -; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s7 -; WAVE32-O0-NEXT: v_and_b32_e64 v1, 1, v1 -; WAVE32-O0-NEXT: v_cmp_eq_u32_e64 s5, v1, 1 +; WAVE32-O0-NEXT: v_and_b32_e64 v0, 1, v0 +; WAVE32-O0-NEXT: v_cmp_eq_u32_e64 s5, v0, 1 ; WAVE32-O0-NEXT: s_mov_b32 s4, exec_lo -; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) -; WAVE32-O0-NEXT: v_writelane_b32 v0, s4, 0 +; WAVE32-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane +; WAVE32-O0-NEXT: v_writelane_b32 v1, s4, 0 ; WAVE32-O0-NEXT: s_or_saveexec_b32 s7, -1 -; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE32-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s7 ; WAVE32-O0-NEXT: s_and_b32 s4, s4, s5 ; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s4 @@ -262,14 +257,13 @@ define void @func_stacksave_nonentry_block(i1 %cond) { ; WAVE32-O0-NEXT: ;;#ASMEND ; WAVE32-O0-NEXT: .LBB4_2: ; %bb2 ; WAVE32-O0-NEXT: s_or_saveexec_b32 s7, -1 -; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; WAVE32-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload ; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s7 ; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) -; WAVE32-O0-NEXT: v_readlane_b32 s4, v0, 0 +; WAVE32-O0-NEXT: v_readlane_b32 s4, v1, 0 ; WAVE32-O0-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; WAVE32-O0-NEXT: ; kill: killed $vgpr0 ; WAVE32-O0-NEXT: s_xor_saveexec_b32 s4, -1 -; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; WAVE32-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s4 ; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) ; WAVE32-O0-NEXT: s_setpc_b64 s[30:31] @@ -278,21 +272,16 @@ define void @func_stacksave_nonentry_block(i1 %cond) { ; WAVE64-O0: ; %bb.0: ; %bb0 ; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; WAVE64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; WAVE64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; WAVE64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; WAVE64-O0-NEXT: s_mov_b64 exec, s[4:5] -; WAVE64-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane -; WAVE64-O0-NEXT: v_mov_b32_e32 v1, v0 -; WAVE64-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; WAVE64-O0-NEXT: s_mov_b64 exec, s[10:11] -; WAVE64-O0-NEXT: v_and_b32_e64 v1, 1, v1 -; WAVE64-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v1, 1 +; WAVE64-O0-NEXT: v_and_b32_e64 v0, 1, v0 +; WAVE64-O0-NEXT: v_cmp_eq_u32_e64 s[6:7], v0, 1 ; WAVE64-O0-NEXT: s_mov_b64 s[4:5], exec -; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) -; WAVE64-O0-NEXT: v_writelane_b32 v0, s4, 0 -; WAVE64-O0-NEXT: v_writelane_b32 v0, s5, 1 +; WAVE64-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane +; WAVE64-O0-NEXT: v_writelane_b32 v1, s4, 0 +; WAVE64-O0-NEXT: v_writelane_b32 v1, s5, 1 ; WAVE64-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; WAVE64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE64-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; WAVE64-O0-NEXT: s_mov_b64 exec, s[10:11] ; WAVE64-O0-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; WAVE64-O0-NEXT: s_mov_b64 exec, s[4:5] @@ -305,15 +294,14 @@ define void @func_stacksave_nonentry_block(i1 %cond) { ; WAVE64-O0-NEXT: ;;#ASMEND ; WAVE64-O0-NEXT: .LBB4_2: ; %bb2 ; WAVE64-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; WAVE64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload ; WAVE64-O0-NEXT: s_mov_b64 exec, s[10:11] ; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) -; WAVE64-O0-NEXT: v_readlane_b32 s4, v0, 0 -; WAVE64-O0-NEXT: v_readlane_b32 s5, v0, 1 +; WAVE64-O0-NEXT: v_readlane_b32 s4, v1, 0 +; WAVE64-O0-NEXT: v_readlane_b32 s5, v1, 1 ; WAVE64-O0-NEXT: s_or_b64 exec, exec, s[4:5] -; WAVE64-O0-NEXT: ; kill: killed $vgpr0 ; WAVE64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; WAVE64-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; WAVE64-O0-NEXT: s_mov_b64 exec, s[4:5] ; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) ; WAVE64-O0-NEXT: s_setpc_b64 s[30:31] @@ -324,10 +312,10 @@ define void @func_stacksave_nonentry_block(i1 %cond) { ; WAVE32-WWM-PREALLOC-NEXT: s_xor_saveexec_b32 s4, -1 ; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s4 -; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; WAVE32-WWM-PREALLOC-NEXT: v_and_b32_e64 v0, 1, v0 ; WAVE32-WWM-PREALLOC-NEXT: v_cmp_eq_u32_e64 s5, v0, 1 ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s4, exec_lo +; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v1, s4, 0 ; WAVE32-WWM-PREALLOC-NEXT: s_and_b32 s4, s4, s5 ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s4 @@ -341,7 +329,6 @@ define void @func_stacksave_nonentry_block(i1 %cond) { ; WAVE32-WWM-PREALLOC-NEXT: .LBB4_2: ; %bb2 ; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s4, v1, 0 ; WAVE32-WWM-PREALLOC-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; WAVE32-WWM-PREALLOC-NEXT: ; kill: killed $vgpr1 ; WAVE32-WWM-PREALLOC-NEXT: s_xor_saveexec_b32 s4, -1 ; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s4 @@ -941,7 +928,6 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE32-O0-NEXT: s_bitset0_b32 s23, 21 ; WAVE32-O0-NEXT: s_add_u32 s20, s20, s9 ; WAVE32-O0-NEXT: s_addc_u32 s21, s21, 0 -; WAVE32-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; WAVE32-O0-NEXT: s_mov_b32 s14, s8 ; WAVE32-O0-NEXT: s_mov_b32 s13, s7 ; WAVE32-O0-NEXT: s_mov_b32 s12, s6 @@ -949,12 +935,10 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE32-O0-NEXT: s_mov_b64 s[8:9], s[2:3] ; WAVE32-O0-NEXT: s_mov_b64 s[4:5], s[0:1] ; WAVE32-O0-NEXT: s_mov_b32 s0, s32 -; WAVE32-O0-NEXT: v_writelane_b32 v3, s0, 0 +; WAVE32-O0-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane +; WAVE32-O0-NEXT: v_writelane_b32 v32, s0, 0 ; WAVE32-O0-NEXT: s_lshr_b32 s0, s0, 5 -; WAVE32-O0-NEXT: v_writelane_b32 v3, s0, 1 -; WAVE32-O0-NEXT: s_or_saveexec_b32 s19, -1 -; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[20:23], 0 offset:128 ; 4-byte Folded Spill -; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s19 +; WAVE32-O0-NEXT: v_writelane_b32 v32, s0, 1 ; WAVE32-O0-NEXT: v_mov_b32_e32 v3, 42 ; WAVE32-O0-NEXT: buffer_store_dword v3, off, s[20:23], 0 ; WAVE32-O0-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1037,17 +1021,12 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE32-O0-NEXT: ; implicit-def: $sgpr18 ; WAVE32-O0-NEXT: v_mov_b32_e32 v30, s18 ; WAVE32-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] -; WAVE32-O0-NEXT: s_or_saveexec_b32 s19, -1 -; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:128 ; 4-byte Folded Reload -; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s19 -; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) -; WAVE32-O0-NEXT: v_readlane_b32 s1, v0, 1 -; WAVE32-O0-NEXT: v_readlane_b32 s0, v0, 0 +; WAVE32-O0-NEXT: v_readlane_b32 s1, v32, 1 +; WAVE32-O0-NEXT: v_readlane_b32 s0, v32, 0 ; WAVE32-O0-NEXT: ;;#ASMSTART ; WAVE32-O0-NEXT: ; use s1 ; WAVE32-O0-NEXT: ;;#ASMEND ; WAVE32-O0-NEXT: s_mov_b32 s32, s0 -; WAVE32-O0-NEXT: ; kill: killed $vgpr0 ; WAVE32-O0-NEXT: s_endpgm ; ; WAVE64-O0-LABEL: kernel_stacksave_stackrestore_call_with_stack_objects: @@ -1059,7 +1038,6 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE64-O0-NEXT: s_waitcnt lgkmcnt(0) ; WAVE64-O0-NEXT: s_add_u32 s24, s24, s9 ; WAVE64-O0-NEXT: s_addc_u32 s25, s25, 0 -; WAVE64-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane ; WAVE64-O0-NEXT: s_mov_b32 s14, s8 ; WAVE64-O0-NEXT: s_mov_b32 s13, s7 ; WAVE64-O0-NEXT: s_mov_b32 s12, s6 @@ -1067,12 +1045,10 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE64-O0-NEXT: s_mov_b64 s[8:9], s[2:3] ; WAVE64-O0-NEXT: s_mov_b64 s[4:5], s[0:1] ; WAVE64-O0-NEXT: s_mov_b32 s0, s32 -; WAVE64-O0-NEXT: v_writelane_b32 v3, s0, 0 +; WAVE64-O0-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane +; WAVE64-O0-NEXT: v_writelane_b32 v32, s0, 0 ; WAVE64-O0-NEXT: s_lshr_b32 s0, s0, 6 -; WAVE64-O0-NEXT: v_writelane_b32 v3, s0, 1 -; WAVE64-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; WAVE64-O0-NEXT: buffer_store_dword v3, off, s[24:27], 0 offset:128 ; 4-byte Folded Spill -; WAVE64-O0-NEXT: s_mov_b64 exec, s[20:21] +; WAVE64-O0-NEXT: v_writelane_b32 v32, s0, 1 ; WAVE64-O0-NEXT: v_mov_b32_e32 v3, 42 ; WAVE64-O0-NEXT: buffer_store_dword v3, off, s[24:27], 0 ; WAVE64-O0-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1155,17 +1131,12 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE64-O0-NEXT: ; implicit-def: $sgpr18 ; WAVE64-O0-NEXT: v_mov_b32_e32 v30, s18 ; WAVE64-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] -; WAVE64-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 offset:128 ; 4-byte Folded Reload -; WAVE64-O0-NEXT: s_mov_b64 exec, s[20:21] -; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) -; WAVE64-O0-NEXT: v_readlane_b32 s1, v0, 1 -; WAVE64-O0-NEXT: v_readlane_b32 s0, v0, 0 +; WAVE64-O0-NEXT: v_readlane_b32 s1, v32, 1 +; WAVE64-O0-NEXT: v_readlane_b32 s0, v32, 0 ; WAVE64-O0-NEXT: ;;#ASMSTART ; WAVE64-O0-NEXT: ; use s1 ; WAVE64-O0-NEXT: ;;#ASMEND ; WAVE64-O0-NEXT: s_mov_b32 s32, s0 -; WAVE64-O0-NEXT: ; kill: killed $vgpr0 ; WAVE64-O0-NEXT: s_endpgm ; ; WAVE32-WWM-PREALLOC-LABEL: kernel_stacksave_stackrestore_call_with_stack_objects: @@ -1178,7 +1149,6 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE32-WWM-PREALLOC-NEXT: s_bitset0_b32 s23, 21 ; WAVE32-WWM-PREALLOC-NEXT: s_add_u32 s20, s20, s9 ; WAVE32-WWM-PREALLOC-NEXT: s_addc_u32 s21, s21, 0 -; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s14, s8 ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s13, s7 ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s12, s6 @@ -1186,6 +1156,7 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[8:9], s[2:3] ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b64 s[4:5], s[0:1] ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s0, s32 +; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane ; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v32, s0, 0 ; WAVE32-WWM-PREALLOC-NEXT: s_lshr_b32 s0, s0, 5 ; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v32, s0, 1 @@ -1277,7 +1248,6 @@ define amdgpu_kernel void @kernel_stacksave_stackrestore_call_with_stack_objects ; WAVE32-WWM-PREALLOC-NEXT: ; use s1 ; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMEND ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s0 -; WAVE32-WWM-PREALLOC-NEXT: ; kill: killed $vgpr32 ; WAVE32-WWM-PREALLOC-NEXT: s_endpgm %alloca = alloca [32 x i32], addrspace(5) %stacksave = call ptr addrspace(5) @llvm.stacksave.p5() @@ -1362,23 +1332,20 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-O0-LABEL: func_stacksave_stackrestore_call_with_stack_objects: ; WAVE32-O0: ; %bb.0: ; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; WAVE32-O0-NEXT: s_mov_b32 s25, s33 +; WAVE32-O0-NEXT: s_mov_b32 s24, s33 ; WAVE32-O0-NEXT: s_mov_b32 s33, s32 ; WAVE32-O0-NEXT: s_xor_saveexec_b32 s16, -1 ; WAVE32-O0-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill -; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; WAVE32-O0-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill ; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s16 ; WAVE32-O0-NEXT: s_add_i32 s32, s32, 0x1200 -; WAVE32-O0-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; WAVE32-O0-NEXT: v_writelane_b32 v32, s30, 0 ; WAVE32-O0-NEXT: v_writelane_b32 v32, s31, 1 ; WAVE32-O0-NEXT: s_mov_b32 s16, s32 -; WAVE32-O0-NEXT: v_writelane_b32 v0, s16, 0 +; WAVE32-O0-NEXT: ; implicit-def: $vgpr33 : SGPR spill to VGPR lane +; WAVE32-O0-NEXT: v_writelane_b32 v33, s16, 0 ; WAVE32-O0-NEXT: s_lshr_b32 s16, s16, 5 -; WAVE32-O0-NEXT: v_writelane_b32 v0, s16, 1 -; WAVE32-O0-NEXT: s_or_saveexec_b32 s24, -1 -; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill -; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s24 +; WAVE32-O0-NEXT: v_writelane_b32 v33, s16, 1 ; WAVE32-O0-NEXT: v_mov_b32_e32 v0, 42 ; WAVE32-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; WAVE32-O0-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1456,25 +1423,20 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-O0-NEXT: ; implicit-def: $sgpr18 ; WAVE32-O0-NEXT: v_mov_b32_e32 v30, s18 ; WAVE32-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] -; WAVE32-O0-NEXT: s_or_saveexec_b32 s24, -1 -; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload -; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s24 -; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) -; WAVE32-O0-NEXT: v_readlane_b32 s5, v0, 1 -; WAVE32-O0-NEXT: v_readlane_b32 s4, v0, 0 +; WAVE32-O0-NEXT: v_readlane_b32 s5, v33, 1 +; WAVE32-O0-NEXT: v_readlane_b32 s4, v33, 0 ; WAVE32-O0-NEXT: ;;#ASMSTART ; WAVE32-O0-NEXT: ; use s5 ; WAVE32-O0-NEXT: ;;#ASMEND ; WAVE32-O0-NEXT: s_mov_b32 s32, s4 ; WAVE32-O0-NEXT: v_readlane_b32 s31, v32, 1 ; WAVE32-O0-NEXT: v_readlane_b32 s30, v32, 0 -; WAVE32-O0-NEXT: ; kill: killed $vgpr0 ; WAVE32-O0-NEXT: s_xor_saveexec_b32 s4, -1 ; WAVE32-O0-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload -; WAVE32-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload +; WAVE32-O0-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload ; WAVE32-O0-NEXT: s_mov_b32 exec_lo, s4 ; WAVE32-O0-NEXT: s_add_i32 s32, s32, 0xffffee00 -; WAVE32-O0-NEXT: s_mov_b32 s33, s25 +; WAVE32-O0-NEXT: s_mov_b32 s33, s24 ; WAVE32-O0-NEXT: s_waitcnt vmcnt(0) ; WAVE32-O0-NEXT: s_setpc_b64 s[30:31] ; @@ -1485,19 +1447,16 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE64-O0-NEXT: s_mov_b32 s33, s32 ; WAVE64-O0-NEXT: s_xor_saveexec_b64 s[16:17], -1 ; WAVE64-O0-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill -; WAVE64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; WAVE64-O0-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill ; WAVE64-O0-NEXT: s_mov_b64 exec, s[16:17] ; WAVE64-O0-NEXT: s_add_i32 s32, s32, 0x2400 -; WAVE64-O0-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; WAVE64-O0-NEXT: v_writelane_b32 v32, s30, 0 ; WAVE64-O0-NEXT: v_writelane_b32 v32, s31, 1 ; WAVE64-O0-NEXT: s_mov_b32 s16, s32 -; WAVE64-O0-NEXT: v_writelane_b32 v0, s16, 0 +; WAVE64-O0-NEXT: ; implicit-def: $vgpr33 : SGPR spill to VGPR lane +; WAVE64-O0-NEXT: v_writelane_b32 v33, s16, 0 ; WAVE64-O0-NEXT: s_lshr_b32 s16, s16, 6 -; WAVE64-O0-NEXT: v_writelane_b32 v0, s16, 1 -; WAVE64-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 -; WAVE64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill -; WAVE64-O0-NEXT: s_mov_b64 exec, s[26:27] +; WAVE64-O0-NEXT: v_writelane_b32 v33, s16, 1 ; WAVE64-O0-NEXT: v_mov_b32_e32 v0, 42 ; WAVE64-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; WAVE64-O0-NEXT: s_waitcnt_vscnt null, 0x0 @@ -1575,22 +1534,17 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE64-O0-NEXT: ; implicit-def: $sgpr18 ; WAVE64-O0-NEXT: v_mov_b32_e32 v30, s18 ; WAVE64-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] -; WAVE64-O0-NEXT: s_or_saveexec_b64 s[26:27], -1 -; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload -; WAVE64-O0-NEXT: s_mov_b64 exec, s[26:27] -; WAVE64-O0-NEXT: s_waitcnt vmcnt(0) -; WAVE64-O0-NEXT: v_readlane_b32 s5, v0, 1 -; WAVE64-O0-NEXT: v_readlane_b32 s4, v0, 0 +; WAVE64-O0-NEXT: v_readlane_b32 s5, v33, 1 +; WAVE64-O0-NEXT: v_readlane_b32 s4, v33, 0 ; WAVE64-O0-NEXT: ;;#ASMSTART ; WAVE64-O0-NEXT: ; use s5 ; WAVE64-O0-NEXT: ;;#ASMEND ; WAVE64-O0-NEXT: s_mov_b32 s32, s4 ; WAVE64-O0-NEXT: v_readlane_b32 s31, v32, 1 ; WAVE64-O0-NEXT: v_readlane_b32 s30, v32, 0 -; WAVE64-O0-NEXT: ; kill: killed $vgpr0 ; WAVE64-O0-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; WAVE64-O0-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload -; WAVE64-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload +; WAVE64-O0-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload ; WAVE64-O0-NEXT: s_mov_b64 exec, s[4:5] ; WAVE64-O0-NEXT: s_add_i32 s32, s32, 0xffffdc00 ; WAVE64-O0-NEXT: s_mov_b32 s33, s19 @@ -1603,14 +1557,14 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s24, s33 ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s33, s32 ; WAVE32-WWM-PREALLOC-NEXT: s_xor_saveexec_b32 s16, -1 -; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill -; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s16 ; WAVE32-WWM-PREALLOC-NEXT: s_add_i32 s32, s32, 0x1200 -; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane ; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v33, s30, 0 ; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v33, s31, 1 ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s16, s32 +; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane ; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v32, s16, 0 ; WAVE32-WWM-PREALLOC-NEXT: s_lshr_b32 s16, s16, 5 ; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v32, s16, 1 @@ -1699,10 +1653,9 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() { ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s4 ; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s31, v33, 1 ; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s30, v33, 0 -; WAVE32-WWM-PREALLOC-NEXT: ; kill: killed $vgpr32 ; WAVE32-WWM-PREALLOC-NEXT: s_xor_saveexec_b32 s4, -1 -; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload -; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload +; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload +; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s4 ; WAVE32-WWM-PREALLOC-NEXT: s_add_i32 s32, s32, 0xffffee00 ; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s33, s24 diff --git a/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir b/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir index 2c4a5dba3520c..cc261b0da4a8f 100644 --- a/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir +++ b/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir @@ -17,10 +17,10 @@ body: | bb.0: liveins: $sgpr20, $vgpr1 ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_reg - ; GCN: liveins: $sgpr20, $vgpr0, $vgpr1 + ; GCN: liveins: $sgpr20, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0 @@ -50,11 +50,11 @@ body: | bb.0: liveins: $sgpr20, $sgpr21, $vgpr1 ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_tuple_subreg - ; GCN: liveins: $sgpr20, $sgpr21, $vgpr0, $vgpr1, $vgpr2 + ; GCN: liveins: $sgpr20, $sgpr21, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF @@ -89,10 +89,10 @@ body: | bb.0: liveins: $sgpr20, $vgpr1 ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_different_outgoing_reg - ; GCN: liveins: $sgpr20, $vgpr1, $vgpr2 + ; GCN: liveins: $sgpr20, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2 @@ -121,17 +121,17 @@ body: | bb.0: liveins: $sgpr20, $vgpr1 ; GCN-LABEL: name: wwm_csr_spill_reload - ; GCN: liveins: $sgpr20, $vgpr1, $vgpr40 + ; GCN: liveins: $sgpr20, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; GCN-NEXT: $vgpr40 = IMPLICIT_DEF - ; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr40 - ; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 0, implicit $exec + ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF + ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2 + ; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0, implicit $exec ; GCN-NEXT: $vgpr0 = COPY killed $vgpr1, implicit $exec - ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: SI_RETURN implicit $vgpr0 $vgpr40 = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/trap-abis.ll b/llvm/test/CodeGen/AMDGPU/trap-abis.ll index 52370f6a2ef05..4dfd4c095c87a 100644 --- a/llvm/test/CodeGen/AMDGPU/trap-abis.ll +++ b/llvm/test/CodeGen/AMDGPU/trap-abis.ll @@ -206,14 +206,14 @@ define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %a ; ; HSA-TRAP-GFX1100-O0-LABEL: non_entry_trap: ; HSA-TRAP-GFX1100-O0: ; %bb.0: ; %entry -; HSA-TRAP-GFX1100-O0-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; HSA-TRAP-GFX1100-O0-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt lgkmcnt(0) ; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b64 s[2:3], s[0:1] -; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v0, s2, 0 -; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v0, s3, 1 +; HSA-TRAP-GFX1100-O0-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane +; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v2, s2, 0 +; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v2, s3, 1 ; HSA-TRAP-GFX1100-O0-NEXT: s_or_saveexec_b32 s6, -1 -; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v0, off ; 4-byte Folded Spill +; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v2, off ; 4-byte Folded Spill ; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 exec_lo, s6 ; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v0, 0 ; HSA-TRAP-GFX1100-O0-NEXT: global_load_b32 v0, v0, s[0:1] glc dlc @@ -236,16 +236,15 @@ define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %a ; HSA-TRAP-GFX1100-O0-NEXT: s_branch .LBB1_3 ; HSA-TRAP-GFX1100-O0-NEXT: .LBB1_2: ; %ret ; HSA-TRAP-GFX1100-O0-NEXT: s_or_saveexec_b32 s6, -1 -; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v0, off, off ; 4-byte Folded Reload +; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v2, off, off ; 4-byte Folded Reload ; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 exec_lo, s6 ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s0, v0, 0 -; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s1, v0, 1 -; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v1, 0 -; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v2, 3 -; HSA-TRAP-GFX1100-O0-NEXT: global_store_b32 v1, v2, s[0:1] dlc +; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s0, v2, 0 +; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s1, v2, 1 +; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v0, 0 +; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v1, 3 +; HSA-TRAP-GFX1100-O0-NEXT: global_store_b32 v0, v1, s[0:1] dlc ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt_vscnt null, 0x0 -; HSA-TRAP-GFX1100-O0-NEXT: ; kill: killed $vgpr0 ; HSA-TRAP-GFX1100-O0-NEXT: s_endpgm ; HSA-TRAP-GFX1100-O0-NEXT: .LBB1_3: ; =>This Inner Loop Header: Depth=1 ; HSA-TRAP-GFX1100-O0-NEXT: s_sethalt 5 @@ -352,34 +351,32 @@ define amdgpu_kernel void @trap_with_use_after(ptr addrspace(1) %arg0, ptr addrs ; ; HSA-TRAP-GFX1100-O0-LABEL: trap_with_use_after: ; HSA-TRAP-GFX1100-O0: ; %bb.0: -; HSA-TRAP-GFX1100-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; HSA-TRAP-GFX1100-O0-NEXT: v_mov_b32_e32 v0, 0 ; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v0, off offset:8 ; 4-byte Folded Spill ; HSA-TRAP-GFX1100-O0-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; HSA-TRAP-GFX1100-O0-NEXT: s_load_b64 s[2:3], s[4:5], 0x8 +; HSA-TRAP-GFX1100-O0-NEXT: ; implicit-def: $vgpr2 : SGPR spill to VGPR lane ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt lgkmcnt(0) -; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v1, s2, 0 -; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v1, s3, 1 +; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v2, s2, 0 +; HSA-TRAP-GFX1100-O0-NEXT: v_writelane_b32 v2, s3, 1 ; HSA-TRAP-GFX1100-O0-NEXT: s_or_saveexec_b32 s6, -1 -; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v1, off offset:4 ; 4-byte Folded Spill +; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v2, off ; 4-byte Folded Spill ; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 exec_lo, s6 ; HSA-TRAP-GFX1100-O0-NEXT: global_load_b32 v0, v0, s[0:1] glc dlc ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v0, off ; 4-byte Folded Spill +; HSA-TRAP-GFX1100-O0-NEXT: scratch_store_b32 off, v0, off offset:4 ; 4-byte Folded Spill ; HSA-TRAP-GFX1100-O0-NEXT: s_cbranch_execnz .LBB2_2 ; HSA-TRAP-GFX1100-O0-NEXT: ; %bb.1: +; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v0, off, off offset:8 ; 4-byte Folded Reload +; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v1, off, off offset:4 ; 4-byte Folded Reload ; HSA-TRAP-GFX1100-O0-NEXT: s_or_saveexec_b32 s6, -1 -; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v0, off, off offset:4 ; 4-byte Folded Reload -; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 exec_lo, s6 -; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s0, v0, 0 -; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s1, v0, 1 -; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v1, off, off offset:8 ; 4-byte Folded Reload ; HSA-TRAP-GFX1100-O0-NEXT: scratch_load_b32 v2, off, off ; 4-byte Folded Reload +; HSA-TRAP-GFX1100-O0-NEXT: s_mov_b32 exec_lo, s6 ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt vmcnt(0) -; HSA-TRAP-GFX1100-O0-NEXT: global_store_b32 v1, v2, s[0:1] dlc +; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s0, v2, 0 +; HSA-TRAP-GFX1100-O0-NEXT: v_readlane_b32 s1, v2, 1 +; HSA-TRAP-GFX1100-O0-NEXT: global_store_b32 v0, v1, s[0:1] dlc ; HSA-TRAP-GFX1100-O0-NEXT: s_waitcnt_vscnt null, 0x0 -; HSA-TRAP-GFX1100-O0-NEXT: ; kill: killed $vgpr0 ; HSA-TRAP-GFX1100-O0-NEXT: s_endpgm ; HSA-TRAP-GFX1100-O0-NEXT: .LBB2_2: ; HSA-TRAP-GFX1100-O0-NEXT: s_trap 2 diff --git a/llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir b/llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir index c73b023f18652..4c2d0d2fa0d77 100644 --- a/llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir +++ b/llvm/test/CodeGen/AMDGPU/true16-ra-pre-gfx11-regression-test.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -start-before=greedy,0 -stop-after=virtregrewriter,1 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -start-before=greedy,0 -stop-after=virtregrewriter,2 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s --- | define amdgpu_ps void @e32() #0 { diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll index 15a83475f368e..a827ebe96cfcf 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll @@ -13,41 +13,37 @@ define amdgpu_kernel void @__omp_offloading_16_dd2df_main_l9() { ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_add_u32 s0, s0, s13 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane -; CHECK-NEXT: v_mov_b32_e32 v2, v0 -; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], 0 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[8:9] -; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: global_load_ushort v3, v1, s[4:5] offset:4 +; CHECK-NEXT: v_mov_b32_e32 v1, v0 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: global_load_ushort v2, v0, s[4:5] offset:4 ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: ; implicit-def: $sgpr4 ; CHECK-NEXT: s_mov_b32 s4, 0 -; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], v2, s4 -; CHECK-NEXT: v_mov_b32_e32 v2, 0 -; CHECK-NEXT: ds_write_b8 v1, v2 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], v1, s4 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 +; CHECK-NEXT: ds_write_b8 v0, v1 ; CHECK-NEXT: s_mov_b64 s[4:5], exec -; CHECK-NEXT: v_writelane_b32 v0, s4, 0 -; CHECK-NEXT: v_writelane_b32 v0, s5, 1 +; CHECK-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane +; CHECK-NEXT: v_writelane_b32 v3, s4, 0 +; CHECK-NEXT: v_writelane_b32 v3, s5, 1 ; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1 -; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[8:9] ; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_cbranch_execz .LBB0_2 ; CHECK-NEXT: ; %bb.1: ; %bb193 ; CHECK-NEXT: .LBB0_2: ; %bb194 +; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1 -; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], 0 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], 0 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[8:9] ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_readlane_b32 s4, v1, 0 -; CHECK-NEXT: v_readlane_b32 s5, v1, 1 +; CHECK-NEXT: v_readlane_b32 s4, v3, 0 +; CHECK-NEXT: v_readlane_b32 s5, v3, 1 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b32 s4, 0xffff -; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_and_b32_e64 v0, s4, v0 ; CHECK-NEXT: s_mov_b32 s4, 0 ; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], v0, s4 @@ -66,10 +62,6 @@ define amdgpu_kernel void @__omp_offloading_16_dd2df_main_l9() { ; CHECK-NEXT: s_trap 2 ; CHECK-NEXT: ; divergent unreachable ; CHECK-NEXT: .LBB0_4: ; %UnifiedReturnBlock -; CHECK-NEXT: s_or_saveexec_b64 s[8:9], -1 -; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], 0 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[8:9] -; CHECK-NEXT: ; kill: killed $vgpr0 ; CHECK-NEXT: s_endpgm bb: %i10 = tail call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir index dd3572c027c86..e5caa509835c3 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir @@ -413,7 +413,7 @@ body: | ; MUBUF-LABEL: name: vgpr32_restore_clobber_scc_emergency_stack_slot ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -424,7 +424,7 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: bb.1: ; MUBUF-NEXT: successors: %bb.2(0x80000000) - ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_NOP 0 ; MUBUF-NEXT: {{ $}} @@ -434,7 +434,7 @@ body: | ; GFX9-FLATSCR-LABEL: name: vgpr32_restore_clobber_scc_emergency_stack_slot ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -446,7 +446,7 @@ body: | ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: bb.1: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_NOP 0 ; GFX9-FLATSCR-NEXT: {{ $}} @@ -456,7 +456,7 @@ body: | ; GFX10-FLATSCR-LABEL: name: vgpr32_restore_clobber_scc_emergency_stack_slot ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -467,7 +467,7 @@ body: | ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: bb.1: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_NOP 0 ; GFX10-FLATSCR-NEXT: {{ $}} @@ -477,7 +477,7 @@ body: | ; VMEM-GFX8-LABEL: name: vgpr32_restore_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -488,7 +488,7 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -525,7 +525,7 @@ body: | ; MUBUF-LABEL: name: vgpr64_restore_clobber_scc_emergency_stack_slot ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -537,7 +537,7 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: bb.1: ; MUBUF-NEXT: successors: %bb.2(0x80000000) - ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_NOP 0 ; MUBUF-NEXT: {{ $}} @@ -547,7 +547,7 @@ body: | ; GFX9-FLATSCR-LABEL: name: vgpr64_restore_clobber_scc_emergency_stack_slot ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -559,7 +559,7 @@ body: | ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: bb.1: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_NOP 0 ; GFX9-FLATSCR-NEXT: {{ $}} @@ -569,7 +569,7 @@ body: | ; GFX10-FLATSCR-LABEL: name: vgpr64_restore_clobber_scc_emergency_stack_slot ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -580,7 +580,7 @@ body: | ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: bb.1: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_NOP 0 ; GFX10-FLATSCR-NEXT: {{ $}} @@ -590,7 +590,7 @@ body: | ; VMEM-GFX8-LABEL: name: vgpr64_restore_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -602,7 +602,7 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -639,7 +639,7 @@ body: | ; MUBUF-LABEL: name: vgpr96_restore_clobber_scc_emergency_stack_slot ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -652,7 +652,7 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: bb.1: ; MUBUF-NEXT: successors: %bb.2(0x80000000) - ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_NOP 0 ; MUBUF-NEXT: {{ $}} @@ -662,7 +662,7 @@ body: | ; GFX9-FLATSCR-LABEL: name: vgpr96_restore_clobber_scc_emergency_stack_slot ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -674,7 +674,7 @@ body: | ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: bb.1: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_NOP 0 ; GFX9-FLATSCR-NEXT: {{ $}} @@ -684,7 +684,7 @@ body: | ; GFX10-FLATSCR-LABEL: name: vgpr96_restore_clobber_scc_emergency_stack_slot ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -695,7 +695,7 @@ body: | ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: bb.1: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_NOP 0 ; GFX10-FLATSCR-NEXT: {{ $}} @@ -705,7 +705,7 @@ body: | ; VMEM-GFX8-LABEL: name: vgpr96_restore_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -718,7 +718,7 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -755,7 +755,7 @@ body: | ; MUBUF-LABEL: name: vgpr32_save_clobber_scc_emergency_stack_slot ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -766,7 +766,7 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: bb.1: ; MUBUF-NEXT: successors: %bb.2(0x80000000) - ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_NOP 0 ; MUBUF-NEXT: {{ $}} @@ -776,7 +776,7 @@ body: | ; GFX9-FLATSCR-LABEL: name: vgpr32_save_clobber_scc_emergency_stack_slot ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -788,7 +788,7 @@ body: | ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: bb.1: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_NOP 0 ; GFX9-FLATSCR-NEXT: {{ $}} @@ -798,7 +798,7 @@ body: | ; GFX10-FLATSCR-LABEL: name: vgpr32_save_clobber_scc_emergency_stack_slot ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -809,7 +809,7 @@ body: | ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: bb.1: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_NOP 0 ; GFX10-FLATSCR-NEXT: {{ $}} @@ -819,7 +819,7 @@ body: | ; VMEM-GFX8-LABEL: name: vgpr32_save_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -830,7 +830,7 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -867,7 +867,7 @@ body: | ; MUBUF-LABEL: name: vgpr64_save_clobber_scc_emergency_stack_slot ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -879,7 +879,7 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: bb.1: ; MUBUF-NEXT: successors: %bb.2(0x80000000) - ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_NOP 0 ; MUBUF-NEXT: {{ $}} @@ -889,7 +889,7 @@ body: | ; GFX9-FLATSCR-LABEL: name: vgpr64_save_clobber_scc_emergency_stack_slot ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -901,7 +901,7 @@ body: | ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: bb.1: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_NOP 0 ; GFX9-FLATSCR-NEXT: {{ $}} @@ -911,7 +911,7 @@ body: | ; GFX10-FLATSCR-LABEL: name: vgpr64_save_clobber_scc_emergency_stack_slot ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -922,7 +922,7 @@ body: | ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: bb.1: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_NOP 0 ; GFX10-FLATSCR-NEXT: {{ $}} @@ -932,7 +932,7 @@ body: | ; VMEM-GFX8-LABEL: name: vgpr64_save_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -944,7 +944,7 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -981,7 +981,7 @@ body: | ; MUBUF-LABEL: name: vgpr96_save_clobber_scc_emergency_stack_slot ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -994,7 +994,7 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: bb.1: ; MUBUF-NEXT: successors: %bb.2(0x80000000) - ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_NOP 0 ; MUBUF-NEXT: {{ $}} @@ -1004,7 +1004,7 @@ body: | ; GFX9-FLATSCR-LABEL: name: vgpr96_save_clobber_scc_emergency_stack_slot ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -1016,7 +1016,7 @@ body: | ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: bb.1: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_NOP 0 ; GFX9-FLATSCR-NEXT: {{ $}} @@ -1026,7 +1026,7 @@ body: | ; GFX10-FLATSCR-LABEL: name: vgpr96_save_clobber_scc_emergency_stack_slot ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -1037,7 +1037,7 @@ body: | ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: bb.1: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_NOP 0 ; GFX10-FLATSCR-NEXT: {{ $}} @@ -1047,7 +1047,7 @@ body: | ; VMEM-GFX8-LABEL: name: vgpr96_save_clobber_scc_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -1060,7 +1060,7 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -1200,7 +1200,7 @@ body: | ; MUBUF-LABEL: name: mubuf_load_restore_clobber_scc_no_vgprs_emergency_stack_slot ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -1211,7 +1211,7 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: bb.1: ; MUBUF-NEXT: successors: %bb.2(0x80000000) - ; MUBUF-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_NOP 0 ; MUBUF-NEXT: {{ $}} @@ -1221,7 +1221,7 @@ body: | ; GFX9-FLATSCR-LABEL: name: mubuf_load_restore_clobber_scc_no_vgprs_emergency_stack_slot ; GFX9-FLATSCR: bb.0: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -1235,7 +1235,7 @@ body: | ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: bb.1: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_NOP 0 ; GFX9-FLATSCR-NEXT: {{ $}} @@ -1245,7 +1245,7 @@ body: | ; GFX10-FLATSCR-LABEL: name: mubuf_load_restore_clobber_scc_no_vgprs_emergency_stack_slot ; GFX10-FLATSCR: bb.0: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; GFX10-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) @@ -1259,7 +1259,7 @@ body: | ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: bb.1: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_NOP 0 ; GFX10-FLATSCR-NEXT: {{ $}} @@ -1269,7 +1269,7 @@ body: | ; VMEM-GFX8-LABEL: name: mubuf_load_restore_clobber_scc_no_vgprs_emergency_stack_slot ; VMEM-GFX8: bb.0: ; VMEM-GFX8-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) @@ -1280,7 +1280,7 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir index 6659e95323769..fa0922590712a 100644 --- a/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/virtregrewrite-undef-identity-copy.mir @@ -30,7 +30,7 @@ machineFunctionInfo: body: | bb.0: ; CHECK-LABEL: name: undef_identity_copy - ; CHECK: renamable $vgpr40_vgpr41_vgpr42_vgpr43 = FLAT_LOAD_DWORDX4 undef renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128), addrspace 1) + ; CHECK: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128), addrspace 1) ; CHECK-NEXT: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95, implicit-def $scc ; CHECK-NEXT: $sgpr4 = COPY $sgpr95 @@ -39,13 +39,14 @@ body: | ; CHECK-NEXT: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @bar + 4, target-flags(amdgpu-rel32-hi) @bar + 4, implicit-def dead $scc ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95 ; CHECK-NEXT: $sgpr4 = COPY $sgpr95 - ; CHECK-NEXT: $vgpr0 = COPY renamable $vgpr40 - ; CHECK-NEXT: $vgpr1 = COPY renamable $vgpr41 - ; CHECK-NEXT: $vgpr2 = COPY killed renamable $vgpr42 - ; CHECK-NEXT: $vgpr3 = KILL undef renamable $vgpr3 + ; CHECK-NEXT: $vgpr0 = COPY [[FLAT_LOAD_DWORDX4_]].sub0 + ; CHECK-NEXT: $vgpr1 = COPY [[FLAT_LOAD_DWORDX4_]].sub1 + ; CHECK-NEXT: $vgpr2 = COPY [[FLAT_LOAD_DWORDX4_]].sub2 + ; CHECK-NEXT: $vgpr3 = COPY undef %4:vgpr_32 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @bar, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95 - ; CHECK-NEXT: FLAT_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) + ; CHECK-NEXT: FLAT_STORE_DWORD undef %6:vreg_64, [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %0:vreg_128 = FLAT_LOAD_DWORDX4 undef %1:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128), addrspace 1) %2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll index 8c285f37b4878..d1ee82e74b3de 100644 --- a/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-copy.ll @@ -13,24 +13,24 @@ define void @vector_reg_liverange_split() #0 { ; GFX90A-NEXT: s_mov_b32 s16, s33 ; GFX90A-NEXT: s_mov_b32 s33, s32 ; GFX90A-NEXT: s_xor_saveexec_b64 s[18:19], -1 -; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX90A-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, -1 -; GFX90A-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX90A-NEXT: buffer_store_dword a32, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX90A-NEXT: buffer_store_dword a32, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX90A-NEXT: s_mov_b64 exec, s[18:19] ; GFX90A-NEXT: v_writelane_b32 v40, s16, 4 ; GFX90A-NEXT: v_writelane_b32 v40, s28, 2 ; GFX90A-NEXT: v_writelane_b32 v40, s29, 3 -; GFX90A-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GFX90A-NEXT: v_writelane_b32 v40, s30, 0 +; GFX90A-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane ; GFX90A-NEXT: s_addk_i32 s32, 0x400 ; GFX90A-NEXT: v_writelane_b32 v40, s31, 1 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s20 ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s20, 0 +; GFX90A-NEXT: v_writelane_b32 v39, s20, 0 ; GFX90A-NEXT: s_or_saveexec_b64 s[28:29], -1 -; GFX90A-NEXT: v_accvgpr_write_b32 a32, v0 +; GFX90A-NEXT: v_accvgpr_write_b32 a32, v39 ; GFX90A-NEXT: s_mov_b64 exec, s[28:29] ; GFX90A-NEXT: s_getpc_b64 s[16:17] ; GFX90A-NEXT: s_add_u32 s16, s16, foo@gotpcrel32@lo+4 @@ -39,23 +39,22 @@ define void @vector_reg_liverange_split() #0 { ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX90A-NEXT: s_or_saveexec_b64 s[28:29], -1 -; GFX90A-NEXT: v_accvgpr_read_b32 v0, a32 +; GFX90A-NEXT: v_accvgpr_read_b32 v39, a32 ; GFX90A-NEXT: s_mov_b64 exec, s[28:29] -; GFX90A-NEXT: v_readlane_b32 s20, v0, 0 +; GFX90A-NEXT: v_readlane_b32 s20, v39, 0 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s20 ; GFX90A-NEXT: ;;#ASMEND ; GFX90A-NEXT: v_readlane_b32 s31, v40, 1 ; GFX90A-NEXT: v_readlane_b32 s30, v40, 0 -; GFX90A-NEXT: ; kill: killed $vgpr0 ; GFX90A-NEXT: v_readlane_b32 s4, v40, 4 ; GFX90A-NEXT: v_readlane_b32 s28, v40, 2 ; GFX90A-NEXT: v_readlane_b32 s29, v40, 3 ; GFX90A-NEXT: s_xor_saveexec_b64 s[6:7], -1 -; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX90A-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, -1 -; GFX90A-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX90A-NEXT: buffer_load_dword a32, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX90A-NEXT: buffer_load_dword a32, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX90A-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX90A-NEXT: s_mov_b64 exec, s[6:7] ; GFX90A-NEXT: s_addk_i32 s32, 0xfc00 ; GFX90A-NEXT: s_mov_b32 s33, s4 diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll index 5608ea8563548..4837efe6606b8 100644 --- a/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/whole-wave-register-spill.ll @@ -19,24 +19,23 @@ define void @test() #0 { ; GCN-NEXT: s_mov_b32 s16, s33 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_xor_saveexec_b64 s[18:19], -1 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, -1 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[18:19] ; GCN-NEXT: v_writelane_b32 v40, s16, 4 ; GCN-NEXT: v_writelane_b32 v40, s28, 2 ; GCN-NEXT: v_writelane_b32 v40, s29, 3 -; GCN-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: s_addk_i32 s32, 0x800 +; GCN-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane +; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s16 ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s16, 0 +; GCN-NEXT: v_writelane_b32 v39, s16, 0 ; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[28:29] ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, ext_func@gotpcrel32@lo+4 @@ -45,26 +44,24 @@ define void @test() #0 { ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: s_or_saveexec_b64 s[28:29], -1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v39, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[28:29] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s4, v1, 0 +; GCN-NEXT: v_readlane_b32 s4, v39, 0 ; GCN-NEXT: v_mov_b32_e32 v0, s4 ; GCN-NEXT: global_store_dword v[0:1], v0, off ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-NEXT: v_readlane_b32 s30, v40, 0 -; GCN-NEXT: ; kill: killed $vgpr1 ; GCN-NEXT: v_readlane_b32 s4, v40, 4 ; GCN-NEXT: v_readlane_b32 s28, v40, 2 ; GCN-NEXT: v_readlane_b32 s29, v40, 3 ; GCN-NEXT: s_xor_saveexec_b64 s[6:7], -1 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, -1 -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: s_addk_i32 s32, 0xf800 +; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: s_mov_b32 s33, s4 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -75,23 +72,23 @@ define void @test() #0 { ; GCN-O0-NEXT: s_mov_b32 s16, s33 ; GCN-O0-NEXT: s_mov_b32 s33, s32 ; GCN-O0-NEXT: s_xor_saveexec_b64 s[18:19], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, -1 -; GCN-O0-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[18:19] ; GCN-O0-NEXT: v_writelane_b32 v40, s16, 4 ; GCN-O0-NEXT: v_writelane_b32 v40, s28, 2 ; GCN-O0-NEXT: v_writelane_b32 v40, s29, 3 ; GCN-O0-NEXT: s_add_i32 s32, s32, 0x400 -; GCN-O0-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane ; GCN-O0-NEXT: v_writelane_b32 v40, s30, 0 ; GCN-O0-NEXT: v_writelane_b32 v40, s31, 1 ; GCN-O0-NEXT: ;;#ASMSTART ; GCN-O0-NEXT: ; def s16 ; GCN-O0-NEXT: ;;#ASMEND -; GCN-O0-NEXT: v_writelane_b32 v0, s16, 0 +; GCN-O0-NEXT: ; implicit-def: $vgpr39 : SGPR spill to VGPR lane +; GCN-O0-NEXT: v_writelane_b32 v39, s16, 0 ; GCN-O0-NEXT: s_or_saveexec_b64 s[28:29], -1 -; GCN-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-O0-NEXT: buffer_store_dword v39, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-O0-NEXT: s_mov_b64 exec, s[28:29] ; GCN-O0-NEXT: s_getpc_b64 s[16:17] ; GCN-O0-NEXT: s_add_u32 s16, s16, ext_func@gotpcrel32@lo+4 @@ -104,26 +101,25 @@ define void @test() #0 { ; GCN-O0-NEXT: s_waitcnt lgkmcnt(0) ; GCN-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-O0-NEXT: s_or_saveexec_b64 s[28:29], -1 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v39, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[28:29] ; GCN-O0-NEXT: s_waitcnt vmcnt(0) -; GCN-O0-NEXT: v_readlane_b32 s4, v0, 0 +; GCN-O0-NEXT: v_readlane_b32 s4, v39, 0 ; GCN-O0-NEXT: ; implicit-def: $sgpr6_sgpr7 -; GCN-O0-NEXT: v_mov_b32_e32 v1, s6 -; GCN-O0-NEXT: v_mov_b32_e32 v2, s7 -; GCN-O0-NEXT: v_mov_b32_e32 v3, s4 -; GCN-O0-NEXT: global_store_dword v[1:2], v3, off +; GCN-O0-NEXT: v_mov_b32_e32 v0, s6 +; GCN-O0-NEXT: v_mov_b32_e32 v1, s7 +; GCN-O0-NEXT: v_mov_b32_e32 v2, s4 +; GCN-O0-NEXT: global_store_dword v[0:1], v2, off ; GCN-O0-NEXT: s_waitcnt vmcnt(0) ; GCN-O0-NEXT: v_readlane_b32 s31, v40, 1 ; GCN-O0-NEXT: v_readlane_b32 s30, v40, 0 -; GCN-O0-NEXT: ; kill: killed $vgpr0 ; GCN-O0-NEXT: v_readlane_b32 s4, v40, 4 ; GCN-O0-NEXT: v_readlane_b32 s28, v40, 2 ; GCN-O0-NEXT: v_readlane_b32 s29, v40, 3 ; GCN-O0-NEXT: s_xor_saveexec_b64 s[6:7], -1 -; GCN-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, -1 -; GCN-O0-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-O0-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[6:7] ; GCN-O0-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; GCN-O0-NEXT: s_mov_b32 s33, s4 diff --git a/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll b/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll new file mode 100644 index 0000000000000..145f1e483cd99 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll @@ -0,0 +1,29 @@ +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stress-regalloc=2 -filetype=null %s 2>&1 | FileCheck %s + +; A negative test to capture the expected error when the VGPRs are insufficient for wwm-regalloc. + +; CHECK: error: can't find enough VGPRs for wwm-regalloc + +define amdgpu_kernel void @test(i32 %in) { +entry: + call void asm sideeffect "", "~{v[0:7]}" () + call void asm sideeffect "", "~{v[8:15]}" () + call void asm sideeffect "", "~{v[16:23]}" () + call void asm sideeffect "", "~{v[24:31]}" () + call void asm sideeffect "", "~{v[32:39]}" () + call void asm sideeffect "", "~{v[40:47]}" () + call void asm sideeffect "", "~{v[48:55]}" () + call void asm sideeffect "", "~{v[56:63]}" () + %val0 = call i32 asm sideeffect "; def $0", "=s" () + %val1 = call i32 asm sideeffect "; def $0", "=s" () + %val2 = call i32 asm sideeffect "; def $0", "=s" () + %cmp = icmp eq i32 %in, 0 + br i1 %cmp, label %bb0, label %ret +bb0: + call void asm sideeffect "; use $0", "s"(i32 %val0) + call void asm sideeffect "; use $0", "s"(i32 %val1) + call void asm sideeffect "; use $0", "s"(i32 %val2) + br label %ret +ret: + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll index c295a056eb9e7..025381d5c16df 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -134,15 +134,10 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) ; GFX9-O0: ; %bb.0: ; %entry ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47] ; GFX9-O0-NEXT: s_mov_b32 s40, s6 ; GFX9-O0-NEXT: s_mov_b32 s34, s4 ; GFX9-O0-NEXT: ; kill: def $sgpr40 killed $sgpr40 def $sgpr40_sgpr41 @@ -157,38 +152,38 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) ; GFX9-O0-NEXT: s_mov_b32 s37, s44 ; GFX9-O0-NEXT: s_mov_b32 s38, s43 ; GFX9-O0-NEXT: s_mov_b32 s39, s42 -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_writelane_b32 v0, s40, 0 -; GFX9-O0-NEXT: v_writelane_b32 v0, s41, 1 -; GFX9-O0-NEXT: v_writelane_b32 v0, s34, 2 -; GFX9-O0-NEXT: v_writelane_b32 v0, s35, 3 +; GFX9-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane +; GFX9-O0-NEXT: v_writelane_b32 v5, s40, 0 +; GFX9-O0-NEXT: v_writelane_b32 v5, s41, 1 +; GFX9-O0-NEXT: v_writelane_b32 v5, s34, 2 +; GFX9-O0-NEXT: v_writelane_b32 v5, s35, 3 ; GFX9-O0-NEXT: s_mov_b32 s34, 0 ; GFX9-O0-NEXT: s_nop 2 -; GFX9-O0-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], s34 +; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], off, s[36:39], s34 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37 ; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[36:37], -1 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, s34 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[36:37] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[36:37] ; GFX9-O0-NEXT: v_mov_b32_e32 v2, s34 ; GFX9-O0-NEXT: s_nop 1 ; GFX9-O0-NEXT: v_mov_b32_dpp v2, v1 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2 ; GFX9-O0-NEXT: s_mov_b64 exec, s[36:37] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v3, s34 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, s34 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v0, s34 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s34 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 s[34:35], exec -; GFX9-O0-NEXT: v_writelane_b32 v0, s34, 4 -; GFX9-O0-NEXT: v_writelane_b32 v0, s35, 5 +; GFX9-O0-NEXT: v_writelane_b32 v5, s34, 4 +; GFX9-O0-NEXT: v_writelane_b32 v5, s35, 5 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47] ; GFX9-O0-NEXT: s_and_b64 s[34:35], s[34:35], s[36:37] ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] @@ -211,26 +206,26 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: .LBB1_2: ; %merge +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s36, v0, 4 -; GFX9-O0-NEXT: v_readlane_b32 s37, v0, 5 +; GFX9-O0-NEXT: v_readlane_b32 s36, v5, 4 +; GFX9-O0-NEXT: v_readlane_b32 s37, v5, 5 ; GFX9-O0-NEXT: s_or_b64 exec, exec, s[36:37] -; GFX9-O0-NEXT: v_readlane_b32 s38, v0, 0 -; GFX9-O0-NEXT: v_readlane_b32 s39, v0, 1 -; GFX9-O0-NEXT: v_readlane_b32 s34, v0, 2 -; GFX9-O0-NEXT: v_readlane_b32 s35, v0, 3 -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v3, v4 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[36:37] +; GFX9-O0-NEXT: v_readlane_b32 s38, v5, 0 +; GFX9-O0-NEXT: v_readlane_b32 s39, v5, 1 +; GFX9-O0-NEXT: v_readlane_b32 s34, v5, 2 +; GFX9-O0-NEXT: v_readlane_b32 s35, v5, 3 +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v0, v3 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[36:37] ; GFX9-O0-NEXT: s_mov_b32 s36, 1 -; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s36, v3 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v0, s36, v0 ; GFX9-O0-NEXT: s_mov_b32 s36, 2 -; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s36 +; GFX9-O0-NEXT: v_and_b32_e64 v0, v0, s36 ; GFX9-O0-NEXT: s_mov_b32 s40, s35 ; GFX9-O0-NEXT: s_mov_b32 s36, s34 ; GFX9-O0-NEXT: s_mov_b32 s34, s39 @@ -240,12 +235,11 @@ define amdgpu_gfx void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) ; GFX9-O0-NEXT: s_mov_b32 s38, s35 ; GFX9-O0-NEXT: s_mov_b32 s39, s34 ; GFX9-O0-NEXT: s_mov_b32 s34, 0 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[36:39], s34 offset:4 -; GFX9-O0-NEXT: ; kill: killed $vgpr0 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[36:39], s34 offset:4 ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: s_setpc_b64 s[30:31] @@ -353,9 +347,9 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg ; GFX9-O0-NEXT: s_mov_b32 s48, s33 ; GFX9-O0-NEXT: s_mov_b32 s33, s32 ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0x400 ; GFX9-O0-NEXT: v_writelane_b32 v3, s30, 0 @@ -397,9 +391,9 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg ; GFX9-O0-NEXT: v_readlane_b32 s31, v3, 1 ; GFX9-O0-NEXT: v_readlane_b32 s30, v3, 0 ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; GFX9-O0-NEXT: s_mov_b32 s33, s48 @@ -412,9 +406,9 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg ; GFX9-O3-NEXT: s_mov_b32 s38, s33 ; GFX9-O3-NEXT: s_mov_b32 s33, s32 ; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 -; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GFX9-O3-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-O3-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: v_writelane_b32 v3, s30, 0 ; GFX9-O3-NEXT: s_addk_i32 s32, 0x400 @@ -435,9 +429,9 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg ; GFX9-O3-NEXT: v_readlane_b32 s31, v3, 1 ; GFX9-O3-NEXT: v_readlane_b32 s30, v3, 0 ; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 -; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-O3-NEXT: s_mov_b32 s33, s38 @@ -539,28 +533,26 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i ; GFX9-O0-LABEL: strict_wwm_call_i64: ; GFX9-O0: ; %bb.0: ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-O0-NEXT: s_mov_b32 s48, s33 +; GFX9-O0-NEXT: s_mov_b32 s46, s33 ; GFX9-O0-NEXT: s_mov_b32 s33, s32 ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0x1000 -; GFX9-O0-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane ; GFX9-O0-NEXT: v_writelane_b32 v10, s30, 0 ; GFX9-O0-NEXT: v_writelane_b32 v10, s31, 1 ; GFX9-O0-NEXT: s_mov_b32 s34, s8 @@ -578,10 +570,11 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i ; GFX9-O0-NEXT: s_mov_b32 s41, s45 ; GFX9-O0-NEXT: s_mov_b32 s42, s44 ; GFX9-O0-NEXT: s_mov_b32 s43, s35 -; GFX9-O0-NEXT: v_writelane_b32 v1, s40, 0 -; GFX9-O0-NEXT: v_writelane_b32 v1, s41, 1 -; GFX9-O0-NEXT: v_writelane_b32 v1, s42, 2 -; GFX9-O0-NEXT: v_writelane_b32 v1, s43, 3 +; GFX9-O0-NEXT: ; implicit-def: $vgpr11 : SGPR spill to VGPR lane +; GFX9-O0-NEXT: v_writelane_b32 v11, s40, 0 +; GFX9-O0-NEXT: v_writelane_b32 v11, s41, 1 +; GFX9-O0-NEXT: v_writelane_b32 v11, s42, 2 +; GFX9-O0-NEXT: v_writelane_b32 v11, s43, 3 ; GFX9-O0-NEXT: ; kill: def $sgpr34 killed $sgpr34 def $sgpr34_sgpr35 ; GFX9-O0-NEXT: s_mov_b32 s35, s9 ; GFX9-O0-NEXT: ; kill: def $sgpr36_sgpr37 killed $sgpr34_sgpr35 @@ -599,11 +592,8 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i ; GFX9-O0-NEXT: ; implicit-def: $sgpr38_sgpr39 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, s34 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: v_writelane_b32 v1, s34, 4 -; GFX9-O0-NEXT: v_writelane_b32 v1, s35, 5 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1 -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47] +; GFX9-O0-NEXT: v_writelane_b32 v11, s34, 4 +; GFX9-O0-NEXT: v_writelane_b32 v11, s35, 5 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, s36 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, v3, v0, s[34:35] ; GFX9-O0-NEXT: ; implicit-def: $sgpr34 @@ -625,20 +615,13 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v2 ; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1 -; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s34, v6, 4 -; GFX9-O0-NEXT: v_readlane_b32 s35, v6, 5 -; GFX9-O0-NEXT: v_readlane_b32 s36, v6, 0 -; GFX9-O0-NEXT: v_readlane_b32 s37, v6, 1 -; GFX9-O0-NEXT: v_readlane_b32 s38, v6, 2 -; GFX9-O0-NEXT: v_readlane_b32 s39, v6, 3 +; GFX9-O0-NEXT: v_readlane_b32 s34, v11, 4 +; GFX9-O0-NEXT: v_readlane_b32 s35, v11, 5 +; GFX9-O0-NEXT: v_readlane_b32 s36, v11, 0 +; GFX9-O0-NEXT: v_readlane_b32 s37, v11, 1 +; GFX9-O0-NEXT: v_readlane_b32 s38, v11, 2 +; GFX9-O0-NEXT: v_readlane_b32 s39, v11, 3 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47] ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O0-NEXT: ; implicit-def: $sgpr40 ; GFX9-O0-NEXT: ; implicit-def: $sgpr40 @@ -647,30 +630,28 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i ; GFX9-O0-NEXT: v_add_co_u32_e64 v2, s[40:41], v2, v4 ; GFX9-O0-NEXT: v_addc_co_u32_e64 v3, s[40:41], v3, v5, s[40:41] ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v7, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_mov_b32 s34, 0 -; GFX9-O0-NEXT: buffer_store_dwordx2 v[6:7], off, s[36:39], s34 offset:4 +; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[36:39], s34 offset:4 ; GFX9-O0-NEXT: v_readlane_b32 s31, v10, 1 ; GFX9-O0-NEXT: v_readlane_b32 s30, v10, 0 -; GFX9-O0-NEXT: ; kill: killed $vgpr0 ; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffff000 -; GFX9-O0-NEXT: s_mov_b32 s33, s48 +; GFX9-O0-NEXT: s_mov_b32 s33, s46 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: s_setpc_b64 s[30:31] ; @@ -680,14 +661,14 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i ; GFX9-O3-NEXT: s_mov_b32 s38, s33 ; GFX9-O3-NEXT: s_mov_b32 s33, s32 ; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 -; GFX9-O3-NEXT: buffer_store_dword v8, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-O3-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GFX9-O3-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill -; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX9-O3-NEXT: buffer_store_dword v6, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX9-O3-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_nop 0 -; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX9-O3-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: v_writelane_b32 v8, s30, 0 ; GFX9-O3-NEXT: s_addk_i32 s32, 0x800 @@ -718,13 +699,13 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i ; GFX9-O3-NEXT: v_readlane_b32 s31, v8, 1 ; GFX9-O3-NEXT: v_readlane_b32 s30, v8, 0 ; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1 -; GFX9-O3-NEXT: buffer_load_dword v8, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-O3-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload -; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload -; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX9-O3-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GFX9-O3-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: s_addk_i32 s32, 0xf800 ; GFX9-O3-NEXT: s_mov_b32 s33, s38 @@ -924,7 +905,7 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt ; GFX9-O0-NEXT: s_nop 0 ; GFX9-O0-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, -1 -; GFX9-O0-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill @@ -937,35 +918,35 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt ; GFX9-O0-NEXT: v_writelane_b32 v47, s65, 1 ; GFX9-O0-NEXT: v_writelane_b32 v47, s66, 2 ; GFX9-O0-NEXT: v_writelane_b32 v47, s67, 3 -; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:16 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill ; GFX9-O0-NEXT: ; implicit-def: $sgpr34 ; GFX9-O0-NEXT: ; implicit-def: $sgpr34 ; GFX9-O0-NEXT: ; implicit-def: $sgpr34 @@ -975,36 +956,36 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt ; GFX9-O0-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-O0-NEXT: v_mov_b32_e32 v35, s5 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, s6 -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s8 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s7 ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s9 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s8 ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s9 ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s11 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s10 ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s12 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s11 ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s13 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s12 ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s14 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s13 ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s15 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s14 ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s16 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s15 ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s17 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s16 ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s17 +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill ; GFX9-O0-NEXT: v_mov_b32_e32 v40, s18 ; GFX9-O0-NEXT: v_mov_b32_e32 v39, s19 ; GFX9-O0-NEXT: v_mov_b32_e32 v38, s20 ; GFX9-O0-NEXT: v_mov_b32_e32 v37, s21 ; GFX9-O0-NEXT: v_mov_b32_e32 v36, s22 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, s23 -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill ; GFX9-O0-NEXT: v_mov_b32_e32 v46, s24 ; GFX9-O0-NEXT: v_mov_b32_e32 v45, s25 ; GFX9-O0-NEXT: v_mov_b32_e32 v44, s26 @@ -1013,56 +994,56 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt ; GFX9-O0-NEXT: v_mov_b32_e32 v41, s29 ; GFX9-O0-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 killed $exec ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v35 -; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v35 -; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v35 -; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v35 -; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v35 -; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v6, v35 -; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v7, v35 -; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v8, v35 -; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v35 -; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v35 -; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v11, v35 -; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v12, v35 -; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v13, v35 -; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload ; GFX9-O0-NEXT: v_mov_b32_e32 v14, v40 -; GFX9-O0-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload ; GFX9-O0-NEXT: v_mov_b32_e32 v15, v39 -; GFX9-O0-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload ; GFX9-O0-NEXT: v_mov_b32_e32 v16, v38 -; GFX9-O0-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload ; GFX9-O0-NEXT: v_mov_b32_e32 v17, v37 -; GFX9-O0-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload ; GFX9-O0-NEXT: v_mov_b32_e32 v18, v36 -; GFX9-O0-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(5) ; GFX9-O0-NEXT: v_mov_b32_e32 v19, v35 -; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload ; GFX9-O0-NEXT: v_mov_b32_e32 v20, v46 ; GFX9-O0-NEXT: v_mov_b32_e32 v21, v45 ; GFX9-O0-NEXT: v_mov_b32_e32 v22, v44 @@ -1080,23 +1061,23 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt ; GFX9-O0-NEXT: s_waitcnt vmcnt(1) ; GFX9-O0-NEXT: v_mov_b32_e32 v30, v36 ; GFX9-O0-NEXT: ; kill: def $vgpr31 killed $vgpr35 killed $exec -; GFX9-O0-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v30, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v29, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v27, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload ; GFX9-O0-NEXT: ; implicit-def: $sgpr34 ; GFX9-O0-NEXT: ; implicit-def: $sgpr34 ; GFX9-O0-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec @@ -1276,7 +1257,7 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt ; GFX9-O0-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, -1 -; GFX9-O0-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll index ee9174822a960..312628c7b5451 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll @@ -114,15 +114,10 @@ define amdgpu_cs void @cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: s_mov_b32 s19, 0xe00000 ; GFX9-O0-NEXT: s_add_u32 s16, s16, s4 ; GFX9-O0-NEXT: s_addc_u32 s17, s17, 0 -; GFX9-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_writelane_b32 v0, s3, 0 +; GFX9-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane +; GFX9-O0-NEXT: v_writelane_b32 v5, s3, 0 ; GFX9-O0-NEXT: s_mov_b32 s4, s1 -; GFX9-O0-NEXT: v_readlane_b32 s1, v0, 0 +; GFX9-O0-NEXT: v_readlane_b32 s1, v5, 0 ; GFX9-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 ; GFX9-O0-NEXT: s_mov_b32 s3, s1 ; GFX9-O0-NEXT: s_mov_b32 s8, s3 @@ -135,37 +130,37 @@ define amdgpu_cs void @cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: s_mov_b32 s5, s10 ; GFX9-O0-NEXT: s_mov_b32 s6, s9 ; GFX9-O0-NEXT: s_mov_b32 s7, s8 -; GFX9-O0-NEXT: v_writelane_b32 v0, s2, 1 -; GFX9-O0-NEXT: v_writelane_b32 v0, s3, 2 -; GFX9-O0-NEXT: v_writelane_b32 v0, s0, 3 -; GFX9-O0-NEXT: v_writelane_b32 v0, s1, 4 +; GFX9-O0-NEXT: v_writelane_b32 v5, s2, 1 +; GFX9-O0-NEXT: v_writelane_b32 v5, s3, 2 +; GFX9-O0-NEXT: v_writelane_b32 v5, s0, 3 +; GFX9-O0-NEXT: v_writelane_b32 v5, s1, 4 ; GFX9-O0-NEXT: s_mov_b32 s0, 0 ; GFX9-O0-NEXT: s_nop 2 -; GFX9-O0-NEXT: buffer_load_dwordx2 v[4:5], off, s[4:7], s0 +; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], off, s[4:7], s0 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill ; GFX9-O0-NEXT: ; implicit-def: $sgpr2_sgpr3 ; GFX9-O0-NEXT: ; implicit-def: $sgpr2_sgpr3 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[2:3], -1 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[2:3] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[2:3] ; GFX9-O0-NEXT: v_mov_b32_e32 v2, s0 ; GFX9-O0-NEXT: s_nop 1 ; GFX9-O0-NEXT: v_mov_b32_dpp v2, v1 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2 ; GFX9-O0-NEXT: s_mov_b64 exec, s[2:3] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v3, s0 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, s0 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v0, s0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 s[0:1], exec -; GFX9-O0-NEXT: v_writelane_b32 v0, s0, 5 -; GFX9-O0-NEXT: v_writelane_b32 v0, s1, 6 +; GFX9-O0-NEXT: v_writelane_b32 v5, s0, 5 +; GFX9-O0-NEXT: v_writelane_b32 v5, s1, 6 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[16:19], 0 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13] ; GFX9-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; GFX9-O0-NEXT: s_mov_b64 exec, s[0:1] @@ -188,26 +183,26 @@ define amdgpu_cs void @cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: .LBB1_2: ; %merge +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v5, off, s[16:19], 0 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s4, v0, 5 -; GFX9-O0-NEXT: v_readlane_b32 s5, v0, 6 +; GFX9-O0-NEXT: v_readlane_b32 s4, v5, 5 +; GFX9-O0-NEXT: v_readlane_b32 s5, v5, 6 ; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX9-O0-NEXT: v_readlane_b32 s2, v0, 1 -; GFX9-O0-NEXT: v_readlane_b32 s3, v0, 2 -; GFX9-O0-NEXT: v_readlane_b32 s0, v0, 3 -; GFX9-O0-NEXT: v_readlane_b32 s1, v0, 4 -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v4 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9-O0-NEXT: v_readlane_b32 s2, v5, 1 +; GFX9-O0-NEXT: v_readlane_b32 s3, v5, 2 +; GFX9-O0-NEXT: v_readlane_b32 s0, v5, 3 +; GFX9-O0-NEXT: v_readlane_b32 s1, v5, 4 +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v3 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GFX9-O0-NEXT: s_mov_b32 s4, 1 -; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s4, v3 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v0, s4, v0 ; GFX9-O0-NEXT: s_mov_b32 s4, 2 -; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s4 +; GFX9-O0-NEXT: v_and_b32_e64 v0, v0, s4 ; GFX9-O0-NEXT: s_mov_b32 s6, s1 ; GFX9-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 ; GFX9-O0-NEXT: s_mov_b32 s4, s3 @@ -217,8 +212,7 @@ define amdgpu_cs void @cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: s_mov_b32 s2, s5 ; GFX9-O0-NEXT: s_mov_b32 s3, s4 ; GFX9-O0-NEXT: s_mov_b32 s4, 0 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s4 offset:4 -; GFX9-O0-NEXT: ; kill: killed $vgpr0 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s4 offset:4 ; GFX9-O0-NEXT: s_endpgm ; ; GFX9-O3-LABEL: cfg: @@ -310,38 +304,32 @@ define hidden i32 @called(i32 %a) noinline { define amdgpu_kernel void @call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) { ; GFX9-O0-LABEL: call: ; GFX9-O0: ; %bb.0: -; GFX9-O0-NEXT: s_mov_b32 s32, 0x400 +; GFX9-O0-NEXT: s_mov_b32 s32, 0 ; GFX9-O0-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0 ; GFX9-O0-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1 ; GFX9-O0-NEXT: s_mov_b32 s26, -1 ; GFX9-O0-NEXT: s_mov_b32 s27, 0xe00000 ; GFX9-O0-NEXT: s_add_u32 s24, s24, s9 ; GFX9-O0-NEXT: s_addc_u32 s25, s25, 0 -; GFX9-O0-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane ; GFX9-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; GFX9-O0-NEXT: v_writelane_b32 v7, s10, 0 -; GFX9-O0-NEXT: v_writelane_b32 v7, s11, 1 +; GFX9-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane +; GFX9-O0-NEXT: v_writelane_b32 v3, s10, 0 +; GFX9-O0-NEXT: v_writelane_b32 v3, s11, 1 ; GFX9-O0-NEXT: s_mov_b32 s14, s8 ; GFX9-O0-NEXT: s_mov_b32 s13, s7 ; GFX9-O0-NEXT: s_mov_b32 s12, s6 ; GFX9-O0-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[2:3] -; GFX9-O0-NEXT: v_readlane_b32 s2, v7, 0 -; GFX9-O0-NEXT: v_readlane_b32 s3, v7, 1 -; GFX9-O0-NEXT: v_writelane_b32 v7, s4, 2 -; GFX9-O0-NEXT: v_writelane_b32 v7, s5, 3 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_store_dword v7, off, s[24:27], 0 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: v_readlane_b32 s2, v3, 0 +; GFX9-O0-NEXT: v_readlane_b32 s3, v3, 1 +; GFX9-O0-NEXT: v_writelane_b32 v3, s4, 2 +; GFX9-O0-NEXT: v_writelane_b32 v3, s5, 3 ; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX9-O0-NEXT: v_readlane_b32 s0, v7, 2 -; GFX9-O0-NEXT: v_readlane_b32 s1, v7, 3 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 +; GFX9-O0-NEXT: v_readlane_b32 s0, v3, 2 +; GFX9-O0-NEXT: v_readlane_b32 s1, v3, 3 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v0 ; GFX9-O0-NEXT: s_mov_b64 exec, s[2:3] ; GFX9-O0-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 ; GFX9-O0-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x2c @@ -355,23 +343,19 @@ define amdgpu_kernel void @call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) { ; GFX9-O0-NEXT: s_mov_b32 s17, s7 ; GFX9-O0-NEXT: s_mov_b32 s18, s6 ; GFX9-O0-NEXT: s_mov_b32 s19, s3 -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_writelane_b32 v1, s16, 4 -; GFX9-O0-NEXT: v_writelane_b32 v1, s17, 5 -; GFX9-O0-NEXT: v_writelane_b32 v1, s18, 6 -; GFX9-O0-NEXT: v_writelane_b32 v1, s19, 7 +; GFX9-O0-NEXT: v_writelane_b32 v3, s16, 4 +; GFX9-O0-NEXT: v_writelane_b32 v3, s17, 5 +; GFX9-O0-NEXT: v_writelane_b32 v3, s18, 6 +; GFX9-O0-NEXT: v_writelane_b32 v3, s19, 7 ; GFX9-O0-NEXT: s_mov_b32 s6, 0 -; GFX9-O0-NEXT: v_writelane_b32 v1, s6, 8 +; GFX9-O0-NEXT: v_writelane_b32 v3, s6, 8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8_sgpr9 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[2:3], -1 -; GFX9-O0-NEXT: v_writelane_b32 v1, s2, 9 -; GFX9-O0-NEXT: v_writelane_b32 v1, s3, 10 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[24:27], 0 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, s6 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v0, s[2:3] +; GFX9-O0-NEXT: v_writelane_b32 v3, s2, 9 +; GFX9-O0-NEXT: v_writelane_b32 v3, s3, 10 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, s6 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[2:3] ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 56 ; GFX9-O0-NEXT: s_mov_b32 s2, s0 ; GFX9-O0-NEXT: s_mov_b32 s0, s1 @@ -387,35 +371,28 @@ define amdgpu_kernel void @call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) { ; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[24:25] ; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[26:27] ; GFX9-O0-NEXT: s_mov_b32 s6, 20 -; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s6, v3 -; GFX9-O0-NEXT: s_mov_b32 s6, 10 ; GFX9-O0-NEXT: v_lshlrev_b32_e64 v4, s6, v4 -; GFX9-O0-NEXT: v_or3_b32 v3, v5, v4, v3 +; GFX9-O0-NEXT: s_mov_b32 s6, 10 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v5, s6, v5 +; GFX9-O0-NEXT: v_or3_b32 v4, v6, v5, v4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9-O0-NEXT: ; implicit-def: $sgpr15 -; GFX9-O0-NEXT: v_mov_b32_e32 v31, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v31, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7 ; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s0, v1, 4 -; GFX9-O0-NEXT: v_readlane_b32 s1, v1, 5 -; GFX9-O0-NEXT: v_readlane_b32 s2, v1, 6 -; GFX9-O0-NEXT: v_readlane_b32 s3, v1, 7 -; GFX9-O0-NEXT: v_readlane_b32 s6, v1, 9 -; GFX9-O0-NEXT: v_readlane_b32 s7, v1, 10 -; GFX9-O0-NEXT: v_readlane_b32 s4, v1, 8 +; GFX9-O0-NEXT: v_readlane_b32 s0, v3, 4 +; GFX9-O0-NEXT: v_readlane_b32 s1, v3, 5 +; GFX9-O0-NEXT: v_readlane_b32 s2, v3, 6 +; GFX9-O0-NEXT: v_readlane_b32 s3, v3, 7 +; GFX9-O0-NEXT: v_readlane_b32 s6, v3, 9 +; GFX9-O0-NEXT: v_readlane_b32 s7, v3, 10 +; GFX9-O0-NEXT: v_readlane_b32 s4, v3, 8 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-O0-NEXT: v_add_u32_e64 v3, v3, v6 +; GFX9-O0-NEXT: v_add_u32_e64 v3, v3, v7 ; GFX9-O0-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s4 offset:4 -; GFX9-O0-NEXT: ; kill: killed $vgpr0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v3 +; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s4 offset:4 ; GFX9-O0-NEXT: s_endpgm ; ; GFX9-O3-LABEL: call: @@ -559,37 +536,31 @@ define i64 @called_i64(i64 %a) noinline { define amdgpu_kernel void @call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %arg) { ; GFX9-O0-LABEL: call_i64: ; GFX9-O0: ; %bb.0: -; GFX9-O0-NEXT: s_mov_b32 s32, 0x400 +; GFX9-O0-NEXT: s_mov_b32 s32, 0 ; GFX9-O0-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0 ; GFX9-O0-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1 ; GFX9-O0-NEXT: s_mov_b32 s26, -1 ; GFX9-O0-NEXT: s_mov_b32 s27, 0xe00000 ; GFX9-O0-NEXT: s_add_u32 s24, s24, s9 ; GFX9-O0-NEXT: s_addc_u32 s25, s25, 0 -; GFX9-O0-NEXT: ; implicit-def: $vgpr12 : SGPR spill to VGPR lane ; GFX9-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; GFX9-O0-NEXT: v_writelane_b32 v12, s10, 0 -; GFX9-O0-NEXT: v_writelane_b32 v12, s11, 1 +; GFX9-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane +; GFX9-O0-NEXT: v_writelane_b32 v8, s10, 0 +; GFX9-O0-NEXT: v_writelane_b32 v8, s11, 1 ; GFX9-O0-NEXT: s_mov_b32 s14, s8 ; GFX9-O0-NEXT: s_mov_b32 s13, s7 ; GFX9-O0-NEXT: s_mov_b32 s12, s6 ; GFX9-O0-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[2:3] -; GFX9-O0-NEXT: v_readlane_b32 s2, v12, 0 -; GFX9-O0-NEXT: v_readlane_b32 s3, v12, 1 -; GFX9-O0-NEXT: v_writelane_b32 v12, s4, 2 -; GFX9-O0-NEXT: v_writelane_b32 v12, s5, 3 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_store_dword v12, off, s[24:27], 0 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: v_readlane_b32 s2, v8, 0 +; GFX9-O0-NEXT: v_readlane_b32 s3, v8, 1 +; GFX9-O0-NEXT: v_writelane_b32 v8, s4, 2 +; GFX9-O0-NEXT: v_writelane_b32 v8, s5, 3 ; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX9-O0-NEXT: v_readlane_b32 s0, v12, 2 -; GFX9-O0-NEXT: v_readlane_b32 s1, v12, 3 +; GFX9-O0-NEXT: v_readlane_b32 s0, v8, 2 +; GFX9-O0-NEXT: v_readlane_b32 s1, v8, 3 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 ; GFX9-O0-NEXT: s_mov_b64 exec, s[2:3] ; GFX9-O0-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x24 @@ -604,11 +575,10 @@ define amdgpu_kernel void @call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %ar ; GFX9-O0-NEXT: s_mov_b32 s17, s8 ; GFX9-O0-NEXT: s_mov_b32 s18, s7 ; GFX9-O0-NEXT: s_mov_b32 s19, s6 -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_writelane_b32 v1, s16, 4 -; GFX9-O0-NEXT: v_writelane_b32 v1, s17, 5 -; GFX9-O0-NEXT: v_writelane_b32 v1, s18, 6 -; GFX9-O0-NEXT: v_writelane_b32 v1, s19, 7 +; GFX9-O0-NEXT: v_writelane_b32 v8, s16, 4 +; GFX9-O0-NEXT: v_writelane_b32 v8, s17, 5 +; GFX9-O0-NEXT: v_writelane_b32 v8, s18, 6 +; GFX9-O0-NEXT: v_writelane_b32 v8, s19, 7 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 ; GFX9-O0-NEXT: s_mov_b32 s15, s7 ; GFX9-O0-NEXT: s_mov_b32 s8, s3 @@ -623,20 +593,17 @@ define amdgpu_kernel void @call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %ar ; GFX9-O0-NEXT: ; implicit-def: $sgpr8_sgpr9 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[2:3], -1 -; GFX9-O0-NEXT: v_writelane_b32 v1, s2, 8 -; GFX9-O0-NEXT: v_writelane_b32 v1, s3, 9 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[24:27], 0 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: v_writelane_b32 v8, s2, 8 +; GFX9-O0-NEXT: v_writelane_b32 v8, s3, 9 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, s6 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[2:3] ; GFX9-O0-NEXT: ; implicit-def: $sgpr2 ; GFX9-O0-NEXT: ; implicit-def: $sgpr2 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6 ; GFX9-O0-NEXT: s_mov_b32 s2, 32 -; GFX9-O0-NEXT: v_lshrrev_b64 v[10:11], s2, v[8:9] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10 +; GFX9-O0-NEXT: v_lshrrev_b64 v[11:12], s2, v[9:10] +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 60 ; GFX9-O0-NEXT: s_mov_b32 s2, s0 ; GFX9-O0-NEXT: s_mov_b32 s0, s1 @@ -664,33 +631,25 @@ define amdgpu_kernel void @call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %ar ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 ; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[24:27], 0 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s0, v2, 4 -; GFX9-O0-NEXT: v_readlane_b32 s1, v2, 5 -; GFX9-O0-NEXT: v_readlane_b32 s2, v2, 6 -; GFX9-O0-NEXT: v_readlane_b32 s3, v2, 7 -; GFX9-O0-NEXT: v_readlane_b32 s4, v2, 8 -; GFX9-O0-NEXT: v_readlane_b32 s5, v2, 9 +; GFX9-O0-NEXT: v_readlane_b32 s0, v8, 4 +; GFX9-O0-NEXT: v_readlane_b32 s1, v8, 5 +; GFX9-O0-NEXT: v_readlane_b32 s2, v8, 6 +; GFX9-O0-NEXT: v_readlane_b32 s3, v8, 7 +; GFX9-O0-NEXT: v_readlane_b32 s4, v8, 8 +; GFX9-O0-NEXT: v_readlane_b32 s5, v8, 9 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10 ; GFX9-O0-NEXT: v_add_co_u32_e64 v3, s[6:7], v3, v5 ; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[6:7], v4, v6, s[6:7] ; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4 ; GFX9-O0-NEXT: s_mov_b32 s4, 0 -; GFX9-O0-NEXT: buffer_store_dwordx2 v[1:2], off, s[0:3], s4 offset:4 -; GFX9-O0-NEXT: ; kill: killed $vgpr0 +; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], s4 offset:4 ; GFX9-O0-NEXT: s_endpgm ; ; GFX9-O3-LABEL: call_i64: @@ -1007,15 +966,10 @@ define amdgpu_cs void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: s_mov_b32 s19, 0xe00000 ; GFX9-O0-NEXT: s_add_u32 s16, s16, s4 ; GFX9-O0-NEXT: s_addc_u32 s17, s17, 0 -; GFX9-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_writelane_b32 v0, s3, 0 +; GFX9-O0-NEXT: ; implicit-def: $vgpr5 : SGPR spill to VGPR lane +; GFX9-O0-NEXT: v_writelane_b32 v5, s3, 0 ; GFX9-O0-NEXT: s_mov_b32 s4, s1 -; GFX9-O0-NEXT: v_readlane_b32 s1, v0, 0 +; GFX9-O0-NEXT: v_readlane_b32 s1, v5, 0 ; GFX9-O0-NEXT: ; kill: def $sgpr2 killed $sgpr2 def $sgpr2_sgpr3 ; GFX9-O0-NEXT: s_mov_b32 s3, s1 ; GFX9-O0-NEXT: s_mov_b32 s8, s3 @@ -1028,37 +982,37 @@ define amdgpu_cs void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: s_mov_b32 s5, s10 ; GFX9-O0-NEXT: s_mov_b32 s6, s9 ; GFX9-O0-NEXT: s_mov_b32 s7, s8 -; GFX9-O0-NEXT: v_writelane_b32 v0, s2, 1 -; GFX9-O0-NEXT: v_writelane_b32 v0, s3, 2 -; GFX9-O0-NEXT: v_writelane_b32 v0, s0, 3 -; GFX9-O0-NEXT: v_writelane_b32 v0, s1, 4 +; GFX9-O0-NEXT: v_writelane_b32 v5, s2, 1 +; GFX9-O0-NEXT: v_writelane_b32 v5, s3, 2 +; GFX9-O0-NEXT: v_writelane_b32 v5, s0, 3 +; GFX9-O0-NEXT: v_writelane_b32 v5, s1, 4 ; GFX9-O0-NEXT: s_mov_b32 s0, 0 ; GFX9-O0-NEXT: s_nop 2 -; GFX9-O0-NEXT: buffer_load_dwordx2 v[4:5], off, s[4:7], s0 +; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], off, s[4:7], s0 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:12 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_nop 0 -; GFX9-O0-NEXT: buffer_store_dword v5, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:16 ; 4-byte Folded Spill ; GFX9-O0-NEXT: ; implicit-def: $sgpr2_sgpr3 ; GFX9-O0-NEXT: ; implicit-def: $sgpr2_sgpr3 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[2:3], -1 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, s0 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[2:3] +; GFX9-O0-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[2:3] ; GFX9-O0-NEXT: v_mov_b32_e32 v2, s0 ; GFX9-O0-NEXT: s_nop 1 ; GFX9-O0-NEXT: v_mov_b32_dpp v2, v1 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2 ; GFX9-O0-NEXT: s_mov_b64 exec, s[2:3] -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 -; GFX9-O0-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v3, s0 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, s0 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 +; GFX9-O0-NEXT: buffer_store_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Spill +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[2:3], v0, s0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 s[0:1], exec -; GFX9-O0-NEXT: v_writelane_b32 v0, s0, 5 -; GFX9-O0-NEXT: v_writelane_b32 v0, s1, 6 +; GFX9-O0-NEXT: v_writelane_b32 v5, s0, 5 +; GFX9-O0-NEXT: v_writelane_b32 v5, s1, 6 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1 -; GFX9-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 ; 4-byte Folded Spill +; GFX9-O0-NEXT: buffer_store_dword v5, off, s[16:19], 0 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13] ; GFX9-O0-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; GFX9-O0-NEXT: s_mov_b64 exec, s[0:1] @@ -1081,26 +1035,26 @@ define amdgpu_cs void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[16:19], 0 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: .LBB8_2: ; %merge +; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload +; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_or_saveexec_b64 s[12:13], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[16:19], 0 ; 4-byte Folded Reload +; GFX9-O0-NEXT: buffer_load_dword v5, off, s[16:19], 0 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_mov_b64 exec, s[12:13] ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s4, v0, 5 -; GFX9-O0-NEXT: v_readlane_b32 s5, v0, 6 +; GFX9-O0-NEXT: v_readlane_b32 s4, v5, 5 +; GFX9-O0-NEXT: v_readlane_b32 s5, v5, 6 ; GFX9-O0-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX9-O0-NEXT: v_readlane_b32 s2, v0, 1 -; GFX9-O0-NEXT: v_readlane_b32 s3, v0, 2 -; GFX9-O0-NEXT: v_readlane_b32 s0, v0, 3 -; GFX9-O0-NEXT: v_readlane_b32 s1, v0, 4 -; GFX9-O0-NEXT: buffer_load_dword v3, off, s[16:19], 0 offset:8 ; 4-byte Folded Reload -; GFX9-O0-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:4 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v4 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] +; GFX9-O0-NEXT: v_readlane_b32 s2, v5, 1 +; GFX9-O0-NEXT: v_readlane_b32 s3, v5, 2 +; GFX9-O0-NEXT: v_readlane_b32 s0, v5, 3 +; GFX9-O0-NEXT: v_readlane_b32 s1, v5, 4 +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[4:5], v0, v3 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GFX9-O0-NEXT: s_mov_b32 s4, 1 -; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s4, v3 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v0, s4, v0 ; GFX9-O0-NEXT: s_mov_b32 s4, 2 -; GFX9-O0-NEXT: v_and_b32_e64 v3, v3, s4 +; GFX9-O0-NEXT: v_and_b32_e64 v0, v0, s4 ; GFX9-O0-NEXT: s_mov_b32 s6, s1 ; GFX9-O0-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 ; GFX9-O0-NEXT: s_mov_b32 s4, s3 @@ -1110,8 +1064,7 @@ define amdgpu_cs void @strict_wwm_cfg(ptr addrspace(8) inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: s_mov_b32 s2, s5 ; GFX9-O0-NEXT: s_mov_b32 s3, s4 ; GFX9-O0-NEXT: s_mov_b32 s4, 0 -; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s4 offset:4 -; GFX9-O0-NEXT: ; kill: killed $vgpr0 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s4 offset:4 ; GFX9-O0-NEXT: s_endpgm ; ; GFX9-O3-LABEL: strict_wwm_cfg: @@ -1203,38 +1156,32 @@ define hidden i32 @strict_wwm_called(i32 %a) noinline { define amdgpu_kernel void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg %arg) { ; GFX9-O0-LABEL: strict_wwm_call: ; GFX9-O0: ; %bb.0: -; GFX9-O0-NEXT: s_mov_b32 s32, 0x400 +; GFX9-O0-NEXT: s_mov_b32 s32, 0 ; GFX9-O0-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0 ; GFX9-O0-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1 ; GFX9-O0-NEXT: s_mov_b32 s26, -1 ; GFX9-O0-NEXT: s_mov_b32 s27, 0xe00000 ; GFX9-O0-NEXT: s_add_u32 s24, s24, s9 ; GFX9-O0-NEXT: s_addc_u32 s25, s25, 0 -; GFX9-O0-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane ; GFX9-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; GFX9-O0-NEXT: v_writelane_b32 v7, s10, 0 -; GFX9-O0-NEXT: v_writelane_b32 v7, s11, 1 +; GFX9-O0-NEXT: ; implicit-def: $vgpr3 : SGPR spill to VGPR lane +; GFX9-O0-NEXT: v_writelane_b32 v3, s10, 0 +; GFX9-O0-NEXT: v_writelane_b32 v3, s11, 1 ; GFX9-O0-NEXT: s_mov_b32 s14, s8 ; GFX9-O0-NEXT: s_mov_b32 s13, s7 ; GFX9-O0-NEXT: s_mov_b32 s12, s6 ; GFX9-O0-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[2:3] -; GFX9-O0-NEXT: v_readlane_b32 s2, v7, 0 -; GFX9-O0-NEXT: v_readlane_b32 s3, v7, 1 -; GFX9-O0-NEXT: v_writelane_b32 v7, s4, 2 -; GFX9-O0-NEXT: v_writelane_b32 v7, s5, 3 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_store_dword v7, off, s[24:27], 0 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: v_readlane_b32 s2, v3, 0 +; GFX9-O0-NEXT: v_readlane_b32 s3, v3, 1 +; GFX9-O0-NEXT: v_writelane_b32 v3, s4, 2 +; GFX9-O0-NEXT: v_writelane_b32 v3, s5, 3 ; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX9-O0-NEXT: v_readlane_b32 s0, v7, 2 -; GFX9-O0-NEXT: v_readlane_b32 s1, v7, 3 -; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 -; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 +; GFX9-O0-NEXT: v_readlane_b32 s0, v3, 2 +; GFX9-O0-NEXT: v_readlane_b32 s1, v3, 3 +; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v1 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v0 ; GFX9-O0-NEXT: s_mov_b64 exec, s[2:3] ; GFX9-O0-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 ; GFX9-O0-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x2c @@ -1248,23 +1195,19 @@ define amdgpu_kernel void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 in ; GFX9-O0-NEXT: s_mov_b32 s17, s7 ; GFX9-O0-NEXT: s_mov_b32 s18, s6 ; GFX9-O0-NEXT: s_mov_b32 s19, s3 -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_writelane_b32 v1, s16, 4 -; GFX9-O0-NEXT: v_writelane_b32 v1, s17, 5 -; GFX9-O0-NEXT: v_writelane_b32 v1, s18, 6 -; GFX9-O0-NEXT: v_writelane_b32 v1, s19, 7 +; GFX9-O0-NEXT: v_writelane_b32 v3, s16, 4 +; GFX9-O0-NEXT: v_writelane_b32 v3, s17, 5 +; GFX9-O0-NEXT: v_writelane_b32 v3, s18, 6 +; GFX9-O0-NEXT: v_writelane_b32 v3, s19, 7 ; GFX9-O0-NEXT: s_mov_b32 s6, 0 -; GFX9-O0-NEXT: v_writelane_b32 v1, s6, 8 +; GFX9-O0-NEXT: v_writelane_b32 v3, s6, 8 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8_sgpr9 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[2:3], -1 -; GFX9-O0-NEXT: v_writelane_b32 v1, s2, 9 -; GFX9-O0-NEXT: v_writelane_b32 v1, s3, 10 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[24:27], 0 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, s6 -; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v0, s[2:3] +; GFX9-O0-NEXT: v_writelane_b32 v3, s2, 9 +; GFX9-O0-NEXT: v_writelane_b32 v3, s3, 10 +; GFX9-O0-NEXT: v_mov_b32_e32 v7, s6 +; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[2:3] ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 56 ; GFX9-O0-NEXT: s_mov_b32 s2, s0 ; GFX9-O0-NEXT: s_mov_b32 s0, s1 @@ -1280,35 +1223,28 @@ define amdgpu_kernel void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 in ; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[24:25] ; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[26:27] ; GFX9-O0-NEXT: s_mov_b32 s6, 20 -; GFX9-O0-NEXT: v_lshlrev_b32_e64 v3, s6, v3 -; GFX9-O0-NEXT: s_mov_b32 s6, 10 ; GFX9-O0-NEXT: v_lshlrev_b32_e64 v4, s6, v4 -; GFX9-O0-NEXT: v_or3_b32 v3, v5, v4, v3 +; GFX9-O0-NEXT: s_mov_b32 s6, 10 +; GFX9-O0-NEXT: v_lshlrev_b32_e64 v5, s6, v5 +; GFX9-O0-NEXT: v_or3_b32 v4, v6, v5, v4 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6_sgpr7 ; GFX9-O0-NEXT: ; implicit-def: $sgpr15 -; GFX9-O0-NEXT: v_mov_b32_e32 v31, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v31, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v7 ; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s0, v1, 4 -; GFX9-O0-NEXT: v_readlane_b32 s1, v1, 5 -; GFX9-O0-NEXT: v_readlane_b32 s2, v1, 6 -; GFX9-O0-NEXT: v_readlane_b32 s3, v1, 7 -; GFX9-O0-NEXT: v_readlane_b32 s6, v1, 9 -; GFX9-O0-NEXT: v_readlane_b32 s7, v1, 10 -; GFX9-O0-NEXT: v_readlane_b32 s4, v1, 8 +; GFX9-O0-NEXT: v_readlane_b32 s0, v3, 4 +; GFX9-O0-NEXT: v_readlane_b32 s1, v3, 5 +; GFX9-O0-NEXT: v_readlane_b32 s2, v3, 6 +; GFX9-O0-NEXT: v_readlane_b32 s3, v3, 7 +; GFX9-O0-NEXT: v_readlane_b32 s6, v3, 9 +; GFX9-O0-NEXT: v_readlane_b32 s7, v3, 10 +; GFX9-O0-NEXT: v_readlane_b32 s4, v3, 8 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-O0-NEXT: v_add_u32_e64 v3, v3, v6 +; GFX9-O0-NEXT: v_add_u32_e64 v3, v3, v7 ; GFX9-O0-NEXT: s_mov_b64 exec, s[6:7] -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s4 offset:4 -; GFX9-O0-NEXT: ; kill: killed $vgpr0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v3 +; GFX9-O0-NEXT: s_nop 0 +; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s4 offset:4 ; GFX9-O0-NEXT: s_endpgm ; ; GFX9-O3-LABEL: strict_wwm_call: @@ -1452,37 +1388,31 @@ define i64 @strict_wwm_called_i64(i64 %a) noinline { define amdgpu_kernel void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 inreg %arg) { ; GFX9-O0-LABEL: strict_wwm_call_i64: ; GFX9-O0: ; %bb.0: -; GFX9-O0-NEXT: s_mov_b32 s32, 0x400 +; GFX9-O0-NEXT: s_mov_b32 s32, 0 ; GFX9-O0-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0 ; GFX9-O0-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1 ; GFX9-O0-NEXT: s_mov_b32 s26, -1 ; GFX9-O0-NEXT: s_mov_b32 s27, 0xe00000 ; GFX9-O0-NEXT: s_add_u32 s24, s24, s9 ; GFX9-O0-NEXT: s_addc_u32 s25, s25, 0 -; GFX9-O0-NEXT: ; implicit-def: $vgpr12 : SGPR spill to VGPR lane ; GFX9-O0-NEXT: s_or_saveexec_b64 s[10:11], -1 -; GFX9-O0-NEXT: v_writelane_b32 v12, s10, 0 -; GFX9-O0-NEXT: v_writelane_b32 v12, s11, 1 +; GFX9-O0-NEXT: ; implicit-def: $vgpr8 : SGPR spill to VGPR lane +; GFX9-O0-NEXT: v_writelane_b32 v8, s10, 0 +; GFX9-O0-NEXT: v_writelane_b32 v8, s11, 1 ; GFX9-O0-NEXT: s_mov_b32 s14, s8 ; GFX9-O0-NEXT: s_mov_b32 s13, s7 ; GFX9-O0-NEXT: s_mov_b32 s12, s6 ; GFX9-O0-NEXT: s_mov_b64 s[10:11], s[4:5] ; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[2:3] -; GFX9-O0-NEXT: v_readlane_b32 s2, v12, 0 -; GFX9-O0-NEXT: v_readlane_b32 s3, v12, 1 -; GFX9-O0-NEXT: v_writelane_b32 v12, s4, 2 -; GFX9-O0-NEXT: v_writelane_b32 v12, s5, 3 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_store_dword v12, off, s[24:27], 0 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: v_readlane_b32 s2, v8, 0 +; GFX9-O0-NEXT: v_readlane_b32 s3, v8, 1 +; GFX9-O0-NEXT: v_writelane_b32 v8, s4, 2 +; GFX9-O0-NEXT: v_writelane_b32 v8, s5, 3 ; GFX9-O0-NEXT: s_mov_b64 s[4:5], s[0:1] -; GFX9-O0-NEXT: v_readlane_b32 s0, v12, 2 -; GFX9-O0-NEXT: v_readlane_b32 s1, v12, 3 +; GFX9-O0-NEXT: v_readlane_b32 s0, v8, 2 +; GFX9-O0-NEXT: v_readlane_b32 s1, v8, 3 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v2 ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_load_dword v1, off, s[24:27], 0 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v0 ; GFX9-O0-NEXT: s_mov_b64 exec, s[2:3] ; GFX9-O0-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x24 @@ -1497,11 +1427,10 @@ define amdgpu_kernel void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i6 ; GFX9-O0-NEXT: s_mov_b32 s17, s8 ; GFX9-O0-NEXT: s_mov_b32 s18, s7 ; GFX9-O0-NEXT: s_mov_b32 s19, s6 -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_writelane_b32 v1, s16, 4 -; GFX9-O0-NEXT: v_writelane_b32 v1, s17, 5 -; GFX9-O0-NEXT: v_writelane_b32 v1, s18, 6 -; GFX9-O0-NEXT: v_writelane_b32 v1, s19, 7 +; GFX9-O0-NEXT: v_writelane_b32 v8, s16, 4 +; GFX9-O0-NEXT: v_writelane_b32 v8, s17, 5 +; GFX9-O0-NEXT: v_writelane_b32 v8, s18, 6 +; GFX9-O0-NEXT: v_writelane_b32 v8, s19, 7 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 0 ; GFX9-O0-NEXT: s_mov_b32 s15, s7 ; GFX9-O0-NEXT: s_mov_b32 s8, s3 @@ -1516,20 +1445,17 @@ define amdgpu_kernel void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i6 ; GFX9-O0-NEXT: ; implicit-def: $sgpr8_sgpr9 ; GFX9-O0-NEXT: v_mov_b32_e32 v0, s2 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[2:3], -1 -; GFX9-O0-NEXT: v_writelane_b32 v1, s2, 8 -; GFX9-O0-NEXT: v_writelane_b32 v1, s3, 9 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_store_dword v1, off, s[24:27], 0 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] +; GFX9-O0-NEXT: v_writelane_b32 v8, s2, 8 +; GFX9-O0-NEXT: v_writelane_b32 v8, s3, 9 ; GFX9-O0-NEXT: v_mov_b32_e32 v7, s6 ; GFX9-O0-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[2:3] ; GFX9-O0-NEXT: ; implicit-def: $sgpr2 ; GFX9-O0-NEXT: ; implicit-def: $sgpr2 -; GFX9-O0-NEXT: v_mov_b32_e32 v8, v7 -; GFX9-O0-NEXT: v_mov_b32_e32 v9, v6 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7 +; GFX9-O0-NEXT: v_mov_b32_e32 v10, v6 ; GFX9-O0-NEXT: s_mov_b32 s2, 32 -; GFX9-O0-NEXT: v_lshrrev_b64 v[10:11], s2, v[8:9] -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10 +; GFX9-O0-NEXT: v_lshrrev_b64 v[11:12], s2, v[9:10] +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11 ; GFX9-O0-NEXT: s_mov_b64 s[6:7], 60 ; GFX9-O0-NEXT: s_mov_b32 s2, s0 ; GFX9-O0-NEXT: s_mov_b32 s0, s1 @@ -1557,33 +1483,25 @@ define amdgpu_kernel void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i6 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v6 ; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_load_dword v2, off, s[24:27], 0 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] -; GFX9-O0-NEXT: s_waitcnt vmcnt(0) -; GFX9-O0-NEXT: v_readlane_b32 s0, v2, 4 -; GFX9-O0-NEXT: v_readlane_b32 s1, v2, 5 -; GFX9-O0-NEXT: v_readlane_b32 s2, v2, 6 -; GFX9-O0-NEXT: v_readlane_b32 s3, v2, 7 -; GFX9-O0-NEXT: v_readlane_b32 s4, v2, 8 -; GFX9-O0-NEXT: v_readlane_b32 s5, v2, 9 +; GFX9-O0-NEXT: v_readlane_b32 s0, v8, 4 +; GFX9-O0-NEXT: v_readlane_b32 s1, v8, 5 +; GFX9-O0-NEXT: v_readlane_b32 s2, v8, 6 +; GFX9-O0-NEXT: v_readlane_b32 s3, v8, 7 +; GFX9-O0-NEXT: v_readlane_b32 s4, v8, 8 +; GFX9-O0-NEXT: v_readlane_b32 s5, v8, 9 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[20:21], -1 -; GFX9-O0-NEXT: buffer_load_dword v0, off, s[24:27], 0 ; 4-byte Folded Reload -; GFX9-O0-NEXT: s_mov_b64 exec, s[20:21] ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v1 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 ; GFX9-O0-NEXT: ; implicit-def: $sgpr6 -; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8 -; GFX9-O0-NEXT: v_mov_b32_e32 v6, v9 +; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9 +; GFX9-O0-NEXT: v_mov_b32_e32 v6, v10 ; GFX9-O0-NEXT: v_add_co_u32_e64 v3, s[6:7], v3, v5 ; GFX9-O0-NEXT: v_addc_co_u32_e64 v4, s[6:7], v4, v6, s[6:7] ; GFX9-O0-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, v4 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, v3 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, v4 ; GFX9-O0-NEXT: s_mov_b32 s4, 0 -; GFX9-O0-NEXT: buffer_store_dwordx2 v[1:2], off, s[0:3], s4 offset:4 -; GFX9-O0-NEXT: ; kill: killed $vgpr0 +; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], s4 offset:4 ; GFX9-O0-NEXT: s_endpgm ; ; GFX9-O3-LABEL: strict_wwm_call_i64: