diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h index 0eb00cbc2f466..529da8d28a3c1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -50,6 +50,7 @@ const D16ImageDimIntrinsic *lookupD16ImageDimIntrinsic(unsigned Intr); struct ImageDimIntrinsicInfo { unsigned Intr; unsigned BaseOpcode; + unsigned AtomicNoRetBaseOpcode; MIMGDim Dim; uint8_t NumOffsetArgs; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 97c2c9c5316b3..9ce12243016f4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2006,19 +2006,27 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( MachineInstr &MI, const AMDGPU::ImageDimIntrinsicInfo *Intr) const { MachineBasicBlock *MBB = MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); + unsigned IntrOpcode = Intr->BaseOpcode; + + // For image atomic: use no-return opcode if result is unused. + if (Intr->AtomicNoRetBaseOpcode != Intr->BaseOpcode) { + Register ResultDef = MI.getOperand(0).getReg(); + if (MRI->use_nodbg_empty(ResultDef)) + IntrOpcode = Intr->AtomicNoRetBaseOpcode; + } const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = - AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode); + AMDGPU::getMIMGBaseOpcodeInfo(IntrOpcode); const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim); - unsigned IntrOpcode = Intr->BaseOpcode; const bool IsGFX10Plus = AMDGPU::isGFX10Plus(STI); const bool IsGFX11Plus = AMDGPU::isGFX11Plus(STI); const bool IsGFX12Plus = AMDGPU::isGFX12Plus(STI); const unsigned ArgOffset = MI.getNumExplicitDefs() + 1; - Register VDataIn, VDataOut; + Register VDataIn = AMDGPU::NoRegister; + Register VDataOut = AMDGPU::NoRegister; LLT VDataTy; int NumVDataDwords = -1; bool IsD16 = MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 || @@ -2049,7 +2057,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( unsigned DMaskLanes = 0; if (BaseOpcode->Atomic) { - VDataOut = MI.getOperand(0).getReg(); + if (!BaseOpcode->NoReturn) + VDataOut = MI.getOperand(0).getReg(); VDataIn = MI.getOperand(2).getReg(); LLT Ty = MRI->getType(VDataIn); @@ -2099,8 +2108,9 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( assert((!IsTexFail || DMaskLanes >= 1) && "should have legalized this"); unsigned CPol = MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm(); - if (BaseOpcode->Atomic) - CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization + // Keep GLC only when the atomic's result is actually used. + if (BaseOpcode->Atomic && !BaseOpcode->NoReturn) + CPol |= AMDGPU::CPol::GLC; if (CPol & ~((IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12) | AMDGPU::CPol::VOLATILE)) return false; diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index 5f6d742d245ec..d95013123aced 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -877,69 +877,69 @@ multiclass MIMG_Store { } class MIMG_Atomic_gfx6789_base op, string asm, RegisterOperand data_rc, - RegisterClass addr_rc, string dns=""> - : MIMG_gfx6789 { - let Constraints = "$vdst = $vdata"; - + RegisterClass addr_rc, bit noRtn, string dns=""> + : MIMG_gfx6789 { + let Constraints = !if(noRtn, "", "$vdst = $vdata"); + let isCodeGenOnly = noRtn; let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256_XNULL:$srsrc, DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da); - let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da"; + let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da"; } class MIMG_Atomic_gfx90a_base op, string asm, RegisterOperand data_rc, - RegisterClass addr_rc, string dns=""> - : MIMG_gfx90a .ret:$vdst), dns> { - let Constraints = "$vdst = $vdata"; - + RegisterClass addr_rc, bit noRtn, string dns=""> + : MIMG_gfx90a .ret:$vdst)), dns> { + let Constraints = !if(noRtn, "", "$vdst = $vdata"); + let isCodeGenOnly = noRtn; let InOperandList = (ins getAlign2RegOp.ret:$vdata, addr_rc:$vaddr, SReg_256_XNULL:$srsrc, DMask:$dmask, UNorm:$unorm, CPol:$cpol, R128A16:$r128, LWE:$lwe, DA:$da); - let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da"; + let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da"; } class MIMG_Atomic_si - : MIMG_Atomic_gfx6789_base + : MIMG_Atomic_gfx6789_base { let AssemblerPredicate = isGFX6GFX7; } class MIMG_Atomic_vi - : MIMG_Atomic_gfx6789_base { + RegisterClass addr_rc, bit noRtn = 0, bit enableDasm = 0> + : MIMG_Atomic_gfx6789_base { let AssemblerPredicate = isGFX8GFX9NotGFX90A; let MIMGEncoding = MIMGEncGfx8; } class MIMG_Atomic_gfx90a - : MIMG_Atomic_gfx90a_base { + RegisterClass addr_rc, bit noRtn = 0, bit enableDasm = 0> + : MIMG_Atomic_gfx90a_base { let AssemblerPredicate = isGFX90APlus; let MIMGEncoding = MIMGEncGfx90a; } class MIMG_Atomic_gfx10 - : MIMG_gfx10 + : MIMG_gfx10 { - let Constraints = "$vdst = $vdata"; - + let Constraints = !if(noRtn, "", "$vdst = $vdata"); + let isCodeGenOnly = noRtn; let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe); - let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"; + let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"; } class MIMG_Atomic_nsa_gfx10 - : MIMG_nsa_gfx10 + : MIMG_nsa_gfx10 { - let Constraints = "$vdst = $vdata"; - + let Constraints = !if(noRtn, "", "$vdst = $vdata"); + let isCodeGenOnly = noRtn; let InOperandList = !con((ins DataRC:$vdata), AddrIns, (ins SReg_256_XNULL:$srsrc, DMask:$dmask, @@ -950,24 +950,24 @@ class MIMG_Atomic_nsa_gfx10 - : MIMG_gfx11 + : MIMG_gfx11 { - let Constraints = "$vdst = $vdata"; - + let Constraints = !if(noRtn, "", "$vdst = $vdata"); + let isCodeGenOnly = noRtn; let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc, DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe); - let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"; + let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe"; } class MIMG_Atomic_nsa_gfx11 - : MIMG_nsa_gfx11 + : MIMG_nsa_gfx11 { - let Constraints = "$vdst = $vdata"; - + let Constraints = !if(noRtn, "", "$vdst = $vdata"); + let isCodeGenOnly = noRtn; let InOperandList = !con((ins DataRC:$vdata), AddrIns, (ins SReg_256_XNULL:$srsrc, DMask:$dmask, @@ -977,11 +977,11 @@ class MIMG_Atomic_nsa_gfx11 - : VIMAGE_gfx12 + : VIMAGE_gfx12 { - let Constraints = "$vdst = $vdata"; - + let Constraints = !if(noRtn, "", "$vdst = $vdata"); + let isCodeGenOnly = noRtn; let InOperandList = !con((ins DataRC:$vdata), AddrIns, (ins SReg_256_XNULL:$rsrc, DMask:$dmask, Dim:$dim, @@ -994,95 +994,96 @@ multiclass MIMG_Atomic_Addr_Helper_m { let hasSideEffects = 1, // FIXME: remove this mayLoad = 1, mayStore = 1, hasPostISelHook = 0, DisableWQM = 1, - FPAtomic = isFP in { + FPAtomic = isFP, IsAtomicNoRet = noRtn in { let VAddrDwords = 1 in { let ssamp = 0 in { if op.HAS_SI then { - def _V1_si : MIMG_Atomic_si ; + def _V1_si : MIMG_Atomic_si ; } if op.HAS_VI then { - def _V1_vi : MIMG_Atomic_vi ; + def _V1_vi : MIMG_Atomic_vi ; let hasPostISelHook = 1 in - def _V1_gfx90a : MIMG_Atomic_gfx90a ; + def _V1_gfx90a : MIMG_Atomic_gfx90a ; } if op.HAS_GFX10M then { - def _V1_gfx10 : MIMG_Atomic_gfx10 ; + def _V1_gfx10 : MIMG_Atomic_gfx10 ; } if op.HAS_GFX11 then { - def _V1_gfx11 : MIMG_Atomic_gfx11 ; + def _V1_gfx11 : MIMG_Atomic_gfx11 ; } } if op.HAS_GFX12 then { - def _V1_gfx12 : VIMAGE_Atomic_gfx12 ; + def _V1_gfx12 : VIMAGE_Atomic_gfx12 ; } } let VAddrDwords = 2 in { let ssamp = 0 in { if op.HAS_SI then { - def _V2_si : MIMG_Atomic_si ; + def _V2_si : MIMG_Atomic_si ; } if op.HAS_VI then { - def _V2_vi : MIMG_Atomic_vi ; - def _V2_gfx90a : MIMG_Atomic_gfx90a ; + def _V2_vi : MIMG_Atomic_vi ; + def _V2_gfx90a : MIMG_Atomic_gfx90a ; } if op.HAS_GFX10M then { - def _V2_gfx10 : MIMG_Atomic_gfx10 ; - def _V2_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 ; + def _V2_gfx10 : MIMG_Atomic_gfx10 ; + def _V2_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 ; } if op.HAS_GFX11 then { - def _V2_gfx11 : MIMG_Atomic_gfx11 ; - def _V2_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 ; + def _V2_gfx11 : MIMG_Atomic_gfx11 ; + def _V2_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 ; } } if op.HAS_GFX12 then { - def _V2_gfx12 : VIMAGE_Atomic_gfx12 ; + def _V2_gfx12 : VIMAGE_Atomic_gfx12 ; } } let VAddrDwords = 3 in { let ssamp = 0 in { if op.HAS_SI then { - def _V3_si : MIMG_Atomic_si ; + def _V3_si : MIMG_Atomic_si ; } if op.HAS_VI then { - def _V3_vi : MIMG_Atomic_vi ; - def _V3_gfx90a : MIMG_Atomic_gfx90a ; + def _V3_vi : MIMG_Atomic_vi ; + def _V3_gfx90a : MIMG_Atomic_gfx90a ; } if op.HAS_GFX10M then { - def _V3_gfx10 : MIMG_Atomic_gfx10 ; - def _V3_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 ; + def _V3_gfx10 : MIMG_Atomic_gfx10 ; + def _V3_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 ; } if op.HAS_GFX11 then { - def _V3_gfx11 : MIMG_Atomic_gfx11 ; - def _V3_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 ; + def _V3_gfx11 : MIMG_Atomic_gfx11 ; + def _V3_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 ; } } if op.HAS_GFX12 then { - def _V3_gfx12 : VIMAGE_Atomic_gfx12 ; + def _V3_gfx12 : VIMAGE_Atomic_gfx12 ; } } let VAddrDwords = 4 in { let ssamp = 0 in { if op.HAS_SI then { - def _V4_si : MIMG_Atomic_si ; + def _V4_si : MIMG_Atomic_si ; } if op.HAS_VI then { - def _V4_vi : MIMG_Atomic_vi ; - def _V4_gfx90a : MIMG_Atomic_gfx90a ; + def _V4_vi : MIMG_Atomic_vi ; + def _V4_gfx90a : MIMG_Atomic_gfx90a ; } if op.HAS_GFX10M then { - def _V4_gfx10 : MIMG_Atomic_gfx10 ; - def _V4_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 ; + def _V4_gfx10 : MIMG_Atomic_gfx10 ; + def _V4_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 ; } if op.HAS_GFX11 then { - def _V4_gfx11 : MIMG_Atomic_gfx11 ; - def _V4_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 ; + def _V4_gfx11 : MIMG_Atomic_gfx11 ; + def _V4_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 ; } } if op.HAS_GFX12 then { - def _V4_gfx12 : VIMAGE_Atomic_gfx12 ; + def _V4_gfx12 : VIMAGE_Atomic_gfx12 ; } } } @@ -1095,12 +1096,13 @@ multiclass MIMG_Atomic_Addr_Helper_m { // 64-bit atomics - let IsAtomicRet = 1 in { +multiclass MIMG_Atomic_Base { // 64-bit atomics + let IsAtomicRet = !not(noRtn) in { def "" : MIMGBaseOpcode { let Atomic = 1; let AtomicX2 = isCmpSwap; + let NoReturn = noRtn; } let BaseOpcode = !cast(NAME) in { @@ -1109,22 +1111,28 @@ multiclass MIMG_Atomic ; + defm _V1 : MIMG_Atomic_Addr_Helper_m ; } let VDataDwords = 2 in - defm _V2 : MIMG_Atomic_Addr_Helper_m ; + defm _V2 : MIMG_Atomic_Addr_Helper_m ; let VDataDwords = 3 in - defm _V3 : MIMG_Atomic_Addr_Helper_m ; + defm _V3 : MIMG_Atomic_Addr_Helper_m ; if isCmpSwap then { let VDataDwords = 4 in - defm _V4 : MIMG_Atomic_Addr_Helper_m ; + defm _V4 : MIMG_Atomic_Addr_Helper_m ; let VDataDwords = 5 in - defm _V5 : MIMG_Atomic_Addr_Helper_m ; + defm _V5 : MIMG_Atomic_Addr_Helper_m ; } } - } // End IsAtomicRet = 1 + } +} + +multiclass MIMG_Atomic { + defm "" : MIMG_Atomic_Base ; + defm "_NORTN" : MIMG_Atomic_Base ; } multiclass MIMG_Atomic_Renamed { Intrinsic Intr = I; MIMGBaseOpcode BaseOpcode = !cast(!strconcat("IMAGE_", I.P.OpMod)); + MIMGBaseOpcode AtomicNoRetBaseOpcode = BaseOpcode; AMDGPUDimProps Dim = I.P.Dim; AMDGPUImageDimIntrinsicEval DimEval = AMDGPUImageDimIntrinsicEval; @@ -1855,13 +1864,20 @@ class ImageDimIntrinsicInfo { bits<8> CoordTyArg = !add(GradientTyArg, !if(I.P.Gradients, 1, 0)); } +class ImageDimAtomicIntrinsicInfo + : ImageDimIntrinsicInfo { + MIMGBaseOpcode AtomicNoRetBaseOpcode = + !cast(!strconcat("IMAGE_", I.P.OpMod, "_NORTN")); +} + def ImageDimIntrinsicTable : GenericTable { let FilterClass = "ImageDimIntrinsicInfo"; - let Fields = ["Intr", "BaseOpcode", "Dim", "NumOffsetArgs", "NumBiasArgs", "NumZCompareArgs", "NumGradients", "NumDmask", "NumData", "NumVAddrs", "NumArgs", - "DMaskIndex", "VAddrStart", "OffsetIndex", "BiasIndex", "ZCompareIndex", "GradientStart", "CoordStart", "LodIndex", "MipIndex", "VAddrEnd", - "RsrcIndex", "SampIndex", "UnormIndex", "TexFailCtrlIndex", "CachePolicyIndex", + let Fields = ["Intr", "BaseOpcode", "AtomicNoRetBaseOpcode", "Dim", "NumOffsetArgs", "NumBiasArgs", "NumZCompareArgs", "NumGradients", "NumDmask", "NumData", + "NumVAddrs", "NumArgs", "DMaskIndex", "VAddrStart", "OffsetIndex", "BiasIndex", "ZCompareIndex", "GradientStart", "CoordStart", "LodIndex", "MipIndex", + "VAddrEnd", "RsrcIndex", "SampIndex", "UnormIndex", "TexFailCtrlIndex", "CachePolicyIndex", "BiasTyArg", "GradientTyArg", "CoordTyArg"]; string TypeOf_BaseOpcode = "MIMGBaseOpcode"; + string TypeOf_AtomicNoRetBaseOpcode = "MIMGBaseOpcode"; string TypeOf_Dim = "MIMGDim"; let PrimaryKey = ["Intr"]; @@ -1874,11 +1890,14 @@ def getImageDimIntrinsicByBaseOpcode : SearchIndex { let Key = ["BaseOpcode", "Dim"]; } -foreach intr = !listconcat(AMDGPUImageDimIntrinsics, - AMDGPUImageDimAtomicIntrinsics) in { +foreach intr = AMDGPUImageDimIntrinsics in { def : ImageDimIntrinsicInfo; } +foreach intr = AMDGPUImageDimAtomicIntrinsics in { + def : ImageDimAtomicIntrinsicInfo; +} + // L to LZ Optimization Mapping def : MIMGLZMapping; def : MIMGLZMapping; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index be4229155c983..b34ab2a7e08e5 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -9134,16 +9134,23 @@ SDValue SITargetLowering::lowerImage(SDValue Op, SDLoc DL(Op); MachineFunction &MF = DAG.getMachineFunction(); const GCNSubtarget *ST = &MF.getSubtarget(); + unsigned IntrOpcode = Intr->BaseOpcode; + // For image atomic: use no-return opcode if result is unused. + if (Intr->AtomicNoRetBaseOpcode != Intr->BaseOpcode && + !Op.getNode()->hasAnyUseOfValue(0)) + IntrOpcode = Intr->AtomicNoRetBaseOpcode; const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = - AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode); + AMDGPU::getMIMGBaseOpcodeInfo(IntrOpcode); const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim); - unsigned IntrOpcode = Intr->BaseOpcode; bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget); bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget); bool IsGFX12Plus = AMDGPU::isGFX12Plus(*Subtarget); SmallVector ResultTypes(Op->values()); SmallVector OrigResultTypes(Op->values()); + if (BaseOpcode->NoReturn && BaseOpcode->Atomic) + ResultTypes.erase(&ResultTypes[0]); + bool IsD16 = false; bool IsG16 = false; bool IsA16 = false; @@ -9162,8 +9169,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op, VData = Op.getOperand(2); IsAtomicPacked16Bit = - (Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 || - Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16); + (IntrOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 || + IntrOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16_NORTN || + IntrOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16 || + IntrOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16_NORTN); bool Is64Bit = VData.getValueSizeInBits() == 64; if (BaseOpcode->AtomicX2) { @@ -9173,7 +9182,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op, if (Is64Bit) VData = DAG.getBitcast(MVT::v4i32, VData); - ResultTypes[0] = Is64Bit ? MVT::v2i64 : MVT::v2i32; + if (!BaseOpcode->NoReturn) + ResultTypes[0] = Is64Bit ? MVT::v2i64 : MVT::v2i32; + DMask = Is64Bit ? 0xf : 0x3; NumVDataDwords = Is64Bit ? 4 : 2; } else { @@ -9399,8 +9410,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op, } unsigned CPol = Op.getConstantOperandVal(ArgOffset + Intr->CachePolicyIndex); - if (BaseOpcode->Atomic) - CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization + // Keep GLC only when the atomic's result is actually used. + if (BaseOpcode->Atomic && !BaseOpcode->NoReturn) + CPol |= AMDGPU::CPol::GLC; if (CPol & ~((IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12) | AMDGPU::CPol::VOLATILE)) return Op; @@ -9512,13 +9524,20 @@ SDValue SITargetLowering::lowerImage(SDValue Op, DAG.setNodeMemRefs(NewNode, {MemRef}); } + if (BaseOpcode->NoReturn) { + if (BaseOpcode->Atomic) + return DAG.getMergeValues( + {DAG.getPOISON(OrigResultTypes[0]), SDValue(NewNode, 0)}, DL); + + return SDValue(NewNode, 0); + } + if (BaseOpcode->AtomicX2) { SmallVector Elt; DAG.ExtractVectorElements(SDValue(NewNode, 0), Elt, 0, 1); return DAG.getMergeValues({Elt[0], SDValue(NewNode, 1)}, DL); } - if (BaseOpcode->NoReturn) - return SDValue(NewNode, 0); + return constructRetValue(DAG, NewNode, OrigResultTypes, IsTexFail, Subtarget->hasUnpackedD16VMem(), IsD16, DMaskLanes, NumVDataDwords, IsAtomicPacked16Bit, DL); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll index 221e2fd4f00f7..09e1fca3f2677 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll @@ -1200,7 +1200,7 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3 ; GFX6-NEXT: s_mov_b32 s5, s7 ; GFX6-NEXT: s_mov_b32 s6, s8 ; GFX6-NEXT: s_mov_b32 s7, s9 -; GFX6-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX6-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm ; GFX6-NEXT: s_endpgm ; ; GFX8-LABEL: atomic_cmpswap_i32_1d_no_return: @@ -1213,7 +1213,7 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3 ; GFX8-NEXT: s_mov_b32 s5, s7 ; GFX8-NEXT: s_mov_b32 s6, s8 ; GFX8-NEXT: s_mov_b32 s7, s9 -; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm ; GFX8-NEXT: s_endpgm ; ; GFX900-LABEL: atomic_cmpswap_i32_1d_no_return: @@ -1226,7 +1226,7 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3 ; GFX900-NEXT: s_mov_b32 s5, s7 ; GFX900-NEXT: s_mov_b32 s6, s8 ; GFX900-NEXT: s_mov_b32 s7, s9 -; GFX900-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX900-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm ; GFX900-NEXT: s_endpgm ; ; GFX90A-LABEL: atomic_cmpswap_i32_1d_no_return: @@ -1239,7 +1239,7 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3 ; GFX90A-NEXT: s_mov_b32 s5, s7 ; GFX90A-NEXT: s_mov_b32 s6, s8 ; GFX90A-NEXT: s_mov_b32 s7, s9 -; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm ; GFX90A-NEXT: s_endpgm ; ; GFX10PLUS-LABEL: atomic_cmpswap_i32_1d_no_return: @@ -1252,7 +1252,7 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 -; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: atomic_cmpswap_i32_1d_no_return: @@ -1265,7 +1265,7 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3 ; GFX12-NEXT: s_mov_b32 s5, s7 ; GFX12-NEXT: s_mov_b32 s6, s8 ; GFX12-NEXT: s_mov_b32 s7, s9 -; GFX12-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN +; GFX12-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D ; GFX12-NEXT: s_endpgm main_body: %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -3194,7 +3194,7 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6 ; GFX6-NEXT: s_mov_b32 s5, s7 ; GFX6-NEXT: s_mov_b32 s6, s8 ; GFX6-NEXT: s_mov_b32 s7, s9 -; GFX6-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX6-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm ; GFX6-NEXT: s_endpgm ; ; GFX8-LABEL: atomic_cmpswap_i64_1d_no_return: @@ -3207,7 +3207,7 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6 ; GFX8-NEXT: s_mov_b32 s5, s7 ; GFX8-NEXT: s_mov_b32 s6, s8 ; GFX8-NEXT: s_mov_b32 s7, s9 -; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm ; GFX8-NEXT: s_endpgm ; ; GFX900-LABEL: atomic_cmpswap_i64_1d_no_return: @@ -3220,7 +3220,7 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6 ; GFX900-NEXT: s_mov_b32 s5, s7 ; GFX900-NEXT: s_mov_b32 s6, s8 ; GFX900-NEXT: s_mov_b32 s7, s9 -; GFX900-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX900-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm ; GFX900-NEXT: s_endpgm ; ; GFX90A-LABEL: atomic_cmpswap_i64_1d_no_return: @@ -3233,7 +3233,7 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6 ; GFX90A-NEXT: s_mov_b32 s5, s7 ; GFX90A-NEXT: s_mov_b32 s6, s8 ; GFX90A-NEXT: s_mov_b32 s7, s9 -; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc +; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm ; GFX90A-NEXT: s_endpgm ; ; GFX10PLUS-LABEL: atomic_cmpswap_i64_1d_no_return: @@ -3246,7 +3246,7 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6 ; GFX10PLUS-NEXT: s_mov_b32 s5, s7 ; GFX10PLUS-NEXT: s_mov_b32 s6, s8 ; GFX10PLUS-NEXT: s_mov_b32 s7, s9 -; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc +; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm ; GFX10PLUS-NEXT: s_endpgm ; ; GFX12-LABEL: atomic_cmpswap_i64_1d_no_return: @@ -3259,7 +3259,7 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6 ; GFX12-NEXT: s_mov_b32 s5, s7 ; GFX12-NEXT: s_mov_b32 s6, s8 ; GFX12-NEXT: s_mov_b32 s7, s9 -; GFX12-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN +; GFX12-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D ; GFX12-NEXT: s_endpgm main_body: %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir index 292fa4be1ca1d..4f160b6cb4b1b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir @@ -25,6 +25,7 @@ body: | ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0 ; GFX6-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; GFX8-LABEL: name: atomic_cmpswap_i32_1d ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} @@ -35,6 +36,7 @@ body: | ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0 ; GFX8-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; GFX10-LABEL: name: atomic_cmpswap_i32_1d ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -45,6 +47,7 @@ body: | ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0 ; GFX10-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; GFX11-LABEL: name: atomic_cmpswap_i32_1d ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} @@ -55,6 +58,7 @@ body: | ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_]].sub0 ; GFX11-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; ; GFX12-LABEL: name: atomic_cmpswap_i32_1d ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} @@ -89,39 +93,43 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX6-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) ; GFX6-NEXT: S_ENDPGM 0 + ; ; GFX8-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX8-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 + ; ; GFX10-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX10-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) ; GFX10-NEXT: S_ENDPGM 0 + ; ; GFX11-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX11-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) ; GFX11-NEXT: S_ENDPGM 0 + ; ; GFX12-LABEL: name: atomic_cmpswap_i32_1d_no_return ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) + ; GFX12-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -150,6 +158,7 @@ body: | ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si]].sub0_sub1 ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 + ; ; GFX8-LABEL: name: atomic_cmpswap_i64_1d ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} @@ -160,6 +169,7 @@ body: | ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi]].sub0_sub1 ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 + ; ; GFX10-LABEL: name: atomic_cmpswap_i64_1d ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} @@ -170,6 +180,7 @@ body: | ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_]].sub0_sub1 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 + ; ; GFX11-LABEL: name: atomic_cmpswap_i64_1d ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX11-NEXT: {{ $}} @@ -180,6 +191,7 @@ body: | ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_]].sub0_sub1 ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 + ; ; GFX12-LABEL: name: atomic_cmpswap_i64_1d ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} @@ -214,39 +226,43 @@ body: | ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX6-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) ; GFX6-NEXT: S_ENDPGM 0 + ; ; GFX8-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX8-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) ; GFX8-NEXT: S_ENDPGM 0 + ; ; GFX10-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX10-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) ; GFX10-NEXT: S_ENDPGM 0 + ; ; GFX11-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX11-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V4_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) ; GFX11-NEXT: S_ENDPGM 0 + ; ; GFX12-LABEL: name: atomic_cmpswap_i64_1d_no_return ; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) + ; GFX12-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V4_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8) ; GFX12-NEXT: S_ENDPGM 0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll index 49607e320bd0a..83f0229aea326 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll @@ -92,8 +92,7 @@ define amdgpu_ps void @atomic_swap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def a0 ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_accvgpr_read_b32 v1, a0 -; GFX90A-NEXT: image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: image_atomic_swap a0, v0, s[0:7] dmask:0x1 unorm ; GFX90A-NEXT: s_endpgm %data = call i32 asm "; def $0", "=a"() %unused = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -106,8 +105,7 @@ define amdgpu_ps void @atomic_add_2d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s, i ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def a0 ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0 -; GFX90A-NEXT: image_atomic_add v2, v[0:1], s[0:7] dmask:0x1 unorm glc +; GFX90A-NEXT: image_atomic_add a0, v[0:1], s[0:7] dmask:0x1 unorm ; GFX90A-NEXT: s_endpgm %data = call i32 asm "; def $0", "=a"() %unused = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -123,9 +121,7 @@ define amdgpu_ps void @atomic_cmpswap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 % ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def a1 ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0 -; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1 -; GFX90A-NEXT: image_atomic_cmpswap v[2:3], v0, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: image_atomic_cmpswap a[0:1], v0, s[0:7] dmask:0x3 unorm ; GFX90A-NEXT: s_endpgm %cmp = call i32 asm "; def $0", "=a"() %swap = call i32 asm "; def $0", "=a"() @@ -139,9 +135,7 @@ define amdgpu_ps void @atomic_swap_1d_i64_agpr_noret(<8 x i32> inreg %rsrc, i32 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def a[0:1] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1 -; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0 -; GFX90A-NEXT: image_atomic_swap v[2:3], v0, s[0:7] dmask:0x3 unorm glc +; GFX90A-NEXT: image_atomic_swap a[0:1], v0, s[0:7] dmask:0x3 unorm ; GFX90A-NEXT: s_endpgm %data = call i64 asm "; def $0", "=a"() %unused = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) @@ -154,14 +148,10 @@ define amdgpu_ps void @atomic_cmpswap_1d_64_agpr_noret(<8 x i32> inreg %rsrc, i3 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def a[0:1] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1 -; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0 ; GFX90A-NEXT: ;;#ASMSTART -; GFX90A-NEXT: ; def a[0:1] +; GFX90A-NEXT: ; def a[2:3] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_accvgpr_read_b32 v5, a1 -; GFX90A-NEXT: v_accvgpr_read_b32 v4, a0 -; GFX90A-NEXT: image_atomic_cmpswap v[2:5], v0, s[0:7] dmask:0xf unorm glc +; GFX90A-NEXT: image_atomic_cmpswap a[0:3], v0, s[0:7] dmask:0xf unorm ; GFX90A-NEXT: s_endpgm %cmp = call i64 asm "; def $0", "=a"() %swap = call i64 asm "; def $0", "=a"() diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.noret.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.noret.ll new file mode 100644 index 0000000000000..6c58a1a30bd4c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.noret.ll @@ -0,0 +1,581 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX10PLUS-GISE %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX10PLUS %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISE %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s + +define amdgpu_ps void @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_swap_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_swap_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_swap_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_swap_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_swap_1d_i64(<8 x i32> inreg %rsrc, i64 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_swap_1d_i64: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_swap_1d_i64: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_swap_1d_i64: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_swap_1d_i64: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_swap_1d_float(<8 x i32> inreg %rsrc, float %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_swap_1d_float: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_swap_1d_float: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_swap_1d_float: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_swap_1d_float: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call float @llvm.amdgcn.image.atomic.swap.1d.f32.i32(float %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_add_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_add_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_add_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_sub_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_sub_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_sub_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_sub_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_sub_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_sub_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_sub_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_smin_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_smin_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_smin_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_smin_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_min_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_smin_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_min_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_umin_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_umin_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_umin_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_umin_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_min_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_umin_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_min_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_smax_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_smax_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_smax_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_smax_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_max_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_smax_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_max_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_umax_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_umax_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_umax_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_umax_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_max_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_umax_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_max_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_and_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_and_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_and_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_and_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_and_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_or_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_or_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_or_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_or_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_or_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_xor_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_xor_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_xor_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_xor_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_xor_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_inc_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_inc_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_inc_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_inc_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_inc_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_inc_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_inc_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_dec_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_dec_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_dec_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_dec_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_dec_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_dec_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_dec_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_cmpswap_1d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_cmpswap_1d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_cmpswap_1d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_cmpswap_1d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_cmpswap_1d_64(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_cmpswap_1d_64: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_cmpswap_1d_64: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_cmpswap_1d_64: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_cmpswap_1d_64: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D +; GFX12-NEXT: s_endpgm + %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_2d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t) { +; GFX10PLUS-GISE-LABEL: atomic_add_2d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_add_2d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_add_2d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_2d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_3d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %r) { +; GFX10PLUS-GISE-LABEL: atomic_add_3d: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_add_3d: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_add_3d: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_3d: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_cube(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %face) { +; GFX10PLUS-GISE-LABEL: atomic_add_cube: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_add_cube: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_add_cube: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_cube: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_1darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %slice) { +; GFX10PLUS-GISE-LABEL: atomic_add_1darray: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_add_1darray: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_add_1darray: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_1darray: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_2darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice) { +; GFX10PLUS-GISE-LABEL: atomic_add_2darray: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_add_2darray: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_add_2darray: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_2darray: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %fragid) { +; GFX10PLUS-GISE-LABEL: atomic_add_2dmsaa: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_add_2dmsaa: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_add_2dmsaa: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_2dmsaa: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) { +; GFX10PLUS-GISE-LABEL: atomic_add_2darraymsaa: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_add_2darraymsaa: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_add_2darraymsaa: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2, v3, v4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_2darraymsaa: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3, v4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { +; GFX10PLUS-GISE-LABEL: atomic_add_1d_slc: +; GFX10PLUS-GISE: ; %bb.0: +; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm slc +; GFX10PLUS-GISE-NEXT: s_endpgm +; +; GFX10PLUS-LABEL: atomic_add_1d_slc: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm slc +; GFX10PLUS-NEXT: s_endpgm +; +; GFX12-GISE-LABEL: atomic_add_1d_slc: +; GFX12-GISE: ; %bb.0: +; GFX12-GISE-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT +; GFX12-GISE-NEXT: s_endpgm +; +; GFX12-LABEL: atomic_add_1d_slc: +; GFX12: ; %bb.0: +; GFX12-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT +; GFX12-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2) + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll index 3d1d6c87eb98d..0ba62e49cabc3 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll @@ -41,15 +41,13 @@ main_body: define amdgpu_ps float @atomic_pk_add_f16_1d_v2_noret(<8 x i32> inreg %rsrc, <2 x half> %data, i32 %s) { ; GFX12-SDAG-LABEL: atomic_pk_add_f16_1d_v2_noret: ; GFX12-SDAG: ; %bb.0: ; %main_body -; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN -; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 +; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; ; GFX12-GISEL-LABEL: atomic_pk_add_f16_1d_v2_noret: ; GFX12-GISEL: ; %bb.0: ; %main_body -; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN -; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-GISEL-NEXT: ; return to shader part epilog main_body: @@ -79,15 +77,13 @@ main_body: define amdgpu_ps float @atomic_pk_add_f16_1d_v4_noret(<8 x i32> inreg %rsrc, <4 x half> %data, i32 %s) { ; GFX12-SDAG-LABEL: atomic_pk_add_f16_1d_v4_noret: ; GFX12-SDAG: ; %bb.0: ; %main_body -; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN -; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 +; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; ; GFX12-GISEL-LABEL: atomic_pk_add_f16_1d_v4_noret: ; GFX12-GISEL: ; %bb.0: ; %main_body -; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN -; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-GISEL-NEXT: ; return to shader part epilog main_body: @@ -126,15 +122,13 @@ main_body: define amdgpu_ps float @atomic_pk_add_bf16_1d_v2_noret(<8 x i32> inreg %rsrc, <2 x bfloat> %data, i32 %s) { ; GFX12-SDAG-LABEL: atomic_pk_add_bf16_1d_v2_noret: ; GFX12-SDAG: ; %bb.0: ; %main_body -; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN -; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 +; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; ; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v2_noret: ; GFX12-GISEL: ; %bb.0: ; %main_body -; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN -; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-GISEL-NEXT: ; return to shader part epilog main_body: @@ -173,15 +167,13 @@ main_body: define amdgpu_ps float @atomic_pk_add_bf16_1d_v4_noret(<8 x i32> inreg %rsrc, <4 x bfloat> %data, i32 %s) { ; GFX12-SDAG-LABEL: atomic_pk_add_bf16_1d_v4_noret: ; GFX12-SDAG: ; %bb.0: ; %main_body -; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN -; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 +; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; ; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v4_noret: ; GFX12-GISEL: ; %bb.0: ; %main_body -; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN -; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-GISEL-NEXT: ; return to shader part epilog main_body: @@ -192,15 +184,13 @@ main_body: define amdgpu_ps float @atomic_pk_add_bf16_1d_v4_nt(<8 x i32> inreg %rsrc, <4 x bfloat> %data, i32 %s) { ; GFX12-SDAG-LABEL: atomic_pk_add_bf16_1d_v4_nt: ; GFX12-SDAG: ; %bb.0: ; %main_body -; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT_RETURN -; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 +; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT ; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; ; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v4_nt: ; GFX12-GISEL: ; %bb.0: ; %main_body -; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT_RETURN -; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 +; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT ; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX12-GISEL-NEXT: ; return to shader part epilog main_body: diff --git a/llvm/test/MC/AMDGPU/buffer-op-swz-operand.s b/llvm/test/MC/AMDGPU/buffer-op-swz-operand.s index 8bd91484d149c..4542027b0df90 100644 --- a/llvm/test/MC/AMDGPU/buffer-op-swz-operand.s +++ b/llvm/test/MC/AMDGPU/buffer-op-swz-operand.s @@ -2,7 +2,7 @@ // CHECK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1100" buffer_load_dwordx4 v[0:3], v0, s[0:3], 0, offen offset:4092 slc -// CHECK: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092 slc ; // CHECK-NEXT: ; // CHECK-NEXT: ; @@ -11,7 +11,7 @@ buffer_load_dwordx4 v[0:3], v0, s[0:3], 0, offen offset:4092 slc // CHECK-NEXT: ; // CHECK-NEXT: ; > buffer_store_dword v0, v1, s[0:3], 0 offen slc -// CHECK: buffer_store_b32 v0, v1, s[0:3], 0 offen slc ; // CHECK-NEXT: ; // CHECK-NEXT: ; @@ -22,7 +22,7 @@ buffer_store_dword v0, v1, s[0:3], 0 offen slc ; tbuffer ops use autogenerate asm parsers tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092 slc -// CHECK: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092 slc ; // CHECK-NEXT: ; // CHECK-NEXT: ; @@ -32,7 +32,7 @@ tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen // CHECK-NEXT: ; // CHECK-NEXT: ; > tbuffer_store_d16_format_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] offen slc -// CHECK: tbuffer_store_d16_format_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] offen slc ; // CHECK-NEXT: ; // CHECK-NEXT: ;