diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index a402fc6d7e611..e811024b73d31 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -875,6 +875,7 @@ GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def, return DataIdx >= 0 && TRI->regsOverlap(MI.getOperand(DataIdx).getReg(), Reg); }; + int WaitStatesNeededForDef = VALUWaitStates - getWaitStatesSince(IsHazardFn, VALUWaitStates); WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); @@ -882,6 +883,70 @@ GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def, return WaitStatesNeeded; } +/// Dest sel forwarding issue occurs if additional logic is needed to swizzle / +/// pack the computed value into correct bit position of the dest register. This +/// occurs if we have SDWA with dst_sel != DWORD or if we have op_sel with +/// dst_sel that is not aligned to the register. This function analayzes the \p +/// MI and \returns an operand with dst forwarding issue, or nullptr if +/// none exists. +static const MachineOperand * +getDstSelForwardingOperand(const MachineInstr &MI, const GCNSubtarget &ST) { + if (!SIInstrInfo::isVALU(MI)) + return nullptr; + + const SIInstrInfo *TII = ST.getInstrInfo(); + + unsigned Opcode = MI.getOpcode(); + + // There are three different types of instructions + // which produce forwarded dest: 1. SDWA with dst_sel != DWORD, 2. VOP3 + // which write hi bits (e.g. op_sel[3] == 1), and 3. CVR_SR_FP8_F32 and + // CVT_SR_BF8_F32 with op_sel[3:2] + // != 0 + if (SIInstrInfo::isSDWA(MI)) { + // Type 1: SDWA with dst_sel != DWORD + if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel)) + if (DstSel->getImm() == AMDGPU::SDWA::DWORD) + return nullptr; + } else { + // Type 2 && Type 3: (VOP3 which write the hi bits) || (CVT_SR_FP8_F32 and + // CVT_SR_BF8_F32 with op_sel[3:2] != 0) + if (!AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel) || + !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)->getImm() & + SISrcMods::DST_OP_SEL || + (AMDGPU::isFP8DstSelInst(Opcode) && + (TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers)->getImm() & + SISrcMods::OP_SEL_0)))) + return nullptr; + } + + return TII->getNamedOperand(MI, AMDGPU::OpName::vdst); +} + +/// Checks whether the provided \p MI "consumes" the operand with a Dest sel +/// fowarding issue \p Dst . We may "consume" the Dst via a standard explicit +/// RAW, or through irregular ways (e.g implicit RAW, certain types of WAW) +static bool consumesDstSelForwardingOperand(const MachineInstr *VALU, + const MachineOperand *Dst, + const SIRegisterInfo *TRI) { + // We must consider implicit reads of the VALU. SDWA with dst_sel and + // UNUSED_PRESERVE will implicitly read the result from forwarded dest, + // and we must account for that hazard. + // We also must account for WAW hazards. In particular, WAW with dest + // preserve semantics (e.g. VOP3 with op_sel, VOP2 && + // !zeroesHigh16BitsOfDest) will read the forwarded dest for parity + // check for ECC. Without accounting for this hazard, the ECC will be + // wrong. + // TODO: limit to RAW (including implicit reads) + problematic WAW (i.e. + // complete zeroesHigh16BitsOfDest) + for (auto &Operand : VALU->operands()) { + if (Operand.isReg() && TRI->regsOverlap(Dst->getReg(), Operand.getReg())) { + return true; + } + } + return false; +} + int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) { int WaitStatesNeeded = 0; @@ -912,27 +977,18 @@ int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) { if (ST.hasDstSelForwardingHazard()) { const int Shift16DefWaitstates = 1; - auto IsShift16BitDefFn = [this, VALU](const MachineInstr &MI) { - if (!SIInstrInfo::isVALU(MI)) - return false; - const SIInstrInfo *TII = ST.getInstrInfo(); - if (SIInstrInfo::isSDWA(MI)) { - if (auto *DstSel = TII->getNamedOperand(MI, AMDGPU::OpName::dst_sel)) - if (DstSel->getImm() == AMDGPU::SDWA::DWORD) - return false; - } else { - if (!AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::op_sel) || - !(TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers) - ->getImm() & - SISrcMods::DST_OP_SEL)) - return false; - } + auto IsShift16BitDefFn = [this, VALU](const MachineInstr &ProducerMI) { const SIRegisterInfo *TRI = ST.getRegisterInfo(); - if (auto *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) { - Register Def = Dst->getReg(); + const MachineOperand *ForwardedDst = + getDstSelForwardingOperand(ProducerMI, ST); + if (ForwardedDst) { + return consumesDstSelForwardingOperand(VALU, ForwardedDst, TRI); + } - for (const MachineOperand &Use : VALU->explicit_uses()) { - if (Use.isReg() && TRI->regsOverlap(Def, Use.getReg())) + if (ProducerMI.isInlineAsm()) { + // Assume inline asm has dst forwarding hazard + for (auto &Def : ProducerMI.all_defs()) { + if (consumesDstSelForwardingOperand(VALU, &Def, TRI)) return true; } } @@ -1029,7 +1085,7 @@ int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) { // problematic thus far. // see checkVALUHazards() - if (!ST.has12DWordStoreHazard()) + if (!ST.has12DWordStoreHazard() && !ST.hasDstSelForwardingHazard()) return 0; const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -1038,11 +1094,45 @@ int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) { for (const MachineOperand &Op : llvm::drop_begin(IA->operands(), InlineAsm::MIOp_FirstOperand)) { if (Op.isReg() && Op.isDef()) { - WaitStatesNeeded = - std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI)); + if (!TRI.isVectorRegister(MRI, Op.getReg())) + continue; + + if (ST.has12DWordStoreHazard()) { + WaitStatesNeeded = + std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI)); + } } } + if (ST.hasDstSelForwardingHazard()) { + const int Shift16DefWaitstates = 1; + + auto IsShift16BitDefFn = [this, &IA](const MachineInstr &ProducerMI) { + const MachineOperand *Dst = getDstSelForwardingOperand(ProducerMI, ST); + // Assume inline asm reads the dst + if (Dst) + return IA->modifiesRegister(Dst->getReg(), &TRI) || + IA->readsRegister(Dst->getReg(), &TRI); + + if (ProducerMI.isInlineAsm()) { + // If MI is inline asm, assume it has dst forwarding hazard + for (auto &Def : ProducerMI.all_defs()) { + if (IA->modifiesRegister(Def.getReg(), &TRI) || + IA->readsRegister(Def.getReg(), &TRI)) { + return true; + } + } + } + + return false; + }; + + int WaitStatesNeededForDef = + Shift16DefWaitstates - + getWaitStatesSince(IsShift16BitDefFn, Shift16DefWaitstates); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef); + } + return WaitStatesNeeded; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index d2838349340d2..d0cb15629ac1e 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2325,6 +2325,7 @@ class VOPProfile _ArgVT, bit _EnableClamp = 0> { field bit IsFP8SrcByteSel = 0; field bit IsFP8DstByteSel = 0; + field bit HasFP8DstByteSel = 0; field bit IsFP8ByteSel = !or(IsFP8SrcByteSel, IsFP8DstByteSel); field bit HasDst = !ne(DstVT.Value, untyped.Value); @@ -2904,6 +2905,15 @@ def getVCMPXOpFromVCMP : InstrMapping { let ValueCols = [["1"]]; } +def FP8DstByteSelTable : GenericTable { + let FilterClass = "VOP3_Pseudo"; + let CppTypeName = "FP8DstByteSelInfo"; + let Fields = ["Opcode", "HasFP8DstByteSel"]; + + let PrimaryKey = ["Opcode"]; + let PrimaryKeyName = "getFP8DstByteSelHelper"; +} + def VOPDComponentTable : GenericTable { let FilterClass = "VOPD_Component"; let CppTypeName = "VOPDComponentInfo"; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 96d4863e94014..c3a648b3c2e06 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -385,6 +385,13 @@ struct SingleUseExceptionInfo { bool IsInvalidSingleUseProducer; }; +struct FP8DstByteSelInfo { + uint16_t Opcode; + bool HasFP8DstByteSel; +}; + +#define GET_FP8DstByteSelTable_DECL +#define GET_FP8DstByteSelTable_IMPL #define GET_MTBUFInfoTable_DECL #define GET_MTBUFInfoTable_IMPL #define GET_MUBUFInfoTable_DECL @@ -629,6 +636,11 @@ bool isInvalidSingleUseProducerInst(unsigned Opc) { return Info && Info->IsInvalidSingleUseProducer; } +bool isFP8DstSelInst(unsigned Opc) { + const FP8DstByteSelInfo *Info = getFP8DstByteSelHelper(Opc); + return Info ? Info->HasFP8DstByteSel : false; +} + unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) { const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc); return Info ? Info->Opcode3Addr : ~0u; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 429c3ad335d21..cb31cd690101d 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -861,6 +861,9 @@ getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo); LLVM_READONLY bool isTrue16Inst(unsigned Opc); +LLVM_READONLY +bool isFP8DstSelInst(unsigned Opc); + LLVM_READONLY bool isInvalidSingleUseConsumerInst(unsigned Opc); diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index efa8e9c74d449..01b1c63dd0d6e 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -568,6 +568,7 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile, let HasSrc2Mods = 1; let HasExtVOP3DPP = 1; let HasOpSel = 1; + let HasFP8DstByteSel = 1; let AsmVOP3OpSel = !subst(", $src2_modifiers", "", getAsmVOP3OpSel<3, HasClamp, HasOMod, HasSrc0FloatMods, HasSrc1FloatMods, @@ -587,6 +588,7 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile, class VOP3_CVT_SR_F8_ByteSel_Profile : VOP3_Profile> { let IsFP8DstByteSel = 1; + let HasFP8DstByteSel = 1; let HasClamp = 0; defvar bytesel = (ins VGPR_32:$vdst_in, ByteSel:$byte_sel); let Ins64 = !con(getIns64 pattern = [], let IsWMMA = P.IsWMMA; let IsSWMMAC = P.IsSWMMAC; + bit HasFP8DstByteSel = P.HasFP8DstByteSel; + let AsmOperands = !if(isVop3OpSel, P.AsmVOP3OpSel, !if(!and(isVOP3P, P.IsPacked), P.AsmVOP3P, P.Asm64)); diff --git a/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir b/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir new file mode 100644 index 0000000000000..e24817078d8bc --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir @@ -0,0 +1,436 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=HAZARD %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=NOHAZARD %s + +--- +name: sdwa_opsel_hazard +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + + ; HAZARD-LABEL: name: sdwa_opsel_hazard + ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; HAZARD-NEXT: {{ $}} + ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec + ; HAZARD-NEXT: S_NOP 0 + ; HAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + ; HAZARD-NEXT: S_ENDPGM 0 + ; + ; NOHAZARD-LABEL: name: sdwa_opsel_hazard + ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; NOHAZARD-NEXT: {{ $}} + ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec + ; NOHAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + ; NOHAZARD-NEXT: S_ENDPGM 0 + renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec + renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + S_ENDPGM 0 +... + +--- +name: sdwa_lo_opsel_hazard +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + + ; HAZARD-LABEL: name: sdwa_lo_opsel_hazard + ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; HAZARD-NEXT: {{ $}} + ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec + ; HAZARD-NEXT: S_NOP 0 + ; HAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 4, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + ; HAZARD-NEXT: S_ENDPGM 0 + ; + ; NOHAZARD-LABEL: name: sdwa_lo_opsel_hazard + ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; NOHAZARD-NEXT: {{ $}} + ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec + ; NOHAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 4, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + ; NOHAZARD-NEXT: S_ENDPGM 0 + renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec + renamable $vgpr0 = V_MAD_U16_gfx9_e64 4, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + S_ENDPGM 0 +... + +--- +name: opsel_sdwa_hazard +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + + ; HAZARD-LABEL: name: opsel_sdwa_hazard + ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; HAZARD-NEXT: {{ $}} + ; HAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + ; HAZARD-NEXT: S_NOP 0 + ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + ; HAZARD-NEXT: S_ENDPGM 0 + ; + ; NOHAZARD-LABEL: name: opsel_sdwa_hazard + ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; NOHAZARD-NEXT: {{ $}} + ; NOHAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + ; NOHAZARD-NEXT: S_ENDPGM 0 + renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + S_ENDPGM 0 +... + + +# TODO -- there is no reason for s_nop (V_ADD_U16 doesn't preserve the dest) + +--- +name: opsel_no_sdwa_no_hazard +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + + ; HAZARD-LABEL: name: opsel_no_sdwa_no_hazard + ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; HAZARD-NEXT: {{ $}} + ; HAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + ; HAZARD-NEXT: S_NOP 0 + ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec + ; HAZARD-NEXT: S_ENDPGM 0 + ; + ; NOHAZARD-LABEL: name: opsel_no_sdwa_no_hazard + ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; NOHAZARD-NEXT: {{ $}} + ; NOHAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec + ; NOHAZARD-NEXT: S_ENDPGM 0 + renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec + S_ENDPGM 0 +... + +--- +name: no_opsel_sdwa_no_hazard +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + + ; HAZARD-LABEL: name: no_opsel_sdwa_no_hazard + ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; HAZARD-NEXT: {{ $}} + ; HAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_e64 killed $vgpr3, killed $vgpr4, killed $vgpr2, 0, implicit $exec + ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + ; HAZARD-NEXT: S_ENDPGM 0 + ; + ; NOHAZARD-LABEL: name: no_opsel_sdwa_no_hazard + ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; NOHAZARD-NEXT: {{ $}} + ; NOHAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_e64 killed $vgpr3, killed $vgpr4, killed $vgpr2, 0, implicit $exec + ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + ; NOHAZARD-NEXT: S_ENDPGM 0 + renamable $vgpr0 = V_MAD_U16_e64 killed $vgpr3, killed $vgpr4, killed $vgpr2, 0, implicit $exec + renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + S_ENDPGM 0 +... + +--- +name: opsel_opsel_hazard +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + + ; HAZARD-LABEL: name: opsel_opsel_hazard + ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; HAZARD-NEXT: {{ $}} + ; HAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + ; HAZARD-NEXT: S_NOP 0 + ; HAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 4, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + ; HAZARD-NEXT: S_ENDPGM 0 + ; + ; NOHAZARD-LABEL: name: opsel_opsel_hazard + ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; NOHAZARD-NEXT: {{ $}} + ; NOHAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + ; NOHAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 4, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + ; NOHAZARD-NEXT: S_ENDPGM 0 + renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + renamable $vgpr0 = V_MAD_U16_gfx9_e64 4, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + S_ENDPGM 0 +... + +# TODO -- there is no reason for s_nop + +--- +name: opsel_opsel_no_hazard +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + + ; HAZARD-LABEL: name: opsel_opsel_no_hazard + ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; HAZARD-NEXT: {{ $}} + ; HAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + ; HAZARD-NEXT: S_NOP 0 + ; HAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + ; HAZARD-NEXT: S_ENDPGM 0 + ; + ; NOHAZARD-LABEL: name: opsel_opsel_no_hazard + ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; NOHAZARD-NEXT: {{ $}} + ; NOHAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + ; NOHAZARD-NEXT: renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + ; NOHAZARD-NEXT: S_ENDPGM 0 + renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + renamable $vgpr0 = V_MAD_U16_gfx9_e64 12, killed $vgpr3, 4, killed $vgpr4, 4, killed $vgpr2, 0, 0, implicit $exec + S_ENDPGM 0 +... + +# DS_READ_U16_D16 has dest preserve semantics, but only VALU consumers have hazard + +--- +name: sdwa_loadsel_no_hazard +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + + ; HAZARD-LABEL: name: sdwa_loadsel_no_hazard + ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; HAZARD-NEXT: {{ $}} + ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 3, 0, 3, 3, implicit $exec + ; HAZARD-NEXT: renamable $vgpr0 = DS_READ_U16_D16 killed renamable $vgpr3, 0, 0, killed renamable $vgpr0, implicit $exec + ; HAZARD-NEXT: S_ENDPGM 0 + ; + ; NOHAZARD-LABEL: name: sdwa_loadsel_no_hazard + ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; NOHAZARD-NEXT: {{ $}} + ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 3, 0, 3, 3, implicit $exec + ; NOHAZARD-NEXT: renamable $vgpr0 = DS_READ_U16_D16 killed renamable $vgpr3, 0, 0, killed renamable $vgpr0, implicit $exec + ; NOHAZARD-NEXT: S_ENDPGM 0 + renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 3, 0, 3, 3, implicit $exec + renamable $vgpr0 = DS_READ_U16_D16 killed renamable $vgpr3, 0, 0, killed renamable $vgpr0, implicit $exec + S_ENDPGM 0 +... + +--- +name: sdwa_sdwa_hazard +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + + ; HAZARD-LABEL: name: sdwa_sdwa_hazard + ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; HAZARD-NEXT: {{ $}} + ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec + ; HAZARD-NEXT: S_NOP 0 + ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + ; HAZARD-NEXT: S_ENDPGM 0 + ; + ; NOHAZARD-LABEL: name: sdwa_sdwa_hazard + ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; NOHAZARD-NEXT: {{ $}} + ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec + ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + ; NOHAZARD-NEXT: S_ENDPGM 0 + renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec + renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + S_ENDPGM 0 +... + +--- +name: cvt_sdwa_hazard_1 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + + ; HAZARD-LABEL: name: cvt_sdwa_hazard_1 + ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; HAZARD-NEXT: {{ $}} + ; HAZARD-NEXT: renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 0, killed $vgpr3, 0, killed $vgpr1, 4, $vgpr0, 0, implicit $mode, implicit $exec + ; HAZARD-NEXT: S_NOP 0 + ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + ; HAZARD-NEXT: S_ENDPGM 0 + ; + ; NOHAZARD-LABEL: name: cvt_sdwa_hazard_1 + ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; NOHAZARD-NEXT: {{ $}} + ; NOHAZARD-NEXT: renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 0, killed $vgpr3, 0, killed $vgpr1, 4, $vgpr0, 0, implicit $mode, implicit $exec + ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + ; NOHAZARD-NEXT: S_ENDPGM 0 + renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 0, killed $vgpr3, 0, killed $vgpr1, 4, $vgpr0, 0, implicit $mode, implicit $exec + renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + S_ENDPGM 0 +... + +--- +name: cvt_sdwa_hazard_2 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + + ; HAZARD-LABEL: name: cvt_sdwa_hazard_2 + ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; HAZARD-NEXT: {{ $}} + ; HAZARD-NEXT: renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 8, killed $vgpr3, 0, killed $vgpr1, 0, $vgpr0, 0, implicit $mode, implicit $exec + ; HAZARD-NEXT: S_NOP 0 + ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + ; HAZARD-NEXT: S_ENDPGM 0 + ; + ; NOHAZARD-LABEL: name: cvt_sdwa_hazard_2 + ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; NOHAZARD-NEXT: {{ $}} + ; NOHAZARD-NEXT: renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 8, killed $vgpr3, 0, killed $vgpr1, 0, $vgpr0, 0, implicit $mode, implicit $exec + ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + ; NOHAZARD-NEXT: S_ENDPGM 0 + renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 8, killed $vgpr3, 0, killed $vgpr1, 0, $vgpr0, 0, implicit $mode, implicit $exec + renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + S_ENDPGM 0 +... + +--- +name: cvt_sdwa_hazard_3 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + + ; HAZARD-LABEL: name: cvt_sdwa_hazard_3 + ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; HAZARD-NEXT: {{ $}} + ; HAZARD-NEXT: renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 8, killed $vgpr3, 0, killed $vgpr1, 4, $vgpr0, 0, implicit $mode, implicit $exec + ; HAZARD-NEXT: S_NOP 0 + ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + ; HAZARD-NEXT: S_ENDPGM 0 + ; + ; NOHAZARD-LABEL: name: cvt_sdwa_hazard_3 + ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; NOHAZARD-NEXT: {{ $}} + ; NOHAZARD-NEXT: renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 8, killed $vgpr3, 0, killed $vgpr1, 4, $vgpr0, 0, implicit $mode, implicit $exec + ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + ; NOHAZARD-NEXT: S_ENDPGM 0 + renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 8, killed $vgpr3, 0, killed $vgpr1, 4, $vgpr0, 0, implicit $mode, implicit $exec + renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + S_ENDPGM 0 +... + +--- +name: cvt_sdwa_no_hazard +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + + ; HAZARD-LABEL: name: cvt_sdwa_no_hazard + ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; HAZARD-NEXT: {{ $}} + ; HAZARD-NEXT: renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 0, killed $vgpr3, 0, killed $vgpr1, 0, $vgpr0, 0, implicit $mode, implicit $exec + ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + ; HAZARD-NEXT: S_ENDPGM 0 + ; + ; NOHAZARD-LABEL: name: cvt_sdwa_no_hazard + ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; NOHAZARD-NEXT: {{ $}} + ; NOHAZARD-NEXT: renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 0, killed $vgpr3, 0, killed $vgpr1, 0, $vgpr0, 0, implicit $mode, implicit $exec + ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + ; NOHAZARD-NEXT: S_ENDPGM 0 + renamable $vgpr0 = V_CVT_SR_FP8_F32_e64 0, killed $vgpr3, 0, killed $vgpr1, 0, $vgpr0, 0, implicit $mode, implicit $exec + renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + S_ENDPGM 0 +... + +# TODO -- there is no reason for s_nop (V_ADD_U16 doesn't preserve the dest) + +--- +name: sdwa_nosdwa_no_hazard +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + + ; HAZARD-LABEL: name: sdwa_nosdwa_no_hazard + ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; HAZARD-NEXT: {{ $}} + ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec + ; HAZARD-NEXT: S_NOP 0 + ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec + ; HAZARD-NEXT: S_ENDPGM 0 + ; + ; NOHAZARD-LABEL: name: sdwa_nosdwa_no_hazard + ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; NOHAZARD-NEXT: {{ $}} + ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec + ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec + ; NOHAZARD-NEXT: S_ENDPGM 0 + renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec + renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec + S_ENDPGM 0 +... + +--- +name: inline_sdwa_hazard +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + + ; HAZARD-LABEL: name: inline_sdwa_hazard + ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; HAZARD-NEXT: {{ $}} + ; HAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1 + ; HAZARD-NEXT: S_NOP 0 + ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + ; HAZARD-NEXT: S_ENDPGM 0 + ; + ; NOHAZARD-LABEL: name: inline_sdwa_hazard + ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; NOHAZARD-NEXT: {{ $}} + ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1 + ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + ; NOHAZARD-NEXT: S_ENDPGM 0 + INLINEASM &"v_or_b32 %0, 0, %1", 32, 327690, def $vgpr0, 327689, $vgpr1 + renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + S_ENDPGM 0 +... + +--- +name: sdwa_inline_hazard +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + + ; HAZARD-LABEL: name: sdwa_inline_hazard + ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; HAZARD-NEXT: {{ $}} + ; HAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + ; HAZARD-NEXT: S_NOP 0 + ; HAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1 + ; HAZARD-NEXT: S_ENDPGM 0 + ; + ; NOHAZARD-LABEL: name: sdwa_inline_hazard + ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; NOHAZARD-NEXT: {{ $}} + ; NOHAZARD-NEXT: renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1 + ; NOHAZARD-NEXT: S_ENDPGM 0 + renamable $vgpr0 = V_ADD_U16_sdwa 0, $vgpr1, 0, $vgpr2, 0, 1, 0, 3, 3, implicit $exec, implicit killed $vgpr0(tied-def 0) + INLINEASM &"v_or_b32 %0, 0, %1", 32, 327690, def $vgpr0, 327689, $vgpr1 + S_ENDPGM 0 +... + + +--- +name: inline_inline_hazard +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + + ; HAZARD-LABEL: name: inline_inline_hazard + ; HAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; HAZARD-NEXT: {{ $}} + ; HAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1 + ; HAZARD-NEXT: S_NOP 0 + ; HAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1 + ; HAZARD-NEXT: S_ENDPGM 0 + ; + ; NOHAZARD-LABEL: name: inline_inline_hazard + ; NOHAZARD: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $exec, $mode + ; NOHAZARD-NEXT: {{ $}} + ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1 + ; NOHAZARD-NEXT: INLINEASM &"v_or_b32 %0, 0, %1", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def $vgpr0, 327689 /* reguse:SReg_1_with_sub0 */, $vgpr1 + ; NOHAZARD-NEXT: S_ENDPGM 0 + INLINEASM &"v_or_b32 %0, 0, %1", 32, 327690, def $vgpr0, 327689, $vgpr1 + INLINEASM &"v_or_b32 %0, 0, %1", 32, 327690, def $vgpr0, 327689, $vgpr1 + S_ENDPGM 0 +... + diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll index d3fc96d7ff801..8313f5b655efb 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll @@ -375,6 +375,7 @@ define i32 @test_cvt_sr_bf8_f32_byte1(float %x, i32 %r, i32 %old) { ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_cvt_sr_bf8_f32 v2, v0, v1 op_sel:[0,0,1,0] +; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: v_mov_b32_e32 v0, v2 ; GFX940-NEXT: s_setpc_b64 s[30:31] ; @@ -469,6 +470,7 @@ define i32 @test_cvt_sr_fp8_f32_byte1(float %x, i32 %r, i32 %old) { ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_cvt_sr_fp8_f32 v2, v0, v1 op_sel:[0,0,1,0] +; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: v_mov_b32_e32 v0, v2 ; GFX940-NEXT: s_setpc_b64 s[30:31] ;