@@ -3544,7 +3544,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35443544 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
35453545 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
35463546 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3547- Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
35483547 Opc == AMDGPU::V_FMAC_F16_fake16_e64) {
35493548 // Don't fold if we are using source or output modifiers. The new VOP2
35503549 // instructions don't have them.
@@ -3565,7 +3564,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35653564 bool IsFMA =
35663565 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
35673566 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3568- Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
35693567 Opc == AMDGPU::V_FMAC_F16_fake16_e64;
35703568 MachineOperand *Src1 = getNamedOperand (UseMI, AMDGPU::OpName::src1);
35713569 MachineOperand *Src2 = getNamedOperand (UseMI, AMDGPU::OpName::src2);
@@ -3599,19 +3597,16 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35993597
36003598 unsigned NewOpc =
36013599 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3602- : ST.hasTrue16BitInsts () ? ST.useRealTrue16Insts ()
3603- ? AMDGPU::V_FMAMK_F16_t16
3604- : AMDGPU::V_FMAMK_F16_fake16
3600+ : ST.hasTrue16BitInsts () ? AMDGPU::V_FMAMK_F16_fake16
36053601 : AMDGPU::V_FMAMK_F16)
36063602 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
36073603 if (pseudoToMCOpcode (NewOpc) == -1 )
36083604 return false ;
36093605
3610- // V_FMAMK_F16_t16 takes VGPR_16_Lo128 operands while V_FMAMK_F16_fake16
3611- // takes VGPR_32_Lo128 operands, so the rewrite would also require
3612- // restricting their register classes. For now just bail out.
3613- if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3614- NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3606+ // V_FMAMK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
3607+ // would also require restricting their register classes. For now
3608+ // just bail out.
3609+ if (NewOpc == AMDGPU::V_FMAMK_F16_fake16)
36153610 return false ;
36163611
36173612 const int64_t Imm = getImmFor (RegSrc == Src1 ? *Src0 : *Src1);
@@ -3626,7 +3621,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
36263621 Src0->setIsKill (RegSrc->isKill ());
36273622
36283623 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3629- Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3624+ Opc == AMDGPU::V_FMAC_F32_e64 ||
36303625 Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
36313626 UseMI.untieRegOperand (
36323627 AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src2));
@@ -3681,26 +3676,23 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
36813676
36823677 unsigned NewOpc =
36833678 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3684- : ST.hasTrue16BitInsts () ? ST.useRealTrue16Insts ()
3685- ? AMDGPU::V_FMAAK_F16_t16
3686- : AMDGPU::V_FMAAK_F16_fake16
3679+ : ST.hasTrue16BitInsts () ? AMDGPU::V_FMAAK_F16_fake16
36873680 : AMDGPU::V_FMAAK_F16)
36883681 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
36893682 if (pseudoToMCOpcode (NewOpc) == -1 )
36903683 return false ;
36913684
3692- // V_FMAAK_F16_t16 takes VGPR_16_Lo128 operands while V_FMAAK_F16_fake16
3693- // takes VGPR_32_Lo128 operands, so the rewrite would also require
3694- // restricting their register classes. For now just bail out.
3695- if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3696- NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3685+ // V_FMAAK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
3686+ // would also require restricting their register classes. For now
3687+ // just bail out.
3688+ if (NewOpc == AMDGPU::V_FMAAK_F16_fake16)
36973689 return false ;
36983690
36993691 // FIXME: This would be a lot easier if we could return a new instruction
37003692 // instead of having to modify in place.
37013693
37023694 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3703- Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3695+ Opc == AMDGPU::V_FMAC_F32_e64 ||
37043696 Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
37053697 UseMI.untieRegOperand (
37063698 AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src2));
@@ -3887,11 +3879,8 @@ static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc) {
38873879 return AMDGPU::V_FMA_LEGACY_F32_e64;
38883880 case AMDGPU::V_FMAC_F16_e32:
38893881 case AMDGPU::V_FMAC_F16_e64:
3890- case AMDGPU::V_FMAC_F16_t16_e64:
38913882 case AMDGPU::V_FMAC_F16_fake16_e64:
3892- return ST.hasTrue16BitInsts () ? ST.useRealTrue16Insts ()
3893- ? AMDGPU::V_FMA_F16_gfx9_t16_e64
3894- : AMDGPU::V_FMA_F16_gfx9_fake16_e64
3883+ return ST.hasTrue16BitInsts () ? AMDGPU::V_FMA_F16_gfx9_fake16_e64
38953884 : AMDGPU::V_FMA_F16_gfx9_e64;
38963885 case AMDGPU::V_FMAC_F32_e32:
38973886 case AMDGPU::V_FMAC_F32_e64:
@@ -3957,22 +3946,19 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
39573946 return MIB;
39583947 }
39593948
3960- assert (Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
3961- Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
3962- " V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
3963- " present "
3964- " pre-RA" );
3949+ assert (
3950+ Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
3951+ " V_FMAC_F16_fake16_e32 is not supported and not expected to be present "
3952+ " pre-RA" );
39653953
39663954 // Handle MAC/FMAC.
39673955 bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
39683956 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3969- Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
39703957 Opc == AMDGPU::V_FMAC_F16_fake16_e64;
39713958 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
39723959 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
39733960 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
39743961 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3975- Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
39763962 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
39773963 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
39783964 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
@@ -3987,7 +3973,6 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
39873973 return nullptr ;
39883974 case AMDGPU::V_MAC_F16_e64:
39893975 case AMDGPU::V_FMAC_F16_e64:
3990- case AMDGPU::V_FMAC_F16_t16_e64:
39913976 case AMDGPU::V_FMAC_F16_fake16_e64:
39923977 case AMDGPU::V_MAC_F32_e64:
39933978 case AMDGPU::V_MAC_LEGACY_F32_e64:
@@ -4073,11 +4058,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40734058 int64_t Imm;
40744059 if (!Src0Literal && getFoldableImm (Src2, Imm, &DefMI)) {
40754060 unsigned NewOpc =
4076- IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts ()
4077- ? ST.useRealTrue16Insts ()
4078- ? AMDGPU::V_FMAAK_F16_t16
4079- : AMDGPU::V_FMAAK_F16_fake16
4080- : AMDGPU::V_FMAAK_F16)
4061+ IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts () ? AMDGPU::V_FMAAK_F16_fake16
4062+ : AMDGPU::V_FMAAK_F16)
40814063 : AMDGPU::V_FMAAK_F32)
40824064 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
40834065 if (pseudoToMCOpcode (NewOpc) != -1 ) {
@@ -4094,14 +4076,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40944076 return MIB;
40954077 }
40964078 }
4097- unsigned NewOpc = IsFMA
4098- ? (IsF16 ? (ST.hasTrue16BitInsts ()
4099- ? ST.useRealTrue16Insts ()
4100- ? AMDGPU::V_FMAMK_F16_t16
4101- : AMDGPU::V_FMAMK_F16_fake16
4102- : AMDGPU::V_FMAMK_F16)
4103- : AMDGPU::V_FMAMK_F32)
4104- : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
4079+ unsigned NewOpc =
4080+ IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts () ? AMDGPU::V_FMAMK_F16_fake16
4081+ : AMDGPU::V_FMAMK_F16)
4082+ : AMDGPU::V_FMAMK_F32)
4083+ : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
41054084 if (!Src0Literal && getFoldableImm (Src1, Imm, &DefMI)) {
41064085 if (pseudoToMCOpcode (NewOpc) != -1 ) {
41074086 MIB = BuildMI (MBB, MI, MI.getDebugLoc (), get (NewOpc))
@@ -4547,7 +4526,6 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
45474526 case AMDGPU::V_MAC_F32_e64:
45484527 case AMDGPU::V_MAC_LEGACY_F32_e64:
45494528 case AMDGPU::V_FMAC_F16_e64:
4550- case AMDGPU::V_FMAC_F16_t16_e64:
45514529 case AMDGPU::V_FMAC_F16_fake16_e64:
45524530 case AMDGPU::V_FMAC_F32_e64:
45534531 case AMDGPU::V_FMAC_F64_e64:
@@ -5604,9 +5582,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
56045582 case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
56055583 case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
56065584 case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
5607- case AMDGPU::S_FMAC_F16:
5608- return ST.useRealTrue16Insts () ? AMDGPU::V_FMAC_F16_t16_e64
5609- : AMDGPU::V_FMAC_F16_fake16_e64;
5585+ case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_fake16_e64;
56105586 case AMDGPU::S_FMAMK_F32: return AMDGPU::V_FMAMK_F32;
56115587 case AMDGPU::S_FMAAK_F32: return AMDGPU::V_FMAAK_F32;
56125588 case AMDGPU::S_CMP_LT_F32: return AMDGPU::V_CMP_LT_F32_e64;
0 commit comments