@@ -1485,7 +1485,9 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
14851485 Register DstReg = MI.getOperand (0 ).getReg ();
14861486 LLT Ty = MRI.getType (DstReg);
14871487
1488+ const LLT S64 = LLT::scalar (64 );
14881489 const LLT S32 = LLT::scalar (32 );
1490+ const LLT S16 = LLT::scalar (16 );
14891491
14901492 unsigned FirstOpnd = isa<GIntrinsic>(MI) ? 2 : 1 ;
14911493 Register SrcReg = MI.getOperand (FirstOpnd).getReg ();
@@ -1495,6 +1497,18 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
14951497 const RegisterBank *DstBank =
14961498 OpdMapper.getInstrMapping ().getOperandMapping (0 ).BreakDown [0 ].RegBank ;
14971499 if (DstBank == &AMDGPU::VGPRRegBank) {
1500+ if (Ty == S16) {
1501+ ApplyRegBankMapping ApplyBank (B, *this , MRI, &AMDGPU::VGPRRegBank);
1502+ B.setInsertPt (B.getMBB (), MI);
1503+ LegalizerHelper Helper (B.getMF (), ApplyBank, B);
1504+
1505+ Helper.widenScalarDst (MI, S32);
1506+ Helper.widenScalarSrc (MI, S32, 1 , AMDGPU::G_ANYEXT);
1507+ Helper.widenScalarSrc (MI, S32, 2 , AMDGPU::G_ZEXT);
1508+ Helper.widenScalarSrc (MI, S32, 3 , AMDGPU::G_ZEXT);
1509+ return true ;
1510+ }
1511+
14981512 if (Ty == S32)
14991513 return true ;
15001514
@@ -1554,6 +1568,11 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
15541568
15551569 ApplyRegBankMapping ApplyBank (B, *this , MRI, &AMDGPU::SGPRRegBank);
15561570
1571+ if (Ty == S16) {
1572+ OffsetReg = B.buildAnyExtOrTrunc (S32, OffsetReg).getReg (0 );
1573+ WidthReg = B.buildAnyExtOrTrunc (S32, WidthReg).getReg (0 );
1574+ }
1575+
15571576 // Ensure the high bits are clear to insert the offset.
15581577 auto OffsetMask = B.buildConstant (S32, maskTrailingOnes<unsigned >(6 ));
15591578 auto ClampOffset = B.buildAnd (S32, OffsetReg, OffsetMask);
@@ -1568,13 +1587,21 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(MachineIRBuilder &B,
15681587
15691588 // TODO: It might be worth using a pseudo here to avoid scc clobber and
15701589 // register class constraints.
1571- unsigned Opc = Ty == S32 ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
1572- (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1590+ unsigned Opc = ( Ty != S64) ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32)
1591+ : (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
15731592
1574- auto MIB = B.buildInstr (Opc, {DstReg}, {SrcReg, MergedInputs});
1593+ Register BFEDst = DstReg;
1594+ if (Ty == S16) {
1595+ BFEDst = MRI.createGenericVirtualRegister (S32);
1596+ MRI.setRegBank (BFEDst, AMDGPU::SGPRRegBank);
1597+ }
1598+ auto MIB = B.buildInstr (Opc, {BFEDst}, {SrcReg, MergedInputs});
15751599 if (!constrainSelectedInstRegOperands (*MIB, *TII, *TRI, *this ))
15761600 llvm_unreachable (" failed to constrain BFE" );
15771601
1602+ if (BFEDst != DstReg)
1603+ B.buildZExtOrTrunc (DstReg, BFEDst);
1604+
15781605 MI.eraseFromParent ();
15791606 return true ;
15801607}
0 commit comments