@@ -160,8 +160,9 @@ class ApplyRegBankMapping final : public GISelChangeObserver {
160160 if (!Op.isReg ())
161161 continue ;
162162
163+ // We may see physical registers if building a real MI
163164 Register Reg = Op.getReg ();
164- if (MRI.getRegClassOrRegBank (Reg))
165+ if (Reg. isPhysical () || MRI.getRegClassOrRegBank (Reg))
165166 continue ;
166167
167168 const RegisterBank *RB = NewBank;
@@ -1444,6 +1445,65 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
14441445 return true ;
14451446}
14461447
1448+ bool AMDGPURegisterBankInfo::applyMappingBFEIntrinsic (
1449+ const OperandsMapper &OpdMapper, bool Signed) const {
1450+ MachineInstr &MI = OpdMapper.getMI ();
1451+ MachineRegisterInfo &MRI = OpdMapper.getMRI ();
1452+
1453+ // Insert basic copies
1454+ applyDefaultMapping (OpdMapper);
1455+
1456+ Register DstReg = MI.getOperand (0 ).getReg ();
1457+ LLT Ty = MRI.getType (DstReg);
1458+
1459+ const LLT S32 = LLT::scalar (32 );
1460+
1461+ const RegisterBank *DstBank =
1462+ OpdMapper.getInstrMapping ().getOperandMapping (0 ).BreakDown [0 ].RegBank ;
1463+ if (DstBank == &AMDGPU::VGPRRegBank) {
1464+ if (Ty == S32)
1465+ return true ;
1466+
1467+ // TODO: 64-bit version is scalar only, so we need to expand this.
1468+ return false ;
1469+ }
1470+
1471+ Register SrcReg = MI.getOperand (2 ).getReg ();
1472+ Register OffsetReg = MI.getOperand (3 ).getReg ();
1473+ Register WidthReg = MI.getOperand (4 ).getReg ();
1474+
1475+ // The scalar form packs the offset and width in a single operand.
1476+
1477+ ApplyRegBankMapping ApplyBank (*this , MRI, &AMDGPU::SGPRRegBank);
1478+ GISelObserverWrapper Observer (&ApplyBank);
1479+ MachineIRBuilder B (MI);
1480+ B.setChangeObserver (Observer);
1481+
1482+ // Ensure the high bits are clear to insert the offset.
1483+ auto OffsetMask = B.buildConstant (S32, maskTrailingOnes<unsigned >(6 ));
1484+ auto ClampOffset = B.buildAnd (S32, OffsetReg, OffsetMask);
1485+
1486+ // Zeros out the low bits, so don't bother clamping the input value.
1487+ auto ShiftWidth = B.buildShl (S32, WidthReg, B.buildConstant (S32, 16 ));
1488+
1489+ // Transformation function, pack the offset and width of a BFE into
1490+ // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
1491+ // source, bits [5:0] contain the offset and bits [22:16] the width.
1492+ auto MergedInputs = B.buildOr (S32, ClampOffset, ShiftWidth);
1493+
1494+ // TODO: It might be worth using a pseudo here to avoid scc clobber and
1495+ // register class constraints.
1496+ unsigned Opc = Ty == S32 ? (Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
1497+ (Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1498+
1499+ auto MIB = B.buildInstr (Opc, {DstReg}, {SrcReg, MergedInputs});
1500+ if (!constrainSelectedInstRegOperands (*MIB, *TII, *TRI, *this ))
1501+ llvm_unreachable (" failed to constrain BFE" );
1502+
1503+ MI.eraseFromParent ();
1504+ return true ;
1505+ }
1506+
14471507// FIXME: Duplicated from LegalizerHelper
14481508static CmpInst::Predicate minMaxToCompare (unsigned Opc) {
14491509 switch (Opc) {
@@ -2592,8 +2652,12 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
25922652 constrainOpWithReadfirstlane (MI, MRI, 5 );
25932653 return ;
25942654 }
2595- default :
2596- break ;
2655+ case Intrinsic::amdgcn_sbfe:
2656+ applyMappingBFEIntrinsic (OpdMapper, true );
2657+ return ;
2658+ case Intrinsic::amdgcn_ubfe:
2659+ applyMappingBFEIntrinsic (OpdMapper, false );
2660+ return ;
25972661 }
25982662 break ;
25992663 }
@@ -2687,7 +2751,11 @@ AMDGPURegisterBankInfo::getDefaultMappingSOP(const MachineInstr &MI) const {
26872751 SmallVector<const ValueMapping*, 8 > OpdsMapping (MI.getNumOperands ());
26882752
26892753 for (unsigned i = 0 , e = MI.getNumOperands (); i != e; ++i) {
2690- unsigned Size = getSizeInBits (MI.getOperand (i).getReg (), MRI, *TRI);
2754+ const MachineOperand &SrcOp = MI.getOperand (i);
2755+ if (!SrcOp.isReg ())
2756+ continue ;
2757+
2758+ unsigned Size = getSizeInBits (SrcOp.getReg (), MRI, *TRI);
26912759 OpdsMapping[i] = AMDGPU::getValueMapping (AMDGPU::SGPRRegBankID, Size);
26922760 }
26932761 return getInstructionMapping (1 , 1 , getOperandsMapping (OpdsMapping),
@@ -3498,8 +3566,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
34983566 case Intrinsic::amdgcn_fmad_ftz:
34993567 case Intrinsic::amdgcn_mbcnt_lo:
35003568 case Intrinsic::amdgcn_mbcnt_hi:
3501- case Intrinsic::amdgcn_ubfe:
3502- case Intrinsic::amdgcn_sbfe:
35033569 case Intrinsic::amdgcn_mul_u24:
35043570 case Intrinsic::amdgcn_mul_i24:
35053571 case Intrinsic::amdgcn_lerp:
@@ -3521,6 +3587,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
35213587 case Intrinsic::amdgcn_sdot8:
35223588 case Intrinsic::amdgcn_udot8:
35233589 return getDefaultMappingVOP (MI);
3590+ case Intrinsic::amdgcn_sbfe:
3591+ case Intrinsic::amdgcn_ubfe:
3592+ if (isSALUMapping (MI))
3593+ return getDefaultMappingSOP (MI);
3594+ return getDefaultMappingVOP (MI);
35243595 case Intrinsic::amdgcn_ds_swizzle:
35253596 case Intrinsic::amdgcn_ds_permute:
35263597 case Intrinsic::amdgcn_ds_bpermute:
0 commit comments