@@ -131,6 +131,7 @@ class SIFoldOperandsImpl {
131131 std::optional<int64_t > getImmOrMaterializedImm (MachineOperand &Op) const ;
132132 bool tryConstantFoldOp (MachineInstr *MI) const ;
133133 bool tryFoldCndMask (MachineInstr &MI) const ;
134+ bool tryFoldBitMask (MachineInstr &MI) const ;
134135 bool tryFoldZeroHighBits (MachineInstr &MI) const ;
135136 bool foldInstOperand (MachineInstr &MI, MachineOperand &OpToFold) const ;
136137
@@ -1447,6 +1448,99 @@ bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &MI) const {
14471448 return true ;
14481449}
14491450
1451+ static bool getBitsReadByInst (unsigned Opc, unsigned &NumBitsRead,
1452+ unsigned &OpIdx) {
1453+ switch (Opc) {
1454+ case AMDGPU::V_ASHR_I32_e64:
1455+ case AMDGPU::V_ASHR_I32_e32:
1456+ case AMDGPU::V_LSHR_B32_e64:
1457+ case AMDGPU::V_LSHR_B32_e32:
1458+ case AMDGPU::V_LSHL_B32_e64:
1459+ case AMDGPU::V_LSHL_B32_e32:
1460+ case AMDGPU::S_LSHL_B32:
1461+ case AMDGPU::S_LSHR_B32:
1462+ case AMDGPU::S_ASHR_I32:
1463+ NumBitsRead = 5 ;
1464+ OpIdx = 2 ;
1465+ return true ;
1466+ case AMDGPU::S_LSHL_B64:
1467+ case AMDGPU::S_LSHR_B64:
1468+ case AMDGPU::S_ASHR_I64:
1469+ NumBitsRead = 6 ;
1470+ OpIdx = 2 ;
1471+ return true ;
1472+ case AMDGPU::V_LSHLREV_B32_e64:
1473+ case AMDGPU::V_LSHLREV_B32_e32:
1474+ case AMDGPU::V_LSHRREV_B32_e64:
1475+ case AMDGPU::V_LSHRREV_B32_e32:
1476+ case AMDGPU::V_ASHRREV_I32_e64:
1477+ case AMDGPU::V_ASHRREV_I32_e32:
1478+ NumBitsRead = 5 ;
1479+ OpIdx = 1 ;
1480+ return true ;
1481+ default :
1482+ return false ;
1483+ }
1484+ }
1485+
1486+ static bool isAndBitMaskRedundant (MachineInstr &MI, unsigned BitsNeeded,
1487+ unsigned &SrcOp) {
1488+ MachineOperand *RegOp = &MI.getOperand (1 );
1489+ MachineOperand *ImmOp = &MI.getOperand (2 );
1490+
1491+ if (!RegOp->isReg () || !ImmOp->isImm ()) {
1492+ if (ImmOp->isReg () && RegOp->isImm ())
1493+ std::swap (RegOp, ImmOp);
1494+ else
1495+ return false ;
1496+ }
1497+
1498+ SrcOp = RegOp->getOperandNo ();
1499+
1500+ const unsigned BitMask = maskTrailingOnes<unsigned >(BitsNeeded);
1501+ return (ImmOp->getImm () & BitMask) == BitMask;
1502+ }
1503+
1504+ bool SIFoldOperandsImpl::tryFoldBitMask (MachineInstr &MI) const {
1505+ unsigned NumBitsRead = 0 ;
1506+ unsigned OpIdx = 0 ;
1507+ if (!getBitsReadByInst (MI.getOpcode (), NumBitsRead, OpIdx))
1508+ return false ;
1509+
1510+ MachineOperand &Op = MI.getOperand (OpIdx);
1511+ if (!Op.isReg ())
1512+ return false ;
1513+
1514+ Register OpReg = Op.getReg ();
1515+ if (OpReg.isPhysical ())
1516+ return false ;
1517+
1518+ MachineInstr *OpDef = MRI->getVRegDef (OpReg);
1519+ if (!OpDef)
1520+ return false ;
1521+
1522+ LLVM_DEBUG (dbgs () << " tryFoldBitMask: " << MI << " \t OpIdx:" << OpIdx << " , NumBitsRead:" << NumBitsRead << " \n " );
1523+
1524+ unsigned ReplaceWith;
1525+ switch (OpDef->getOpcode ()) {
1526+ // TODO: add more opcodes?
1527+ case AMDGPU::S_AND_B32:
1528+ case AMDGPU::V_AND_B32_e32:
1529+ case AMDGPU::V_AND_B32_e64:
1530+ if (!isAndBitMaskRedundant (*OpDef, NumBitsRead, ReplaceWith))
1531+ return false ;
1532+ break ;
1533+ default :
1534+ return false ;
1535+ }
1536+
1537+ MachineOperand &ReplaceWithOp = OpDef->getOperand (ReplaceWith);
1538+ LLVM_DEBUG (dbgs () << " \t replacing operand with:" << ReplaceWithOp << " \n " );
1539+
1540+ MI.getOperand (OpIdx).setReg (ReplaceWithOp.getReg ());
1541+ return true ;
1542+ }
1543+
14501544bool SIFoldOperandsImpl::tryFoldZeroHighBits (MachineInstr &MI) const {
14511545 if (MI.getOpcode () != AMDGPU::V_AND_B32_e64 &&
14521546 MI.getOpcode () != AMDGPU::V_AND_B32_e32)
@@ -1458,7 +1552,7 @@ bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &MI) const {
14581552
14591553 Register Src1 = MI.getOperand (2 ).getReg ();
14601554 MachineInstr *SrcDef = MRI->getVRegDef (Src1);
1461- if (!ST->zeroesHigh16BitsOfDest (SrcDef->getOpcode ()))
1555+ if (!SrcDef || ! ST->zeroesHigh16BitsOfDest (SrcDef->getOpcode ()))
14621556 return false ;
14631557
14641558 Register Dst = MI.getOperand (0 ).getReg ();
@@ -2451,6 +2545,7 @@ bool SIFoldOperandsImpl::run(MachineFunction &MF) {
24512545 MachineOperand *CurrentKnownM0Val = nullptr ;
24522546 for (auto &MI : make_early_inc_range (*MBB)) {
24532547 Changed |= tryFoldCndMask (MI);
2548+ Changed |= tryFoldBitMask (MI);
24542549
24552550 if (tryFoldZeroHighBits (MI)) {
24562551 Changed = true ;
0 commit comments