@@ -2688,6 +2688,10 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
26882688// c) x & (-1 >> (32 - y))
26892689// d) x << (32 - y) >> (32 - y)
26902690bool X86DAGToDAGISel::matchBitExtract (SDNode *Node) {
2691+ assert (
2692+ (Node->getOpcode () == ISD::AND || Node->getOpcode () == ISD::SRL) &&
2693+ " Should be either an and-mask, or right-shift after clearing high bits." );
2694+
26912695 // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
26922696 if (!Subtarget->hasBMI () && !Subtarget->hasBMI2 ())
26932697 return false ;
@@ -2698,13 +2702,16 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
26982702 if (NVT != MVT::i32 && NVT != MVT::i64 )
26992703 return false ;
27002704
2705+ unsigned Size = NVT.getSizeInBits ();
2706+
27012707 SDValue NBits;
27022708
27032709 // If we have BMI2's BZHI, we are ok with muti-use patterns.
27042710 // Else, if we only have BMI1's BEXTR, we require one-use.
27052711 const bool CanHaveExtraUses = Subtarget->hasBMI2 ();
2706- auto checkOneUse = [CanHaveExtraUses](SDValue Op) {
2707- return CanHaveExtraUses || Op.hasOneUse ();
2712+ auto checkOneUse = [CanHaveExtraUses](SDValue Op, unsigned NUses = 1 ) {
2713+ return CanHaveExtraUses ||
2714+ Op.getNode ()->hasNUsesOfValue (NUses, Op.getResNo ());
27082715 };
27092716
27102717 // a) x & ((1 << nbits) + (-1))
@@ -2740,31 +2747,73 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
27402747 return true ;
27412748 };
27422749
2750+ SDValue X;
2751+
2752+ // d) x << (32 - y) >> (32 - y)
2753+ auto matchPatternD = [&checkOneUse, Size, &X, &NBits](SDNode *Node) -> bool {
2754+ if (Node->getOpcode () != ISD::SRL)
2755+ return false ;
2756+ SDValue N0 = Node->getOperand (0 );
2757+ if (N0->getOpcode () != ISD::SHL || !checkOneUse (N0))
2758+ return false ;
2759+ SDValue N1 = Node->getOperand (1 );
2760+ SDValue N01 = N0->getOperand (1 );
2761+ // Both of the shifts must be by the exact same value.
2762+ // There should not be any uses of the shift amount outside of the pattern.
2763+ if (N1 != N01 || !checkOneUse (N1, 2 ))
2764+ return false ;
2765+ // Skip over a truncate of the shift amount.
2766+ if (N1->getOpcode () == ISD::TRUNCATE) {
2767+ N1 = N1->getOperand (0 );
2768+ // The trunc should have been the only user of the real shift amount.
2769+ if (!checkOneUse (N1))
2770+ return false ;
2771+ }
2772+ // Match the shift amount as: (bitwidth - y). It should go away, too.
2773+ if (N1.getOpcode () != ISD::SUB)
2774+ return false ;
2775+ auto N10 = dyn_cast<ConstantSDNode>(N1.getOperand (0 ));
2776+ if (!N10 || N10->getZExtValue () != Size)
2777+ return false ;
2778+ X = N0->getOperand (0 );
2779+ NBits = N1.getOperand (1 );
2780+ return true ;
2781+ };
2782+
27432783 auto matchLowBitMask = [&matchPatternA,
27442784 &matchPatternB](SDValue Mask) -> bool {
2745- // FIXME: patterns c, d .
2785+ // FIXME: pattern c .
27462786 return matchPatternA (Mask) || matchPatternB (Mask);
27472787 };
27482788
2749- SDValue X = Node->getOperand (0 );
2750- SDValue Mask = Node->getOperand (1 );
2789+ if (Node->getOpcode () == ISD::AND) {
2790+ X = Node->getOperand (0 );
2791+ SDValue Mask = Node->getOperand (1 );
27512792
2752- if (matchLowBitMask (Mask)) {
2753- // Great.
2754- } else {
2755- std::swap (X, Mask);
2756- if (!matchLowBitMask (Mask))
2757- return false ;
2758- }
2793+ if (matchLowBitMask (Mask)) {
2794+ // Great.
2795+ } else {
2796+ std::swap (X, Mask);
2797+ if (!matchLowBitMask (Mask))
2798+ return false ;
2799+ }
2800+ } else if (!matchPatternD (Node))
2801+ return false ;
27592802
27602803 SDLoc DL (Node);
27612804
2805+ SDValue OrigNBits = NBits;
2806+ // Do we need to truncate the shift amount?
2807+ if (NBits.getValueType () != MVT::i8 ) {
2808+ NBits = CurDAG->getNode (ISD::TRUNCATE, DL, MVT::i8 , NBits);
2809+ insertDAGNode (*CurDAG, OrigNBits, NBits);
2810+ }
2811+
27622812 // Insert 8-bit NBits into lowest 8 bits of NVT-sized (32 or 64-bit) register.
27632813 // All the other bits are undefined, we do not care about them.
27642814 SDValue ImplDef =
27652815 SDValue (CurDAG->getMachineNode (TargetOpcode::IMPLICIT_DEF, DL, NVT), 0 );
27662816 insertDAGNode (*CurDAG, NBits, ImplDef);
2767- SDValue OrigNBits = NBits;
27682817 NBits = CurDAG->getTargetInsertSubreg (X86::sub_8bit, DL, NVT, ImplDef, NBits);
27692818 insertDAGNode (*CurDAG, OrigNBits, NBits);
27702819
@@ -2963,17 +3012,8 @@ bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
29633012 if (ShiftAmt->getOpcode () == ISD::TRUNCATE)
29643013 ShiftAmt = ShiftAmt->getOperand (0 );
29653014
2966- // Special case to avoid messing up a BZHI pattern.
2967- // Look for (srl (shl X, (size - y)), (size - y)
2968- if (Subtarget->hasBMI2 () && (VT == MVT::i32 || VT == MVT::i64 ) &&
2969- N->getOpcode () == ISD::SRL && N->getOperand (0 ).getOpcode () == ISD::SHL &&
2970- // Shift amounts the same?
2971- N->getOperand (1 ) == N->getOperand (0 ).getOperand (1 ) &&
2972- // Shift amounts size - y?
2973- ShiftAmt.getOpcode () == ISD::SUB &&
2974- isa<ConstantSDNode>(ShiftAmt.getOperand (0 )) &&
2975- cast<ConstantSDNode>(ShiftAmt.getOperand (0 ))->getZExtValue () == Size)
2976- return false ;
3015+ // This function is called after X86DAGToDAGISel::matchBitExtract(),
3016+ // so we are not afraid that we might mess up BZHI/BEXTR pattern.
29773017
29783018 SDValue NewShiftAmt;
29793019 if (ShiftAmt->getOpcode () == ISD::ADD || ShiftAmt->getOpcode () == ISD::SUB) {
@@ -3172,6 +3212,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
31723212 }
31733213
31743214 case ISD::SRL:
3215+ if (matchBitExtract (Node))
3216+ return ;
3217+ LLVM_FALLTHROUGH;
31753218 case ISD::SRA:
31763219 case ISD::SHL:
31773220 if (tryShiftAmountMod (Node))
0 commit comments