@@ -2740,100 +2740,114 @@ static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) {
27402740 // rotate matching code under visitSelect and visitTrunc?
27412741 unsigned Width = Or.getType ()->getScalarSizeInBits ();
27422742
2743- // First, find an or'd pair of opposite shifts:
2744- // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
2745- BinaryOperator *Or0, *Or1;
2746- if (!match (Or.getOperand (0 ), m_BinOp (Or0)) ||
2747- !match (Or.getOperand (1 ), m_BinOp (Or1)))
2748- return nullptr ;
2749-
2750- Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
2751- if (!match (Or0, m_OneUse (m_LogicalShift (m_Value (ShVal0), m_Value (ShAmt0)))) ||
2752- !match (Or1, m_OneUse (m_LogicalShift (m_Value (ShVal1), m_Value (ShAmt1)))) ||
2753- Or0->getOpcode () == Or1->getOpcode ())
2743+ Instruction *Or0, *Or1;
2744+ if (!match (Or.getOperand (0 ), m_Instruction (Or0)) ||
2745+ !match (Or.getOperand (1 ), m_Instruction (Or1)))
27542746 return nullptr ;
27552747
2756- // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
2757- if (Or0->getOpcode () == BinaryOperator::LShr) {
2758- std::swap (Or0, Or1);
2759- std::swap (ShVal0, ShVal1);
2760- std::swap (ShAmt0, ShAmt1);
2761- }
2762- assert (Or0->getOpcode () == BinaryOperator::Shl &&
2763- Or1->getOpcode () == BinaryOperator::LShr &&
2764- " Illegal or(shift,shift) pair" );
2765-
2766- // Match the shift amount operands for a funnel shift pattern. This always
2767- // matches a subtraction on the R operand.
2768- auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
2769- // Check for constant shift amounts that sum to the bitwidth.
2770- const APInt *LI, *RI;
2771- if (match (L, m_APIntAllowUndef (LI)) && match (R, m_APIntAllowUndef (RI)))
2772- if (LI->ult (Width) && RI->ult (Width) && (*LI + *RI) == Width)
2773- return ConstantInt::get (L->getType (), *LI);
2774-
2775- Constant *LC, *RC;
2776- if (match (L, m_Constant (LC)) && match (R, m_Constant (RC)) &&
2777- match (L, m_SpecificInt_ICMP (ICmpInst::ICMP_ULT, APInt (Width, Width))) &&
2778- match (R, m_SpecificInt_ICMP (ICmpInst::ICMP_ULT, APInt (Width, Width))) &&
2779- match (ConstantExpr::getAdd (LC, RC), m_SpecificIntAllowUndef (Width)))
2780- return ConstantExpr::mergeUndefsWith (LC, RC);
2781-
2782- // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
2783- // We limit this to X < Width in case the backend re-expands the intrinsic,
2784- // and has to reintroduce a shift modulo operation (InstCombine might remove
2785- // it after this fold). This still doesn't guarantee that the final codegen
2786- // will match this original pattern.
2787- if (match (R, m_OneUse (m_Sub (m_SpecificInt (Width), m_Specific (L))))) {
2788- KnownBits KnownL = IC.computeKnownBits (L, /* Depth*/ 0 , &Or);
2789- return KnownL.getMaxValue ().ult (Width) ? L : nullptr ;
2790- }
2748+ bool IsFshl = true ; // Sub on LSHR.
2749+ SmallVector<Value *, 3 > FShiftArgs;
27912750
2792- // For non-constant cases, the following patterns currently only work for
2793- // rotation patterns.
2794- // TODO: Add general funnel-shift compatible patterns.
2795- if (ShVal0 != ShVal1)
2751+ // First, find an or'd pair of opposite shifts:
2752+ // or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1)
2753+ if (isa<BinaryOperator>(Or0) && isa<BinaryOperator>(Or1)) {
2754+ Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
2755+ if (!match (Or0,
2756+ m_OneUse (m_LogicalShift (m_Value (ShVal0), m_Value (ShAmt0)))) ||
2757+ !match (Or1,
2758+ m_OneUse (m_LogicalShift (m_Value (ShVal1), m_Value (ShAmt1)))) ||
2759+ Or0->getOpcode () == Or1->getOpcode ())
27962760 return nullptr ;
27972761
2798- // For non-constant cases we don't support non-pow2 shift masks.
2799- // TODO: Is it worth matching urem as well?
2800- if (!isPowerOf2_32 (Width))
2801- return nullptr ;
2762+ // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
2763+ if (Or0->getOpcode () == BinaryOperator::LShr) {
2764+ std::swap (Or0, Or1);
2765+ std::swap (ShVal0, ShVal1);
2766+ std::swap (ShAmt0, ShAmt1);
2767+ }
2768+ assert (Or0->getOpcode () == BinaryOperator::Shl &&
2769+ Or1->getOpcode () == BinaryOperator::LShr &&
2770+ " Illegal or(shift,shift) pair" );
2771+
2772+ // Match the shift amount operands for a funnel shift pattern. This always
2773+ // matches a subtraction on the R operand.
2774+ auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
2775+ // Check for constant shift amounts that sum to the bitwidth.
2776+ const APInt *LI, *RI;
2777+ if (match (L, m_APIntAllowUndef (LI)) && match (R, m_APIntAllowUndef (RI)))
2778+ if (LI->ult (Width) && RI->ult (Width) && (*LI + *RI) == Width)
2779+ return ConstantInt::get (L->getType (), *LI);
2780+
2781+ Constant *LC, *RC;
2782+ if (match (L, m_Constant (LC)) && match (R, m_Constant (RC)) &&
2783+ match (L,
2784+ m_SpecificInt_ICMP (ICmpInst::ICMP_ULT, APInt (Width, Width))) &&
2785+ match (R,
2786+ m_SpecificInt_ICMP (ICmpInst::ICMP_ULT, APInt (Width, Width))) &&
2787+ match (ConstantExpr::getAdd (LC, RC), m_SpecificIntAllowUndef (Width)))
2788+ return ConstantExpr::mergeUndefsWith (LC, RC);
2789+
2790+ // (shl ShVal, X) | (lshr ShVal, (Width - x)) iff X < Width.
2791+ // We limit this to X < Width in case the backend re-expands the
2792+ // intrinsic, and has to reintroduce a shift modulo operation (InstCombine
2793+ // might remove it after this fold). This still doesn't guarantee that the
2794+ // final codegen will match this original pattern.
2795+ if (match (R, m_OneUse (m_Sub (m_SpecificInt (Width), m_Specific (L))))) {
2796+ KnownBits KnownL = IC.computeKnownBits (L, /* Depth*/ 0 , &Or);
2797+ return KnownL.getMaxValue ().ult (Width) ? L : nullptr ;
2798+ }
28022799
2803- // The shift amount may be masked with negation:
2804- // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
2805- Value *X;
2806- unsigned Mask = Width - 1 ;
2807- if (match (L, m_And (m_Value (X), m_SpecificInt (Mask))) &&
2808- match (R, m_And (m_Neg (m_Specific (X)), m_SpecificInt (Mask))))
2809- return X;
2800+ // For non-constant cases, the following patterns currently only work for
2801+ // rotation patterns.
2802+ // TODO: Add general funnel-shift compatible patterns.
2803+ if (ShVal0 != ShVal1)
2804+ return nullptr ;
28102805
2811- // Similar to above, but the shift amount may be extended after masking,
2812- // so return the extended value as the parameter for the intrinsic.
2813- if (match (L, m_ZExt (m_And (m_Value (X), m_SpecificInt (Mask)))) &&
2814- match (R, m_And (m_Neg (m_ZExt (m_And (m_Specific (X), m_SpecificInt (Mask)))),
2815- m_SpecificInt (Mask))))
2816- return L;
2806+ // For non-constant cases we don't support non-pow2 shift masks.
2807+ // TODO: Is it worth matching urem as well?
2808+ if (!isPowerOf2_32 (Width))
2809+ return nullptr ;
28172810
2818- if (match (L, m_ZExt (m_And (m_Value (X), m_SpecificInt (Mask)))) &&
2819- match (R, m_ZExt (m_And (m_Neg (m_Specific (X)), m_SpecificInt (Mask)))))
2820- return L;
2811+ // The shift amount may be masked with negation:
2812+ // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
2813+ Value *X;
2814+ unsigned Mask = Width - 1 ;
2815+ if (match (L, m_And (m_Value (X), m_SpecificInt (Mask))) &&
2816+ match (R, m_And (m_Neg (m_Specific (X)), m_SpecificInt (Mask))))
2817+ return X;
2818+
2819+ // Similar to above, but the shift amount may be extended after masking,
2820+ // so return the extended value as the parameter for the intrinsic.
2821+ if (match (L, m_ZExt (m_And (m_Value (X), m_SpecificInt (Mask)))) &&
2822+ match (R,
2823+ m_And (m_Neg (m_ZExt (m_And (m_Specific (X), m_SpecificInt (Mask)))),
2824+ m_SpecificInt (Mask))))
2825+ return L;
2826+
2827+ if (match (L, m_ZExt (m_And (m_Value (X), m_SpecificInt (Mask)))) &&
2828+ match (R, m_ZExt (m_And (m_Neg (m_Specific (X)), m_SpecificInt (Mask)))))
2829+ return L;
28212830
2822- return nullptr ;
2823- };
2831+ return nullptr ;
2832+ };
28242833
2825- Value *ShAmt = matchShiftAmount (ShAmt0, ShAmt1, Width);
2826- bool IsFshl = true ; // Sub on LSHR.
2827- if (!ShAmt) {
2828- ShAmt = matchShiftAmount (ShAmt1, ShAmt0, Width);
2829- IsFshl = false ; // Sub on SHL.
2834+ Value *ShAmt = matchShiftAmount (ShAmt0, ShAmt1, Width);
2835+ if (!ShAmt) {
2836+ ShAmt = matchShiftAmount (ShAmt1, ShAmt0, Width);
2837+ IsFshl = false ; // Sub on SHL.
2838+ }
2839+ if (!ShAmt)
2840+ return nullptr ;
2841+
2842+ FShiftArgs = {ShVal0, ShVal1, ShAmt};
28302843 }
2831- if (!ShAmt)
2844+
2845+ if (FShiftArgs.empty ())
28322846 return nullptr ;
28332847
28342848 Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
28352849 Function *F = Intrinsic::getDeclaration (Or.getModule (), IID, Or.getType ());
2836- return CallInst::Create (F, {ShVal0, ShVal1, ShAmt} );
2850+ return CallInst::Create (F, FShiftArgs );
28372851}
28382852
28392853// / Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns.
0 commit comments