@@ -2060,7 +2060,10 @@ lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
20602060
20612061 const auto &Begin = Mask.begin ();
20622062 const auto &End = Mask.end ();
2063- unsigned HalfSize = Mask.size () / 2 ;
2063+ int HalfSize = Mask.size () / 2 ;
2064+
2065+ if (SplatIndex >= HalfSize)
2066+ return SDValue ();
20642067
20652068 assert (SplatIndex < (int )Mask.size () && " Out of bounds mask index" );
20662069 if (fitsRegularPattern<int >(Begin, 1 , End - HalfSize, SplatIndex, 0 ) &&
@@ -2354,7 +2357,7 @@ static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
23542357// / cases need to be converted to it for processing.
23552358// /
23562359// / This function may modify V1, V2 and Mask
2357- static void canonicalizeShuffleVectorByLane (
2360+ static bool canonicalizeShuffleVectorByLane (
23582361 const SDLoc &DL, MutableArrayRef<int > Mask, MVT VT, SDValue &V1,
23592362 SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
23602363
@@ -2378,15 +2381,15 @@ static void canonicalizeShuffleVectorByLane(
23782381 preMask = LowLaneTy;
23792382
23802383 if (std::all_of (Mask.begin () + HalfSize, Mask.end (), [&](int M) {
2381- return M < 0 || (M >= 0 && M < HalfSize ) ||
2382- (M >= MaskSize && M < MaskSize + HalfSize );
2384+ return M < 0 || (M >= HalfSize && M < MaskSize ) ||
2385+ (M >= MaskSize + HalfSize && M < MaskSize * 2 );
23832386 }))
2384- postMask = HighLaneTy ;
2387+ postMask = LowLaneTy ;
23852388 else if (std::all_of (Mask.begin () + HalfSize, Mask.end (), [&](int M) {
2386- return M < 0 || (M >= HalfSize && M < MaskSize ) ||
2387- (M >= MaskSize + HalfSize && M < MaskSize * 2 );
2389+ return M < 0 || (M >= 0 && M < HalfSize ) ||
2390+ (M >= MaskSize && M < MaskSize + HalfSize );
23882391 }))
2389- postMask = LowLaneTy ;
2392+ postMask = HighLaneTy ;
23902393
23912394 // The pre-half of mask is high lane type, and the post-half of mask
23922395 // is low lane type, which is closest to the LoongArch instructions.
@@ -2395,7 +2398,7 @@ static void canonicalizeShuffleVectorByLane(
23952398 // to the lower 128-bit of vector register, and the low lane of mask
23962399 // corresponds the higher 128-bit of vector register.
23972400 if (preMask == HighLaneTy && postMask == LowLaneTy) {
2398- return ;
2401+ return false ;
23992402 }
24002403 if (preMask == LowLaneTy && postMask == HighLaneTy) {
24012404 V1 = DAG.getBitcast (MVT::v4i64, V1);
@@ -2449,8 +2452,10 @@ static void canonicalizeShuffleVectorByLane(
24492452 *it = *it < 0 ? *it : *it + HalfSize;
24502453 }
24512454 } else { // cross-lane
2452- return ;
2455+ return false ;
24532456 }
2457+
2458+ return true ;
24542459}
24552460
24562461// / Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
@@ -2516,27 +2521,20 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
25162521 assert (Mask.size () % 2 == 0 && " Expected even mask size." );
25172522 assert (Mask.size () >= 4 && " Mask size is less than 4." );
25182523
2519- // canonicalize non cross-lane shuffle vector
2520- SmallVector<int > NewMask (Mask);
2521- canonicalizeShuffleVectorByLane (DL, NewMask, VT, V1, V2, DAG, Subtarget);
2522-
25232524 APInt KnownUndef, KnownZero;
2524- computeZeroableShuffleElements (NewMask , V1, V2, KnownUndef, KnownZero);
2525+ computeZeroableShuffleElements (Mask , V1, V2, KnownUndef, KnownZero);
25252526 APInt Zeroable = KnownUndef | KnownZero;
25262527
25272528 SDValue Result;
25282529 // TODO: Add more comparison patterns.
25292530 if (V2.isUndef ()) {
2530- if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI (DL, NewMask , VT, V1, V2, DAG,
2531+ if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI (DL, Mask , VT, V1, V2, DAG,
25312532 Subtarget)))
25322533 return Result;
2533- if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I (DL, NewMask , VT, V1, V2, DAG,
2534+ if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I (DL, Mask , VT, V1, V2, DAG,
25342535 Subtarget)))
25352536 return Result;
2536- if ((Result = lowerVECTOR_SHUFFLE_XVPERM (DL, NewMask, VT, V1, V2, DAG)))
2537- return Result;
2538- if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle (DL, NewMask, VT,
2539- V1, V2, DAG)))
2537+ if ((Result = lowerVECTOR_SHUFFLE_XVPERM (DL, Mask, VT, V1, V2, DAG)))
25402538 return Result;
25412539
25422540 // TODO: This comment may be enabled in the future to better match the
@@ -2546,24 +2544,36 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
25462544
25472545 // It is recommended not to change the pattern comparison order for better
25482546 // performance.
2549- if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV (DL, NewMask , VT, V1, V2, DAG)))
2547+ if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV (DL, Mask , VT, V1, V2, DAG)))
25502548 return Result;
2551- if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD (DL, NewMask , VT, V1, V2, DAG)))
2549+ if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD (DL, Mask , VT, V1, V2, DAG)))
25522550 return Result;
2553- if ((Result = lowerVECTOR_SHUFFLE_XVILVH (DL, NewMask , VT, V1, V2, DAG)))
2551+ if ((Result = lowerVECTOR_SHUFFLE_XVILVH (DL, Mask , VT, V1, V2, DAG)))
25542552 return Result;
2555- if ((Result = lowerVECTOR_SHUFFLE_XVILVL (DL, NewMask , VT, V1, V2, DAG)))
2553+ if ((Result = lowerVECTOR_SHUFFLE_XVILVL (DL, Mask , VT, V1, V2, DAG)))
25562554 return Result;
2557- if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV (DL, NewMask , VT, V1, V2, DAG)))
2555+ if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV (DL, Mask , VT, V1, V2, DAG)))
25582556 return Result;
2559- if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD (DL, NewMask , VT, V1, V2, DAG)))
2557+ if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD (DL, Mask , VT, V1, V2, DAG)))
25602558 return Result;
2561- if ((Result = lowerVECTOR_SHUFFLEAsShift (DL, NewMask , VT, V1, V2, DAG,
2562- Subtarget, Zeroable)))
2559+ if ((Result = lowerVECTOR_SHUFFLEAsShift (DL, Mask , VT, V1, V2, DAG, Subtarget ,
2560+ Zeroable)))
25632561 return Result;
2564- if ((Result = lowerVECTOR_SHUFFLEAsByteRotate (DL, NewMask , VT, V1, V2, DAG,
2562+ if ((Result = lowerVECTOR_SHUFFLEAsByteRotate (DL, Mask , VT, V1, V2, DAG,
25652563 Subtarget)))
25662564 return Result;
2565+
2566+ // canonicalize non cross-lane shuffle vector
2567+ SmallVector<int > NewMask (Mask);
2568+ if (canonicalizeShuffleVectorByLane (DL, NewMask, VT, V1, V2, DAG, Subtarget))
2569+ return lower256BitShuffle (DL, NewMask, VT, V1, V2, DAG, Subtarget);
2570+
2571+ if (V2.isUndef ()) {
2572+ if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle (DL, NewMask, VT,
2573+ V1, V2, DAG)))
2574+ return Result;
2575+ }
2576+
25672577 if (SDValue NewShuffle = widenShuffleMask (DL, NewMask, VT, V1, V2, DAG))
25682578 return NewShuffle;
25692579 if ((Result = lowerVECTOR_SHUFFLE_XVSHUF (DL, NewMask, VT, V1, V2, DAG)))
0 commit comments