Skip to content

Commit 80abdb7

Browse files
committed
[LoongArch] Refine 256-bit vector_shuffle legalization for LASX
1 parent 4a7179f commit 80abdb7

File tree

3 files changed

+45
-37
lines changed

3 files changed

+45
-37
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 43 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2060,7 +2060,10 @@ lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
20602060

20612061
const auto &Begin = Mask.begin();
20622062
const auto &End = Mask.end();
2063-
unsigned HalfSize = Mask.size() / 2;
2063+
int HalfSize = Mask.size() / 2;
2064+
2065+
if (SplatIndex >= HalfSize)
2066+
return SDValue();
20642067

20652068
assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
20662069
if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
@@ -2354,7 +2357,7 @@ static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
23542357
/// cases need to be converted to it for processing.
23552358
///
23562359
/// This function may modify V1, V2 and Mask
2357-
static void canonicalizeShuffleVectorByLane(
2360+
static bool canonicalizeShuffleVectorByLane(
23582361
const SDLoc &DL, MutableArrayRef<int> Mask, MVT VT, SDValue &V1,
23592362
SDValue &V2, SelectionDAG &DAG, const LoongArchSubtarget &Subtarget) {
23602363

@@ -2378,15 +2381,15 @@ static void canonicalizeShuffleVectorByLane(
23782381
preMask = LowLaneTy;
23792382

23802383
if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2381-
return M < 0 || (M >= 0 && M < HalfSize) ||
2382-
(M >= MaskSize && M < MaskSize + HalfSize);
2384+
return M < 0 || (M >= HalfSize && M < MaskSize) ||
2385+
(M >= MaskSize + HalfSize && M < MaskSize * 2);
23832386
}))
2384-
postMask = HighLaneTy;
2387+
postMask = LowLaneTy;
23852388
else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
2386-
return M < 0 || (M >= HalfSize && M < MaskSize) ||
2387-
(M >= MaskSize + HalfSize && M < MaskSize * 2);
2389+
return M < 0 || (M >= 0 && M < HalfSize) ||
2390+
(M >= MaskSize && M < MaskSize + HalfSize);
23882391
}))
2389-
postMask = LowLaneTy;
2392+
postMask = HighLaneTy;
23902393

23912394
// The pre-half of mask is high lane type, and the post-half of mask
23922395
// is low lane type, which is closest to the LoongArch instructions.
@@ -2395,7 +2398,7 @@ static void canonicalizeShuffleVectorByLane(
23952398
// to the lower 128-bit of vector register, and the low lane of mask
23962399
// corresponds the higher 128-bit of vector register.
23972400
if (preMask == HighLaneTy && postMask == LowLaneTy) {
2398-
return;
2401+
return false;
23992402
}
24002403
if (preMask == LowLaneTy && postMask == HighLaneTy) {
24012404
V1 = DAG.getBitcast(MVT::v4i64, V1);
@@ -2449,8 +2452,10 @@ static void canonicalizeShuffleVectorByLane(
24492452
*it = *it < 0 ? *it : *it + HalfSize;
24502453
}
24512454
} else { // cross-lane
2452-
return;
2455+
return false;
24532456
}
2457+
2458+
return true;
24542459
}
24552460

24562461
/// Lower VECTOR_SHUFFLE as lane permute and then shuffle (if possible).
@@ -2516,27 +2521,20 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
25162521
assert(Mask.size() % 2 == 0 && "Expected even mask size.");
25172522
assert(Mask.size() >= 4 && "Mask size is less than 4.");
25182523

2519-
// canonicalize non cross-lane shuffle vector
2520-
SmallVector<int> NewMask(Mask);
2521-
canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2522-
25232524
APInt KnownUndef, KnownZero;
2524-
computeZeroableShuffleElements(NewMask, V1, V2, KnownUndef, KnownZero);
2525+
computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
25252526
APInt Zeroable = KnownUndef | KnownZero;
25262527

25272528
SDValue Result;
25282529
// TODO: Add more comparison patterns.
25292530
if (V2.isUndef()) {
2530-
if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG,
2531+
if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, V2, DAG,
25312532
Subtarget)))
25322533
return Result;
2533-
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG,
2534+
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
25342535
Subtarget)))
25352536
return Result;
2536-
if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, NewMask, VT, V1, V2, DAG)))
2537-
return Result;
2538-
if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2539-
V1, V2, DAG)))
2537+
if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, V2, DAG)))
25402538
return Result;
25412539

25422540
// TODO: This comment may be enabled in the future to better match the
@@ -2546,24 +2544,39 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
25462544

25472545
// It is recommended not to change the pattern comparison order for better
25482546
// performance.
2549-
if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
2547+
if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, Mask, VT, V1, V2, DAG)))
25502548
return Result;
2551-
if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
2549+
if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, Mask, VT, V1, V2, DAG)))
25522550
return Result;
2553-
if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
2551+
if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, Mask, VT, V1, V2, DAG)))
25542552
return Result;
2555-
if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
2553+
if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, Mask, VT, V1, V2, DAG)))
25562554
return Result;
2557-
if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
2555+
if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, Mask, VT, V1, V2, DAG)))
25582556
return Result;
2559-
if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
2557+
if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
25602558
return Result;
2561-
if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG,
2562-
Subtarget, Zeroable)))
2559+
if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
2560+
Zeroable)))
25632561
return Result;
2564-
if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG,
2562+
if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
25652563
Subtarget)))
25662564
return Result;
2565+
2566+
// canonicalize non cross-lane shuffle vector
2567+
SmallVector<int> NewMask(Mask);
2568+
if (canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG, Subtarget))
2569+
return lower256BitShuffle(DL, NewMask, VT, V1, V2, DAG, Subtarget);
2570+
2571+
// FIXME: Handling the remaining cases earlier can degrade performance
2572+
// in some situations. Further analysis is required to enable more
2573+
// effective optimizations.
2574+
if (V2.isUndef()) {
2575+
if ((Result = lowerVECTOR_SHUFFLEAsLanePermuteAndShuffle(DL, NewMask, VT,
2576+
V1, V2, DAG)))
2577+
return Result;
2578+
}
2579+
25672580
if (SDValue NewShuffle = widenShuffleMask(DL, NewMask, VT, V1, V2, DAG))
25682581
return NewShuffle;
25692582
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) {
88
; CHECK-LABEL: shufflevector_v4f64:
99
; CHECK: # %bb.0: # %entry
1010
; CHECK-NEXT: xvpickve.d $xr2, $xr1, 3
11-
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
11+
; CHECK-NEXT: xvpermi.d $xr3, $xr0, 238
1212
; CHECK-NEXT: xvrepl128vei.d $xr3, $xr3, 1
1313
; CHECK-NEXT: vextrins.d $vr3, $vr2, 16
1414
; CHECK-NEXT: xvpickve.d $xr1, $xr1, 2

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b) {
1616
define <32 x i8> @shufflevector_v32i8_undef(<32 x i8> %a) {
1717
; CHECK-LABEL: shufflevector_v32i8_undef:
1818
; CHECK: # %bb.0:
19-
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68
2019
; CHECK-NEXT: xvrepl128vei.b $xr0, $xr0, 1
2120
; CHECK-NEXT: ret
2221
%c = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
@@ -39,7 +38,6 @@ define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b) {
3938
define <16 x i16> @shufflevector_v16i16_undef(<16 x i16> %a) {
4039
; CHECK-LABEL: shufflevector_v16i16_undef:
4140
; CHECK: # %bb.0:
42-
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68
4341
; CHECK-NEXT: xvrepl128vei.h $xr0, $xr0, 3
4442
; CHECK-NEXT: ret
4543
%c = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3,
@@ -62,7 +60,6 @@ define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) {
6260
define <8 x i32> @shufflevector_v8i32_undef(<8 x i32> %a) {
6361
; CHECK-LABEL: shufflevector_v8i32_undef:
6462
; CHECK: # %bb.0:
65-
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68
6663
; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 2
6764
; CHECK-NEXT: ret
6865
%c = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 8, i32 8, i32 8, i32 8>
@@ -83,7 +80,6 @@ define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) {
8380
define <4 x i64> @shufflevector_v4i64_undef(<4 x i64> %a) {
8481
; CHECK-LABEL: shufflevector_v4i64_undef:
8582
; CHECK: # %bb.0:
86-
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68
8783
; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 1
8884
; CHECK-NEXT: ret
8985
%c = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, i32 1, i32 5, i32 5>
@@ -104,7 +100,7 @@ define <8 x float> @shufflevector_v8f32(<8 x float> %a, <8 x float> %b) {
104100
define <8 x float> @shufflevector_v8f32_undef(<8 x float> %a) {
105101
; CHECK-LABEL: shufflevector_v8f32_undef:
106102
; CHECK: # %bb.0:
107-
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78
103+
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 238
108104
; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 1
109105
; CHECK-NEXT: ret
110106
%c = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 9, i32 9, i32 9, i32 9>
@@ -125,7 +121,6 @@ define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) {
125121
define <4 x double> @shufflevector_v4f64_undef(<4 x double> %a) {
126122
; CHECK-LABEL: shufflevector_v4f64_undef:
127123
; CHECK: # %bb.0:
128-
; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68
129124
; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 0
130125
; CHECK-NEXT: ret
131126
%c = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 0, i32 0, i32 7, i32 7>

0 commit comments

Comments
 (0)