Skip to content

Commit f8c413d

Browse files
committed
[LoongArch] Custom legalize vector_shuffle to xvinsve0.{w/d} when possible
1 parent 137d759 commit f8c413d

File tree

4 files changed

+64
-2
lines changed

4 files changed

+64
-2
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2317,6 +2317,54 @@ static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
23172317
return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
23182318
}
23192319

2320+
// Check if exactly one element of the Mask is replaced by 'Replaced', while
2321+
// all other elements are either 'Base + i' or undef (-1). On success, return
2322+
// the index of the replaced element. Otherwise, just return -1.
2323+
static int checkReplaceOne(ArrayRef<int> Mask, int Base, int Replaced) {
2324+
int MaskSize = Mask.size();
2325+
int Idx = -1;
2326+
for (int i = 0; i < MaskSize; ++i) {
2327+
if (Mask[i] == Base + i || Mask[i] == -1)
2328+
continue;
2329+
if (Mask[i] != Replaced)
2330+
return -1;
2331+
if (Idx == -1)
2332+
Idx = i;
2333+
else
2334+
return -1;
2335+
}
2336+
return Idx;
2337+
}
2338+
2339+
/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
2340+
static SDValue
2341+
lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
2342+
SDValue V1, SDValue V2, SelectionDAG &DAG,
2343+
const LoongArchSubtarget &Subtarget) {
2344+
// LoongArch LASX only supports xvinsve0.{w/d}.
2345+
if (VT != MVT::v8i32 && VT != MVT::v8f32 && VT != MVT::v4i64 &&
2346+
VT != MVT::v4f64)
2347+
return SDValue();
2348+
2349+
MVT GRLenVT = Subtarget.getGRLenVT();
2350+
int MaskSize = Mask.size();
2351+
assert(MaskSize == (int)VT.getVectorNumElements() && "Unexpected mask size");
2352+
2353+
// Case 1: the lowest element of V2 replaces one element in V1.
2354+
int Idx = checkReplaceOne(Mask, 0, MaskSize);
2355+
if (Idx != -1)
2356+
return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V1, V2,
2357+
DAG.getConstant(Idx, DL, GRLenVT));
2358+
2359+
// Case 2: the lowest element of V1 replaces one element in V2.
2360+
Idx = checkReplaceOne(Mask, MaskSize, 0);
2361+
if (Idx != -1)
2362+
return DAG.getNode(LoongArchISD::XVINSVE0, DL, VT, V2, V1,
2363+
DAG.getConstant(Idx, DL, GRLenVT));
2364+
2365+
return SDValue();
2366+
}
2367+
23202368
/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
23212369
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
23222370
MVT VT, SDValue V1, SDValue V2,
@@ -2593,6 +2641,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
25932641
if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
25942642
Zeroable)))
25952643
return Result;
2644+
if ((Result =
2645+
lowerVECTOR_SHUFFLE_XVINSVE0(DL, Mask, VT, V1, V2, DAG, Subtarget)))
2646+
return Result;
25962647
if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG,
25972648
Subtarget)))
25982649
return Result;
@@ -7450,6 +7501,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
74507501
NODE_NAME_CASE(XVPERM)
74517502
NODE_NAME_CASE(XVREPLVE0)
74527503
NODE_NAME_CASE(XVREPLVE0Q)
7504+
NODE_NAME_CASE(XVINSVE0)
74537505
NODE_NAME_CASE(VPICK_SEXT_ELT)
74547506
NODE_NAME_CASE(VPICK_ZEXT_ELT)
74557507
NODE_NAME_CASE(VREPLVE)

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ enum NodeType : unsigned {
151151
XVPERM,
152152
XVREPLVE0,
153153
XVREPLVE0Q,
154+
XVINSVE0,
154155

155156
// Extended vector element extraction
156157
VPICK_SEXT_ELT,

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_LoongArchV1RUimm>;
2020
def loongarch_xvperm: SDNode<"LoongArchISD::XVPERM", SDT_LoongArchXVPERM>;
2121
def loongarch_xvreplve0: SDNode<"LoongArchISD::XVREPLVE0", SDT_LoongArchXVREPLVE0>;
2222
def loongarch_xvreplve0q: SDNode<"LoongArchISD::XVREPLVE0Q", SDT_LoongArchXVREPLVE0>;
23+
def loongarch_xvinsve0 : SDNode<"LoongArchISD::XVINSVE0", SDT_LoongArchV2RUimm>;
2324
def loongarch_xvmskltz: SDNode<"LoongArchISD::XVMSKLTZ", SDT_LoongArchVMSKCOND>;
2425
def loongarch_xvmskgez: SDNode<"LoongArchISD::XVMSKGEZ", SDT_LoongArchVMSKCOND>;
2526
def loongarch_xvmskeqz: SDNode<"LoongArchISD::XVMSKEQZ", SDT_LoongArchVMSKCOND>;
@@ -1708,6 +1709,14 @@ def : Pat<(vector_insert v4f64:$xd, (f64(bitconvert i64:$rj)), uimm2:$imm),
17081709
(XVINSGR2VR_D v4f64:$xd, GPR:$rj, uimm2:$imm)>;
17091710

17101711
// XVINSVE0_{W/D}
1712+
def : Pat<(loongarch_xvinsve0 v8i32:$xd, v8i32:$xj, uimm3:$imm),
1713+
(XVINSVE0_W v8i32:$xd, v8i32:$xj, uimm3:$imm)>;
1714+
def : Pat<(loongarch_xvinsve0 v4i64:$xd, v4i64:$xj, uimm2:$imm),
1715+
(XVINSVE0_D v4i64:$xd, v4i64:$xj, uimm2:$imm)>;
1716+
def : Pat<(loongarch_xvinsve0 v8f32:$xd, v8f32:$xj, uimm3:$imm),
1717+
(XVINSVE0_W v8f32:$xd, v8f32:$xj, uimm3:$imm)>;
1718+
def : Pat<(loongarch_xvinsve0 v4f64:$xd, v4f64:$xj, uimm2:$imm),
1719+
(XVINSVE0_D v4f64:$xd, v4f64:$xj, uimm2:$imm)>;
17111720
def : Pat<(vector_insert v8f32:$xd, FPR32:$fj, uimm3:$imm),
17121721
(XVINSVE0_W v8f32:$xd, (SUBREG_TO_REG(i64 0), FPR32:$fj, sub_32),
17131722
uimm3:$imm)>;

llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvinsve0.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2-
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
3-
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
2+
; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s
3+
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
44

55
;; xvinsve0.w
66
define void @xvinsve0_v8i32_l_0(ptr %d, ptr %a, ptr %b) nounwind {

0 commit comments

Comments
 (0)