-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[RISCV] Use getELen() instead of hardcoded 64 in lowerBUILD_VECTOR. #77355
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This is needed to properly support Zve32x.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesThis is needed to properly support Zve32x. Patch is 20.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/77355.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 79c16cf4c4c361..835ea4618904f6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3657,10 +3657,10 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
// would require bit-manipulation instructions to construct the splat value.
SmallVector<SDValue> Sequence;
const auto *BV = cast<BuildVectorSDNode>(Op);
- if (VT.isInteger() && EltBitSize < 64 &&
+ if (VT.isInteger() && EltBitSize < Subtarget.getELen() &&
ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
BV->getRepeatedSequence(Sequence) &&
- (Sequence.size() * EltBitSize) <= 64) {
+ (Sequence.size() * EltBitSize) <= Subtarget.getELen()) {
unsigned SeqLen = Sequence.size();
MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index 5dfa3835cad020..faeca5ef801a54 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V
+; RUN: llc -mtriple=riscv64 -mattr=+zve32x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32
define void @buildvec_vid_v16i8(ptr %x) {
; CHECK-LABEL: buildvec_vid_v16i8:
@@ -296,11 +297,22 @@ define <4 x i64> @buildvec_vid_step1_add0_v4i64() {
; RV32-NEXT: vsext.vf4 v8, v10
; RV32-NEXT: ret
;
-; RV64-LABEL: buildvec_vid_step1_add0_v4i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vid.v v8
-; RV64-NEXT: ret
+; RV64V-LABEL: buildvec_vid_step1_add0_v4i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64V-NEXT: vid.v v8
+; RV64V-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_vid_step1_add0_v4i64:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: li a1, 3
+; RV64ZVE32-NEXT: sd a1, 24(a0)
+; RV64ZVE32-NEXT: li a1, 2
+; RV64ZVE32-NEXT: sd a1, 16(a0)
+; RV64ZVE32-NEXT: li a1, 1
+; RV64ZVE32-NEXT: sd a1, 8(a0)
+; RV64ZVE32-NEXT: sd zero, 0(a0)
+; RV64ZVE32-NEXT: ret
ret <4 x i64> <i64 0, i64 1, i64 2, i64 3>
}
@@ -314,12 +326,23 @@ define <4 x i64> @buildvec_vid_step2_add0_v4i64() {
; RV32-NEXT: vsext.vf4 v8, v10
; RV32-NEXT: ret
;
-; RV64-LABEL: buildvec_vid_step2_add0_v4i64:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vid.v v8
-; RV64-NEXT: vadd.vv v8, v8, v8
-; RV64-NEXT: ret
+; RV64V-LABEL: buildvec_vid_step2_add0_v4i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64V-NEXT: vid.v v8
+; RV64V-NEXT: vadd.vv v8, v8, v8
+; RV64V-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_vid_step2_add0_v4i64:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: li a1, 6
+; RV64ZVE32-NEXT: sd a1, 24(a0)
+; RV64ZVE32-NEXT: li a1, 4
+; RV64ZVE32-NEXT: sd a1, 16(a0)
+; RV64ZVE32-NEXT: li a1, 2
+; RV64ZVE32-NEXT: sd a1, 8(a0)
+; RV64ZVE32-NEXT: sd zero, 0(a0)
+; RV64ZVE32-NEXT: ret
ret <4 x i64> <i64 0, i64 2, i64 4, i64 6>
}
@@ -420,21 +443,47 @@ define <2 x i8> @buildvec_dominant0_v2i8() {
}
define <2 x i8> @buildvec_dominant1_v2i8() {
-; CHECK-LABEL: buildvec_dominant1_v2i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v8, -1
-; CHECK-NEXT: ret
+; RV32-LABEL: buildvec_dominant1_v2i8:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; RV32-NEXT: vmv.v.i v8, -1
+; RV32-NEXT: ret
+;
+; RV64V-LABEL: buildvec_dominant1_v2i8:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; RV64V-NEXT: vmv.v.i v8, -1
+; RV64V-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_dominant1_v2i8:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32-NEXT: vmv.v.i v8, -1
+; RV64ZVE32-NEXT: ret
ret <2 x i8> <i8 undef, i8 -1>
}
define <2 x i8> @buildvec_dominant2_v2i8() {
-; CHECK-LABEL: buildvec_dominant2_v2i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vid.v v8
-; CHECK-NEXT: vrsub.vi v8, v8, 0
-; CHECK-NEXT: ret
+; RV32-LABEL: buildvec_dominant2_v2i8:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; RV32-NEXT: vid.v v8
+; RV32-NEXT: vrsub.vi v8, v8, 0
+; RV32-NEXT: ret
+;
+; RV64V-LABEL: buildvec_dominant2_v2i8:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; RV64V-NEXT: vid.v v8
+; RV64V-NEXT: vrsub.vi v8, v8, 0
+; RV64V-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_dominant2_v2i8:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; RV64ZVE32-NEXT: vid.v v8
+; RV64ZVE32-NEXT: vrsub.vi v8, v8, 0
+; RV64ZVE32-NEXT: ret
ret <2 x i8> <i8 0, i8 -1>
}
@@ -448,16 +497,25 @@ define void @buildvec_dominant0_v2i32(ptr %x) {
; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: ret
;
-; RV64-LABEL: buildvec_dominant0_v2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a1, %hi(.LCPI38_0)
-; RV64-NEXT: ld a1, %lo(.LCPI38_0)(a1)
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.i v8, -1
-; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma
-; RV64-NEXT: vmv.s.x v8, a1
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: ret
+; RV64V-LABEL: buildvec_dominant0_v2i32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: lui a1, %hi(.LCPI38_0)
+; RV64V-NEXT: ld a1, %lo(.LCPI38_0)(a1)
+; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64V-NEXT: vmv.v.i v8, -1
+; RV64V-NEXT: vsetvli zero, zero, e64, m1, tu, ma
+; RV64V-NEXT: vmv.s.x v8, a1
+; RV64V-NEXT: vse64.v v8, (a0)
+; RV64V-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_dominant0_v2i32:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: lui a1, %hi(.LCPI38_0)
+; RV64ZVE32-NEXT: ld a1, %lo(.LCPI38_0)(a1)
+; RV64ZVE32-NEXT: li a2, -1
+; RV64ZVE32-NEXT: sd a2, 8(a0)
+; RV64ZVE32-NEXT: sd a1, 0(a0)
+; RV64ZVE32-NEXT: ret
store <2 x i64> <i64 2049638230412172402, i64 -1>, ptr %x
ret void
}
@@ -472,14 +530,23 @@ define void @buildvec_dominant1_optsize_v2i32(ptr %x) optsize {
; RV32-NEXT: vse32.v v8, (a0)
; RV32-NEXT: ret
;
-; RV64-LABEL: buildvec_dominant1_optsize_v2i32:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a1, %hi(.LCPI39_0)
-; RV64-NEXT: addi a1, a1, %lo(.LCPI39_0)
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vle64.v v8, (a1)
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: ret
+; RV64V-LABEL: buildvec_dominant1_optsize_v2i32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: lui a1, %hi(.LCPI39_0)
+; RV64V-NEXT: addi a1, a1, %lo(.LCPI39_0)
+; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64V-NEXT: vle64.v v8, (a1)
+; RV64V-NEXT: vse64.v v8, (a0)
+; RV64V-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_dominant1_optsize_v2i32:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: lui a1, %hi(.LCPI39_0)
+; RV64ZVE32-NEXT: ld a1, %lo(.LCPI39_0)(a1)
+; RV64ZVE32-NEXT: li a2, -1
+; RV64ZVE32-NEXT: sd a2, 8(a0)
+; RV64ZVE32-NEXT: sd a1, 0(a0)
+; RV64ZVE32-NEXT: ret
store <2 x i64> <i64 2049638230412172402, i64 -1>, ptr %x
ret void
}
@@ -497,15 +564,35 @@ define void @buildvec_seq_v8i8_v4i16(ptr %x) {
}
define void @buildvec_seq_v8i8_v2i32(ptr %x) {
-; CHECK-LABEL: buildvec_seq_v8i8_v2i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, 48
-; CHECK-NEXT: addi a1, a1, 513
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a1
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: ret
+; RV32-LABEL: buildvec_seq_v8i8_v2i32:
+; RV32: # %bb.0:
+; RV32-NEXT: lui a1, 48
+; RV32-NEXT: addi a1, a1, 513
+; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v8, a1
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vse8.v v8, (a0)
+; RV32-NEXT: ret
+;
+; RV64V-LABEL: buildvec_seq_v8i8_v2i32:
+; RV64V: # %bb.0:
+; RV64V-NEXT: lui a1, 48
+; RV64V-NEXT: addi a1, a1, 513
+; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; RV64V-NEXT: vmv.v.x v8, a1
+; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64V-NEXT: vse8.v v8, (a0)
+; RV64V-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_seq_v8i8_v2i32:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: lui a1, 48
+; RV64ZVE32-NEXT: addi a1, a1, 513
+; RV64ZVE32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; RV64ZVE32-NEXT: vmv.v.x v8, a1
+; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64ZVE32-NEXT: vse8.v v8, (a0)
+; RV64ZVE32-NEXT: ret
store <8 x i8> <i8 1, i8 2, i8 3, i8 undef, i8 1, i8 2, i8 3, i8 undef>, ptr %x
ret void
}
@@ -520,15 +607,24 @@ define void @buildvec_seq_v16i8_v2i64(ptr %x) {
; RV32-NEXT: vse8.v v8, (a0)
; RV32-NEXT: ret
;
-; RV64-LABEL: buildvec_seq_v16i8_v2i64:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a1, %hi(.LCPI42_0)
-; RV64-NEXT: addi a1, a1, %lo(.LCPI42_0)
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vlse64.v v8, (a1), zero
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vse8.v v8, (a0)
-; RV64-NEXT: ret
+; RV64V-LABEL: buildvec_seq_v16i8_v2i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: lui a1, %hi(.LCPI42_0)
+; RV64V-NEXT: addi a1, a1, %lo(.LCPI42_0)
+; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64V-NEXT: vlse64.v v8, (a1), zero
+; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64V-NEXT: vse8.v v8, (a0)
+; RV64V-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_seq_v16i8_v2i64:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: lui a1, %hi(.LCPI42_0)
+; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI42_0)
+; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64ZVE32-NEXT: vle8.v v8, (a1)
+; RV64ZVE32-NEXT: vse8.v v8, (a0)
+; RV64ZVE32-NEXT: ret
store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr %x
ret void
}
@@ -544,36 +640,79 @@ define void @buildvec_seq2_v16i8_v2i64(ptr %x) {
; RV32-NEXT: vse8.v v8, (a0)
; RV32-NEXT: ret
;
-; RV64-LABEL: buildvec_seq2_v16i8_v2i64:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a1, 528432
-; RV64-NEXT: addiw a1, a1, 513
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.x v8, a1
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vse8.v v8, (a0)
-; RV64-NEXT: ret
+; RV64V-LABEL: buildvec_seq2_v16i8_v2i64:
+; RV64V: # %bb.0:
+; RV64V-NEXT: lui a1, 528432
+; RV64V-NEXT: addiw a1, a1, 513
+; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64V-NEXT: vmv.v.x v8, a1
+; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64V-NEXT: vse8.v v8, (a0)
+; RV64V-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_seq2_v16i8_v2i64:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: lui a1, %hi(.LCPI43_0)
+; RV64ZVE32-NEXT: addi a1, a1, %lo(.LCPI43_0)
+; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64ZVE32-NEXT: vle8.v v8, (a1)
+; RV64ZVE32-NEXT: vse8.v v8, (a0)
+; RV64ZVE32-NEXT: ret
store <16 x i8> <i8 1, i8 2, i8 3, i8 129, i8 -1, i8 -1, i8 -1, i8 -1, i8 1, i8 2, i8 3, i8 129, i8 -1, i8 -1, i8 -1, i8 -1>, ptr %x
ret void
}
define void @buildvec_seq_v9i8(ptr %x) {
-; CHECK-LABEL: buildvec_seq_v9i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 73
-; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.s.x v0, a1
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 3
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: li a1, 146
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.s.x v0, a1
-; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmerge.vim v8, v8, 2, v0
-; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: ret
+; RV32-LABEL: buildvec_seq_v9i8:
+; RV32: # %bb.0:
+; RV32-NEXT: li a1, 73
+; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV32-NEXT: vmv.s.x v0, a1
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vmv.v.i v8, 3
+; RV32-NEXT: vmerge.vim v8, v8, 1, v0
+; RV32-NEXT: li a1, 146
+; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT: vmv.s.x v0, a1
+; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; RV32-NEXT: vmerge.vim v8, v8, 2, v0
+; RV32-NEXT: vsetivli zero, 9, e8, m1, ta, ma
+; RV32-NEXT: vse8.v v8, (a0)
+; RV32-NEXT: ret
+;
+; RV64V-LABEL: buildvec_seq_v9i8:
+; RV64V: # %bb.0:
+; RV64V-NEXT: li a1, 73
+; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; RV64V-NEXT: vmv.s.x v0, a1
+; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64V-NEXT: vmv.v.i v8, 3
+; RV64V-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64V-NEXT: li a1, 146
+; RV64V-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64V-NEXT: vmv.s.x v0, a1
+; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; RV64V-NEXT: vmerge.vim v8, v8, 2, v0
+; RV64V-NEXT: vsetivli zero, 9, e8, m1, ta, ma
+; RV64V-NEXT: vse8.v v8, (a0)
+; RV64V-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_seq_v9i8:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: li a1, 73
+; RV64ZVE32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
+; RV64ZVE32-NEXT: vmv.s.x v0, a1
+; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64ZVE32-NEXT: vmv.v.i v8, 3
+; RV64ZVE32-NEXT: vmerge.vim v8, v8, 1, v0
+; RV64ZVE32-NEXT: li a1, 146
+; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32-NEXT: vmv.s.x v0, a1
+; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, ma
+; RV64ZVE32-NEXT: vmerge.vim v8, v8, 2, v0
+; RV64ZVE32-NEXT: vsetivli zero, 9, e8, m1, ta, ma
+; RV64ZVE32-NEXT: vse8.v v8, (a0)
+; RV64ZVE32-NEXT: ret
store <9 x i8> <i8 1, i8 2, i8 3, i8 1, i8 2, i8 3, i8 1, i8 2, i8 3>, ptr %x
ret void
}
@@ -863,14 +1002,22 @@ define <4 x i64> @v4xi64_exact(i64 %a, i64 %b, i64 %c, i64 %d) vscale_range(2,2)
; RV32-NEXT: vslide1down.vx v8, v8, a3
; RV32-NEXT: ret
;
-; RV64-LABEL: v4xi64_exact:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.x v8, a2
-; RV64-NEXT: vslide1down.vx v9, v8, a3
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: ret
+; RV64V-LABEL: v4xi64_exact:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64V-NEXT: vmv.v.x v8, a2
+; RV64V-NEXT: vslide1down.vx v9, v8, a3
+; RV64V-NEXT: vmv.v.x v8, a0
+; RV64V-NEXT: vslide1down.vx v8, v8, a1
+; RV64V-NEXT: ret
+;
+; RV64ZVE32-LABEL: v4xi64_exact:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: sd a4, 24(a0)
+; RV64ZVE32-NEXT: sd a3, 16(a0)
+; RV64ZVE32-NEXT: sd a2, 8(a0)
+; RV64ZVE32-NEXT: sd a1, 0(a0)
+; RV64ZVE32-NEXT: ret
%v1 = insertelement <4 x i64> poison, i64 %a, i32 0
%v2 = insertelement <4 x i64> %v1, i64 %b, i32 1
%v3 = insertelement <4 x i64> %v2, i64 %c, i32 2
@@ -907,18 +1054,31 @@ define <8 x i64> @v8xi64_exact(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i
; RV32-NEXT: vslide1down.vx v11, v11, t0
; RV32-NEXT: ret
;
-; RV64-LABEL: v8xi64_exact:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.x v8, a2
-; RV64-NEXT: vslide1down.vx v9, v8, a3
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vmv.v.x v10, a4
-; RV64-NEXT: vslide1down.vx v10, v10, a5
-; RV64-NEXT: vmv.v.x v11, a6
-; RV64-NEXT: vslide1down.vx v11, v11, a7
-; RV64-NEXT: ret
+; RV64V-LABEL: v8xi64_exact:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64V-NEXT: vmv.v.x v8, a2
+; RV64V-NEXT: vslide1down.vx v9, v8, a3
+; RV64V-NEXT: vmv.v.x v8, a0
+; RV64V-NEXT: vslide1down.vx v8, v8, a1
+; RV64V-NEXT: vmv.v.x v10, a4
+; RV64V-NEXT: vslide1down.vx v10, v10, a5
+; RV64V-NEXT: vmv.v.x v11, a6
+; RV64V-NEXT: vslide1down.vx v11, v11, a7
+; RV64V-NEXT: ret
+;
+; RV64ZVE32-LABEL: v8xi64_exact:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: ld t0, 0(sp)
+; RV64ZVE32-NEXT: sd t0, 56(a0)
+; RV64ZVE32-NEXT: sd a7, 48(a0)
+; RV64ZVE32-NEXT: sd a6, 40(a0)
+; RV64ZVE32-NEXT: sd a5, 32(a0)
+; RV64ZVE32-NEXT: sd a4, 24(a0)
+; RV64ZVE32-NEXT: sd a3, 16(a0)
+; RV64ZVE32-NEXT: sd a2, 8(a0)
+; RV64ZVE32-NEXT: sd a1, 0(a0)
+; RV64ZVE32-NEXT: ret
%v1 = insertelement <8 x i64> poison, i64 %a, i32 0
%v2 = insertelement <8 x i64> %v1, i64 %b, i32 1
%v3 = insertelement <8 x i64> %v2, i64 %c, i32 2
@@ -946,16 +1106,28 @@ define <8 x i64> @v8xi64_exact_equal_halves(i64 %a, i64 %b, i64 %c, i64 %d) vsca
; RV32-NEXT: vmv.v.v v11, v9
; RV32-NEXT: ret
;
-; RV64-LABEL: v8xi64_exact_equal_halves:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.x v8, a2
-; RV64-NEXT: vslide1down.vx v9, v8, a3
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vmv.v.v v10, v8
-; RV64-NEXT: vmv.v.v v11, v9
-; RV64-NEXT: ret
+; RV64V-LABEL: v8xi64_exact_equal_halves:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64V-NEXT: vmv.v.x v8, a2
+; RV64V-NEXT: vslide1down.vx v9, v8, a3
+; RV64V-NEXT: vmv.v.x v8, a0
+; RV64V-NEXT: vslide1down.vx v8, v8, a1
+; RV64V-NEXT: vmv.v.v v10, v8
+; RV64V-NEXT: vmv.v.v v11, v9
+; RV64V-NEXT: ret
+;
+; RV64ZVE32-LABEL: v8xi64_exact_equal_halves:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: sd a4, 56(a0)
+; RV64ZVE32-NEXT: sd a3, 48(a0)
+; RV64ZVE32-NEXT: sd a2, 40(a0)
+; RV64ZVE32-NEXT: sd a1, 32(a0)
+; RV64ZVE32-NEXT: sd a4, 24(a0)
+; RV64ZVE32-NEXT: sd a3, 16(a0)
+; RV64ZVE32-NEXT: sd a2, 8(a0)
+; RV64ZVE32-NEXT: sd a1, 0(a0)
+; RV64ZVE32-NEXT: ret
%v1 = insertelement <8 x i64> poison, i64 %a, i32 0
%v2 = insertelement <8 x i64> %v1, i64 %b, i32 1
%v3 = insertelement <8 x i64> %v2, i64 %c, i32 2
@@ -981,14 +1153,22 @@ define <8 x i64> @v8xi64_exact_undef_suffix(i64 %a, i64 %b, i64 %c, i64 %d) vsca
; RV32-NEXT: vslide1down.vx v8, v8, a3
; RV32-NEXT: ret
;
-; RV64-LABEL: v8xi64_exact_undef_suffix:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.x v8, a2
-; RV64-NEXT: vslide1down.vx v9, v8, a3
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: ret
+; RV64V-LABEL: v8xi64_exact_undef_suffix:
+; RV64V: # %bb.0:
+; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV64V-NEXT: vmv.v.x v8, a2
+; RV64V-NEXT: vslide1down.vx v9, v8, a3
+; RV64V-NEXT: vmv.v.x v8, a0
+; RV64V-NEXT: vslide1down.vx v8, v8, a1
+; RV64V-NEXT: ret
+;
+; RV64ZVE32-LABEL: v8xi64_exact_undef_suffix:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: sd a4, 24(a0)
+; RV64ZVE32-NEXT: sd a3, 16(a0)
+; RV64ZVE32-NEXT: sd a2, 8(a0)
+; RV64ZVE32-NEXT: sd a1, 0(a0)
+; RV64ZVE32-NEXT: ret
%v1 = insertelement <8 x i64> poison, i64 %a, i32 0
%v2 = insertelement <8 x i64> %v1, i64 %b, i32 1
%v3 = insertelement <8 x i64> %v2, i64 %c, i32 2
@@ -1010,14 +1190,22 @@ define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vsca
; RV32-NEXT: vslide1down.vx v10, v8, a3
; RV32-NEXT: ret
;
-; RV64-LABEL: v8xi64_exact_undef_prefix:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.x v8, a2
-; RV64-NEXT: vslide1down.vx v11, v8, a3
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vslide1down.vx v10, v8, ...
[truncated]
|
wangpc-pp
approved these changes
Jan 9, 2024
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
justinfargnoli
pushed a commit
to justinfargnoli/llvm-project
that referenced
this pull request
Jan 28, 2024
…lvm#77355) This is needed to properly support Zve32x.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
This is needed to properly support Zve32x.