diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index addeab8d9aba6..bf58226e0bd39 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1709,6 +1709,11 @@ foreach nf = {2-8} in "true", "vlseg"#nf#"eN.v and vsseg"#nf#"eN.v are " "implemented as a wide memory op and shuffle">; +def TuneVLDependentLatency + : SubtargetFeature<"vl-dependent-latency", "HasVLDependentLatency", "true", + "Latency of vector instructions is dependent on the " + "dynamic value of vl">; + def Experimental : SubtargetFeature<"experimental", "HasExperimental", "true", "Experimental intrinsics">; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d295a45149d3c..98b613d9cc856 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -12341,9 +12341,10 @@ SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op, SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG); - SDValue SlideDown = - getVSlidedown(DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, - DownOffset, TrueMask, UpOffset); + SDValue SlideDown = getVSlidedown( + DAG, Subtarget, DL, VecVT, DAG.getUNDEF(VecVT), V1, DownOffset, TrueMask, + Subtarget.hasVLDependentLatency() ? UpOffset + : DAG.getRegister(RISCV::X0, XLenVT)); return getVSlideup(DAG, Subtarget, DL, VecVT, SlideDown, V2, UpOffset, TrueMask, DAG.getRegister(RISCV::X0, XLenVT), RISCVVType::TAIL_AGNOSTIC); @@ -13367,7 +13368,7 @@ RISCVTargetLowering::lowerVPSpliceExperimental(SDValue Op, if (ImmValue != 0) Op1 = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Op1, DownOffset, Mask, - UpOffset); + Subtarget.hasVLDependentLatency() ? UpOffset : EVL2); SDValue Result = getVSlideup(DAG, Subtarget, DL, ContainerVT, Op1, Op2, UpOffset, Mask, EVL2, RISCVVType::TAIL_AGNOSTIC); diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 03214fe8239ce..838edf6c57250 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -275,7 +275,8 @@ def SIFIVE_U74 : RISCVProcessorModel<"sifive-u74", defvar SiFiveIntelligenceTuneFeatures = !listconcat(SiFive7TuneFeatures, [TuneDLenFactor2, TuneOptimizedZeroStrideLoad, - TuneOptimizedNF2SegmentLoadStore]); + TuneOptimizedNF2SegmentLoadStore, + TuneVLDependentLatency]); def SIFIVE_X280 : RISCVProcessorModel<"sifive-x280", SiFive7Model, [Feature64Bit, FeatureStdExtI, diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll index 95d330889d2c0..26f5ab10f6c36 100644 --- a/llvm/test/CodeGen/RISCV/features-info.ll +++ b/llvm/test/CodeGen/RISCV/features-info.ll @@ -171,6 +171,7 @@ ; CHECK-NEXT: use-postra-scheduler - Schedule again after register allocation. ; CHECK-NEXT: v - 'V' (Vector Extension for Application Processors). ; CHECK-NEXT: ventana-veyron - Ventana Veyron-Series processors. +; CHECK-NEXT: vl-dependent-latency - Latency of vector instructions is dependent on the dynamic value of vl. ; CHECK-NEXT: vxrm-pipeline-flush - VXRM writes causes pipeline flush. ; CHECK-NEXT: xandesperf - 'XAndesPerf' (Andes Performance Extension). ; CHECK-NEXT: xandesvbfhcvt - 'XAndesVBFHCvt' (Andes Vector BFLOAT16 Conversion Extension). diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll index fb22e5eaca2cc..73c2e06f14d63 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vp-splice.ll @@ -1,32 +1,52 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfh,+zvfbfmin -verify-machineinstrs -riscv-v-vector-bits-min=128 \ -; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zvfhmin,+zvfbfmin -verify-machineinstrs -riscv-v-vector-bits-min=128 \ -; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfh,+zvfbfmin -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,NOVLDEP,ZVFH +; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,NOVLDEP,ZVFHMIN +; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfh,+zvfbfmin,+vl-dependent-latency -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,VLDEP,ZVFH +; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin,+vl-dependent-latency -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,VLDEP,ZVFHMIN define <2 x i64> @test_vp_splice_v2i64(<2 x i64> %va, <2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v2i64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v2i64: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 1 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v2i64: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 1 +; VLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 1, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <2 x i64> %v } define <2 x i64> @test_vp_splice_v2i64_negative_offset(<2 x i64> %va, <2 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v2i64_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v2i64_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v2i64_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 1 +; VLDEP-NEXT: ret %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 -1, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <2 x i64> %v @@ -44,260 +64,419 @@ define <2 x i64> @test_vp_splice_v2i64_zero_offset(<2 x i64> %va, <2 x i64> %vb, } define <2 x i64> @test_vp_splice_v2i64_masked(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v2i64_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v2i64_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 1, v0.t +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v2i64_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 1, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; VLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; VLDEP-NEXT: ret %v = call <2 x i64> @llvm.experimental.vp.splice.v2i64(<2 x i64> %va, <2 x i64> %vb, i32 1, <2 x i1> %mask, i32 %evla, i32 %evlb) ret <2 x i64> %v } define <4 x i32> @test_vp_splice_v4i32(<4 x i32> %va, <4 x i32> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v4i32: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v4i32: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v4i32: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3 +; VLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 3, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <4 x i32> %v } define <4 x i32> @test_vp_splice_v4i32_negative_offset(<4 x i32> %va, <4 x i32> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v4i32_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 3 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v4i32_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 3 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v4i32_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 3 +; VLDEP-NEXT: ret %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 -3, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <4 x i32> %v } define <4 x i32> @test_vp_splice_v4i32_masked(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v4i32_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3, v0.t -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v4i32_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3, v0.t +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v4i32_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; VLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; VLDEP-NEXT: ret %v = call <4 x i32> @llvm.experimental.vp.splice.v4i32(<4 x i32> %va, <4 x i32> %vb, i32 3, <4 x i1> %mask, i32 %evla, i32 %evlb) ret <4 x i32> %v } define <8 x i16> @test_vp_splice_v8i16(<8 x i16> %va, <8 x i16> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v8i16: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v8i16: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 5 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v8i16: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 5 +; VLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <8 x i16> %v } define <8 x i16> @test_vp_splice_v8i16_negative_offset(<8 x i16> %va, <8 x i16> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v8i16_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetivli zero, 5, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 5 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v8i16_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 5 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v8i16_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetivli zero, 5, e16, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 5 +; VLDEP-NEXT: ret %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 -5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <8 x i16> %v } define <8 x i16> @test_vp_splice_v8i16_masked(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v8i16_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v8i16_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 5, v0.t +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v8i16_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 5, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; VLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; VLDEP-NEXT: ret %v = call <8 x i16> @llvm.experimental.vp.splice.v8i16(<8 x i16> %va, <8 x i16> %vb, i32 5, <8 x i1> %mask, i32 %evla, i32 %evlb) ret <8 x i16> %v } define <16 x i8> @test_vp_splice_v16i8(<16 x i8> %va, <16 x i8> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v16i8: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v16i8: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 5 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v16i8: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 5 +; VLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 5, <16 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <16 x i8> %v } define <16 x i8> @test_vp_splice_v16i8_negative_offset(<16 x i8> %va, <16 x i8> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v16i8_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetivli zero, 5, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 5 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v16i8_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 5 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v16i8_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetivli zero, 5, e8, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 5 +; VLDEP-NEXT: ret %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 -5, <16 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <16 x i8> %v } define <16 x i8> @test_vp_splice_v16i8_masked(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v16i8_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu -; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v16i8_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 5, v0.t +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v16i8_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 5, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; VLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; VLDEP-NEXT: ret %v = call <16 x i8> @llvm.experimental.vp.splice.v16i8(<16 x i8> %va, <16 x i8> %vb, i32 5, <16 x i1> %mask, i32 %evla, i32 %evlb) ret <16 x i8> %v } define <2 x double> @test_vp_splice_v2f64(<2 x double> %va, <2 x double> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v2f64: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 1 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v2f64: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 1 +; VLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %v = call <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %va, <2 x double> %vb, i32 1, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <2 x double> %v } define <2 x double> @test_vp_splice_v2f64_negative_offset(<2 x double> %va, <2 x double> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v2f64_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v2f64_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v2f64_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 1 +; VLDEP-NEXT: ret %v = call <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %va, <2 x double> %vb, i32 -1, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <2 x double> %v } define <2 x double> @test_vp_splice_v2f64_masked(<2 x double> %va, <2 x double> %vb, <2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v2f64_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v2f64_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 1, v0.t +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v2f64_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 1, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; VLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; VLDEP-NEXT: ret %v = call <2 x double> @llvm.experimental.vp.splice.v2f64(<2 x double> %va, <2 x double> %vb, i32 1, <2 x i1> %mask, i32 %evla, i32 %evlb) ret <2 x double> %v } define <4 x float> @test_vp_splice_v4f32(<4 x float> %va, <4 x float> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v4f32: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v4f32: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3 +; VLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %v = call <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float> %va, <4 x float> %vb, i32 3, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <4 x float> %v } define <4 x float> @test_vp_splice_v4f32_negative_offset(<4 x float> %va, <4 x float> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v4f32_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 3 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v4f32_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 3 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v4f32_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 3 +; VLDEP-NEXT: ret %v = call <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float> %va, <4 x float> %vb, i32 -3, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <4 x float> %v } define <4 x float> @test_vp_splice_v4f32_masked(<4 x float> %va, <4 x float> %vb, <4 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v4f32_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3, v0.t -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v4f32_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3, v0.t +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v4f32_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; VLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; VLDEP-NEXT: ret %v = call <4 x float> @llvm.experimental.vp.splice.v4f32(<4 x float> %va, <4 x float> %vb, i32 3, <4 x i1> %mask, i32 %evla, i32 %evlb) ret <4 x float> %v } define <8 x half> @test_vp_splice_v8f16(<8 x half> %va, <8 x half> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v8f16: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 5 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v8f16: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 5 +; VLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %v = call <8 x half> @llvm.experimental.vp.splice.v8f16(<8 x half> %va, <8 x half> %vb, i32 5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <8 x half> %v } define <8 x half> @test_vp_splice_v8f16_negative_offset(<8 x half> %va, <8 x half> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v8f16_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetivli zero, 5, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 5 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v8f16_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 5 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v8f16_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetivli zero, 5, e16, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 5 +; VLDEP-NEXT: ret %v = call <8 x half> @llvm.experimental.vp.splice.v8f16(<8 x half> %va, <8 x half> %vb, i32 -5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <8 x half> %v } define <8 x half> @test_vp_splice_v8f16_masked(<8 x half> %va, <8 x half> %vb, <8 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v8f16_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v8f16_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 5, v0.t +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v8f16_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 5, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; VLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; VLDEP-NEXT: ret %v = call <8 x half> @llvm.experimental.vp.splice.v8f16(<8 x half> %va, <8 x half> %vb, i32 5, <8 x i1> %mask, i32 %evla, i32 %evlb) ret <8 x half> %v } @@ -349,11 +528,9 @@ define <4 x half> @test_vp_splice_nxv2f16_with_firstelt(half %first, <4 x half> ; ; ZVFHMIN-LABEL: test_vp_splice_nxv2f16_with_firstelt: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, ta, ma -; ZVFHMIN-NEXT: vmv.s.x v9, a1 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; ZVFHMIN-NEXT: vslideup.vi v9, v8, 1, v0.t +; ZVFHMIN-NEXT: fmv.x.h a1, fa0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vslide1up.vx v9, v8, a1, v0.t ; ZVFHMIN-NEXT: vmv1r.v v8, v9 ; ZVFHMIN-NEXT: ret %va = insertelement <4 x half> poison, half %first, i32 0 @@ -362,42 +539,67 @@ define <4 x half> @test_vp_splice_nxv2f16_with_firstelt(half %first, <4 x half> } define <8 x bfloat> @test_vp_splice_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v8bf16: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 5 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v8bf16: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 5 +; VLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %v = call <8 x bfloat> @llvm.experimental.vp.splice.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, i32 5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <8 x bfloat> %v } define <8 x bfloat> @test_vp_splice_v8bf16_negative_offset(<8 x bfloat> %va, <8 x bfloat> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v8bf16_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetivli zero, 5, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 5 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v8bf16_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 5 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v8bf16_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetivli zero, 5, e16, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 5 +; VLDEP-NEXT: ret %v = call <8 x bfloat> @llvm.experimental.vp.splice.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, i32 -5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <8 x bfloat> %v } define <8 x bfloat> @test_vp_splice_v8bf16_masked(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v8bf16_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v8bf16_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 5, v0.t +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v8bf16_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 5, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; VLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; VLDEP-NEXT: ret %v = call <8 x bfloat> @llvm.experimental.vp.splice.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, i32 5, <8 x i1> %mask, i32 %evla, i32 %evlb) ret <8 x bfloat> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll index 90d798b167cfc..67a09b071c1a8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s -; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s -; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin < %s | FileCheck %s -; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin < %s | FileCheck %s +; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,NOVLDEP +; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,NOVLDEP +; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,NOVLDEP +; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,NOVLDEP +; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin,+vl-dependent-latency < %s | FileCheck %s --check-prefixes=CHECK,VLDEP +; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zvfhmin,+zvfbfmin,+vl-dependent-latency < %s | FileCheck %s --check-prefixes=CHECK,VLDEP +; RUN: llc -mtriple riscv32 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin,+vl-dependent-latency < %s | FileCheck %s --check-prefixes=CHECK,VLDEP +; RUN: llc -mtriple riscv64 -mattr=+m,+f,+d,+v,+zvfh,+zvfbfmin,+vl-dependent-latency < %s | FileCheck %s --check-prefixes=CHECK,VLDEP ; Tests assume VLEN=128 or vscale_range_min=2. @@ -31,25 +35,43 @@ define @splice_nxv1i1_offset_negone( %a, @splice_nxv1i1_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv1i1_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-NEXT: vslideup.vx v8, v10, a0 -; CHECK-NEXT: vand.vi v8, v8, 1 -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv1i1_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: vmerge.vim v10, v8, 1, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: srli a0, a0, 3 +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 1 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vslideup.vx v8, v10, a0 +; NOVLDEP-NEXT: vand.vi v8, v8, 1 +; NOVLDEP-NEXT: vmsne.vi v0, v8, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv1i1_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: vmerge.vim v10, v8, 1, v0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: srli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 1 +; VLDEP-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v10, a0 +; VLDEP-NEXT: vand.vi v8, v8, 1 +; VLDEP-NEXT: vmsne.vi v0, v8, 0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv1i1( %a, %b, i32 1) ret %res } @@ -79,25 +101,43 @@ define @splice_nxv2i1_offset_negone( %a, @splice_nxv2i1_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2i1_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v10, a0 -; CHECK-NEXT: vand.vi v8, v8, 1 -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2i1_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: vmerge.vim v10, v8, 1, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vslideup.vx v8, v10, a0 +; NOVLDEP-NEXT: vand.vi v8, v8, 1 +; NOVLDEP-NEXT: vmsne.vi v0, v8, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2i1_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: vmerge.vim v10, v8, 1, v0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3 +; VLDEP-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v10, a0 +; VLDEP-NEXT: vand.vi v8, v8, 1 +; VLDEP-NEXT: vmsne.vi v0, v8, 0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2i1( %a, %b, i32 3) ret %res } @@ -127,25 +167,43 @@ define @splice_nxv4i1_offset_negone( %a, @splice_nxv4i1_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4i1_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -7 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 7 -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v10, a0 -; CHECK-NEXT: vand.vi v8, v8, 1 -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4i1_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: vmerge.vim v10, v8, 1, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 7 +; NOVLDEP-NEXT: addi a0, a0, -7 +; NOVLDEP-NEXT: vslideup.vx v8, v10, a0 +; NOVLDEP-NEXT: vand.vi v8, v8, 1 +; NOVLDEP-NEXT: vmsne.vi v0, v8, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4i1_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: vmerge.vim v10, v8, 1, v0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -7 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 7 +; VLDEP-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v10, a0 +; VLDEP-NEXT: vand.vi v8, v8, 1 +; VLDEP-NEXT: vmsne.vi v0, v8, 0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4i1( %a, %b, i32 7) ret %res } @@ -174,24 +232,41 @@ define @splice_nxv8i1_offset_negone( %a, @splice_nxv8i1_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8i1_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: addi a0, a0, -15 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 15 -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v10, a0 -; CHECK-NEXT: vand.vi v8, v8, 1 -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8i1_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: vmerge.vim v10, v8, 1, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 15 +; NOVLDEP-NEXT: addi a0, a0, -15 +; NOVLDEP-NEXT: vslideup.vx v8, v10, a0 +; NOVLDEP-NEXT: vand.vi v8, v8, 1 +; NOVLDEP-NEXT: vmsne.vi v0, v8, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8i1_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: vmerge.vim v10, v8, 1, v0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: addi a0, a0, -15 +; VLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 15 +; VLDEP-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v10, a0 +; VLDEP-NEXT: vand.vi v8, v8, 1 +; VLDEP-NEXT: vmsne.vi v0, v8, 0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8i1( %a, %b, i32 15) ret %res } @@ -221,25 +296,43 @@ define @splice_nxv16i1_offset_negone( %a, < } define @splice_nxv16i1_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16i1_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -31 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 31 -; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v12, a0 -; CHECK-NEXT: vand.vi v8, v8, 1 -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16i1_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: vmerge.vim v12, v10, 1, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v8, v10, 1, v0 +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 31 +; NOVLDEP-NEXT: addi a0, a0, -31 +; NOVLDEP-NEXT: vslideup.vx v8, v12, a0 +; NOVLDEP-NEXT: vand.vi v8, v8, 1 +; NOVLDEP-NEXT: vmsne.vi v0, v8, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16i1_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: vmerge.vim v12, v10, 1, v0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v8, v10, 1, v0 +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -31 +; VLDEP-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 31 +; VLDEP-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v12, a0 +; VLDEP-NEXT: vand.vi v8, v8, 1 +; VLDEP-NEXT: vmsne.vi v0, v8, 0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16i1( %a, %b, i32 31) ret %res } @@ -269,24 +362,43 @@ define @splice_nxv32i1_offset_negone( %a, < } define @splice_nxv32i1_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv32i1_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 63 -; CHECK-NEXT: vmerge.vim v16, v12, 1, v0 -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -63 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v16, v16, a1 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 -; CHECK-NEXT: vslideup.vx v16, v8, a0 -; CHECK-NEXT: vand.vi v8, v16, 1 -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv32i1_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v12, 0 +; NOVLDEP-NEXT: li a0, 63 +; NOVLDEP-NEXT: vmerge.vim v16, v12, 1, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v8, v12, 1, v0 +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -63 +; NOVLDEP-NEXT: vslideup.vx v8, v16, a0 +; NOVLDEP-NEXT: vand.vi v8, v8, 1 +; NOVLDEP-NEXT: vmsne.vi v0, v8, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv32i1_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; VLDEP-NEXT: vmv.v.i v12, 0 +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: li a1, 63 +; VLDEP-NEXT: vmerge.vim v16, v12, 1, v0 +; VLDEP-NEXT: slli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -63 +; VLDEP-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v16, v16, a1 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; VLDEP-NEXT: vmerge.vim v8, v12, 1, v0 +; VLDEP-NEXT: vslideup.vx v16, v8, a0 +; VLDEP-NEXT: vand.vi v8, v16, 1 +; VLDEP-NEXT: vmsne.vi v0, v8, 0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv32i1( %a, %b, i32 63) ret %res } @@ -316,24 +428,43 @@ define @splice_nxv64i1_offset_negone( %a, < } define @splice_nxv64i1_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv64i1_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 127 -; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -127 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v24, v24, a1 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: vslideup.vx v24, v8, a0 -; CHECK-NEXT: vand.vi v8, v24, 1 -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv64i1_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v24, 0 +; NOVLDEP-NEXT: li a0, 127 +; NOVLDEP-NEXT: vmerge.vim v16, v24, 1, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v8, v24, 1, v0 +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -127 +; NOVLDEP-NEXT: vslideup.vx v8, v16, a0 +; NOVLDEP-NEXT: vand.vi v8, v8, 1 +; NOVLDEP-NEXT: vmsne.vi v0, v8, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv64i1_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; VLDEP-NEXT: vmv.v.i v16, 0 +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: li a1, 127 +; VLDEP-NEXT: vmerge.vim v24, v16, 1, v0 +; VLDEP-NEXT: slli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -127 +; VLDEP-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v24, v24, a1 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; VLDEP-NEXT: vmerge.vim v8, v16, 1, v0 +; VLDEP-NEXT: vslideup.vx v24, v8, a0 +; VLDEP-NEXT: vand.vi v8, v24, 1 +; VLDEP-NEXT: vmsne.vi v0, v8, 0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv64i1( %a, %b, i32 127) ret %res } @@ -363,31 +494,51 @@ define @splice_nxv1i8_offset_negone( %a, @splice_nxv1i8_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv1i8_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -2 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv1i8_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -2 +; NOVLDEP-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 2 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv1i8_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -2 +; VLDEP-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 2 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv1i8( %a, %b, i32 -2) ret %res } define @splice_nxv1i8_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv1i8_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv1i8_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 1 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv1i8_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 1 +; VLDEP-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv1i8( %a, %b, i32 1) ret %res } @@ -417,31 +568,51 @@ define @splice_nxv2i8_offset_negone( %a, @splice_nxv2i8_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2i8_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -4 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 4 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2i8_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -4 +; NOVLDEP-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 4 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2i8_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -4 +; VLDEP-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 4 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2i8( %a, %b, i32 -4) ret %res } define @splice_nxv2i8_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2i8_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2i8_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2i8_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3 +; VLDEP-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2i8( %a, %b, i32 3) ret %res } @@ -471,31 +642,51 @@ define @splice_nxv4i8_offset_negone( %a, @splice_nxv4i8_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4i8_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 8 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4i8_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -8 +; NOVLDEP-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 8 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4i8_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -8 +; VLDEP-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 8 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4i8( %a, %b, i32 -8) ret %res } define @splice_nxv4i8_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4i8_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -7 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 7 -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4i8_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 7 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -7 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4i8_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -7 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 7 +; VLDEP-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4i8( %a, %b, i32 7) ret %res } @@ -524,29 +715,47 @@ define @splice_nxv8i8_offset_negone( %a, @splice_nxv8i8_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8i8_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -16 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 16 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8i8_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -16 +; NOVLDEP-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 16 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8i8_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -16 +; VLDEP-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 16 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8i8( %a, %b, i32 -16) ret %res } define @splice_nxv8i8_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8i8_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -15 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 15 -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8i8_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -15 +; NOVLDEP-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 15 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8i8_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -15 +; VLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 15 +; VLDEP-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8i8( %a, %b, i32 15) ret %res } @@ -562,47 +771,78 @@ define @splice_nxv16i8_offset_zero( %a, @splice_nxv16i8_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16i8_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16i8_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v10, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16i8_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e8, m2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v10, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16i8( %a, %b, i32 -1) ret %res } define @splice_nxv16i8_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16i8_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -32 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v10, a1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16i8_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -32 +; NOVLDEP-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: li a0, 32 +; NOVLDEP-NEXT: vslideup.vx v8, v10, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16i8_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -32 +; VLDEP-NEXT: li a1, 32 +; VLDEP-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v10, a1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16i8( %a, %b, i32 -32) ret %res } define @splice_nxv16i8_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16i8_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -31 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 31 -; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v10, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16i8_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 31 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -31 +; NOVLDEP-NEXT: vslideup.vx v8, v10, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16i8_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -31 +; VLDEP-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 31 +; VLDEP-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v10, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16i8( %a, %b, i32 31) ret %res } @@ -618,48 +858,80 @@ define @splice_nxv32i8_offset_zero( %a, @splice_nxv32i8_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv32i8_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e8, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv32i8_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v12, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv32i8_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e8, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v12, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv32i8( %a, %b, i32 -1) ret %res } define @splice_nxv32i8_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv32i8_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -64 -; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v12, a1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv32i8_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -64 +; NOVLDEP-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: li a0, 64 +; NOVLDEP-NEXT: vslideup.vx v8, v12, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv32i8_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -64 +; VLDEP-NEXT: li a1, 64 +; VLDEP-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v12, a1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv32i8( %a, %b, i32 -64) ret %res } define @splice_nxv32i8_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv32i8_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -63 -; CHECK-NEXT: li a1, 63 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v12, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv32i8_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: li a0, 63 +; NOVLDEP-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -63 +; NOVLDEP-NEXT: vslideup.vx v8, v12, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv32i8_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -63 +; VLDEP-NEXT: li a1, 63 +; VLDEP-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a1 +; VLDEP-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v12, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv32i8( %a, %b, i32 63) ret %res } @@ -675,48 +947,80 @@ define @splice_nxv64i8_offset_zero( %a, @splice_nxv64i8_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv64i8_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vslideup.vi v8, v16, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv64i8_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v16, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv64i8_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e8, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v16, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv64i8( %a, %b, i32 -1) ret %res } define @splice_nxv64i8_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv64i8_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -128 -; CHECK-NEXT: li a1, 128 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv64i8_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -128 +; NOVLDEP-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: li a0, 128 +; NOVLDEP-NEXT: vslideup.vx v8, v16, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv64i8_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -128 +; VLDEP-NEXT: li a1, 128 +; VLDEP-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v16, a1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv64i8( %a, %b, i32 -128) ret %res } define @splice_nxv64i8_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv64i8_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -127 -; CHECK-NEXT: li a1, 127 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv64i8_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: li a0, 127 +; NOVLDEP-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -127 +; NOVLDEP-NEXT: vslideup.vx v8, v16, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv64i8_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -127 +; VLDEP-NEXT: li a1, 127 +; VLDEP-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a1 +; VLDEP-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v16, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv64i8( %a, %b, i32 127) ret %res } @@ -746,31 +1050,51 @@ define @splice_nxv1i16_offset_negone( %a, < } define @splice_nxv1i16_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv1i16_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -2 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv1i16_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -2 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 2 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv1i16_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -2 +; VLDEP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 2 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv1i16( %a, %b, i32 -2) ret %res } define @splice_nxv1i16_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv1i16_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv1i16_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 1 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv1i16_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 1 +; VLDEP-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv1i16( %a, %b, i32 1) ret %res } @@ -800,31 +1124,51 @@ define @splice_nxv2i16_offset_negone( %a, < } define @splice_nxv2i16_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2i16_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -4 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 4 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2i16_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -4 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 4 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2i16_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -4 +; VLDEP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 4 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2i16( %a, %b, i32 -4) ret %res } define @splice_nxv2i16_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2i16_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2i16_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2i16_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3 +; VLDEP-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2i16( %a, %b, i32 3) ret %res } @@ -854,31 +1198,51 @@ define @splice_nxv4i16_offset_negone( %a, < } define @splice_nxv4i16_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4i16_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -8 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 8 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4i16_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -8 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 8 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4i16_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -8 +; VLDEP-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 8 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4i16( %a, %b, i32 -8) ret %res } define @splice_nxv4i16_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4i16_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -7 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 7 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4i16_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 7 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -7 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4i16_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -7 +; VLDEP-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 7 +; VLDEP-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4i16( %a, %b, i32 7) ret %res } @@ -894,43 +1258,70 @@ define @splice_nxv8i16_offset_zero( %a, @splice_nxv8i16_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8i16_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8i16_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v10, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8i16_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v10, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8i16( %a, %b, i32 -1) ret %res } define @splice_nxv8i16_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8i16_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -16 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 16 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8i16_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -16 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v10, 16 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8i16_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -16 +; VLDEP-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v10, 16 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8i16( %a, %b, i32 -16) ret %res } define @splice_nxv8i16_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8i16_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -15 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 15 -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v10, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8i16_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -15 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 15 +; NOVLDEP-NEXT: vslideup.vx v8, v10, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8i16_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -15 +; VLDEP-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 15 +; VLDEP-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v10, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8i16( %a, %b, i32 15) ret %res } @@ -946,47 +1337,78 @@ define @splice_nxv16i16_offset_zero( %a, } define @splice_nxv16i16_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16i16_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16i16_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v12, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16i16_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e16, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v12, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16i16( %a, %b, i32 -1) ret %res } define @splice_nxv16i16_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16i16_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -32 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v12, a1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16i16_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -32 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: li a0, 32 +; NOVLDEP-NEXT: vslideup.vx v8, v12, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16i16_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -32 +; VLDEP-NEXT: li a1, 32 +; VLDEP-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v12, a1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16i16( %a, %b, i32 -32) ret %res } define @splice_nxv16i16_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16i16_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -31 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 31 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v12, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16i16_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 31 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -31 +; NOVLDEP-NEXT: vslideup.vx v8, v12, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16i16_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -31 +; VLDEP-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 31 +; VLDEP-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v12, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16i16( %a, %b, i32 31) ret %res } @@ -1002,48 +1424,80 @@ define @splice_nxv32i16_offset_zero( %a, } define @splice_nxv32i16_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv32i16_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vslideup.vi v8, v16, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv32i16_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v16, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv32i16_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e16, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v16, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv32i16( %a, %b, i32 -1) ret %res } define @splice_nxv32i16_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv32i16_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -64 -; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv32i16_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -64 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: li a0, 64 +; NOVLDEP-NEXT: vslideup.vx v8, v16, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv32i16_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -64 +; VLDEP-NEXT: li a1, 64 +; VLDEP-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v16, a1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv32i16( %a, %b, i32 -64) ret %res } define @splice_nxv32i16_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv32i16_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -63 -; CHECK-NEXT: li a1, 63 -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv32i16_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: li a0, 63 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -63 +; NOVLDEP-NEXT: vslideup.vx v8, v16, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv32i16_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -63 +; VLDEP-NEXT: li a1, 63 +; VLDEP-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a1 +; VLDEP-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v16, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv32i16( %a, %b, i32 63) ret %res } @@ -1073,31 +1527,51 @@ define @splice_nxv1i32_offset_negone( %a, < } define @splice_nxv1i32_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv1i32_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -2 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv1i32_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -2 +; NOVLDEP-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 2 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv1i32_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -2 +; VLDEP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 2 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv1i32( %a, %b, i32 -2) ret %res } define @splice_nxv1i32_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv1i32_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv1i32_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 1 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv1i32_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 1 +; VLDEP-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv1i32( %a, %b, i32 1) ret %res } @@ -1127,31 +1601,51 @@ define @splice_nxv2i32_offset_negone( %a, < } define @splice_nxv2i32_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2i32_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -4 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 4 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2i32_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -4 +; NOVLDEP-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 4 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2i32_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -4 +; VLDEP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 4 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2i32( %a, %b, i32 -4) ret %res } define @splice_nxv2i32_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2i32_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2i32_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2i32_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3 +; VLDEP-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2i32( %a, %b, i32 3) ret %res } @@ -1167,46 +1661,76 @@ define @splice_nxv4i32_offset_zero( %a, @splice_nxv4i32_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4i32_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4i32_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v10, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4i32_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v10, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4i32( %a, %b, i32 -1) ret %res } define @splice_nxv4i32_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4i32_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -8 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 8 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4i32_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -8 +; NOVLDEP-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v10, 8 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4i32_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -8 +; VLDEP-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v10, 8 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4i32( %a, %b, i32 -8) ret %res } define @splice_nxv4i32_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4i32_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -7 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 7 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v10, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4i32_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 7 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -7 +; NOVLDEP-NEXT: vslideup.vx v8, v10, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4i32_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -7 +; VLDEP-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 7 +; VLDEP-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v10, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4i32( %a, %b, i32 7) ret %res } @@ -1222,43 +1746,70 @@ define @splice_nxv8i32_offset_zero( %a, @splice_nxv8i32_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8i32_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8i32_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v12, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8i32_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e32, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v12, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8i32( %a, %b, i32 -1) ret %res } define @splice_nxv8i32_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8i32_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -16 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 16 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8i32_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -16 +; NOVLDEP-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v12, 16 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8i32_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -16 +; VLDEP-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v12, 16 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8i32( %a, %b, i32 -16) ret %res } define @splice_nxv8i32_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8i32_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -15 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 15 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v12, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8i32_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -15 +; NOVLDEP-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 15 +; NOVLDEP-NEXT: vslideup.vx v8, v12, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8i32_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -15 +; VLDEP-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 15 +; VLDEP-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v12, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8i32( %a, %b, i32 15) ret %res } @@ -1274,47 +1825,78 @@ define @splice_nxv16i32_offset_zero( %a, } define @splice_nxv16i32_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16i32_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vi v8, v16, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16i32_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v16, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16i32_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e32, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v16, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16i32( %a, %b, i32 -1) ret %res } define @splice_nxv16i32_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16i32_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -32 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16i32_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -32 +; NOVLDEP-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: li a0, 32 +; NOVLDEP-NEXT: vslideup.vx v8, v16, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16i32_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -32 +; VLDEP-NEXT: li a1, 32 +; VLDEP-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v16, a1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16i32( %a, %b, i32 -32) ret %res } define @splice_nxv16i32_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16i32_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -31 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 31 -; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16i32_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 31 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -31 +; NOVLDEP-NEXT: vslideup.vx v8, v16, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16i32_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -31 +; VLDEP-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 31 +; VLDEP-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v16, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16i32( %a, %b, i32 31) ret %res } @@ -1344,31 +1926,51 @@ define @splice_nxv1i64_offset_negone( %a, < } define @splice_nxv1i64_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv1i64_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -2 -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv1i64_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -2 +; NOVLDEP-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 2 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv1i64_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -2 +; VLDEP-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 2 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv1i64( %a, %b, i32 -2) ret %res } define @splice_nxv1i64_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv1i64_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv1i64_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 1 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv1i64_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 1 +; VLDEP-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv1i64( %a, %b, i32 1) ret %res } @@ -1384,46 +1986,76 @@ define @splice_nxv2i64_offset_zero( %a, @splice_nxv2i64_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2i64_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2i64_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v10, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2i64_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v10, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2i64( %a, %b, i32 -1) ret %res } define @splice_nxv2i64_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2i64_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -4 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 4 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2i64_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -4 +; NOVLDEP-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v10, 4 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2i64_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -4 +; VLDEP-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v10, 4 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2i64( %a, %b, i32 -4) ret %res } define @splice_nxv2i64_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2i64_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v10, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2i64_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vslideup.vx v8, v10, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2i64_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3 +; VLDEP-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v10, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2i64( %a, %b, i32 3) ret %res } @@ -1439,46 +2071,76 @@ define @splice_nxv4i64_offset_zero( %a, @splice_nxv4i64_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4i64_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4i64_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v12, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4i64_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v12, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4i64( %a, %b, i32 -1) ret %res } define @splice_nxv4i64_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4i64_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -8 -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 8 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4i64_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -8 +; NOVLDEP-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v12, 8 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4i64_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -8 +; VLDEP-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v12, 8 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4i64( %a, %b, i32 -8) ret %res } define @splice_nxv4i64_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4i64_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -7 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 7 -; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v12, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4i64_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 7 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -7 +; NOVLDEP-NEXT: vslideup.vx v8, v12, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4i64_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -7 +; VLDEP-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 7 +; VLDEP-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v12, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4i64( %a, %b, i32 7) ret %res } @@ -1494,43 +2156,70 @@ define @splice_nxv8i64_offset_zero( %a, @splice_nxv8i64_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8i64_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vslideup.vi v8, v16, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8i64_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v16, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8i64_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e64, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v16, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8i64( %a, %b, i32 -1) ret %res } define @splice_nxv8i64_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8i64_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -16 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vslideup.vi v8, v16, 16 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8i64_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -16 +; NOVLDEP-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v16, 16 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8i64_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -16 +; VLDEP-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v16, 16 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8i64( %a, %b, i32 -16) ret %res } define @splice_nxv8i64_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8i64_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -15 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 15 -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8i64_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -15 +; NOVLDEP-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 15 +; NOVLDEP-NEXT: vslideup.vx v8, v16, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8i64_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -15 +; VLDEP-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 15 +; VLDEP-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v16, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8i64( %a, %b, i32 15) ret %res } @@ -1560,31 +2249,51 @@ define @splice_nxv1bf16_offset_negone( @splice_nxv1bf16_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv1bf16_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -2 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv1bf16_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -2 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 2 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv1bf16_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -2 +; VLDEP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 2 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv1bf16( %a, %b, i32 -2) ret %res } define @splice_nxv1bf16_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv1bf16_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv1bf16_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 1 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv1bf16_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 1 +; VLDEP-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv1bf16( %a, %b, i32 1) ret %res } @@ -1614,31 +2323,51 @@ define @splice_nxv2bf16_offset_negone( @splice_nxv2bf16_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2bf16_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -4 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 4 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2bf16_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -4 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 4 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2bf16_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -4 +; VLDEP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 4 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2bf16( %a, %b, i32 -4) ret %res } define @splice_nxv2bf16_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2bf16_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2bf16_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2bf16_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3 +; VLDEP-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2bf16( %a, %b, i32 3) ret %res } @@ -1668,31 +2397,51 @@ define @splice_nxv4bf16_offset_negone( @splice_nxv4bf16_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4bf16_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -8 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 8 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4bf16_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -8 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 8 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4bf16_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -8 +; VLDEP-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 8 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4bf16( %a, %b, i32 -8) ret %res } define @splice_nxv4bf16_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4bf16_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -7 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 7 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4bf16_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 7 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -7 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4bf16_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -7 +; VLDEP-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 7 +; VLDEP-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4bf16( %a, %b, i32 7) ret %res } @@ -1708,43 +2457,70 @@ define @splice_nxv8bf16_offset_zero( } define @splice_nxv8bf16_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8bf16_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8bf16_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v10, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8bf16_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v10, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8bf16( %a, %b, i32 -1) ret %res } define @splice_nxv8bf16_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8bf16_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -16 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 16 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8bf16_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -16 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v10, 16 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8bf16_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -16 +; VLDEP-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v10, 16 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8bf16( %a, %b, i32 -16) ret %res } define @splice_nxv8bf16_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8bf16_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -15 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 15 -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v10, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8bf16_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -15 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 15 +; NOVLDEP-NEXT: vslideup.vx v8, v10, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8bf16_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -15 +; VLDEP-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 15 +; VLDEP-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v10, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8bf16( %a, %b, i32 15) ret %res } @@ -1760,47 +2536,78 @@ define @splice_nxv16bf16_offset_zero( @splice_nxv16bf16_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16bf16_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16bf16_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v12, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16bf16_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e16, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v12, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16bf16( %a, %b, i32 -1) ret %res } define @splice_nxv16bf16_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16bf16_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -32 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v12, a1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16bf16_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -32 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: li a0, 32 +; NOVLDEP-NEXT: vslideup.vx v8, v12, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16bf16_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -32 +; VLDEP-NEXT: li a1, 32 +; VLDEP-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v12, a1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16bf16( %a, %b, i32 -32) ret %res } define @splice_nxv16bf16_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16bf16_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -31 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 31 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v12, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16bf16_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 31 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -31 +; NOVLDEP-NEXT: vslideup.vx v8, v12, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16bf16_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -31 +; VLDEP-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 31 +; VLDEP-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v12, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16bf16( %a, %b, i32 31) ret %res } @@ -1816,48 +2623,80 @@ define @splice_nxv32bf16_offset_zero( @splice_nxv32bf16_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv32bf16_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vslideup.vi v8, v16, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv32bf16_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v16, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv32bf16_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e16, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v16, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv32bf16( %a, %b, i32 -1) ret %res } define @splice_nxv32bf16_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv32bf16_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -64 -; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv32bf16_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -64 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: li a0, 64 +; NOVLDEP-NEXT: vslideup.vx v8, v16, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv32bf16_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -64 +; VLDEP-NEXT: li a1, 64 +; VLDEP-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v16, a1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv32bf16( %a, %b, i32 -64) ret %res } define @splice_nxv32bf16_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv32bf16_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -63 -; CHECK-NEXT: li a1, 63 -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv32bf16_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: li a0, 63 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -63 +; NOVLDEP-NEXT: vslideup.vx v8, v16, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv32bf16_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -63 +; VLDEP-NEXT: li a1, 63 +; VLDEP-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a1 +; VLDEP-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v16, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv32bf16( %a, %b, i32 63) ret %res } @@ -1887,31 +2726,51 @@ define @splice_nxv1f16_offset_negone( %a, } define @splice_nxv1f16_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv1f16_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -2 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv1f16_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -2 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 2 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv1f16_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -2 +; VLDEP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 2 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv1f16( %a, %b, i32 -2) ret %res } define @splice_nxv1f16_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv1f16_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv1f16_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 1 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv1f16_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 1 +; VLDEP-NEXT: vsetvli a1, zero, e16, mf4, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv1f16( %a, %b, i32 1) ret %res } @@ -1941,31 +2800,51 @@ define @splice_nxv2f16_offset_negone( %a, } define @splice_nxv2f16_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2f16_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -4 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 4 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2f16_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -4 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 4 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2f16_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -4 +; VLDEP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 4 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2f16( %a, %b, i32 -4) ret %res } define @splice_nxv2f16_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2f16_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2f16_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2f16_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3 +; VLDEP-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2f16( %a, %b, i32 3) ret %res } @@ -1995,31 +2874,51 @@ define @splice_nxv4f16_offset_negone( %a, } define @splice_nxv4f16_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4f16_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -8 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 8 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4f16_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -8 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 8 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4f16_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -8 +; VLDEP-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 8 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4f16( %a, %b, i32 -8) ret %res } define @splice_nxv4f16_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4f16_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -7 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 7 -; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4f16_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 7 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -7 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4f16_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -7 +; VLDEP-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 7 +; VLDEP-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4f16( %a, %b, i32 7) ret %res } @@ -2035,43 +2934,70 @@ define @splice_nxv8f16_offset_zero( %a, < } define @splice_nxv8f16_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8f16_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e16, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8f16_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v10, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8f16_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v10, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8f16( %a, %b, i32 -1) ret %res } define @splice_nxv8f16_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8f16_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -16 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 16 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8f16_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -16 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v10, 16 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8f16_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -16 +; VLDEP-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v10, 16 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8f16( %a, %b, i32 -16) ret %res } define @splice_nxv8f16_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8f16_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -15 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 15 -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v10, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8f16_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -15 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 15 +; NOVLDEP-NEXT: vslideup.vx v8, v10, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8f16_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -15 +; VLDEP-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 15 +; VLDEP-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v10, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8f16( %a, %b, i32 15) ret %res } @@ -2087,47 +3013,78 @@ define @splice_nxv16f16_offset_zero( %a } define @splice_nxv16f16_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16f16_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e16, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16f16_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v12, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16f16_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e16, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v12, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16f16( %a, %b, i32 -1) ret %res } define @splice_nxv16f16_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16f16_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -32 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v12, a1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16f16_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -32 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: li a0, 32 +; NOVLDEP-NEXT: vslideup.vx v8, v12, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16f16_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -32 +; VLDEP-NEXT: li a1, 32 +; VLDEP-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v12, a1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16f16( %a, %b, i32 -32) ret %res } define @splice_nxv16f16_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16f16_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -31 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 31 -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v12, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16f16_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 31 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -31 +; NOVLDEP-NEXT: vslideup.vx v8, v12, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16f16_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -31 +; VLDEP-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 31 +; VLDEP-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v12, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16f16( %a, %b, i32 31) ret %res } @@ -2143,48 +3100,80 @@ define @splice_nxv32f16_offset_zero( %a } define @splice_nxv32f16_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv32f16_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e16, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vslideup.vi v8, v16, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv32f16_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v16, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv32f16_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e16, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v16, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv32f16( %a, %b, i32 -1) ret %res } define @splice_nxv32f16_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv32f16_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -64 -; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv32f16_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -64 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: li a0, 64 +; NOVLDEP-NEXT: vslideup.vx v8, v16, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv32f16_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -64 +; VLDEP-NEXT: li a1, 64 +; VLDEP-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v16, a1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv32f16( %a, %b, i32 -64) ret %res } define @splice_nxv32f16_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv32f16_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -63 -; CHECK-NEXT: li a1, 63 -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv32f16_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: li a0, 63 +; NOVLDEP-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -63 +; NOVLDEP-NEXT: vslideup.vx v8, v16, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv32f16_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -63 +; VLDEP-NEXT: li a1, 63 +; VLDEP-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a1 +; VLDEP-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v16, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv32f16( %a, %b, i32 63) ret %res } @@ -2214,31 +3203,51 @@ define @splice_nxv1f32_offset_negone( % } define @splice_nxv1f32_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv1f32_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -2 -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv1f32_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -2 +; NOVLDEP-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 2 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv1f32_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -2 +; VLDEP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 2 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv1f32( %a, %b, i32 -2) ret %res } define @splice_nxv1f32_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv1f32_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv1f32_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 1 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv1f32_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 1 +; VLDEP-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv1f32( %a, %b, i32 1) ret %res } @@ -2268,31 +3277,51 @@ define @splice_nxv2f32_offset_negone( % } define @splice_nxv2f32_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2f32_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -4 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 4 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2f32_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -4 +; NOVLDEP-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 4 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2f32_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -4 +; VLDEP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 4 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2f32( %a, %b, i32 -4) ret %res } define @splice_nxv2f32_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2f32_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2f32_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2f32_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3 +; VLDEP-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2f32( %a, %b, i32 3) ret %res } @@ -2308,46 +3337,76 @@ define @splice_nxv4f32_offset_zero( %a, } define @splice_nxv4f32_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4f32_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4f32_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v10, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4f32_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v10, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4f32( %a, %b, i32 -1) ret %res } define @splice_nxv4f32_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4f32_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -8 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 8 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4f32_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -8 +; NOVLDEP-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v10, 8 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4f32_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -8 +; VLDEP-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v10, 8 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4f32( %a, %b, i32 -8) ret %res } define @splice_nxv4f32_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4f32_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -7 -; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 7 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v10, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4f32_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 7 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -7 +; NOVLDEP-NEXT: vslideup.vx v8, v10, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4f32_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -7 +; VLDEP-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 7 +; VLDEP-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v10, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4f32( %a, %b, i32 7) ret %res } @@ -2363,43 +3422,70 @@ define @splice_nxv8f32_offset_zero( %a, } define @splice_nxv8f32_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8f32_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e32, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8f32_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v12, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8f32_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e32, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v12, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8f32( %a, %b, i32 -1) ret %res } define @splice_nxv8f32_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8f32_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -16 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 16 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8f32_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -16 +; NOVLDEP-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v12, 16 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8f32_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -16 +; VLDEP-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v12, 16 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8f32( %a, %b, i32 -16) ret %res } define @splice_nxv8f32_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8f32_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -15 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 15 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v12, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8f32_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -15 +; NOVLDEP-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 15 +; NOVLDEP-NEXT: vslideup.vx v8, v12, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8f32_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -15 +; VLDEP-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 15 +; VLDEP-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v12, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8f32( %a, %b, i32 15) ret %res } @@ -2415,47 +3501,78 @@ define @splice_nxv16f32_offset_zero( } define @splice_nxv16f32_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16f32_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vi v8, v16, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16f32_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v16, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16f32_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e32, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v16, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16f32( %a, %b, i32 -1) ret %res } define @splice_nxv16f32_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16f32_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -32 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16f32_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -32 +; NOVLDEP-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: li a0, 32 +; NOVLDEP-NEXT: vslideup.vx v8, v16, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16f32_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -32 +; VLDEP-NEXT: li a1, 32 +; VLDEP-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v16, a1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16f32( %a, %b, i32 -32) ret %res } define @splice_nxv16f32_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv16f32_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -31 -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 31 -; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv16f32_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 31 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: slli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -31 +; NOVLDEP-NEXT: vslideup.vx v8, v16, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv16f32_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: slli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -31 +; VLDEP-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 31 +; VLDEP-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v16, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv16f32( %a, %b, i32 31) ret %res } @@ -2485,31 +3602,51 @@ define @splice_nxv1f64_offset_negone( } define @splice_nxv1f64_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv1f64_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -2 -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv1f64_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -2 +; NOVLDEP-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 2 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv1f64_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -2 +; VLDEP-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 2 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv1f64( %a, %b, i32 -2) ret %res } define @splice_nxv1f64_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv1f64_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv1f64_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 1 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 3 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv1f64_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 3 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 1 +; VLDEP-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv1f64( %a, %b, i32 1) ret %res } @@ -2525,46 +3662,76 @@ define @splice_nxv2f64_offset_zero( % } define @splice_nxv2f64_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2f64_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2f64_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v10, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2f64_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v10, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2f64( %a, %b, i32 -1) ret %res } define @splice_nxv2f64_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2f64_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -4 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 4 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2f64_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -4 +; NOVLDEP-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v10, 4 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2f64_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -4 +; VLDEP-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v10, 4 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2f64( %a, %b, i32 -4) ret %res } define @splice_nxv2f64_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv2f64_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v10, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv2f64_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 2 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vslideup.vx v8, v10, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv2f64_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 2 +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3 +; VLDEP-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v10, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv2f64( %a, %b, i32 3) ret %res } @@ -2580,46 +3747,76 @@ define @splice_nxv4f64_offset_zero( % } define @splice_nxv4f64_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4f64_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4f64_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v12, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4f64_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v12, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4f64( %a, %b, i32 -1) ret %res } define @splice_nxv4f64_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4f64_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -8 -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 8 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4f64_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -8 +; NOVLDEP-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v12, 8 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4f64_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -8 +; VLDEP-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v12, 8 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4f64( %a, %b, i32 -8) ret %res } define @splice_nxv4f64_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv4f64_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: addi a0, a0, -7 -; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 7 -; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v12, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv4f64_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 7 +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: srli a0, a0, 1 +; NOVLDEP-NEXT: addi a0, a0, -7 +; NOVLDEP-NEXT: vslideup.vx v8, v12, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv4f64_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: srli a0, a0, 1 +; VLDEP-NEXT: addi a0, a0, -7 +; VLDEP-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 7 +; VLDEP-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v12, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv4f64( %a, %b, i32 7) ret %res } @@ -2635,43 +3832,70 @@ define @splice_nxv8f64_offset_zero( % } define @splice_nxv8f64_offset_negone( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8f64_offset_negone: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vslideup.vi v8, v16, 1 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8f64_offset_negone: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v16, 1 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8f64_offset_negone: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e64, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v16, 1 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8f64( %a, %b, i32 -1) ret %res } define @splice_nxv8f64_offset_min( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8f64_offset_min: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -16 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vslideup.vi v8, v16, 16 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8f64_offset_min: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -16 +; NOVLDEP-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v16, 16 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8f64_offset_min: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -16 +; VLDEP-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v16, 16 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8f64( %a, %b, i32 -16) ret %res } define @splice_nxv8f64_offset_max( %a, %b) #0 { -; CHECK-LABEL: splice_nxv8f64_offset_max: -; CHECK: # %bb.0: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: addi a0, a0, -15 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 15 -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: splice_nxv8f64_offset_max: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: csrr a0, vlenb +; NOVLDEP-NEXT: addi a0, a0, -15 +; NOVLDEP-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 15 +; NOVLDEP-NEXT: vslideup.vx v8, v16, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: splice_nxv8f64_offset_max: +; VLDEP: # %bb.0: +; VLDEP-NEXT: csrr a0, vlenb +; VLDEP-NEXT: addi a0, a0, -15 +; VLDEP-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 15 +; VLDEP-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v16, a0 +; VLDEP-NEXT: ret %res = call @llvm.vector.splice.nxv8f64( %a, %b, i32 15) ret %res } diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll index 477f1daf10c24..dec68fa970c99 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=128 \ -; RUN: < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefix=NOVLDEP +; RUN: llc -mtriple=riscv64 -mattr=+v,+vl-dependent-latency -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefix=VLDEP declare <2 x i1> @llvm.experimental.vp.splice.v2i1(<2 x i1>, <2 x i1>, i32, <2 x i1>, i32, i32) declare <4 x i1> @llvm.experimental.vp.splice.v4i1(<4 x i1>, <4 x i1>, i32, <4 x i1>, i32, i32) @@ -8,293 +10,521 @@ declare <8 x i1> @llvm.experimental.vp.splice.v8i1(<8 x i1>, <8 x i1>, i32, <8 x declare <16 x i1> @llvm.experimental.vp.splice.v16i1(<16 x i1>, <16 x i1>, i32, <16 x i1>, i32, i32) define <2 x i1> @test_vp_splice_v2i1(<2 x i1> %va, <2 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v2i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v9, 1 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vslideup.vx v9, v8, a0 -; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v2i1: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v9, v9, 1 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vslideup.vx v9, v8, a0 +; NOVLDEP-NEXT: vmsne.vi v0, v9, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v2i1: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; VLDEP-NEXT: vslidedown.vi v9, v9, 1 +; VLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; VLDEP-NEXT: vslideup.vx v9, v8, a0 +; VLDEP-NEXT: vmsne.vi v0, v9, 0 +; VLDEP-NEXT: ret %v = call <2 x i1> @llvm.experimental.vp.splice.v2i1(<2 x i1> %va, <2 x i1> %vb, i32 1, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <2 x i1> %v } define <2 x i1> @test_vp_splice_v2i1_negative_offset(<2 x i1> %va, <2 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v2i1_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vslidedown.vx v9, v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vslideup.vi v9, v8, 1 -; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v2i1_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v9, v9, a0 +; NOVLDEP-NEXT: vslideup.vi v9, v8, 1 +; NOVLDEP-NEXT: vmsne.vi v0, v9, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v2i1_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; VLDEP-NEXT: vslidedown.vx v9, v9, a0 +; VLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; VLDEP-NEXT: vslideup.vi v9, v8, 1 +; VLDEP-NEXT: vmsne.vi v0, v9, 0 +; VLDEP-NEXT: ret %v = call <2 x i1> @llvm.experimental.vp.splice.v2i1(<2 x i1> %va, <2 x i1> %vb, i32 -1, <2 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <2 x i1> %v } define <2 x i1> @test_vp_splice_v2i1_masked(<2 x i1> %va, <2 x i1> %vb, <2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v2i1_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v10, 1, v0.t -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu -; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v2i1_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; NOVLDEP-NEXT: vmv1r.v v10, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; NOVLDEP-NEXT: vmv.v.i v11, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v10 +; NOVLDEP-NEXT: vmerge.vim v10, v11, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v10, v10, 1, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; NOVLDEP-NEXT: vslideup.vx v10, v8, a0, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; NOVLDEP-NEXT: vmsne.vi v0, v10, 0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v2i1_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; VLDEP-NEXT: vmv1r.v v10, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; VLDEP-NEXT: vmv.v.i v11, 0 +; VLDEP-NEXT: vmv1r.v v0, v10 +; VLDEP-NEXT: vmerge.vim v10, v11, 1, v0 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; VLDEP-NEXT: vslidedown.vi v10, v10, 1, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; VLDEP-NEXT: vslideup.vx v10, v8, a0, v0.t +; VLDEP-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; VLDEP-NEXT: vmsne.vi v0, v10, 0, v0.t +; VLDEP-NEXT: ret %v = call <2 x i1> @llvm.experimental.vp.splice.v2i1(<2 x i1> %va, <2 x i1> %vb, i32 1, <2 x i1> %mask, i32 %evla, i32 %evlb) ret <2 x i1> %v } define <4 x i1> @test_vp_splice_v4i1(<4 x i1> %va, <4 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v4i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v9, 3 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vx v9, v8, a0 -; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v4i1: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v9, v9, 3 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vslideup.vx v9, v8, a0 +; NOVLDEP-NEXT: vmsne.vi v0, v9, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v4i1: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; VLDEP-NEXT: vslidedown.vi v9, v9, 3 +; VLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; VLDEP-NEXT: vslideup.vx v9, v8, a0 +; VLDEP-NEXT: vmsne.vi v0, v9, 0 +; VLDEP-NEXT: ret %v = call <4 x i1> @llvm.experimental.vp.splice.v4i1(<4 x i1> %va, <4 x i1> %vb, i32 3, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <4 x i1> %v } define <4 x i1> @test_vp_splice_v4i1_negative_offset(<4 x i1> %va, <4 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v4i1_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetivli zero, 3, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v9, v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vi v9, v8, 3 -; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v4i1_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v9, v9, a0 +; NOVLDEP-NEXT: vslideup.vi v9, v8, 3 +; NOVLDEP-NEXT: vmsne.vi v0, v9, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v4i1_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetivli zero, 3, e8, mf4, ta, ma +; VLDEP-NEXT: vslidedown.vx v9, v9, a0 +; VLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; VLDEP-NEXT: vslideup.vi v9, v8, 3 +; VLDEP-NEXT: vmsne.vi v0, v9, 0 +; VLDEP-NEXT: ret %v = call <4 x i1> @llvm.experimental.vp.splice.v4i1(<4 x i1> %va, <4 x i1> %vb, i32 -3, <4 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <4 x i1> %v } define <4 x i1> @test_vp_splice_v4i1_masked(<4 x i1> %va, <4 x i1> %vb, <4 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v4i1_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v10, 3, v0.t -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu -; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v4i1_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; NOVLDEP-NEXT: vmv1r.v v10, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; NOVLDEP-NEXT: vmv.v.i v11, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v10 +; NOVLDEP-NEXT: vmerge.vim v10, v11, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v10, v10, 3, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; NOVLDEP-NEXT: vslideup.vx v10, v8, a0, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; NOVLDEP-NEXT: vmsne.vi v0, v10, 0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v4i1_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; VLDEP-NEXT: vmv1r.v v10, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; VLDEP-NEXT: vmv.v.i v11, 0 +; VLDEP-NEXT: vmv1r.v v0, v10 +; VLDEP-NEXT: vmerge.vim v10, v11, 1, v0 +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; VLDEP-NEXT: vslidedown.vi v10, v10, 3, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; VLDEP-NEXT: vslideup.vx v10, v8, a0, v0.t +; VLDEP-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; VLDEP-NEXT: vmsne.vi v0, v10, 0, v0.t +; VLDEP-NEXT: ret %v = call <4 x i1> @llvm.experimental.vp.splice.v4i1(<4 x i1> %va, <4 x i1> %vb, i32 3, <4 x i1> %mask, i32 %evla, i32 %evlb) ret <4 x i1> %v } define <8 x i1> @test_vp_splice_v8i1(<8 x i1> %va, <8 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v8i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v9, 5 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v9, v8, a0 -; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v8i1: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v9, v9, 5 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vslideup.vx v9, v8, a0 +; NOVLDEP-NEXT: vmsne.vi v0, v9, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v8i1: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vi v9, v9, 5 +; VLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; VLDEP-NEXT: vslideup.vx v9, v8, a0 +; VLDEP-NEXT: vmsne.vi v0, v9, 0 +; VLDEP-NEXT: ret %v = call <8 x i1> @llvm.experimental.vp.splice.v8i1(<8 x i1> %va, <8 x i1> %vb, i32 5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <8 x i1> %v } define <8 x i1> @test_vp_splice_v8i1_negative_offset(<8 x i1> %va, <8 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v8i1_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetivli zero, 5, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v9, v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v9, v8, 5 -; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v8i1_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v9, v9, a0 +; NOVLDEP-NEXT: vslideup.vi v9, v8, 5 +; NOVLDEP-NEXT: vmsne.vi v0, v9, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v8i1_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetivli zero, 5, e8, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vx v9, v9, a0 +; VLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; VLDEP-NEXT: vslideup.vi v9, v8, 5 +; VLDEP-NEXT: vmsne.vi v0, v9, 0 +; VLDEP-NEXT: ret %v = call <8 x i1> @llvm.experimental.vp.splice.v8i1(<8 x i1> %va, <8 x i1> %vb, i32 -5, <8 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <8 x i1> %v } define <8 x i1> @test_vp_splice_v8i1_masked(<8 x i1> %va, <8 x i1> %vb, <8 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v8i1_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu -; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v8i1_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; NOVLDEP-NEXT: vmv1r.v v10, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLDEP-NEXT: vmv.v.i v11, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v10 +; NOVLDEP-NEXT: vmerge.vim v10, v11, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v10, v10, 5, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; NOVLDEP-NEXT: vslideup.vx v10, v8, a0, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; NOVLDEP-NEXT: vmsne.vi v0, v10, 0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v8i1_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; VLDEP-NEXT: vmv1r.v v10, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; VLDEP-NEXT: vmv.v.i v11, 0 +; VLDEP-NEXT: vmv1r.v v0, v10 +; VLDEP-NEXT: vmerge.vim v10, v11, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vi v10, v10, 5, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; VLDEP-NEXT: vslideup.vx v10, v8, a0, v0.t +; VLDEP-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; VLDEP-NEXT: vmsne.vi v0, v10, 0, v0.t +; VLDEP-NEXT: ret %v = call <8 x i1> @llvm.experimental.vp.splice.v8i1(<8 x i1> %va, <8 x i1> %vb, i32 5, <8 x i1> %mask, i32 %evla, i32 %evlb) ret <8 x i1> %v } define <16 x i1> @test_vp_splice_v16i1(<16 x i1> %va, <16 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v16i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v9, 5 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v9, v8, a0 -; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v16i1: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v9, v9, 5 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vslideup.vx v9, v8, a0 +; NOVLDEP-NEXT: vmsne.vi v0, v9, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v16i1: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v9, v9, 5 +; VLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v9, v8, a0 +; VLDEP-NEXT: vmsne.vi v0, v9, 0 +; VLDEP-NEXT: ret %v = call <16 x i1> @llvm.experimental.vp.splice.v16i1(<16 x i1> %va, <16 x i1> %vb, i32 5, <16 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <16 x i1> %v } define <16 x i1> @test_vp_splice_v16i1_negative_offset(<16 x i1> %va, <16 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v16i1_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetivli zero, 5, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v9, v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vi v9, v8, 5 -; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v16i1_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v9, v9, a0 +; NOVLDEP-NEXT: vslideup.vi v9, v8, 5 +; NOVLDEP-NEXT: vmsne.vi v0, v9, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v16i1_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetivli zero, 5, e8, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v9, v9, a0 +; VLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v9, v8, 5 +; VLDEP-NEXT: vmsne.vi v0, v9, 0 +; VLDEP-NEXT: ret %v = call <16 x i1> @llvm.experimental.vp.splice.v16i1(<16 x i1> %va, <16 x i1> %vb, i32 -5, <16 x i1> splat (i1 1), i32 %evla, i32 %evlb) ret <16 x i1> %v } define <16 x i1> @test_vp_splice_v16i1_masked(<16 x i1> %va, <16 x i1> %vb, <16 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { -; CHECK-LABEL: test_vp_splice_v16i1_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu -; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_v16i1_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; NOVLDEP-NEXT: vmv1r.v v10, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; NOVLDEP-NEXT: vmv.v.i v11, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v10 +; NOVLDEP-NEXT: vmerge.vim v10, v11, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v10, v10, 5, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; NOVLDEP-NEXT: vslideup.vx v10, v8, a0, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; NOVLDEP-NEXT: vmsne.vi v0, v10, 0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_v16i1_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; VLDEP-NEXT: vmv1r.v v10, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; VLDEP-NEXT: vmv.v.i v11, 0 +; VLDEP-NEXT: vmv1r.v v0, v10 +; VLDEP-NEXT: vmerge.vim v10, v11, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v10, v10, 5, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; VLDEP-NEXT: vslideup.vx v10, v8, a0, v0.t +; VLDEP-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; VLDEP-NEXT: vmsne.vi v0, v10, 0, v0.t +; VLDEP-NEXT: ret %v = call <16 x i1> @llvm.experimental.vp.splice.v16i1(<16 x i1> %va, <16 x i1> %vb, i32 5, <16 x i1> %mask, i32 %evla, i32 %evlb) ret <16 x i1> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll index 709269904dbd8..895cdb22c4965 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ -; RUN: < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v < %s | FileCheck %s --check-prefix=NOVLDEP +; RUN: llc -mtriple=riscv64 -mattr=+v,+vl-dependent-latency < %s | FileCheck %s --check-prefix=VLDEP declare @llvm.experimental.vp.splice.nxv1i1(, , i32, , i32, i32) declare @llvm.experimental.vp.splice.nxv2i1(, , i32, , i32, i32) @@ -11,515 +11,917 @@ declare @llvm.experimental.vp.splice.nxv32i1( @llvm.experimental.vp.splice.nxv64i1(, , i32, , i32, i32) define @test_vp_splice_nxv1i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv1i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v9, 1 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vslideup.vx v9, v8, a0 -; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv1i1: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v9, v9, 1 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vslideup.vx v9, v8, a0 +; NOVLDEP-NEXT: vmsne.vi v0, v9, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv1i1: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; VLDEP-NEXT: vslidedown.vi v9, v9, 1 +; VLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; VLDEP-NEXT: vslideup.vx v9, v8, a0 +; VLDEP-NEXT: vmsne.vi v0, v9, 0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv1i1( %va, %vb, i32 1, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv1i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv1i1_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 -; CHECK-NEXT: addi a0, a0, -2 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vslidedown.vx v9, v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vslideup.vi v9, v8, 2 -; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv1i1_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -2 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v9, v9, a0 +; NOVLDEP-NEXT: vslideup.vi v9, v8, 2 +; NOVLDEP-NEXT: vmsne.vi v0, v9, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv1i1_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; VLDEP-NEXT: addi a0, a0, -2 +; VLDEP-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLDEP-NEXT: vslidedown.vx v9, v9, a0 +; VLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; VLDEP-NEXT: vslideup.vi v9, v8, 2 +; VLDEP-NEXT: vmsne.vi v0, v9, 0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv1i1( %va, %vb, i32 -2, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv1i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv1i1_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v10, 1, v0.t -; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu -; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv1i1_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; NOVLDEP-NEXT: vmv1r.v v10, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; NOVLDEP-NEXT: vmv.v.i v11, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v10 +; NOVLDEP-NEXT: vmerge.vim v10, v11, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v10, v10, 1, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; NOVLDEP-NEXT: vslideup.vx v10, v8, a0, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; NOVLDEP-NEXT: vmsne.vi v0, v10, 0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv1i1_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; VLDEP-NEXT: vmv1r.v v10, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; VLDEP-NEXT: vmv.v.i v11, 0 +; VLDEP-NEXT: vmv1r.v v0, v10 +; VLDEP-NEXT: vmerge.vim v10, v11, 1, v0 +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf8, ta, ma +; VLDEP-NEXT: vslidedown.vi v10, v10, 1, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e8, mf8, ta, mu +; VLDEP-NEXT: vslideup.vx v10, v8, a0, v0.t +; VLDEP-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; VLDEP-NEXT: vmsne.vi v0, v10, 0, v0.t +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv1i1( %va, %vb, i32 1, %mask, i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv2i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv2i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v9, 3 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vx v9, v8, a0 -; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv2i1: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v9, v9, 3 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vslideup.vx v9, v8, a0 +; NOVLDEP-NEXT: vmsne.vi v0, v9, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv2i1: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; VLDEP-NEXT: vslidedown.vi v9, v9, 3 +; VLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; VLDEP-NEXT: vslideup.vx v9, v8, a0 +; VLDEP-NEXT: vmsne.vi v0, v9, 0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2i1( %va, %vb, i32 3, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv2i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv2i1_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 -; CHECK-NEXT: addi a0, a0, -4 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v9, v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vi v9, v8, 4 -; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv2i1_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -4 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v9, v9, a0 +; NOVLDEP-NEXT: vslideup.vi v9, v8, 4 +; NOVLDEP-NEXT: vmsne.vi v0, v9, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv2i1_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; VLDEP-NEXT: addi a0, a0, -4 +; VLDEP-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; VLDEP-NEXT: vslidedown.vx v9, v9, a0 +; VLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; VLDEP-NEXT: vslideup.vi v9, v8, 4 +; VLDEP-NEXT: vmsne.vi v0, v9, 0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2i1( %va, %vb, i32 -4, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv2i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv2i1_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v10, 3, v0.t -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu -; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv2i1_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; NOVLDEP-NEXT: vmv1r.v v10, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; NOVLDEP-NEXT: vmv.v.i v11, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v10 +; NOVLDEP-NEXT: vmerge.vim v10, v11, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v10, v10, 3, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; NOVLDEP-NEXT: vslideup.vx v10, v8, a0, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; NOVLDEP-NEXT: vmsne.vi v0, v10, 0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv2i1_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; VLDEP-NEXT: vmv1r.v v10, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; VLDEP-NEXT: vmv.v.i v11, 0 +; VLDEP-NEXT: vmv1r.v v0, v10 +; VLDEP-NEXT: vmerge.vim v10, v11, 1, v0 +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf4, ta, ma +; VLDEP-NEXT: vslidedown.vi v10, v10, 3, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e8, mf4, ta, mu +; VLDEP-NEXT: vslideup.vx v10, v8, a0, v0.t +; VLDEP-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; VLDEP-NEXT: vmsne.vi v0, v10, 0, v0.t +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2i1( %va, %vb, i32 3, %mask, i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv4i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv4i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v9, 5 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v9, v8, a0 -; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv4i1: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v9, v9, 5 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vslideup.vx v9, v8, a0 +; NOVLDEP-NEXT: vmsne.vi v0, v9, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv4i1: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vi v9, v9, 5 +; VLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; VLDEP-NEXT: vslideup.vx v9, v8, a0 +; VLDEP-NEXT: vmsne.vi v0, v9, 0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv4i1( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv4i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv4i1_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetivli zero, 5, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v9, v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v9, v8, 5 -; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv4i1_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v9, v9, a0 +; NOVLDEP-NEXT: vslideup.vi v9, v8, 5 +; NOVLDEP-NEXT: vmsne.vi v0, v9, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv4i1_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetivli zero, 5, e8, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vx v9, v9, a0 +; VLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; VLDEP-NEXT: vslideup.vi v9, v8, 5 +; VLDEP-NEXT: vmsne.vi v0, v9, 0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv4i1( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv4i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv4i1_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu -; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv4i1_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; NOVLDEP-NEXT: vmv1r.v v10, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; NOVLDEP-NEXT: vmv.v.i v11, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v10 +; NOVLDEP-NEXT: vmerge.vim v10, v11, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v10, v10, 5, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; NOVLDEP-NEXT: vslideup.vx v10, v8, a0, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; NOVLDEP-NEXT: vmsne.vi v0, v10, 0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv4i1_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; VLDEP-NEXT: vmv1r.v v10, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; VLDEP-NEXT: vmv.v.i v11, 0 +; VLDEP-NEXT: vmv1r.v v0, v10 +; VLDEP-NEXT: vmerge.vim v10, v11, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vi v10, v10, 5, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e8, mf2, ta, mu +; VLDEP-NEXT: vslideup.vx v10, v8, a0, v0.t +; VLDEP-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; VLDEP-NEXT: vmsne.vi v0, v10, 0, v0.t +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv4i1( %va, %vb, i32 5, %mask, i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv8i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv8i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v9, 5 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v9, v8, a0 -; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv8i1: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v9, v9, 5 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vslideup.vx v9, v8, a0 +; NOVLDEP-NEXT: vmsne.vi v0, v9, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv8i1: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v9, v9, 5 +; VLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v9, v8, a0 +; VLDEP-NEXT: vmsne.vi v0, v9, 0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv8i1( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv8i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv8i1_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetivli zero, 5, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v9, v9, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vi v9, v8, 5 -; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv8i1_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v9, v9, a0 +; NOVLDEP-NEXT: vslideup.vi v9, v8, 5 +; NOVLDEP-NEXT: vmsne.vi v0, v9, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv8i1_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v9, v10, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetivli zero, 5, e8, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v9, v9, a0 +; VLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v9, v8, 5 +; VLDEP-NEXT: vmsne.vi v0, v9, 0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv8i1( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv8i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv8i1_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu -; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv8i1_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; NOVLDEP-NEXT: vmv1r.v v10, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v8, 0 +; NOVLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; NOVLDEP-NEXT: vmv.v.i v11, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v10 +; NOVLDEP-NEXT: vmerge.vim v10, v11, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v10, v10, 5, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; NOVLDEP-NEXT: vslideup.vx v10, v8, a0, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; NOVLDEP-NEXT: vmsne.vi v0, v10, 0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv8i1_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; VLDEP-NEXT: vmv1r.v v10, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v8, 0 +; VLDEP-NEXT: vmerge.vim v8, v8, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; VLDEP-NEXT: vmv.v.i v11, 0 +; VLDEP-NEXT: vmv1r.v v0, v10 +; VLDEP-NEXT: vmerge.vim v10, v11, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v10, v10, 5, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; VLDEP-NEXT: vslideup.vx v10, v8, a0, v0.t +; VLDEP-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; VLDEP-NEXT: vmsne.vi v0, v10, 0, v0.t +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv8i1( %va, %vb, i32 5, %mask, i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv16i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv16i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v10, a0 -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv16i1: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: vmerge.vim v10, v10, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; NOVLDEP-NEXT: vmv.v.i v12, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v8, v12, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 5 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vslideup.vx v8, v10, a0 +; NOVLDEP-NEXT: vmsne.vi v0, v8, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv16i1: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: vmerge.vim v10, v10, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; VLDEP-NEXT: vmv.v.i v12, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v8, v12, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 5 +; VLDEP-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v10, a0 +; VLDEP-NEXT: vmsne.vi v0, v8, 0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv16i1( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv16i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv16i1_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetivli zero, 5, e8, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 5 -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv16i1_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v10, 0 +; NOVLDEP-NEXT: vmerge.vim v10, v10, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; NOVLDEP-NEXT: vmv.v.i v12, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v8, v12, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v10, 5 +; NOVLDEP-NEXT: vmsne.vi v0, v8, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv16i1_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v10, 0 +; VLDEP-NEXT: vmerge.vim v10, v10, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; VLDEP-NEXT: vmv.v.i v12, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v8, v12, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetivli zero, 5, e8, m2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v10, 5 +; VLDEP-NEXT: vmsne.vi v0, v8, 0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv16i1( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv16i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv16i1_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v14, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v14, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; CHECK-NEXT: vslideup.vx v10, v12, a0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv16i1_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; NOVLDEP-NEXT: vmv1r.v v10, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v12, 0 +; NOVLDEP-NEXT: vmerge.vim v12, v12, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; NOVLDEP-NEXT: vmv.v.i v14, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v10 +; NOVLDEP-NEXT: vmerge.vim v10, v14, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v10, v10, 5, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, m2, ta, mu +; NOVLDEP-NEXT: vslideup.vx v10, v12, a0, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; NOVLDEP-NEXT: vmsne.vi v8, v10, 0, v0.t +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv16i1_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; VLDEP-NEXT: vmv1r.v v10, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v12, 0 +; VLDEP-NEXT: vmerge.vim v12, v12, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; VLDEP-NEXT: vmv.v.i v14, 0 +; VLDEP-NEXT: vmv1r.v v0, v10 +; VLDEP-NEXT: vmerge.vim v10, v14, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; VLDEP-NEXT: vslidedown.vi v10, v10, 5, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e8, m2, ta, mu +; VLDEP-NEXT: vslideup.vx v10, v12, a0, v0.t +; VLDEP-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; VLDEP-NEXT: vmsne.vi v8, v10, 0, v0.t +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv16i1( %va, %vb, i32 5, %mask, i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv32i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv32i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v12, a0 -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv32i1: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v12, 0 +; NOVLDEP-NEXT: vmerge.vim v12, v12, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; NOVLDEP-NEXT: vmv.v.i v16, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v8, v16, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 5 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vslideup.vx v8, v12, a0 +; NOVLDEP-NEXT: vmsne.vi v0, v8, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv32i1: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v12, 0 +; VLDEP-NEXT: vmerge.vim v12, v12, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; VLDEP-NEXT: vmv.v.i v16, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v8, v16, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 5 +; VLDEP-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v12, a0 +; VLDEP-NEXT: vmsne.vi v0, v8, 0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv32i1( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv32i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv32i1_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetivli zero, 5, e8, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 5 -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv32i1_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v12, 0 +; NOVLDEP-NEXT: vmerge.vim v12, v12, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; NOVLDEP-NEXT: vmv.v.i v16, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v8, v16, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v12, 5 +; NOVLDEP-NEXT: vmsne.vi v0, v8, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv32i1_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v12, 0 +; VLDEP-NEXT: vmerge.vim v12, v12, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; VLDEP-NEXT: vmv.v.i v16, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v8, v16, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetivli zero, 5, e8, m4, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v12, 5 +; VLDEP-NEXT: vmsne.vi v0, v8, 0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv32i1( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv32i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv32i1_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v16, v16, 5, v0.t -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu -; CHECK-NEXT: vslideup.vx v16, v12, a0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma -; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv32i1_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; NOVLDEP-NEXT: vmv1r.v v10, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v12, 0 +; NOVLDEP-NEXT: vmerge.vim v12, v12, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; NOVLDEP-NEXT: vmv.v.i v16, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v10 +; NOVLDEP-NEXT: vmerge.vim v16, v16, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v16, v16, 5, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, m4, ta, mu +; NOVLDEP-NEXT: vslideup.vx v16, v12, a0, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, m4, ta, ma +; NOVLDEP-NEXT: vmsne.vi v8, v16, 0, v0.t +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv32i1_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; VLDEP-NEXT: vmv1r.v v10, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v12, 0 +; VLDEP-NEXT: vmerge.vim v12, v12, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; VLDEP-NEXT: vmv.v.i v16, 0 +; VLDEP-NEXT: vmv1r.v v0, v10 +; VLDEP-NEXT: vmerge.vim v16, v16, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; VLDEP-NEXT: vslidedown.vi v16, v16, 5, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e8, m4, ta, mu +; VLDEP-NEXT: vslideup.vx v16, v12, a0, v0.t +; VLDEP-NEXT: vsetvli zero, zero, e8, m4, ta, ma +; VLDEP-NEXT: vmsne.vi v8, v16, 0, v0.t +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv32i1( %va, %vb, i32 5, %mask, i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv64i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv64i1: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v24, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v24, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a0 -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv64i1: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v16, 0 +; NOVLDEP-NEXT: vmerge.vim v16, v16, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; NOVLDEP-NEXT: vmv.v.i v24, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v8, v24, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 5 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vslideup.vx v8, v16, a0 +; NOVLDEP-NEXT: vmsne.vi v0, v8, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv64i1: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v16, 0 +; VLDEP-NEXT: vmerge.vim v16, v16, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; VLDEP-NEXT: vmv.v.i v24, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v8, v24, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 5 +; VLDEP-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v16, a0 +; VLDEP-NEXT: vmsne.vi v0, v8, 0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv64i1( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv64i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv64i1_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v24, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v24, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetivli zero, 5, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vslideup.vi v8, v16, 5 -; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv64i1_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; NOVLDEP-NEXT: vmv1r.v v9, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v16, 0 +; NOVLDEP-NEXT: vmerge.vim v16, v16, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; NOVLDEP-NEXT: vmv.v.i v24, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vmerge.vim v8, v24, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v16, 5 +; NOVLDEP-NEXT: vmsne.vi v0, v8, 0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv64i1_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; VLDEP-NEXT: vmv1r.v v9, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v16, 0 +; VLDEP-NEXT: vmerge.vim v16, v16, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; VLDEP-NEXT: vmv.v.i v24, 0 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vmerge.vim v8, v24, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetivli zero, 5, e8, m8, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v16, 5 +; VLDEP-NEXT: vmsne.vi v0, v8, 0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv64i1( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv64i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv64i1_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v24, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v24, v24, 1, v0 -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v24, 5, v0.t -; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu -; CHECK-NEXT: vslideup.vx v24, v16, a0, v0.t -; CHECK-NEXT: vsetvli zero, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v8, v24, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv64i1_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; NOVLDEP-NEXT: vmv1r.v v10, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: vmv.v.i v16, 0 +; NOVLDEP-NEXT: vmerge.vim v16, v16, 1, v0 +; NOVLDEP-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; NOVLDEP-NEXT: vmv.v.i v24, 0 +; NOVLDEP-NEXT: vmv1r.v v0, v10 +; NOVLDEP-NEXT: vmerge.vim v24, v24, 1, v0 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vmv1r.v v0, v9 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v24, v24, 5, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, m8, ta, mu +; NOVLDEP-NEXT: vslideup.vx v24, v16, a0, v0.t +; NOVLDEP-NEXT: vsetvli zero, zero, e8, m8, ta, ma +; NOVLDEP-NEXT: vmsne.vi v8, v24, 0, v0.t +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv64i1_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; VLDEP-NEXT: vmv1r.v v10, v0 +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: vmv.v.i v16, 0 +; VLDEP-NEXT: vmerge.vim v16, v16, 1, v0 +; VLDEP-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; VLDEP-NEXT: vmv.v.i v24, 0 +; VLDEP-NEXT: vmv1r.v v0, v10 +; VLDEP-NEXT: vmerge.vim v24, v24, 1, v0 +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vmv1r.v v0, v9 +; VLDEP-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; VLDEP-NEXT: vslidedown.vi v24, v24, 5, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e8, m8, ta, mu +; VLDEP-NEXT: vslideup.vx v24, v16, a0, v0.t +; VLDEP-NEXT: vsetvli zero, zero, e8, m8, ta, ma +; VLDEP-NEXT: vmsne.vi v8, v24, 0, v0.t +; VLDEP-NEXT: vmv1r.v v0, v8 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv64i1( %va, %vb, i32 5, %mask, i32 %evla, i32 %evlb) ret %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll index e6a57ae6b1ea5..f69205a661c95 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll @@ -1,31 +1,51 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfh,+zvfbfmin -verify-machineinstrs \ -; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ZVFH -; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin -verify-machineinstrs \ -; RUN: < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN +; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfh,+zvfbfmin \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,NOVLDEP,ZVFH +; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,NOVLDEP,ZVFHMIN +; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfh,+zvfbfmin,+vl-dependent-latency \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,VLDEP,ZVFH +; RUN: llc -mtriple riscv64 -mattr=+f,+d,+v,+zfh,+zfbfmin,+zvfhmin,+zvfbfmin,+vl-dependent-latency \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,VLDEP,ZVFHMIN define @test_vp_splice_nxv2i64( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv2i64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v10, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv2i64: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vslideup.vx v8, v10, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv2i64: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3 +; VLDEP-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v10, a0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2i64( %va, %vb, i32 3, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv2i64_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv2i64_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetivli zero, 3, e64, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 3 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv2i64_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v10, 3 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv2i64_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetivli zero, 3, e64, m2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v10, 3 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2i64( %va, %vb, i32 -3, splat (i1 1), i32 %evla, i32 %evlb) ret %v } @@ -41,248 +61,407 @@ define @test_vp_splice_nxv2i64_zero_offset( } define @test_vp_splice_nxv2i64_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv2i64_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3, v0.t -; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; CHECK-NEXT: vslideup.vx v8, v10, a0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv2i64_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3, v0.t +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; NOVLDEP-NEXT: vslideup.vx v8, v10, a0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv2i64_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e64, m2, ta, mu +; VLDEP-NEXT: vslideup.vx v8, v10, a0, v0.t +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2i64( %va, %vb, i32 3, %mask, i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv1i64( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv1i64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv1i64: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 1 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv1i64: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 1 +; VLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv1i64( %va, %vb, i32 1, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv1i64_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv1i64_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -2 -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv1i64_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: addi a0, a0, -2 +; NOVLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 2 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv1i64_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -2 +; VLDEP-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 2 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv1i64( %va, %vb, i32 -2, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv1i64_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv1i64_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv1i64_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 1, v0.t +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv1i64_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 1, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; VLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv1i64( %va, %vb, i32 1, %mask, i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv2i32( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv2i32: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv2i32: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv2i32: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3 +; VLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2i32( %va, %vb, i32 3, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv2i32_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv2i32_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -4 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 4 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv2i32_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: addi a0, a0, -4 +; NOVLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 4 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv2i32_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -4 +; VLDEP-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 4 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2i32( %va, %vb, i32 -4, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv2i32_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv2i32_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3, v0.t -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv2i32_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3, v0.t +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv2i32_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; VLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2i32( %va, %vb, i32 3, %mask, i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv4i16( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv4i16: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv4i16: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 5 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv4i16: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 5 +; VLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv4i16( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv4i16_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv4i16_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetivli zero, 5, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 5 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv4i16_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 5 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv4i16_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetivli zero, 5, e16, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 5 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv4i16( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv4i16_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv4i16_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu -; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv4i16_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 5, v0.t +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv4i16_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 5, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e16, m1, ta, mu +; VLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv4i16( %va, %vb, i32 5, %mask, i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv8i8( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv8i8: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv8i8: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 5 +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv8i8: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 5 +; VLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv8i8( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv8i8_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv8i8_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetivli zero, 5, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 5 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv8i8_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 5 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv8i8_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetivli zero, 5, e8, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 5 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv8i8( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv8i8_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv8i8_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -5 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5, v0.t -; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu -; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv8i8_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 5, v0.t +; NOVLDEP-NEXT: addi a0, a0, -5 +; NOVLDEP-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv8i8_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -5 +; VLDEP-NEXT: vsetvli zero, a0, e8, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 5, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e8, m1, ta, mu +; VLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv8i8( %va, %vb, i32 5, %mask, i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv1f64( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv1f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv1f64: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 1 +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv1f64: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 1 +; VLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv1f64( %va, %vb, i32 1, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv1f64_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv1f64_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -2 -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv1f64_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: addi a0, a0, -2 +; NOVLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 2 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv1f64_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -2 +; VLDEP-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 2 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv1f64( %va, %vb, i32 -2, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv1f64_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv1f64_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv1f64_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 1, v0.t +; NOVLDEP-NEXT: addi a0, a0, -1 +; NOVLDEP-NEXT: vsetvli zero, zero, e64, m1, ta, mu +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv1f64_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -1 +; VLDEP-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 1, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e64, m1, ta, mu +; VLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv1f64( %va, %vb, i32 1, %mask, i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv2f32( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv2f32: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv2f32: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv2f32: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3 +; VLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2f32( %va, %vb, i32 3, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv2f32_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv2f32_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 3 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv2f32_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 3 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv2f32_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetivli zero, 3, e32, m1, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 3 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2f32( %va, %vb, i32 -3, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv2f32_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv2f32_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3, v0.t -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu -; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv2f32_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3, v0.t +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vsetvli zero, zero, e32, m1, ta, mu +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv2f32_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; VLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2f32( %va, %vb, i32 3, %mask, i32 %evla, i32 %evlb) ret %v } @@ -429,79 +608,129 @@ define @test_vp_splice_nxv16i64_negative_offset( @test_vp_splice_nxv2f16( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv2f16: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv2f16: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv2f16: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3 +; VLDEP-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2f16( %va, %vb, i32 3, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv2f16_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv2f16_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 3 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv2f16_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 3 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv2f16_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetivli zero, 3, e16, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 3 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2f16( %va, %vb, i32 -3, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv2f16_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv2f16_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3, v0.t -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv2f16_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3, v0.t +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv2f16_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; VLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2f16( %va, %vb, i32 3, %mask, i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv2bf16( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v9, a0 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv2bf16: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3 +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv2bf16: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3 +; VLDEP-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; VLDEP-NEXT: vslideup.vx v8, v9, a0 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2bf16( %va, %vb, i32 3, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv2bf16_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv2bf16_negative_offset: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 3 -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv2bf16_negative_offset: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vx v8, v8, a0 +; NOVLDEP-NEXT: vslideup.vi v8, v9, 3 +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv2bf16_negative_offset: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetivli zero, 3, e16, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vx v8, v8, a0 +; VLDEP-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; VLDEP-NEXT: vslideup.vi v8, v9, 3 +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2bf16( %va, %vb, i32 -3, splat (i1 1), i32 %evla, i32 %evlb) ret %v } define @test_vp_splice_nxv2bf16_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) #0 { -; CHECK-LABEL: test_vp_splice_nxv2bf16_masked: -; CHECK: # %bb.0: -; CHECK-NEXT: addi a0, a0, -3 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 3, v0.t -; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu -; CHECK-NEXT: vslideup.vx v8, v9, a0, v0.t -; CHECK-NEXT: ret +; NOVLDEP-LABEL: test_vp_splice_nxv2bf16_masked: +; NOVLDEP: # %bb.0: +; NOVLDEP-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; NOVLDEP-NEXT: vslidedown.vi v8, v8, 3, v0.t +; NOVLDEP-NEXT: addi a0, a0, -3 +; NOVLDEP-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; NOVLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; NOVLDEP-NEXT: ret +; +; VLDEP-LABEL: test_vp_splice_nxv2bf16_masked: +; VLDEP: # %bb.0: +; VLDEP-NEXT: addi a0, a0, -3 +; VLDEP-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; VLDEP-NEXT: vslidedown.vi v8, v8, 3, v0.t +; VLDEP-NEXT: vsetvli zero, a1, e16, mf2, ta, mu +; VLDEP-NEXT: vslideup.vx v8, v9, a0, v0.t +; VLDEP-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2bf16( %va, %vb, i32 3, %mask, i32 %evla, i32 %evlb) ret %v }