diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 45a37622a531b..b0e3f534e2aaa 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7377,6 +7377,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.getScalarType() == MVT::i1) return getNode(ISD::XOR, DL, VT, N1, N2); + // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)). + if (Opcode == ISD::ADD && N1.getOpcode() == ISD::VSCALE && + N2.getOpcode() == ISD::VSCALE) { + const APInt &C1 = N1->getConstantOperandAPInt(0); + const APInt &C2 = N2->getConstantOperandAPInt(0); + return getVScale(DL, VT, C1 + C2); + } break; case ISD::MUL: assert(VT.isInteger() && "This operator does not apply to FP types!"); diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll index d42c42c7ce036..7c9a283dd54bc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll @@ -488,8 +488,6 @@ define @extract_nxv6f16_nxv12f16_6( %in) ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v13, v10, a0 ; CHECK-NEXT: vslidedown.vx v12, v9, a0 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v12, v10, a0 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret @@ -543,8 +541,6 @@ define @extract_nxv6bf16_nxv12bf16_6( @insert_nxv1i8_nxv4i8_3( %vec, @llvm.vector.insert.nxv1i8.nxv4i8( %vec, %subvec, i64 3) @@ -246,8 +245,7 @@ define @insert_nxv16i32_nxv1i32_1( %vec, ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v16, a0 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv1i32.nxv16i32( %vec, %subvec, i64 1) @@ -282,8 +280,8 @@ define @insert_nxv16i8_nxv1i8_1( %vec, @insert_nxv16i8_nxv1i8_3( %vec, @llvm.vector.insert.nxv1i8.nxv16i8( %vec, %subvec, i64 3) ret %v @@ -363,8 +361,7 @@ define @insert_nxv32f16_nxv2f16_2( %vec ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v16, a0 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv2f16.nxv32f16( %vec, %subvec, i64 2) @@ -376,8 +373,7 @@ define @insert_nxv32f16_nxv2f16_26( %ve ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v14, v16, a0 ; CHECK-NEXT: ret %v = call @llvm.vector.insert.nxv2f16.nxv32f16( %vec, %subvec, i64 26) @@ -422,8 +418,8 @@ define @insert_nxv32i1_nxv8i1_8( %v, @insert_nxv32bf16_nxv2bf16_2( @llvm.vector.insert.nxv2bf16.nxv32bf16( %vec, %subvec, i64 2) @@ -583,8 +578,7 @@ define @insert_nxv32bf16_nxv2bf16_26( @llvm.vector.insert.nxv2bf16.nxv32bf16( %vec, %subvec, i64 26) diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll index 28b27bb75f210..9972df97ad9f4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -1371,6 +1371,8 @@ define @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vf_nx16f64( %va) { ; RV32-NEXT: vmfeq.vf v24, v16, fa5 ; RV32-NEXT: vmfeq.vf v0, v8, fa5 ; RV32-NEXT: srli a0, a0, 3 -; RV32-NEXT: add a1, a0, a0 -; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV32-NEXT: vslideup.vx v0, v24, a0 ; RV32-NEXT: ret ; @@ -4293,8 +4292,7 @@ define @fcmp_oeq_vf_nx16f64( %va) { ; RV64-NEXT: vmfeq.vf v24, v16, fa5 ; RV64-NEXT: vmfeq.vf v0, v8, fa5 ; RV64-NEXT: srli a0, a0, 3 -; RV64-NEXT: add a1, a0, a0 -; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; RV64-NEXT: vslideup.vx v0, v24, a0 ; RV64-NEXT: ret ; @@ -4306,8 +4304,7 @@ define @fcmp_oeq_vf_nx16f64( %va) { ; ZVFHMIN32-NEXT: vmfeq.vf v24, v16, fa5 ; ZVFHMIN32-NEXT: vmfeq.vf v0, v8, fa5 ; ZVFHMIN32-NEXT: srli a0, a0, 3 -; ZVFHMIN32-NEXT: add a1, a0, a0 -; ZVFHMIN32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; ZVFHMIN32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; ZVFHMIN32-NEXT: vslideup.vx v0, v24, a0 ; ZVFHMIN32-NEXT: ret ; @@ -4319,8 +4316,7 @@ define @fcmp_oeq_vf_nx16f64( %va) { ; ZVFHMIN64-NEXT: vmfeq.vf v24, v16, fa5 ; ZVFHMIN64-NEXT: vmfeq.vf v0, v8, fa5 ; ZVFHMIN64-NEXT: srli a0, a0, 3 -; ZVFHMIN64-NEXT: add a1, a0, a0 -; ZVFHMIN64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; ZVFHMIN64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; ZVFHMIN64-NEXT: vslideup.vx v0, v24, a0 ; ZVFHMIN64-NEXT: ret %vc = fcmp oeq %va, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll index ef560a7631dee..13c63d9c80a9a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll @@ -2246,8 +2246,7 @@ define @icmp_eq_vv_nxv32i32( %va, @icmp_eq_vx_nxv32i32( %va, i32 %b, ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t -; CHECK-NEXT: add a0, a2, a2 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v16, v25, a2 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret @@ -2316,8 +2314,7 @@ define @icmp_eq_vx_swap_nxv32i32( %va, i32 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t -; CHECK-NEXT: add a0, a2, a2 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v16, v25, a2 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll index bd3c29b0c6efc..a85b471530cc9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll @@ -3001,9 +3001,8 @@ define @icmp_eq_vi_nx16i64( %va) { ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vmseq.vi v24, v16, 0 ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; CHECK-NEXT: vslideup.vx v0, v24, a0 ; CHECK-NEXT: ret %vc = icmp eq %va, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll index c9f9a79733003..790cd56ee952c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll +++ b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll @@ -48,10 +48,10 @@ define internal void @SubRegLivenessUndefInPhi(i64 %cond) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a1, a0, 2 ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: vadd.vi v10, v9, 1 ; CHECK-NEXT: vadd.vi v11, v9, 3 -; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: vslideup.vx v12, v10, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll index baace6d26f144..4753ab915bdf3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -191,8 +191,7 @@ define {<2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave3_v2i32_v6i32(<6 x ; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 4 ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v12, a0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -222,8 +221,7 @@ define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave4_v2i32_ ; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v8, 2 ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v12, v10, a0 ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: addi a0, sp, 16 @@ -254,15 +252,13 @@ define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @vector_deinterle ; CHECK-NEXT: vslidedown.vi v14, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 8 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v13, v12, a1 -; CHECK-NEXT: vslideup.vx v8, v14, a1 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v13, a0 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v13, v12, a0 +; CHECK-NEXT: vslideup.vx v8, v14, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v13, a1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v10 ; CHECK-NEXT: vs2r.v v8, (a0) @@ -292,16 +288,14 @@ define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @vecto ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 10 ; CHECK-NEXT: vslidedown.vi v12, v8, 8 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: add a3, a0, a0 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v15, v14, a1 -; CHECK-NEXT: vslideup.vx v8, v16, a1 -; CHECK-NEXT: vslideup.vx v12, v10, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v15, a0 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v15, v14, a0 +; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: vslideup.vx v12, v10, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v15, a1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v12 ; CHECK-NEXT: vs2r.v v8, (a0) @@ -330,22 +324,19 @@ define {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>} @v ; CHECK-NEXT: vslidedown.vi v12, v8, 2 ; CHECK-NEXT: vslidedown.vi v13, v8, 4 ; CHECK-NEXT: vslidedown.vi v14, v8, 6 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a2, a0, 2 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a2, a0, 3 ; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: add a3, a1, a1 -; CHECK-NEXT: add a4, a2, a1 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vx v10, v9, a2 +; CHECK-NEXT: add a3, a1, a2 +; CHECK-NEXT: vslideup.vx v8, v12, a2 ; CHECK-NEXT: vsetvli zero, a3, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v10, v9, a1 -; CHECK-NEXT: vslideup.vx v8, v12, a1 -; CHECK-NEXT: add a3, a0, a0 -; CHECK-NEXT: add a1, a4, a1 -; CHECK-NEXT: vsetvli zero, a4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v10, v11, a2 -; CHECK-NEXT: vslideup.vx v8, v13, a2 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v14, a4 -; CHECK-NEXT: vsetvli zero, a3, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v10, v11, a1 +; CHECK-NEXT: vslideup.vx v8, v13, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v8, v14, a3 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v10, a0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs1r.v v8, (a0) @@ -374,23 +365,20 @@ define {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 ; CHECK-NEXT: vslidedown.vi v13, v8, 2 ; CHECK-NEXT: vslidedown.vi v14, v8, 4 ; CHECK-NEXT: vslidedown.vi v15, v8, 6 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a2, a0, 2 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a2, a0, 3 ; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: add a3, a1, a1 -; CHECK-NEXT: add a4, a2, a1 -; CHECK-NEXT: add a5, a0, a0 +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, tu, ma +; CHECK-NEXT: vslideup.vx v10, v9, a2 +; CHECK-NEXT: add a3, a1, a2 +; CHECK-NEXT: vslideup.vx v8, v13, a2 ; CHECK-NEXT: vsetvli zero, a3, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v10, v9, a1 -; CHECK-NEXT: vslideup.vx v8, v13, a1 -; CHECK-NEXT: add a1, a4, a1 -; CHECK-NEXT: vsetvli zero, a4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v10, v11, a2 -; CHECK-NEXT: vslideup.vx v8, v14, a2 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v10, v12, a4 -; CHECK-NEXT: vslideup.vx v8, v15, a4 -; CHECK-NEXT: vsetvli zero, a5, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v10, v11, a1 +; CHECK-NEXT: vslideup.vx v8, v14, a1 +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v10, v12, a3 +; CHECK-NEXT: vslideup.vx v8, v15, a3 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v10, a0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs1r.v v8, (a0) @@ -551,8 +539,7 @@ define {<2 x float>, <2 x float>, <2 x float>} @vector_deinterleave3_v6f32_v2f32 ; CHECK-NEXT: vsetivli zero, 2, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 4 ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v12, a0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v10 @@ -586,8 +573,7 @@ define {<2 x float>, <2 x float>, <2 x float>, <2 x float>} @vector_deinterleave ; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v8, 2 ; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vx v12, v10, a0 ; CHECK-NEXT: vslideup.vx v8, v9, a0 ; CHECK-NEXT: addi a0, sp, 16 @@ -622,15 +608,13 @@ define {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @vector_dein ; CHECK-NEXT: vslidedown.vi v14, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 8 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v13, v12, a1 -; CHECK-NEXT: vslideup.vx v8, v14, a1 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v13, a0 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v13, v12, a0 +; CHECK-NEXT: vslideup.vx v8, v14, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v13, a1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v10 ; CHECK-NEXT: vs2r.v v8, (a0) @@ -664,16 +648,14 @@ define {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 10 ; CHECK-NEXT: vslidedown.vi v12, v8, 8 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: add a3, a0, a0 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v15, v14, a1 -; CHECK-NEXT: vslideup.vx v8, v16, a1 -; CHECK-NEXT: vslideup.vx v12, v10, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v15, a0 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v15, v14, a0 +; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: vslideup.vx v12, v10, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v15, a1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v12 ; CHECK-NEXT: vs2r.v v8, (a0) @@ -707,21 +689,18 @@ define {<1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>, ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vslidedown.vi v13, v8, 5 ; CHECK-NEXT: vslidedown.vi v14, v8, 6 -; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: add a3, a0, a0 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v11, v9, a1 -; CHECK-NEXT: vslideup.vx v10, v12, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v10, v11, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v11, v9, a0 +; CHECK-NEXT: vslideup.vx v10, v12, a0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v10, v11, a1 ; CHECK-NEXT: vslidedown.vi v11, v8, 4 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v11, v13, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v11, v14, a0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v11, v13, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v11, v14, a1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs2r.v v10, (a0) ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma @@ -751,25 +730,22 @@ define {<1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>, <1 x half>, ; CHECK-NEXT: vslidedown.vi v10, v8, 7 ; CHECK-NEXT: vslidedown.vi v11, v8, 6 ; CHECK-NEXT: vslidedown.vi v12, v8, 5 -; CHECK-NEXT: srli a1, a0, 3 +; CHECK-NEXT: srli a1, a0, 2 +; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: vslidedown.vi v9, v8, 4 -; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: add a2, a1, a1 -; CHECK-NEXT: add a3, a0, a0 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v11, v10, a1 -; CHECK-NEXT: vslideup.vx v9, v12, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v9, v11, a0 -; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v11, v10, a0 +; CHECK-NEXT: vslideup.vx v9, v12, a0 +; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v9, v11, a1 ; CHECK-NEXT: vslidedown.vi v10, v8, 3 ; CHECK-NEXT: vslidedown.vi v11, v8, 2 ; CHECK-NEXT: vslidedown.vi v12, v8, 1 -; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v11, v10, a1 -; CHECK-NEXT: vslideup.vx v8, v12, a1 -; CHECK-NEXT: vsetvli zero, a3, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vx v8, v11, a0 +; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v11, v10, a0 +; CHECK-NEXT: vslideup.vx v8, v12, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vslideup.vx v8, v11, a1 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs2r.v v8, (a0) ; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll index 75f92c86ff09f..6144f916ea52b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -2705,16 +2705,10 @@ define {, , , , , , , , , %a, @vector_interleave_nxv32i1_nxv16i1( ; V-NEXT: vmv1r.v v0, v8 ; V-NEXT: vmv.v.i v10, 0 ; V-NEXT: li a0, -1 -; V-NEXT: csrr a1, vlenb ; V-NEXT: vmerge.vim v12, v10, 1, v0 ; V-NEXT: vmv1r.v v0, v9 ; V-NEXT: vmerge.vim v14, v10, 1, v0 -; V-NEXT: srli a1, a1, 2 ; V-NEXT: vwaddu.vv v8, v14, v12 ; V-NEXT: vwmaccu.vx v8, a0, v12 +; V-NEXT: csrr a0, vlenb ; V-NEXT: vmsne.vi v12, v10, 0 ; V-NEXT: vmsne.vi v0, v8, 0 -; V-NEXT: add a0, a1, a1 -; V-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; V-NEXT: vslideup.vx v0, v12, a1 +; V-NEXT: srli a0, a0, 2 +; V-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; V-NEXT: vslideup.vx v0, v12, a0 ; V-NEXT: ret ; ; ZVBB-LABEL: vector_interleave_nxv32i1_nxv16i1: @@ -38,17 +37,16 @@ define @vector_interleave_nxv32i1_nxv16i1( ; ZVBB-NEXT: vmv1r.v v0, v8 ; ZVBB-NEXT: vmv.v.i v10, 0 ; ZVBB-NEXT: li a0, 1 -; ZVBB-NEXT: csrr a1, vlenb ; ZVBB-NEXT: vmerge.vim v10, v10, 1, v0 -; ZVBB-NEXT: srli a1, a1, 2 ; ZVBB-NEXT: vwsll.vi v12, v10, 8 ; ZVBB-NEXT: vmv1r.v v0, v9 ; ZVBB-NEXT: vwaddu.wx v12, v12, a0, v0.t +; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: vmsne.vi v8, v14, 0 ; ZVBB-NEXT: vmsne.vi v0, v12, 0 -; ZVBB-NEXT: add a0, a1, a1 -; ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v8, a1 +; ZVBB-NEXT: srli a0, a0, 2 +; ZVBB-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v8, a0 ; ZVBB-NEXT: ret ; ; ZIP-LABEL: vector_interleave_nxv32i1_nxv16i1: @@ -61,13 +59,12 @@ define @vector_interleave_nxv32i1_nxv16i1( ; ZIP-NEXT: vmerge.vim v12, v10, 1, v0 ; ZIP-NEXT: vmv1r.v v0, v9 ; ZIP-NEXT: vmerge.vim v8, v10, 1, v0 -; ZIP-NEXT: srli a0, a0, 2 ; ZIP-NEXT: ri.vzip2b.vv v10, v8, v12 ; ZIP-NEXT: ri.vzip2a.vv v14, v8, v12 ; ZIP-NEXT: vmsne.vi v8, v10, 0 ; ZIP-NEXT: vmsne.vi v0, v14, 0 -; ZIP-NEXT: add a1, a0, a0 -; ZIP-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; ZIP-NEXT: srli a0, a0, 2 +; ZIP-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZIP-NEXT: vslideup.vx v0, v8, a0 ; ZIP-NEXT: ret %res = call @llvm.vector.interleave2.nxv32i1( %a, %b) @@ -508,19 +505,17 @@ define @vector_interleave_nxv48i1_nxv16i1( ; CHECK-NEXT: add a2, a3, a2 ; CHECK-NEXT: vsseg3e8.v v14, (a0) ; CHECK-NEXT: vl2r.v v8, (a2) -; CHECK-NEXT: srli a2, a1, 2 -; CHECK-NEXT: srli a1, a1, 1 +; CHECK-NEXT: srli a2, a1, 1 ; CHECK-NEXT: vl2r.v v10, (a3) ; CHECK-NEXT: vl2r.v v12, (a0) -; CHECK-NEXT: add a0, a2, a2 +; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: vmsne.vi v14, v8, 0 ; CHECK-NEXT: vmsne.vi v8, v10, 0 ; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v0, v8, a2 -; CHECK-NEXT: add a0, a1, a1 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v0, v14, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v0, v8, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v0, v14, a2 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 6 ; CHECK-NEXT: mul a0, a0, a1 @@ -551,19 +546,17 @@ define @vector_interleave_nxv48i1_nxv16i1( ; ZVBB-NEXT: add a2, a3, a2 ; ZVBB-NEXT: vsseg3e8.v v14, (a0) ; ZVBB-NEXT: vl2r.v v8, (a2) -; ZVBB-NEXT: srli a2, a1, 2 -; ZVBB-NEXT: srli a1, a1, 1 +; ZVBB-NEXT: srli a2, a1, 1 ; ZVBB-NEXT: vl2r.v v10, (a3) ; ZVBB-NEXT: vl2r.v v12, (a0) -; ZVBB-NEXT: add a0, a2, a2 +; ZVBB-NEXT: srli a1, a1, 2 ; ZVBB-NEXT: vmsne.vi v14, v8, 0 ; ZVBB-NEXT: vmsne.vi v8, v10, 0 ; ZVBB-NEXT: vmsne.vi v0, v12, 0 -; ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v8, a2 -; ZVBB-NEXT: add a0, a1, a1 -; ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v14, a1 +; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v8, a1 +; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v14, a2 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: li a1, 6 ; ZVBB-NEXT: mul a0, a0, a1 @@ -812,22 +805,20 @@ define @vector_interleave_nxv64i1_nxv16i1( ; CHECK-NEXT: add a2, a4, a2 ; CHECK-NEXT: vsseg4e8.v v14, (a0) ; CHECK-NEXT: vl2r.v v8, (a2) -; CHECK-NEXT: srli a2, a1, 2 -; CHECK-NEXT: srli a1, a1, 1 +; CHECK-NEXT: srli a2, a1, 1 +; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: vl2r.v v10, (a4) -; CHECK-NEXT: add a4, a2, a2 ; CHECK-NEXT: vl2r.v v12, (a3) ; CHECK-NEXT: vl2r.v v14, (a0) ; CHECK-NEXT: vmsne.vi v16, v8, 0 ; CHECK-NEXT: vmsne.vi v8, v10, 0 ; CHECK-NEXT: vmsne.vi v9, v12, 0 ; CHECK-NEXT: vmsne.vi v0, v14, 0 -; CHECK-NEXT: vsetvli zero, a4, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a2 -; CHECK-NEXT: vslideup.vx v0, v9, a2 -; CHECK-NEXT: add a0, a1, a1 -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v0, v8, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v8, v16, a1 +; CHECK-NEXT: vslideup.vx v0, v9, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v0, v8, a2 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 @@ -859,22 +850,20 @@ define @vector_interleave_nxv64i1_nxv16i1( ; ZVBB-NEXT: add a2, a4, a2 ; ZVBB-NEXT: vsseg4e8.v v14, (a0) ; ZVBB-NEXT: vl2r.v v8, (a2) -; ZVBB-NEXT: srli a2, a1, 2 -; ZVBB-NEXT: srli a1, a1, 1 +; ZVBB-NEXT: srli a2, a1, 1 +; ZVBB-NEXT: srli a1, a1, 2 ; ZVBB-NEXT: vl2r.v v10, (a4) -; ZVBB-NEXT: add a4, a2, a2 ; ZVBB-NEXT: vl2r.v v12, (a3) ; ZVBB-NEXT: vl2r.v v14, (a0) ; ZVBB-NEXT: vmsne.vi v16, v8, 0 ; ZVBB-NEXT: vmsne.vi v8, v10, 0 ; ZVBB-NEXT: vmsne.vi v9, v12, 0 ; ZVBB-NEXT: vmsne.vi v0, v14, 0 -; ZVBB-NEXT: vsetvli zero, a4, e8, mf2, ta, ma -; ZVBB-NEXT: vslideup.vx v8, v16, a2 -; ZVBB-NEXT: vslideup.vx v0, v9, a2 -; ZVBB-NEXT: add a0, a1, a1 -; ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v8, a1 +; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; ZVBB-NEXT: vslideup.vx v8, v16, a1 +; ZVBB-NEXT: vslideup.vx v0, v9, a1 +; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v8, a2 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: slli a0, a0, 3 ; ZVBB-NEXT: add sp, sp, a0 @@ -1114,7 +1103,7 @@ define @vector_interleave_nxv80i1_nxv16i1( ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v18, v12, 1, v0 ; CHECK-NEXT: add a2, a4, a1 -; CHECK-NEXT: srli a3, a1, 2 +; CHECK-NEXT: srli a3, a1, 1 ; CHECK-NEXT: vmv2r.v v20, v14 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v16, v12, 1, v0 @@ -1144,11 +1133,9 @@ define @vector_interleave_nxv80i1_nxv16i1( ; CHECK-NEXT: add a5, a4, a1 ; CHECK-NEXT: vl1r.v v16, (a5) ; CHECK-NEXT: add a5, a5, a1 -; CHECK-NEXT: srli a1, a1, 1 +; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: vl1r.v v11, (a2) -; CHECK-NEXT: add a2, a3, a3 ; CHECK-NEXT: vl1r.v v15, (a4) -; CHECK-NEXT: add a4, a1, a1 ; CHECK-NEXT: vl1r.v v13, (a0) ; CHECK-NEXT: vl1r.v v17, (a5) ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -1156,11 +1143,11 @@ define @vector_interleave_nxv80i1_nxv16i1( ; CHECK-NEXT: vmsne.vi v0, v10, 0 ; CHECK-NEXT: vmsne.vi v8, v14, 0 ; CHECK-NEXT: vmsne.vi v9, v12, 0 -; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v0, v18, a3 -; CHECK-NEXT: vslideup.vx v9, v8, a3 -; CHECK-NEXT: vsetvli zero, a4, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v0, v9, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vx v0, v18, a1 +; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v0, v9, a3 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmsne.vi v8, v16, 0 ; CHECK-NEXT: csrr a0, vlenb @@ -1190,7 +1177,7 @@ define @vector_interleave_nxv80i1_nxv16i1( ; ZVBB-NEXT: vmv1r.v v0, v8 ; ZVBB-NEXT: vmerge.vim v18, v12, 1, v0 ; ZVBB-NEXT: add a2, a4, a1 -; ZVBB-NEXT: srli a3, a1, 2 +; ZVBB-NEXT: srli a3, a1, 1 ; ZVBB-NEXT: vmv2r.v v20, v14 ; ZVBB-NEXT: vmv1r.v v0, v9 ; ZVBB-NEXT: vmerge.vim v16, v12, 1, v0 @@ -1220,11 +1207,9 @@ define @vector_interleave_nxv80i1_nxv16i1( ; ZVBB-NEXT: add a5, a4, a1 ; ZVBB-NEXT: vl1r.v v16, (a5) ; ZVBB-NEXT: add a5, a5, a1 -; ZVBB-NEXT: srli a1, a1, 1 +; ZVBB-NEXT: srli a1, a1, 2 ; ZVBB-NEXT: vl1r.v v11, (a2) -; ZVBB-NEXT: add a2, a3, a3 ; ZVBB-NEXT: vl1r.v v15, (a4) -; ZVBB-NEXT: add a4, a1, a1 ; ZVBB-NEXT: vl1r.v v13, (a0) ; ZVBB-NEXT: vl1r.v v17, (a5) ; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -1232,11 +1217,11 @@ define @vector_interleave_nxv80i1_nxv16i1( ; ZVBB-NEXT: vmsne.vi v0, v10, 0 ; ZVBB-NEXT: vmsne.vi v8, v14, 0 ; ZVBB-NEXT: vmsne.vi v9, v12, 0 -; ZVBB-NEXT: vsetvli zero, a2, e8, mf2, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v18, a3 -; ZVBB-NEXT: vslideup.vx v9, v8, a3 -; ZVBB-NEXT: vsetvli zero, a4, e8, m1, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v9, a1 +; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v18, a1 +; ZVBB-NEXT: vslideup.vx v9, v8, a1 +; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v9, a3 ; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; ZVBB-NEXT: vmsne.vi v8, v16, 0 ; ZVBB-NEXT: csrr a0, vlenb @@ -2340,47 +2325,45 @@ define @vector_interleave_nxv96i1_nxv16i1( ; CHECK-NEXT: vmv1r.v v17, v9 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v24, v20, 1, v0 -; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: addi a4, sp, 16 ; CHECK-NEXT: vmv1r.v v18, v25 ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmerge.vim v26, v20, 1, v0 -; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: vmv1r.v v19, v27 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vmerge.vim v10, v20, 1, v0 -; CHECK-NEXT: add a3, a0, a2 +; CHECK-NEXT: add a2, a0, a1 ; CHECK-NEXT: vmv1r.v v20, v11 -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma ; CHECK-NEXT: vsseg6e8.v v15, (a0) ; CHECK-NEXT: vmv1r.v v15, v22 -; CHECK-NEXT: add a4, a5, a2 +; CHECK-NEXT: add a5, a4, a1 ; CHECK-NEXT: vmv1r.v v16, v8 -; CHECK-NEXT: srli a1, a2, 2 +; CHECK-NEXT: srli a3, a1, 1 ; CHECK-NEXT: vmv1r.v v17, v24 -; CHECK-NEXT: add a6, a4, a2 +; CHECK-NEXT: add a6, a5, a1 ; CHECK-NEXT: vmv1r.v v18, v26 -; CHECK-NEXT: add a7, a3, a2 +; CHECK-NEXT: add a7, a2, a1 ; CHECK-NEXT: vmv1r.v v19, v10 -; CHECK-NEXT: vsseg6e8.v v14, (a5) +; CHECK-NEXT: vsseg6e8.v v14, (a4) ; CHECK-NEXT: vl1r.v v8, (a0) -; CHECK-NEXT: add a0, a6, a2 +; CHECK-NEXT: add a0, a6, a1 ; CHECK-NEXT: vl1r.v v10, (a6) -; CHECK-NEXT: add a6, a7, a2 -; CHECK-NEXT: vl1r.v v12, (a5) -; CHECK-NEXT: add a5, a0, a2 +; CHECK-NEXT: add a6, a7, a1 +; CHECK-NEXT: vl1r.v v12, (a4) +; CHECK-NEXT: add a4, a0, a1 ; CHECK-NEXT: vl1r.v v14, (a7) -; CHECK-NEXT: add a7, a6, a2 -; CHECK-NEXT: vl1r.v v16, (a5) -; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a7, a6, a1 +; CHECK-NEXT: vl1r.v v16, (a4) +; CHECK-NEXT: add a4, a4, a1 ; CHECK-NEXT: vl1r.v v18, (a7) -; CHECK-NEXT: add a7, a7, a2 -; CHECK-NEXT: srli a2, a2, 1 -; CHECK-NEXT: vl1r.v v9, (a3) -; CHECK-NEXT: add a3, a1, a1 -; CHECK-NEXT: vl1r.v v17, (a5) -; CHECK-NEXT: add a5, a2, a2 +; CHECK-NEXT: add a7, a7, a1 +; CHECK-NEXT: srli a1, a1, 2 +; CHECK-NEXT: vl1r.v v9, (a2) +; CHECK-NEXT: vl1r.v v17, (a4) ; CHECK-NEXT: vl1r.v v11, (a0) -; CHECK-NEXT: vl1r.v v13, (a4) +; CHECK-NEXT: vl1r.v v13, (a5) ; CHECK-NEXT: vl1r.v v19, (a7) ; CHECK-NEXT: vl1r.v v15, (a6) ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -2390,12 +2373,12 @@ define @vector_interleave_nxv96i1_nxv16i1( ; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: vmsne.vi v10, v18, 0 ; CHECK-NEXT: vmsne.vi v8, v14, 0 -; CHECK-NEXT: vsetvli zero, a3, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v9, v20, a1 ; CHECK-NEXT: vslideup.vx v0, v16, a1 -; CHECK-NEXT: vsetvli zero, a5, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v0, v9, a2 -; CHECK-NEXT: vsetvli zero, a3, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vslideup.vx v0, v9, a3 +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v8, v10, a1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 12 @@ -2427,47 +2410,45 @@ define @vector_interleave_nxv96i1_nxv16i1( ; ZVBB-NEXT: vmv1r.v v17, v9 ; ZVBB-NEXT: vmv1r.v v0, v10 ; ZVBB-NEXT: vmerge.vim v24, v20, 1, v0 -; ZVBB-NEXT: addi a5, sp, 16 +; ZVBB-NEXT: addi a4, sp, 16 ; ZVBB-NEXT: vmv1r.v v18, v25 ; ZVBB-NEXT: vmv1r.v v0, v11 ; ZVBB-NEXT: vmerge.vim v26, v20, 1, v0 -; ZVBB-NEXT: csrr a2, vlenb +; ZVBB-NEXT: csrr a1, vlenb ; ZVBB-NEXT: vmv1r.v v19, v27 ; ZVBB-NEXT: vmv1r.v v0, v12 ; ZVBB-NEXT: vmerge.vim v10, v20, 1, v0 -; ZVBB-NEXT: add a3, a0, a2 +; ZVBB-NEXT: add a2, a0, a1 ; ZVBB-NEXT: vmv1r.v v20, v11 -; ZVBB-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; ZVBB-NEXT: vsetvli a3, zero, e8, m1, ta, ma ; ZVBB-NEXT: vsseg6e8.v v15, (a0) ; ZVBB-NEXT: vmv1r.v v15, v22 -; ZVBB-NEXT: add a4, a5, a2 +; ZVBB-NEXT: add a5, a4, a1 ; ZVBB-NEXT: vmv1r.v v16, v8 -; ZVBB-NEXT: srli a1, a2, 2 +; ZVBB-NEXT: srli a3, a1, 1 ; ZVBB-NEXT: vmv1r.v v17, v24 -; ZVBB-NEXT: add a6, a4, a2 +; ZVBB-NEXT: add a6, a5, a1 ; ZVBB-NEXT: vmv1r.v v18, v26 -; ZVBB-NEXT: add a7, a3, a2 +; ZVBB-NEXT: add a7, a2, a1 ; ZVBB-NEXT: vmv1r.v v19, v10 -; ZVBB-NEXT: vsseg6e8.v v14, (a5) +; ZVBB-NEXT: vsseg6e8.v v14, (a4) ; ZVBB-NEXT: vl1r.v v8, (a0) -; ZVBB-NEXT: add a0, a6, a2 +; ZVBB-NEXT: add a0, a6, a1 ; ZVBB-NEXT: vl1r.v v10, (a6) -; ZVBB-NEXT: add a6, a7, a2 -; ZVBB-NEXT: vl1r.v v12, (a5) -; ZVBB-NEXT: add a5, a0, a2 +; ZVBB-NEXT: add a6, a7, a1 +; ZVBB-NEXT: vl1r.v v12, (a4) +; ZVBB-NEXT: add a4, a0, a1 ; ZVBB-NEXT: vl1r.v v14, (a7) -; ZVBB-NEXT: add a7, a6, a2 -; ZVBB-NEXT: vl1r.v v16, (a5) -; ZVBB-NEXT: add a5, a5, a2 +; ZVBB-NEXT: add a7, a6, a1 +; ZVBB-NEXT: vl1r.v v16, (a4) +; ZVBB-NEXT: add a4, a4, a1 ; ZVBB-NEXT: vl1r.v v18, (a7) -; ZVBB-NEXT: add a7, a7, a2 -; ZVBB-NEXT: srli a2, a2, 1 -; ZVBB-NEXT: vl1r.v v9, (a3) -; ZVBB-NEXT: add a3, a1, a1 -; ZVBB-NEXT: vl1r.v v17, (a5) -; ZVBB-NEXT: add a5, a2, a2 +; ZVBB-NEXT: add a7, a7, a1 +; ZVBB-NEXT: srli a1, a1, 2 +; ZVBB-NEXT: vl1r.v v9, (a2) +; ZVBB-NEXT: vl1r.v v17, (a4) ; ZVBB-NEXT: vl1r.v v11, (a0) -; ZVBB-NEXT: vl1r.v v13, (a4) +; ZVBB-NEXT: vl1r.v v13, (a5) ; ZVBB-NEXT: vl1r.v v19, (a7) ; ZVBB-NEXT: vl1r.v v15, (a6) ; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, ma @@ -2477,12 +2458,12 @@ define @vector_interleave_nxv96i1_nxv16i1( ; ZVBB-NEXT: vmsne.vi v0, v12, 0 ; ZVBB-NEXT: vmsne.vi v10, v18, 0 ; ZVBB-NEXT: vmsne.vi v8, v14, 0 -; ZVBB-NEXT: vsetvli zero, a3, e8, mf2, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; ZVBB-NEXT: vslideup.vx v9, v20, a1 ; ZVBB-NEXT: vslideup.vx v0, v16, a1 -; ZVBB-NEXT: vsetvli zero, a5, e8, m1, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v9, a2 -; ZVBB-NEXT: vsetvli zero, a3, e8, mf2, ta, ma +; ZVBB-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; ZVBB-NEXT: vslideup.vx v0, v9, a3 +; ZVBB-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; ZVBB-NEXT: vslideup.vx v8, v10, a1 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: li a1, 12 @@ -3676,23 +3657,21 @@ define @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv128i1_nxv16i1( @vector_interleave_nxv128i1_nxv16i1( @vector_interleave_nxv4bf16_nxv2bf16( @vector_interleave_nxv4bf16_nxv2bf16( @vector_interleave_nxv4bf16_nxv2bf16( @vector_interleave_nxv4f16_nxv2f16( @vector_interleave_nxv4f16_nxv2f16( @vector_interleave_nxv4f16_nxv2f16( @vector_interleave_nxv6f16_nxv2f16( @vector_interleave_nxv6f16_nxv2f16( @vector_interleave_nxv6bf16_nxv2bf16( @vector_interleave_nxv6bf16_nxv2bf16( @vector_interleave_nxv3f32_nxv1f32( @vector_interleave_nxv3f32_nxv1f32( @vector_interleave_nxv8f16_nxv2f16( @vector_interleave_nxv8f16_nxv2f16( @vector_interleave_nxv8bf16_nxv2bf16( @vector_interleave_nxv8bf16_nxv2bf16( @vector_interleave_nxv4f32_nxv1f32( @vector_interleave_nxv4f32_nxv1f32( @vector_interleave_nxv10f16_nxv2f16( @vector_interleave_nxv10f16_nxv2f16( @vector_interleave_nxv10bf16_nxv2bf16( @vector_interleave_nxv10bf16_nxv2bf16( @vector_interleave_nxv5f32_nxv1f32( @vector_interleave_nxv5f32_nxv1f32( @vector_interleave_nxv12f16_nxv2f16( @vector_interleave_nxv12f16_nxv2f16( @vector_interleave_nxv12bf16_nxv2bf16( @vector_interleave_nxv12bf16_nxv2bf16( @vector_interleave_nxv6f32_nxv1f32( @vector_interleave_nxv6f32_nxv1f32( @vector_interleave_nxv14f16_nxv2f16( @vector_interleave_nxv14f16_nxv2f16( @vector_interleave_nxv14f16_nxv2f16( @vector_interleave_nxv14f16_nxv2f16( @vector_interleave_nxv14bf16_nxv2bf16( @vector_interleave_nxv14bf16_nxv2bf16( @vector_interleave_nxv14bf16_nxv2bf16( @vector_interleave_nxv14bf16_nxv2bf16( @vector_interleave_nxv7f32_nxv1f32( @vector_interleave_nxv7f32_nxv1f32( @vector_interleave_nxv7f32_nxv1f32( @vector_interleave_nxv7f32_nxv1f32( @vector_interleave_nxv16f16_nxv2f16( @vector_interleave_nxv16f16_nxv2f16( @vector_interleave_nxv16bf16_nxv2bf16( @vector_interleave_nxv16bf16_nxv2bf16( @vector_interleave_nxv8f32_nxv1f32( @vector_interleave_nxv8f32_nxv1f32( @vfptosi_nxv32bf16_nxv32i1( %va) ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v16 ; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v24 -; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vand.vi v12, v12, 1 ; CHECK-NEXT: vmsne.vi v16, v8, 0 ; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v0, v16, a0 ; CHECK-NEXT: ret %evec = fptosi %va to @@ -656,12 +655,11 @@ define @vfptoui_nxv32bf16_nxv32i1( %va) ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v16 ; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v24 -; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vand.vi v12, v12, 1 ; CHECK-NEXT: vmsne.vi v16, v8, 0 ; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v0, v16, a0 ; CHECK-NEXT: ret %evec = fptoui %va to @@ -1654,12 +1652,11 @@ define @vfptosi_nxv32f16_nxv32i1( %va) { ; ZVFHMIN-NEXT: srli a0, a0, 2 ; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v16 ; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v12, v24 -; ZVFHMIN-NEXT: add a1, a0, a0 ; ZVFHMIN-NEXT: vand.vi v8, v8, 1 ; ZVFHMIN-NEXT: vand.vi v12, v12, 1 ; ZVFHMIN-NEXT: vmsne.vi v16, v8, 0 ; ZVFHMIN-NEXT: vmsne.vi v0, v12, 0 -; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslideup.vx v0, v16, a0 ; ZVFHMIN-NEXT: ret %evec = fptosi %va to @@ -1684,12 +1681,11 @@ define @vfptoui_nxv32f16_nxv32i1( %va) { ; ZVFHMIN-NEXT: srli a0, a0, 2 ; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v16 ; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v12, v24 -; ZVFHMIN-NEXT: add a1, a0, a0 ; ZVFHMIN-NEXT: vand.vi v8, v8, 1 ; ZVFHMIN-NEXT: vand.vi v12, v12, 1 ; ZVFHMIN-NEXT: vmsne.vi v16, v8, 0 ; ZVFHMIN-NEXT: vmsne.vi v0, v12, 0 -; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslideup.vx v0, v16, a0 ; ZVFHMIN-NEXT: ret %evec = fptoui %va to diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll index 142ee5256f9e7..1868154052272 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll @@ -567,38 +567,37 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract( % ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v8, v0 -; RV32-NEXT: slli a2, a1, 1 ; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: li a1, -1 +; RV32-NEXT: li a2, -1 ; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vmerge.vim v11, v9, 1, v0 -; RV32-NEXT: srli a3, a3, 2 ; RV32-NEXT: vwaddu.vv v12, v11, v11 -; RV32-NEXT: vwmaccu.vx v12, a1, v11 +; RV32-NEXT: vwmaccu.vx v12, a2, v11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: srli a2, a2, 2 ; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vx v11, v12, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vx v11, v12, a2 ; RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vmsne.vi v0, v11, 0 -; RV32-NEXT: add a1, a3, a3 +; RV32-NEXT: slli a3, a1, 1 ; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vx v10, v9, a3 -; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma +; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV32-NEXT: vslideup.vx v10, v9, a2 +; RV32-NEXT: vsetvli zero, a3, e8, mf2, ta, ma ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vle32.v v10, (a0), v0.t ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma +; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; RV32-NEXT: vnsrl.wx v13, v10, a1 ; RV32-NEXT: vmv.x.s a1, v10 ; RV32-NEXT: vnsrl.wi v12, v10, 0 -; RV32-NEXT: srli a2, a2, 1 +; RV32-NEXT: srli a3, a3, 1 ; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vsetvli zero, a2, e32, m1, ta, ma +; RV32-NEXT: vsetvli zero, a3, e32, m1, ta, ma ; RV32-NEXT: vsseg2e32.v v12, (a0), v0.t ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret @@ -611,26 +610,24 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract( % ; RV64-NEXT: li a2, -1 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a4, a1, 33 -; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vmerge.vim v11, v9, 1, v0 -; RV64-NEXT: srli a3, a3, 2 ; RV64-NEXT: vwaddu.vv v12, v11, v11 ; RV64-NEXT: vwmaccu.vx v12, a2, v11 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: srli a2, a2, 2 ; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v11, v12, a3 +; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vx v11, v12, a2 ; RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vmsne.vi v0, v11, 0 -; RV64-NEXT: add a1, a3, a3 +; RV64-NEXT: slli a3, a1, 33 ; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; RV64-NEXT: vslideup.vx v10, v9, a3 ; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV64-NEXT: vslideup.vx v10, v9, a2 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: srli a1, a4, 32 +; RV64-NEXT: srli a1, a3, 32 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV64-NEXT: vle32.v v10, (a0), v0.t ; RV64-NEXT: li a1, 32 @@ -638,9 +635,9 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract( % ; RV64-NEXT: vnsrl.wx v13, v10, a1 ; RV64-NEXT: vmv.x.s a1, v10 ; RV64-NEXT: vnsrl.wi v12, v10, 0 -; RV64-NEXT: srli a4, a4, 33 +; RV64-NEXT: srli a3, a3, 33 ; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vsetvli zero, a4, e32, m1, ta, ma +; RV64-NEXT: vsetvli zero, a3, e32, m1, ta, ma ; RV64-NEXT: vsseg2e32.v v12, (a0), v0.t ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret @@ -807,10 +804,7 @@ define void @not_balanced_store_tree( %v0, ; RV32-NEXT: srli a3, a3, 3 ; RV32-NEXT: vsetvli a4, zero, e32, m1, ta, ma ; RV32-NEXT: vslidedown.vx v8, v12, a3 -; RV32-NEXT: add a4, a3, a3 -; RV32-NEXT: vsetvli zero, a4, e32, m1, ta, ma ; RV32-NEXT: vslideup.vx v12, v8, a3 -; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; RV32-NEXT: vwaddu.vv v16, v12, v9 ; RV32-NEXT: vwmaccu.vx v16, a2, v9 ; RV32-NEXT: vsetvli a3, zero, e32, m2, ta, ma @@ -831,10 +825,7 @@ define void @not_balanced_store_tree( %v0, ; RV64-NEXT: srli a3, a3, 3 ; RV64-NEXT: vsetvli a4, zero, e32, m1, ta, ma ; RV64-NEXT: vslidedown.vx v8, v12, a3 -; RV64-NEXT: add a4, a3, a3 -; RV64-NEXT: vsetvli zero, a4, e32, m1, ta, ma ; RV64-NEXT: vslideup.vx v12, v8, a3 -; RV64-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; RV64-NEXT: vwaddu.vv v16, v12, v9 ; RV64-NEXT: vwmaccu.vx v16, a2, v9 ; RV64-NEXT: vsetvli a3, zero, e32, m2, ta, ma @@ -858,29 +849,28 @@ define {, } @not_same_mask( ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v9, v0 ; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: li a2, -1 ; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vmerge.vim v11, v8, 1, v0 ; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vmerge.vim v9, v8, 1, v0 -; RV32-NEXT: srli a3, a3, 2 ; RV32-NEXT: vwaddu.vv v12, v9, v11 ; RV32-NEXT: vwmaccu.vx v12, a2, v11 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: srli a2, a2, 2 ; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vx v9, v12, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vx v9, v12, a2 ; RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vmsne.vi v0, v9, 0 -; RV32-NEXT: add a2, a3, a3 +; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: vmerge.vim v8, v8, 1, v0 -; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vx v10, v8, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV32-NEXT: vslideup.vx v10, v8, a2 ; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vle32.v v10, (a0), v0.t @@ -899,26 +889,24 @@ define {, } @not_same_mask( ; RV64-NEXT: li a2, -1 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a1, a1, 33 -; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vmerge.vim v11, v8, 1, v0 ; RV64-NEXT: vmv1r.v v0, v9 ; RV64-NEXT: vmerge.vim v9, v8, 1, v0 -; RV64-NEXT: srli a3, a3, 2 ; RV64-NEXT: vwaddu.vv v12, v9, v11 ; RV64-NEXT: vwmaccu.vx v12, a2, v11 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: srli a2, a2, 2 ; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v9, v12, a3 +; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vx v9, v12, a2 ; RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vmsne.vi v0, v9, 0 -; RV64-NEXT: add a2, a3, a3 +; RV64-NEXT: slli a1, a1, 33 ; RV64-NEXT: vmerge.vim v8, v8, 1, v0 -; RV64-NEXT: vsetvli zero, a2, e8, mf2, ta, ma -; RV64-NEXT: vslideup.vx v10, v8, a3 -; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV64-NEXT: vslideup.vx v10, v8, a2 ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma