diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index d7aa85048eeda..06d9c5bac0524 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -1040,6 +1040,12 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VMSOF_M: case RISCV::VIOTA_M: case RISCV::VID_V: + // Vector Slide Instructions + case RISCV::VSLIDEUP_VX: + case RISCV::VSLIDEUP_VI: + case RISCV::VSLIDEDOWN_VX: + case RISCV::VSLIDEDOWN_VI: + // TODO: Handle v[f]slide1up, but not v[f]slide1down. // Vector Single-Width Floating-Point Add/Subtract Instructions case RISCV::VFADD_VF: case RISCV::VFADD_VV: @@ -1252,6 +1258,9 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const { return false; } + assert(!RISCVII::elementsDependOnVL(RISCV::getRVVMCOpcode(MI.getOpcode())) && + "Instruction shouldn't be supported if elements depend on VL"); + assert(MI.getOperand(0).isReg() && isVectorRegClass(MI.getOperand(0).getReg(), MRI) && "All supported instructions produce a vector register result"); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll index 984bc5b2c7352..b97fa1d3a51ec 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -25,8 +25,8 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslide1down.vx v10, v10, a4 ; RV32-NEXT: vslide1down.vx v10, v10, a2 -; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, tu, mu @@ -56,8 +56,8 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslide1down.vx v10, v10, a4 ; RV64-NEXT: vslide1down.vx v10, v10, a2 -; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, tu, mu @@ -95,8 +95,8 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslide1down.vx v10, v10, a4 ; RV32-NEXT: vslide1down.vx v10, v10, a2 -; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -126,8 +126,8 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslide1down.vx v10, v10, a4 ; RV64-NEXT: vslide1down.vx v10, v10, a2 -; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -166,8 +166,8 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -197,8 +197,8 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -236,8 +236,8 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslide1down.vx v10, v10, a4 ; RV32-NEXT: vslide1down.vx v10, v10, a2 -; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, tu, mu @@ -267,8 +267,8 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslide1down.vx v10, v10, a4 ; RV64-NEXT: vslide1down.vx v10, v10, a2 -; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, tu, mu @@ -306,8 +306,8 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -337,8 +337,8 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -377,8 +377,8 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -408,8 +408,8 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vsetivli zero, 6, e8, mf2, ta, ma +; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index 1cbb980aebffc..8d3b99f3a7dc8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -3434,6 +3434,86 @@ define @vid.v( %c, iXLen %vl) { ret %2 } +define @vslideup_vx( %a, iXLen %b, iXLen %vl) { +; NOVLOPT-LABEL: vslideup_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vslideup.vx v10, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vslideup_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vslideup.vx v10, v8, a0 +; VLOPT-NEXT: vadd.vv v8, v10, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vslideup( poison, %a, iXLen %b, iXLen -1, iXLen 3) + %2 = call @llvm.riscv.vadd( poison, %1, %1, iXLen %vl) + ret %2 +} + +define @vslideup_vi( %a, iXLen %vl) { +; NOVLOPT-LABEL: vslideup_vi: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vslideup.vi v10, v8, 2 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v10, v10 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vslideup_vi: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vslideup.vi v10, v8, 2 +; VLOPT-NEXT: vadd.vv v8, v10, v10 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vslideup( poison, %a, iXLen 2, iXLen -1, iXLen 3) + %2 = call @llvm.riscv.vadd( poison, %1, %1, iXLen %vl) + ret %2 +} + +define @vslidedown_vx( %a, iXLen %b, iXLen %vl) { +; NOVLOPT-LABEL: vslidedown_vx: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vslidedown.vx v8, v8, a0 +; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vslidedown_vx: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; VLOPT-NEXT: vslidedown.vx v8, v8, a0 +; VLOPT-NEXT: vadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vslidedown( poison, %a, iXLen %b, iXLen -1, iXLen 3) + %2 = call @llvm.riscv.vadd( poison, %1, %1, iXLen %vl) + ret %2 +} + +define @vslidedown_vi( %a, iXLen %vl) { +; NOVLOPT-LABEL: vslidedown_vi: +; NOVLOPT: # %bb.0: +; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; NOVLOPT-NEXT: vslidedown.vi v8, v8, 2 +; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOVLOPT-NEXT: vadd.vv v8, v8, v8 +; NOVLOPT-NEXT: ret +; +; VLOPT-LABEL: vslidedown_vi: +; VLOPT: # %bb.0: +; VLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; VLOPT-NEXT: vslidedown.vi v8, v8, 2 +; VLOPT-NEXT: vadd.vv v8, v8, v8 +; VLOPT-NEXT: ret + %1 = call @llvm.riscv.vslidedown( poison, %a, iXLen 2, iXLen -1, iXLen 3) + %2 = call @llvm.riscv.vadd( poison, %1, %1, iXLen %vl) + ret %2 +} + define @vfadd_vv( %a, %b, iXLen %vl) { ; NOVLOPT-LABEL: vfadd_vv: ; NOVLOPT: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll index 35f01f608b56e..7fb822d20f892 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll @@ -536,37 +536,37 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract( % ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v8, v0 +; RV32-NEXT: slli a2, a1, 1 ; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: li a2, -1 -; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; RV32-NEXT: li a1, -1 +; RV32-NEXT: vmerge.vim v10, v9, 1, v0 +; RV32-NEXT: vwaddu.vv v11, v10, v10 +; RV32-NEXT: vwmaccu.vx v11, a1, v10 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: srli a1, a1, 2 ; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV32-NEXT: vmerge.vim v11, v9, 1, v0 -; RV32-NEXT: vwaddu.vv v12, v11, v11 -; RV32-NEXT: vwmaccu.vx v12, a2, v11 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: srli a2, a2, 2 -; RV32-NEXT: vmsne.vi v0, v12, 0 +; RV32-NEXT: vmsne.vi v0, v11, 0 ; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vx v11, v12, a2 +; RV32-NEXT: vslidedown.vx v11, v11, a1 +; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; RV32-NEXT: vmerge.vim v10, v10, 1, v0 ; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vmsne.vi v0, v11, 0 -; RV32-NEXT: slli a3, a1, 1 ; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vx v10, v9, a2 -; RV32-NEXT: vsetvli zero, a3, e8, mf2, ta, ma +; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma +; RV32-NEXT: vslideup.vx v10, v9, a1 ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vle32.v v10, (a0), v0.t ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma +; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma ; RV32-NEXT: vnsrl.wx v13, v10, a1 ; RV32-NEXT: vmv.x.s a1, v10 ; RV32-NEXT: vnsrl.wi v12, v10, 0 -; RV32-NEXT: srli a3, a3, 1 +; RV32-NEXT: srli a2, a2, 1 ; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vsetvli zero, a3, e32, m1, ta, ma +; RV32-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; RV32-NEXT: vsseg2e32.v v12, (a0), v0.t ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret @@ -657,30 +657,30 @@ define {, } @not_same_mask( ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v9, v0 ; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: li a2, -1 -; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV32-NEXT: vmerge.vim v11, v8, 1, v0 +; RV32-NEXT: vmerge.vim v10, v8, 1, v0 ; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vmerge.vim v9, v8, 1, v0 -; RV32-NEXT: vwaddu.vv v12, v9, v11 -; RV32-NEXT: vwmaccu.vx v12, a2, v11 +; RV32-NEXT: vwaddu.vv v11, v9, v10 +; RV32-NEXT: vwmaccu.vx v11, a2, v10 ; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: srli a2, a2, 2 -; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vx v9, v12, a2 -; RV32-NEXT: vmerge.vim v10, v10, 1, v0 ; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV32-NEXT: vmsne.vi v0, v9, 0 -; RV32-NEXT: slli a1, a1, 1 -; RV32-NEXT: vmerge.vim v8, v8, 1, v0 +; RV32-NEXT: vmsne.vi v0, v11, 0 ; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vx v10, v8, a2 +; RV32-NEXT: vslidedown.vx v10, v11, a2 ; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV32-NEXT: vmerge.vim v9, v9, 1, v0 +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV32-NEXT: vmsne.vi v0, v10, 0 +; RV32-NEXT: vmerge.vim v8, v8, 1, v0 +; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; RV32-NEXT: vslideup.vx v9, v8, a2 +; RV32-NEXT: vmsne.vi v0, v9, 0 ; RV32-NEXT: vle32.v v10, (a0), v0.t ; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma