@@ -3424,16 +3424,14 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
34243424; RV32-NEXT: vredsum.vs v8, v8, v16
34253425; RV32-NEXT: vredsum.vs v9, v10, v16
34263426; RV32-NEXT: vredsum.vs v10, v12, v16
3427- ; RV32-NEXT: vredsum.vs v11, v14, v16
34283427; RV32-NEXT: vmv.x.s a0, v8
34293428; RV32-NEXT: vmv.x.s a1, v9
34303429; RV32-NEXT: vmv.x.s a2, v10
3431- ; RV32-NEXT: vmv.x.s a3, v11
3430+ ; RV32-NEXT: vredsum.vs v8, v14, v16
34323431; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3433- ; RV32-NEXT: vmv.v.x v8, a0
3434- ; RV32-NEXT: vslide1down.vx v8, v8, a1
3435- ; RV32-NEXT: vslide1down.vx v8, v8, a2
3436- ; RV32-NEXT: vslide1down.vx v8, v8, a3
3432+ ; RV32-NEXT: vslide1up.vx v9, v8, a2
3433+ ; RV32-NEXT: vslide1up.vx v10, v9, a1
3434+ ; RV32-NEXT: vslide1up.vx v8, v10, a0
34373435; RV32-NEXT: ret
34383436;
34393437; RV64V-ONLY-LABEL: buildvec_vredsum:
@@ -3443,16 +3441,14 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
34433441; RV64V-ONLY-NEXT: vredsum.vs v8, v8, v16
34443442; RV64V-ONLY-NEXT: vredsum.vs v9, v10, v16
34453443; RV64V-ONLY-NEXT: vredsum.vs v10, v12, v16
3446- ; RV64V-ONLY-NEXT: vredsum.vs v11, v14, v16
34473444; RV64V-ONLY-NEXT: vmv.x.s a0, v8
34483445; RV64V-ONLY-NEXT: vmv.x.s a1, v9
34493446; RV64V-ONLY-NEXT: vmv.x.s a2, v10
3450- ; RV64V-ONLY-NEXT: vmv.x.s a3, v11
3447+ ; RV64V-ONLY-NEXT: vredsum.vs v8, v14, v16
34513448; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3452- ; RV64V-ONLY-NEXT: vmv.v.x v8, a0
3453- ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
3454- ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
3455- ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
3449+ ; RV64V-ONLY-NEXT: vslide1up.vx v9, v8, a2
3450+ ; RV64V-ONLY-NEXT: vslide1up.vx v10, v9, a1
3451+ ; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0
34563452; RV64V-ONLY-NEXT: ret
34573453;
34583454; RVA22U64-LABEL: buildvec_vredsum:
@@ -3502,16 +3498,14 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
35023498; RV64ZVE32-NEXT: vredsum.vs v8, v8, v16
35033499; RV64ZVE32-NEXT: vredsum.vs v9, v10, v16
35043500; RV64ZVE32-NEXT: vredsum.vs v10, v12, v16
3505- ; RV64ZVE32-NEXT: vredsum.vs v11, v14, v16
35063501; RV64ZVE32-NEXT: vmv.x.s a0, v8
35073502; RV64ZVE32-NEXT: vmv.x.s a1, v9
35083503; RV64ZVE32-NEXT: vmv.x.s a2, v10
3509- ; RV64ZVE32-NEXT: vmv.x.s a3, v11
3504+ ; RV64ZVE32-NEXT: vredsum.vs v8, v14, v16
35103505; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3511- ; RV64ZVE32-NEXT: vmv.v.x v8, a0
3512- ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
3513- ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
3514- ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
3506+ ; RV64ZVE32-NEXT: vslide1up.vx v9, v8, a2
3507+ ; RV64ZVE32-NEXT: vslide1up.vx v10, v9, a1
3508+ ; RV64ZVE32-NEXT: vslide1up.vx v8, v10, a0
35153509; RV64ZVE32-NEXT: ret
35163510 %247 = tail call i32 @llvm.vector.reduce.add.v8i32 (<8 x i32 > %arg0 )
35173511 %248 = insertelement <4 x i32 > poison, i32 %247 , i64 0
@@ -3531,16 +3525,14 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
35313525; RV32-NEXT: vredmaxu.vs v8, v8, v8
35323526; RV32-NEXT: vredmaxu.vs v9, v10, v10
35333527; RV32-NEXT: vredmaxu.vs v10, v12, v12
3534- ; RV32-NEXT: vredmaxu.vs v11, v14, v14
35353528; RV32-NEXT: vmv.x.s a0, v8
35363529; RV32-NEXT: vmv.x.s a1, v9
35373530; RV32-NEXT: vmv.x.s a2, v10
3538- ; RV32-NEXT: vmv.x.s a3, v11
3531+ ; RV32-NEXT: vredmaxu.vs v8, v14, v14
35393532; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3540- ; RV32-NEXT: vmv.v.x v8, a0
3541- ; RV32-NEXT: vslide1down.vx v8, v8, a1
3542- ; RV32-NEXT: vslide1down.vx v8, v8, a2
3543- ; RV32-NEXT: vslide1down.vx v8, v8, a3
3533+ ; RV32-NEXT: vslide1up.vx v9, v8, a2
3534+ ; RV32-NEXT: vslide1up.vx v10, v9, a1
3535+ ; RV32-NEXT: vslide1up.vx v8, v10, a0
35443536; RV32-NEXT: ret
35453537;
35463538; RV64V-ONLY-LABEL: buildvec_vredmax:
@@ -3549,16 +3541,14 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
35493541; RV64V-ONLY-NEXT: vredmaxu.vs v8, v8, v8
35503542; RV64V-ONLY-NEXT: vredmaxu.vs v9, v10, v10
35513543; RV64V-ONLY-NEXT: vredmaxu.vs v10, v12, v12
3552- ; RV64V-ONLY-NEXT: vredmaxu.vs v11, v14, v14
35533544; RV64V-ONLY-NEXT: vmv.x.s a0, v8
35543545; RV64V-ONLY-NEXT: vmv.x.s a1, v9
35553546; RV64V-ONLY-NEXT: vmv.x.s a2, v10
3556- ; RV64V-ONLY-NEXT: vmv.x.s a3, v11
3547+ ; RV64V-ONLY-NEXT: vredmaxu.vs v8, v14, v14
35573548; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3558- ; RV64V-ONLY-NEXT: vmv.v.x v8, a0
3559- ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
3560- ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
3561- ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
3549+ ; RV64V-ONLY-NEXT: vslide1up.vx v9, v8, a2
3550+ ; RV64V-ONLY-NEXT: vslide1up.vx v10, v9, a1
3551+ ; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0
35623552; RV64V-ONLY-NEXT: ret
35633553;
35643554; RVA22U64-LABEL: buildvec_vredmax:
@@ -3605,16 +3595,14 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %
36053595; RV64ZVE32-NEXT: vredmaxu.vs v8, v8, v8
36063596; RV64ZVE32-NEXT: vredmaxu.vs v9, v10, v10
36073597; RV64ZVE32-NEXT: vredmaxu.vs v10, v12, v12
3608- ; RV64ZVE32-NEXT: vredmaxu.vs v11, v14, v14
36093598; RV64ZVE32-NEXT: vmv.x.s a0, v8
36103599; RV64ZVE32-NEXT: vmv.x.s a1, v9
36113600; RV64ZVE32-NEXT: vmv.x.s a2, v10
3612- ; RV64ZVE32-NEXT: vmv.x.s a3, v11
3601+ ; RV64ZVE32-NEXT: vredmaxu.vs v8, v14, v14
36133602; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3614- ; RV64ZVE32-NEXT: vmv.v.x v8, a0
3615- ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
3616- ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
3617- ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
3603+ ; RV64ZVE32-NEXT: vslide1up.vx v9, v8, a2
3604+ ; RV64ZVE32-NEXT: vslide1up.vx v10, v9, a1
3605+ ; RV64ZVE32-NEXT: vslide1up.vx v8, v10, a0
36183606; RV64ZVE32-NEXT: ret
36193607 %247 = tail call i32 @llvm.vector.reduce.umax.v8i32 (<8 x i32 > %arg0 )
36203608 %248 = insertelement <4 x i32 > poison, i32 %247 , i64 0
0 commit comments