@@ -3416,5 +3416,216 @@ define <4 x i1> @buildvec_i1_splat(i1 %e1) {
34163416 ret <4 x i1 > %v4
34173417}
34183418
3419+ define <4 x i32 > @buildvec_vredsum (<8 x i32 > %arg0 , <8 x i32 > %arg1 , <8 x i32 > %arg2 , <8 x i32 > %arg3 ) nounwind {
3420+ ; RV32-LABEL: buildvec_vredsum:
3421+ ; RV32: # %bb.0:
3422+ ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3423+ ; RV32-NEXT: vmv.s.x v16, zero
3424+ ; RV32-NEXT: vredsum.vs v8, v8, v16
3425+ ; RV32-NEXT: vredsum.vs v9, v10, v16
3426+ ; RV32-NEXT: vredsum.vs v10, v12, v16
3427+ ; RV32-NEXT: vredsum.vs v11, v14, v16
3428+ ; RV32-NEXT: vmv.x.s a0, v8
3429+ ; RV32-NEXT: vmv.x.s a1, v9
3430+ ; RV32-NEXT: vmv.x.s a2, v10
3431+ ; RV32-NEXT: vmv.x.s a3, v11
3432+ ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3433+ ; RV32-NEXT: vmv.v.x v8, a0
3434+ ; RV32-NEXT: vslide1down.vx v8, v8, a1
3435+ ; RV32-NEXT: vslide1down.vx v8, v8, a2
3436+ ; RV32-NEXT: vslide1down.vx v8, v8, a3
3437+ ; RV32-NEXT: ret
3438+ ;
3439+ ; RV64V-ONLY-LABEL: buildvec_vredsum:
3440+ ; RV64V-ONLY: # %bb.0:
3441+ ; RV64V-ONLY-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3442+ ; RV64V-ONLY-NEXT: vmv.s.x v16, zero
3443+ ; RV64V-ONLY-NEXT: vredsum.vs v8, v8, v16
3444+ ; RV64V-ONLY-NEXT: vredsum.vs v9, v10, v16
3445+ ; RV64V-ONLY-NEXT: vredsum.vs v10, v12, v16
3446+ ; RV64V-ONLY-NEXT: vredsum.vs v11, v14, v16
3447+ ; RV64V-ONLY-NEXT: vmv.x.s a0, v8
3448+ ; RV64V-ONLY-NEXT: vmv.x.s a1, v9
3449+ ; RV64V-ONLY-NEXT: vmv.x.s a2, v10
3450+ ; RV64V-ONLY-NEXT: vmv.x.s a3, v11
3451+ ; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3452+ ; RV64V-ONLY-NEXT: vmv.v.x v8, a0
3453+ ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
3454+ ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
3455+ ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
3456+ ; RV64V-ONLY-NEXT: ret
3457+ ;
3458+ ; RVA22U64-LABEL: buildvec_vredsum:
3459+ ; RVA22U64: # %bb.0:
3460+ ; RVA22U64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3461+ ; RVA22U64-NEXT: vmv.s.x v16, zero
3462+ ; RVA22U64-NEXT: vredsum.vs v8, v8, v16
3463+ ; RVA22U64-NEXT: vredsum.vs v9, v10, v16
3464+ ; RVA22U64-NEXT: vredsum.vs v10, v12, v16
3465+ ; RVA22U64-NEXT: vredsum.vs v11, v14, v16
3466+ ; RVA22U64-NEXT: vmv.x.s a0, v8
3467+ ; RVA22U64-NEXT: vmv.x.s a1, v9
3468+ ; RVA22U64-NEXT: vmv.x.s a2, v10
3469+ ; RVA22U64-NEXT: slli a1, a1, 32
3470+ ; RVA22U64-NEXT: add.uw a0, a0, a1
3471+ ; RVA22U64-NEXT: vmv.x.s a1, v11
3472+ ; RVA22U64-NEXT: slli a1, a1, 32
3473+ ; RVA22U64-NEXT: add.uw a1, a2, a1
3474+ ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3475+ ; RVA22U64-NEXT: vmv.v.x v8, a0
3476+ ; RVA22U64-NEXT: vslide1down.vx v8, v8, a1
3477+ ; RVA22U64-NEXT: ret
3478+ ;
3479+ ; RVA22U64-PACK-LABEL: buildvec_vredsum:
3480+ ; RVA22U64-PACK: # %bb.0:
3481+ ; RVA22U64-PACK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3482+ ; RVA22U64-PACK-NEXT: vmv.s.x v16, zero
3483+ ; RVA22U64-PACK-NEXT: vredsum.vs v8, v8, v16
3484+ ; RVA22U64-PACK-NEXT: vredsum.vs v9, v10, v16
3485+ ; RVA22U64-PACK-NEXT: vredsum.vs v10, v12, v16
3486+ ; RVA22U64-PACK-NEXT: vredsum.vs v11, v14, v16
3487+ ; RVA22U64-PACK-NEXT: vmv.x.s a0, v8
3488+ ; RVA22U64-PACK-NEXT: vmv.x.s a1, v9
3489+ ; RVA22U64-PACK-NEXT: vmv.x.s a2, v10
3490+ ; RVA22U64-PACK-NEXT: pack a0, a0, a1
3491+ ; RVA22U64-PACK-NEXT: vmv.x.s a1, v11
3492+ ; RVA22U64-PACK-NEXT: pack a1, a2, a1
3493+ ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3494+ ; RVA22U64-PACK-NEXT: vmv.v.x v8, a0
3495+ ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a1
3496+ ; RVA22U64-PACK-NEXT: ret
3497+ ;
3498+ ; RV64ZVE32-LABEL: buildvec_vredsum:
3499+ ; RV64ZVE32: # %bb.0:
3500+ ; RV64ZVE32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3501+ ; RV64ZVE32-NEXT: vmv.s.x v16, zero
3502+ ; RV64ZVE32-NEXT: vredsum.vs v8, v8, v16
3503+ ; RV64ZVE32-NEXT: vredsum.vs v9, v10, v16
3504+ ; RV64ZVE32-NEXT: vredsum.vs v10, v12, v16
3505+ ; RV64ZVE32-NEXT: vredsum.vs v11, v14, v16
3506+ ; RV64ZVE32-NEXT: vmv.x.s a0, v8
3507+ ; RV64ZVE32-NEXT: vmv.x.s a1, v9
3508+ ; RV64ZVE32-NEXT: vmv.x.s a2, v10
3509+ ; RV64ZVE32-NEXT: vmv.x.s a3, v11
3510+ ; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3511+ ; RV64ZVE32-NEXT: vmv.v.x v8, a0
3512+ ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
3513+ ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
3514+ ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
3515+ ; RV64ZVE32-NEXT: ret
3516+ %247 = tail call i32 @llvm.vector.reduce.add.v8i32 (<8 x i32 > %arg0 )
3517+ %248 = insertelement <4 x i32 > poison, i32 %247 , i64 0
3518+ %250 = tail call i32 @llvm.vector.reduce.add.v8i32 (<8 x i32 > %arg1 )
3519+ %251 = insertelement <4 x i32 > %248 , i32 %250 , i64 1
3520+ %252 = tail call i32 @llvm.vector.reduce.add.v8i32 (<8 x i32 > %arg2 )
3521+ %253 = insertelement <4 x i32 > %251 , i32 %252 , i64 2
3522+ %254 = tail call i32 @llvm.vector.reduce.add.v8i32 (<8 x i32 > %arg3 )
3523+ %255 = insertelement <4 x i32 > %253 , i32 %254 , i64 3
3524+ ret <4 x i32 > %255
3525+ }
3526+
3527+ define <4 x i32 > @buildvec_vredmax (<8 x i32 > %arg0 , <8 x i32 > %arg1 , <8 x i32 > %arg2 , <8 x i32 > %arg3 ) nounwind {
3528+ ; RV32-LABEL: buildvec_vredmax:
3529+ ; RV32: # %bb.0:
3530+ ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3531+ ; RV32-NEXT: vredmaxu.vs v8, v8, v8
3532+ ; RV32-NEXT: vredmaxu.vs v9, v10, v10
3533+ ; RV32-NEXT: vredmaxu.vs v10, v12, v12
3534+ ; RV32-NEXT: vredmaxu.vs v11, v14, v14
3535+ ; RV32-NEXT: vmv.x.s a0, v8
3536+ ; RV32-NEXT: vmv.x.s a1, v9
3537+ ; RV32-NEXT: vmv.x.s a2, v10
3538+ ; RV32-NEXT: vmv.x.s a3, v11
3539+ ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3540+ ; RV32-NEXT: vmv.v.x v8, a0
3541+ ; RV32-NEXT: vslide1down.vx v8, v8, a1
3542+ ; RV32-NEXT: vslide1down.vx v8, v8, a2
3543+ ; RV32-NEXT: vslide1down.vx v8, v8, a3
3544+ ; RV32-NEXT: ret
3545+ ;
3546+ ; RV64V-ONLY-LABEL: buildvec_vredmax:
3547+ ; RV64V-ONLY: # %bb.0:
3548+ ; RV64V-ONLY-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3549+ ; RV64V-ONLY-NEXT: vredmaxu.vs v8, v8, v8
3550+ ; RV64V-ONLY-NEXT: vredmaxu.vs v9, v10, v10
3551+ ; RV64V-ONLY-NEXT: vredmaxu.vs v10, v12, v12
3552+ ; RV64V-ONLY-NEXT: vredmaxu.vs v11, v14, v14
3553+ ; RV64V-ONLY-NEXT: vmv.x.s a0, v8
3554+ ; RV64V-ONLY-NEXT: vmv.x.s a1, v9
3555+ ; RV64V-ONLY-NEXT: vmv.x.s a2, v10
3556+ ; RV64V-ONLY-NEXT: vmv.x.s a3, v11
3557+ ; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3558+ ; RV64V-ONLY-NEXT: vmv.v.x v8, a0
3559+ ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
3560+ ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
3561+ ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
3562+ ; RV64V-ONLY-NEXT: ret
3563+ ;
3564+ ; RVA22U64-LABEL: buildvec_vredmax:
3565+ ; RVA22U64: # %bb.0:
3566+ ; RVA22U64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3567+ ; RVA22U64-NEXT: vredmaxu.vs v8, v8, v8
3568+ ; RVA22U64-NEXT: vredmaxu.vs v9, v10, v10
3569+ ; RVA22U64-NEXT: vredmaxu.vs v10, v12, v12
3570+ ; RVA22U64-NEXT: vredmaxu.vs v11, v14, v14
3571+ ; RVA22U64-NEXT: vmv.x.s a0, v8
3572+ ; RVA22U64-NEXT: vmv.x.s a1, v9
3573+ ; RVA22U64-NEXT: vmv.x.s a2, v10
3574+ ; RVA22U64-NEXT: slli a1, a1, 32
3575+ ; RVA22U64-NEXT: add.uw a0, a0, a1
3576+ ; RVA22U64-NEXT: vmv.x.s a1, v11
3577+ ; RVA22U64-NEXT: slli a1, a1, 32
3578+ ; RVA22U64-NEXT: add.uw a1, a2, a1
3579+ ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3580+ ; RVA22U64-NEXT: vmv.v.x v8, a0
3581+ ; RVA22U64-NEXT: vslide1down.vx v8, v8, a1
3582+ ; RVA22U64-NEXT: ret
3583+ ;
3584+ ; RVA22U64-PACK-LABEL: buildvec_vredmax:
3585+ ; RVA22U64-PACK: # %bb.0:
3586+ ; RVA22U64-PACK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3587+ ; RVA22U64-PACK-NEXT: vredmaxu.vs v8, v8, v8
3588+ ; RVA22U64-PACK-NEXT: vredmaxu.vs v9, v10, v10
3589+ ; RVA22U64-PACK-NEXT: vredmaxu.vs v10, v12, v12
3590+ ; RVA22U64-PACK-NEXT: vredmaxu.vs v11, v14, v14
3591+ ; RVA22U64-PACK-NEXT: vmv.x.s a0, v8
3592+ ; RVA22U64-PACK-NEXT: vmv.x.s a1, v9
3593+ ; RVA22U64-PACK-NEXT: vmv.x.s a2, v10
3594+ ; RVA22U64-PACK-NEXT: pack a0, a0, a1
3595+ ; RVA22U64-PACK-NEXT: vmv.x.s a1, v11
3596+ ; RVA22U64-PACK-NEXT: pack a1, a2, a1
3597+ ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3598+ ; RVA22U64-PACK-NEXT: vmv.v.x v8, a0
3599+ ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a1
3600+ ; RVA22U64-PACK-NEXT: ret
3601+ ;
3602+ ; RV64ZVE32-LABEL: buildvec_vredmax:
3603+ ; RV64ZVE32: # %bb.0:
3604+ ; RV64ZVE32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3605+ ; RV64ZVE32-NEXT: vredmaxu.vs v8, v8, v8
3606+ ; RV64ZVE32-NEXT: vredmaxu.vs v9, v10, v10
3607+ ; RV64ZVE32-NEXT: vredmaxu.vs v10, v12, v12
3608+ ; RV64ZVE32-NEXT: vredmaxu.vs v11, v14, v14
3609+ ; RV64ZVE32-NEXT: vmv.x.s a0, v8
3610+ ; RV64ZVE32-NEXT: vmv.x.s a1, v9
3611+ ; RV64ZVE32-NEXT: vmv.x.s a2, v10
3612+ ; RV64ZVE32-NEXT: vmv.x.s a3, v11
3613+ ; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3614+ ; RV64ZVE32-NEXT: vmv.v.x v8, a0
3615+ ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
3616+ ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
3617+ ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
3618+ ; RV64ZVE32-NEXT: ret
3619+ %247 = tail call i32 @llvm.vector.reduce.umax.v8i32 (<8 x i32 > %arg0 )
3620+ %248 = insertelement <4 x i32 > poison, i32 %247 , i64 0
3621+ %250 = tail call i32 @llvm.vector.reduce.umax.v8i32 (<8 x i32 > %arg1 )
3622+ %251 = insertelement <4 x i32 > %248 , i32 %250 , i64 1
3623+ %252 = tail call i32 @llvm.vector.reduce.umax.v8i32 (<8 x i32 > %arg2 )
3624+ %253 = insertelement <4 x i32 > %251 , i32 %252 , i64 2
3625+ %254 = tail call i32 @llvm.vector.reduce.umax.v8i32 (<8 x i32 > %arg3 )
3626+ %255 = insertelement <4 x i32 > %253 , i32 %254 , i64 3
3627+ ret <4 x i32 > %255
3628+ }
3629+
34193630;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
34203631; RV64: {{.*}}
0 commit comments