11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
22; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK,SVE2
3+ ; RUN: llc -global-isel -global-isel-abort=2 -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK,SVE2
34; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s --check-prefixes=CHECK,SVE2P1
5+ ; RUN: llc -global-isel -global-isel-abort=2 -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s --check-prefixes=CHECK,SVE2P1
46
5- define <vscale x 4 x float > @fdot_wide_vl128 (<vscale x 4 x float > %acc , <vscale x 8 x half > %a , <vscale x 8 x half > %b ) {
6- ; SVE2-LABEL: fdot_wide_vl128 :
7+ define <vscale x 4 x float > @fdot_wide_nxv4f32 (<vscale x 4 x float > %acc , <vscale x 8 x half > %a , <vscale x 8 x half > %b ) {
8+ ; SVE2-LABEL: fdot_wide_nxv4f32 :
79; SVE2: // %bb.0: // %entry
810; SVE2-NEXT: uunpklo z3.s, z1.h
911; SVE2-NEXT: uunpklo z4.s, z2.h
@@ -20,7 +22,7 @@ define <vscale x 4 x float> @fdot_wide_vl128(<vscale x 4 x float> %acc, <vscale
2022; SVE2-NEXT: fadd z0.s, z0.s, z1.s
2123; SVE2-NEXT: ret
2224;
23- ; SVE2P1-LABEL: fdot_wide_vl128 :
25+ ; SVE2P1-LABEL: fdot_wide_nxv4f32 :
2426; SVE2P1: // %bb.0: // %entry
2527; SVE2P1-NEXT: fdot z0.s, z1.h, z2.h
2628; SVE2P1-NEXT: ret
3234 ret <vscale x 4 x float > %partial.reduce
3335}
3436
35- define <vscale x 4 x float > @fdot_splat_vl128 (<vscale x 4 x float > %acc , <vscale x 8 x half > %a ) {
36- ; SVE2-LABEL: fdot_splat_vl128 :
37+ define <vscale x 4 x float > @fdot_splat_nxv4f32 (<vscale x 4 x float > %acc , <vscale x 8 x half > %a ) {
38+ ; SVE2-LABEL: fdot_splat_nxv4f32 :
3739; SVE2: // %bb.0: // %entry
3840; SVE2-NEXT: uunpklo z2.s, z1.h
3941; SVE2-NEXT: ptrue p0.s
@@ -44,7 +46,7 @@ define <vscale x 4 x float> @fdot_splat_vl128(<vscale x 4 x float> %acc, <vscale
4446; SVE2-NEXT: fadd z0.s, z0.s, z1.s
4547; SVE2-NEXT: ret
4648;
47- ; SVE2P1-LABEL: fdot_splat_vl128 :
49+ ; SVE2P1-LABEL: fdot_splat_nxv4f32 :
4850; SVE2P1: // %bb.0: // %entry
4951; SVE2P1-NEXT: fmov z2.h, #1.00000000
5052; SVE2P1-NEXT: fdot z0.s, z1.h, z2.h
@@ -55,101 +57,8 @@ entry:
5557 ret <vscale x 4 x float > %partial.reduce
5658}
5759
58- define void @fdot_wide_vl256 (ptr %accptr , ptr %aptr , ptr %bptr ) vscale_range(2 ,2 ) {
59- ; SVE2-LABEL: fdot_wide_vl256:
60- ; SVE2: // %bb.0: // %entry
61- ; SVE2-NEXT: ptrue p0.s
62- ; SVE2-NEXT: ld1h { z0.s }, p0/z, [x1]
63- ; SVE2-NEXT: ld1h { z1.s }, p0/z, [x2]
64- ; SVE2-NEXT: ld1h { z2.s }, p0/z, [x1, #1, mul vl]
65- ; SVE2-NEXT: ld1h { z3.s }, p0/z, [x2, #1, mul vl]
66- ; SVE2-NEXT: fcvt z0.s, p0/m, z0.h
67- ; SVE2-NEXT: fcvt z1.s, p0/m, z1.h
68- ; SVE2-NEXT: fcvt z2.s, p0/m, z2.h
69- ; SVE2-NEXT: fcvt z3.s, p0/m, z3.h
70- ; SVE2-NEXT: fmul z0.s, z0.s, z1.s
71- ; SVE2-NEXT: ldr z1, [x0]
72- ; SVE2-NEXT: fmul z2.s, z2.s, z3.s
73- ; SVE2-NEXT: fadd z0.s, z1.s, z0.s
74- ; SVE2-NEXT: fadd z0.s, z0.s, z2.s
75- ; SVE2-NEXT: str z0, [x0]
76- ; SVE2-NEXT: ret
77- ;
78- ; SVE2P1-LABEL: fdot_wide_vl256:
79- ; SVE2P1: // %bb.0: // %entry
80- ; SVE2P1-NEXT: ldr z0, [x0]
81- ; SVE2P1-NEXT: ldr z1, [x1]
82- ; SVE2P1-NEXT: ldr z2, [x2]
83- ; SVE2P1-NEXT: fdot z0.s, z1.h, z2.h
84- ; SVE2P1-NEXT: str z0, [x0]
85- ; SVE2P1-NEXT: ret
86- entry:
87- %acc = load <8 x float >, ptr %accptr
88- %a = load <16 x half >, ptr %aptr
89- %b = load <16 x half >, ptr %bptr
90- %a.wide = fpext <16 x half > %a to <16 x float >
91- %b.wide = fpext <16 x half > %b to <16 x float >
92- %mult = fmul <16 x float > %a.wide , %b.wide
93- %partial.reduce = call <8 x float > @llvm.vector.partial.reduce.fadd (<8 x float > %acc , <16 x float > %mult )
94- store <8 x float > %partial.reduce , ptr %accptr
95- ret void
96- }
97-
98- define <4 x float > @fixed_fdot_wide (<4 x float > %acc , <8 x half > %a , <8 x half > %b ) {
99- ; CHECK-LABEL: fixed_fdot_wide:
100- ; CHECK: // %bb.0: // %entry
101- ; CHECK-NEXT: fcvtl v3.4s, v1.4h
102- ; CHECK-NEXT: fcvtl v4.4s, v2.4h
103- ; CHECK-NEXT: fcvtl2 v1.4s, v1.8h
104- ; CHECK-NEXT: fcvtl2 v2.4s, v2.8h
105- ; CHECK-NEXT: fmul v3.4s, v3.4s, v4.4s
106- ; CHECK-NEXT: fmul v1.4s, v1.4s, v2.4s
107- ; CHECK-NEXT: fadd v0.4s, v0.4s, v3.4s
108- ; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
109- ; CHECK-NEXT: ret
110- entry:
111- %a.wide = fpext <8 x half > %a to <8 x float >
112- %b.wide = fpext <8 x half > %b to <8 x float >
113- %mult = fmul <8 x float > %a.wide , %b.wide
114- %partial.reduce = call <4 x float > @llvm.vector.partial.reduce.fadd (<4 x float > %acc , <8 x float > %mult )
115- ret <4 x float > %partial.reduce
116- }
117-
118- define <8 x half > @partial_reduce_half (<8 x half > %acc , <16 x half > %a ) {
119- ; CHECK-LABEL: partial_reduce_half:
120- ; CHECK: // %bb.0: // %entry
121- ; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h
122- ; CHECK-NEXT: fadd v0.8h, v0.8h, v2.8h
123- ; CHECK-NEXT: ret
124- entry:
125- %partial.reduce = call <8 x half > @llvm.vector.partial.reduce.fadd (<8 x half > %acc , <16 x half > %a )
126- ret <8 x half > %partial.reduce
127- }
128-
129- define <4 x float > @partial_reduce_float (<4 x float > %acc , <8 x float > %a ) {
130- ; CHECK-LABEL: partial_reduce_float:
131- ; CHECK: // %bb.0: // %entry
132- ; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
133- ; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s
134- ; CHECK-NEXT: ret
135- entry:
136- %partial.reduce = call <4 x float > @llvm.vector.partial.reduce.fadd (<4 x float > %acc , <8 x float > %a )
137- ret <4 x float > %partial.reduce
138- }
139-
140- define <2 x double > @partial_reduce_double (<2 x double > %acc , <4 x double > %a ) {
141- ; CHECK-LABEL: partial_reduce_double:
142- ; CHECK: // %bb.0: // %entry
143- ; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
144- ; CHECK-NEXT: fadd v0.2d, v0.2d, v2.2d
145- ; CHECK-NEXT: ret
146- entry:
147- %partial.reduce = call <2 x double > @llvm.vector.partial.reduce.fadd (<2 x double > %acc , <4 x double > %a )
148- ret <2 x double > %partial.reduce
149- }
150-
151- define <vscale x 8 x half > @partial_reduce_half_vl128 (<vscale x 8 x half > %acc , <vscale x 16 x half > %a ) {
152- ; CHECK-LABEL: partial_reduce_half_vl128:
60+ define <vscale x 8 x half > @partial_reduce_nxv8f16 (<vscale x 8 x half > %acc , <vscale x 16 x half > %a ) {
61+ ; CHECK-LABEL: partial_reduce_nxv8f16:
15362; CHECK: // %bb.0: // %entry
15463; CHECK-NEXT: fadd z0.h, z0.h, z1.h
15564; CHECK-NEXT: fadd z0.h, z0.h, z2.h
@@ -159,8 +68,8 @@ entry:
15968 ret <vscale x 8 x half > %partial.reduce
16069}
16170
162- define <vscale x 4 x float > @partial_reduce_float_vl128 (<vscale x 4 x float > %acc , <vscale x 8 x float > %a ) {
163- ; CHECK-LABEL: partial_reduce_float_vl128 :
71+ define <vscale x 4 x float > @partial_reduce_nxv4f32 (<vscale x 4 x float > %acc , <vscale x 8 x float > %a ) {
72+ ; CHECK-LABEL: partial_reduce_nxv4f32 :
16473; CHECK: // %bb.0: // %entry
16574; CHECK-NEXT: fadd z0.s, z0.s, z1.s
16675; CHECK-NEXT: fadd z0.s, z0.s, z2.s
@@ -170,8 +79,8 @@ entry:
17079 ret <vscale x 4 x float > %partial.reduce
17180}
17281
173- define <vscale x 2 x double > @partial_reduce_double_vl128 (<vscale x 2 x double > %acc , <vscale x 4 x double > %a ) {
174- ; CHECK-LABEL: partial_reduce_double_vl128 :
82+ define <vscale x 2 x double > @partial_reduce_nxv2f64 (<vscale x 2 x double > %acc , <vscale x 4 x double > %a ) {
83+ ; CHECK-LABEL: partial_reduce_nxv2f64 :
17584; CHECK: // %bb.0: // %entry
17685; CHECK-NEXT: fadd z0.d, z0.d, z1.d
17786; CHECK-NEXT: fadd z0.d, z0.d, z2.d
0 commit comments