@@ -185,6 +185,19 @@ define <vscale x 2 x float> @copysign_nxv2f32_nxv2f64(<vscale x 2 x float> %a, <
185185 ret <vscale x 2 x float > %r
186186}
187187
188+ define <vscale x 2 x float > @copysign_nxv2f32_nxv2bf16 (<vscale x 2 x float > %a , <vscale x 2 x bfloat> %b ) {
189+ ; CHECK-LABEL: copysign_nxv2f32_nxv2bf16:
190+ ; CHECK: // %bb.0:
191+ ; CHECK-NEXT: lsl z1.s, z1.s, #16
192+ ; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff
193+ ; CHECK-NEXT: and z1.s, z1.s, #0x80000000
194+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
195+ ; CHECK-NEXT: ret
196+ %tmp0 = fpext <vscale x 2 x bfloat> %b to <vscale x 2 x float >
197+ %r = call <vscale x 2 x float > @llvm.copysign.nxv2f32 (<vscale x 2 x float > %a , <vscale x 2 x float > %tmp0 )
198+ ret <vscale x 2 x float > %r
199+ }
200+
188201;
189202; llvm.copysign.nxv4f32
190203;
@@ -230,6 +243,19 @@ define <vscale x 4 x float> @copysign_nxv4f32_nxv4f64(<vscale x 4 x float> %a, <
230243 ret <vscale x 4 x float > %r
231244}
232245
246+ define <vscale x 4 x float > @copysign_nxv4f32_nxv4bf16 (<vscale x 4 x float > %a , <vscale x 4 x bfloat> %b ) {
247+ ; CHECK-LABEL: copysign_nxv4f32_nxv4bf16:
248+ ; CHECK: // %bb.0:
249+ ; CHECK-NEXT: lsl z1.s, z1.s, #16
250+ ; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff
251+ ; CHECK-NEXT: and z1.s, z1.s, #0x80000000
252+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
253+ ; CHECK-NEXT: ret
254+ %tmp0 = fpext <vscale x 4 x bfloat> %b to <vscale x 4 x float >
255+ %r = call <vscale x 4 x float > @llvm.copysign.nxv4f32 (<vscale x 4 x float > %a , <vscale x 4 x float > %tmp0 )
256+ ret <vscale x 4 x float > %r
257+ }
258+
233259;
234260; llvm.copysign.nxv2f64
235261;
@@ -273,9 +299,137 @@ define <vscale x 2 x double> @copysign_nxv2f64_nxv2f64(<vscale x 2 x double> %a,
273299 ret <vscale x 2 x double > %r
274300}
275301
302+ define <vscale x 2 x double > @copysign_nxv2f64_nxv2bf16 (<vscale x 2 x double > %a , <vscale x 2 x bfloat> %b ) {
303+ ; CHECK-LABEL: copysign_nxv2f64_nxv2bf16:
304+ ; CHECK: // %bb.0:
305+ ; CHECK-NEXT: lsl z1.s, z1.s, #16
306+ ; CHECK-NEXT: ptrue p0.d
307+ ; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff
308+ ; CHECK-NEXT: fcvt z1.d, p0/m, z1.s
309+ ; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000
310+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
311+ ; CHECK-NEXT: ret
312+ %b.ext = fpext <vscale x 2 x bfloat> %b to <vscale x 2 x double >
313+ %r = call <vscale x 2 x double > @llvm.copysign.nxv2f64 (<vscale x 2 x double > %a , <vscale x 2 x double > %b.ext )
314+ ret <vscale x 2 x double > %r
315+ }
316+
317+ ;
318+ ; llvm.copysign.nxv2bf16
319+ ;
320+
321+ define <vscale x 2 x bfloat> @copysign_nxv2bf16_nxv2bf16 (<vscale x 2 x bfloat> %a , <vscale x 2 x bfloat> %b ) {
322+ ; CHECK-LABEL: copysign_nxv2bf16_nxv2bf16:
323+ ; CHECK: // %bb.0:
324+ ; CHECK-NEXT: and z1.h, z1.h, #0x8000
325+ ; CHECK-NEXT: and z0.h, z0.h, #0x7fff
326+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
327+ ; CHECK-NEXT: ret
328+ %r = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16 (<vscale x 2 x bfloat> %a , <vscale x 2 x bfloat> %b )
329+ ret <vscale x 2 x bfloat> %r
330+ }
331+
332+ define <vscale x 2 x bfloat> @copysign_nxv2bf16_nxv2f32 (<vscale x 2 x bfloat> %a , <vscale x 2 x float > %b ) {
333+ ; CHECK-LABEL: copysign_nxv2bf16_nxv2f32:
334+ ; CHECK: // %bb.0:
335+ ; CHECK-NEXT: ptrue p0.d
336+ ; CHECK-NEXT: and z0.h, z0.h, #0x7fff
337+ ; CHECK-NEXT: bfcvt z1.h, p0/m, z1.s
338+ ; CHECK-NEXT: and z1.h, z1.h, #0x8000
339+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
340+ ; CHECK-NEXT: ret
341+ %tmp0 = fptrunc <vscale x 2 x float > %b to <vscale x 2 x bfloat>
342+ %r = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16 (<vscale x 2 x bfloat> %a , <vscale x 2 x bfloat> %tmp0 )
343+ ret <vscale x 2 x bfloat> %r
344+ }
345+
346+ ; TODO: Cannot downconvert from double to bfloat
347+ ;define <vscale x 2 x bfloat> @copysign_nxv2bf16_nxv2f64(<vscale x 2 x bfloat> %a, <vscale x 2 x double> %b) {
348+ ; %tmp0 = fptrunc <vscale x 2 x double> %b to <vscale x 2 x bfloat>
349+ ; %r = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %tmp0)
350+ ; ret <vscale x 2 x bfloat> %r
351+ ;}
352+
353+ ;
354+ ; llvm.copysign.nxv2bf16
355+ ;
356+
357+ define <vscale x 4 x bfloat> @copysign_nxv4bf16_nxv4bf16 (<vscale x 4 x bfloat> %a , <vscale x 4 x bfloat> %b ) {
358+ ; CHECK-LABEL: copysign_nxv4bf16_nxv4bf16:
359+ ; CHECK: // %bb.0:
360+ ; CHECK-NEXT: and z1.h, z1.h, #0x8000
361+ ; CHECK-NEXT: and z0.h, z0.h, #0x7fff
362+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
363+ ; CHECK-NEXT: ret
364+ %r = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16 (<vscale x 4 x bfloat> %a , <vscale x 4 x bfloat> %b )
365+ ret <vscale x 4 x bfloat> %r
366+ }
367+
368+ define <vscale x 4 x bfloat> @copysign_nxv4bf16_nxv4f32 (<vscale x 4 x bfloat> %a , <vscale x 4 x float > %b ) {
369+ ; CHECK-LABEL: copysign_nxv4bf16_nxv4f32:
370+ ; CHECK: // %bb.0:
371+ ; CHECK-NEXT: ptrue p0.s
372+ ; CHECK-NEXT: and z0.h, z0.h, #0x7fff
373+ ; CHECK-NEXT: bfcvt z1.h, p0/m, z1.s
374+ ; CHECK-NEXT: and z1.h, z1.h, #0x8000
375+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
376+ ; CHECK-NEXT: ret
377+ %b.trunc = fptrunc <vscale x 4 x float > %b to <vscale x 4 x bfloat>
378+ %r = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16 (<vscale x 4 x bfloat> %a , <vscale x 4 x bfloat> %b.trunc )
379+ ret <vscale x 4 x bfloat> %r
380+ }
381+
382+ ; TODO: Cannot downconvert from double to bfloat
383+ ;define <vscale x 4 x bfloat> @copysign_nxv4bf16_nxv4f64(<vscale x 4 x bfloat> %a, <vscale x 4 x double> %b) {
384+ ; %b.trunc = fptrunc <vscale x 4 x double> %b to <vscale x 4 x bfloat>
385+ ; %r = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b.trunc)
386+ ; ret <vscale x 4 x bfloat> %r
387+ ;}
388+
389+ ;
390+ ; llvm.copysign.nxv8bf16
391+ ;
392+
393+ define <vscale x 8 x bfloat> @copysign_nxv8bf16_nxv8bf16 (<vscale x 8 x bfloat> %a , <vscale x 8 x bfloat> %b ) {
394+ ; CHECK-LABEL: copysign_nxv8bf16_nxv8bf16:
395+ ; CHECK: // %bb.0:
396+ ; CHECK-NEXT: and z1.h, z1.h, #0x8000
397+ ; CHECK-NEXT: and z0.h, z0.h, #0x7fff
398+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
399+ ; CHECK-NEXT: ret
400+ %r = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16 (<vscale x 8 x bfloat> %a , <vscale x 8 x bfloat> %b )
401+ ret <vscale x 8 x bfloat> %r
402+ }
403+
404+ define <vscale x 8 x bfloat> @copysign_nxv8bf16_nxv8f32 (<vscale x 8 x bfloat> %a , <vscale x 8 x float > %b ) {
405+ ; CHECK-LABEL: copysign_nxv8bf16_nxv8f32:
406+ ; CHECK: // %bb.0:
407+ ; CHECK-NEXT: ptrue p0.s
408+ ; CHECK-NEXT: and z0.h, z0.h, #0x7fff
409+ ; CHECK-NEXT: bfcvt z2.h, p0/m, z2.s
410+ ; CHECK-NEXT: bfcvt z1.h, p0/m, z1.s
411+ ; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h
412+ ; CHECK-NEXT: and z1.h, z1.h, #0x8000
413+ ; CHECK-NEXT: orr z0.d, z0.d, z1.d
414+ ; CHECK-NEXT: ret
415+ %b.trunc = fptrunc <vscale x 8 x float > %b to <vscale x 8 x bfloat>
416+ %r = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16 (<vscale x 8 x bfloat> %a , <vscale x 8 x bfloat> %b.trunc )
417+ ret <vscale x 8 x bfloat> %r
418+ }
419+
420+ ; TODO: Cannot downconvert from double to bfloat
421+ ;define <vscale x 8 x bfloat> @copysign_nxv8bf16_nxv8f64(<vscale x 8 x bfloat> %a, <vscale x 8 x double> %b) {
422+ ; %b.trunc = fptrunc <vscale x 8 x double> %b to <vscale x 8 x bfloat>
423+ ; %r = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b.trunc)
424+ ; ret <vscale x 8 x bfloat> %r
425+ ;}
426+
276427declare <vscale x 2 x half > @llvm.copysign.nxv2f16 (<vscale x 2 x half > %a , <vscale x 2 x half > %b )
277428declare <vscale x 4 x half > @llvm.copysign.nxv4f16 (<vscale x 4 x half > %a , <vscale x 4 x half > %b )
278429declare <vscale x 8 x half > @llvm.copysign.nxv8f16 (<vscale x 8 x half > %a , <vscale x 8 x half > %b )
279430declare <vscale x 2 x float > @llvm.copysign.nxv2f32 (<vscale x 2 x float > %a , <vscale x 2 x float > %b )
280431declare <vscale x 4 x float > @llvm.copysign.nxv4f32 (<vscale x 4 x float > %a , <vscale x 4 x float > %b )
281432declare <vscale x 2 x double > @llvm.copysign.nxv2f64 (<vscale x 2 x double > %a , <vscale x 2 x double > %b )
433+ declare <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16 (<vscale x 2 x bfloat> %a , <vscale x 2 x bfloat> %b )
434+ declare <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16 (<vscale x 4 x bfloat> %a , <vscale x 4 x bfloat> %b )
435+ declare <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16 (<vscale x 8 x bfloat> %a , <vscale x 8 x bfloat> %b )
0 commit comments