@@ -291,3 +291,160 @@ define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
291291 %abd = call <2 x double > @llvm.aarch64.neon.fabd.v2f64 (<2 x double > %lhs , <2 x double > %rhs )
292292 ret <2 x double > %abd
293293}
294+
295+ define <8 x i16 > @test_uabd_knownbits_vec8i16 (<8 x i16 > %lhs , <8 x i16 > %rhs ) {
296+ ; CHECK-LABEL: test_uabd_knownbits_vec8i16:
297+ ; CHECK: // %bb.0:
298+ ; CHECK-NEXT: movi v2.8h, #15
299+ ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
300+ ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
301+ ; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
302+ ; CHECK-NEXT: rev64 v0.8h, v0.8h
303+ ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
304+ ; CHECK-NEXT: ret
305+ %and1 = and <8 x i16 > %lhs , <i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 >
306+ %and2 = and <8 x i16 > %rhs , <i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 >
307+ %uabd = call <8 x i16 > @llvm.aarch64.neon.uabd.v8i16 (<8 x i16 > %and1 , <8 x i16 > %and2 )
308+ %suff = shufflevector <8 x i16 > %uabd , <8 x i16 > undef , <8 x i32 > <i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
309+ %res = and <8 x i16 > %suff , <i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 , i16 15 >
310+ ret <8 x i16 > %res
311+ }
312+
313+ define <4 x i32 > @knownbits_uabd_mask_and_shuffle_lshr (<4 x i32 > %a0 , <4 x i32 > %a1 ) {
314+ ; CHECK-LABEL: knownbits_uabd_mask_and_shuffle_lshr:
315+ ; CHECK: // %bb.0:
316+ ; CHECK-NEXT: movi v0.2d, #0000000000000000
317+ ; CHECK-NEXT: ushr v0.4s, v0.4s, #17
318+ ; CHECK-NEXT: ret
319+ %1 = and <4 x i32 > %a0 , <i32 65535 , i32 65535 , i32 65535 , i32 65535 >
320+ %2 = and <4 x i32 > %a1 , <i32 65535 , i32 65535 , i32 65535 , i32 65535 >
321+ %3 = call <4 x i32 > @llvm.aarch64.neon.uabd.v4i32 (<4 x i32 > %1 , <4 x i32 > %2 )
322+ %4 = shufflevector <4 x i32 > %3 , <4 x i32 > undef , <4 x i32 > <i32 3 , i32 2 , i32 1 , i32 0 >
323+ %5 = lshr <4 x i32 > %4 , <i32 17 , i32 17 , i32 17 , i32 17 >
324+ ret <4 x i32 > %5
325+ }
326+
327+ define <4 x i32 > @knownbits_mask_and_shuffle_lshr (<4 x i32 > %a0 , <4 x i32 > %a1 ) {
328+ ; CHECK-LABEL: knownbits_mask_and_shuffle_lshr:
329+ ; CHECK: // %bb.0:
330+ ; CHECK-NEXT: movi v0.2d, #0000000000000000
331+ ; CHECK-NEXT: ret
332+ %1 = and <4 x i32 > %a0 , <i32 32767 , i32 32767 , i32 32767 , i32 32767 >
333+ %2 = and <4 x i32 > %a1 , <i32 32767 , i32 32767 , i32 32767 , i32 32767 >
334+ %3 = call <4 x i32 > @llvm.aarch64.neon.uabd.v4i32 (<4 x i32 > %1 , <4 x i32 > %2 )
335+ %4 = shufflevector <4 x i32 > %3 , <4 x i32 > undef , <4 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 >
336+ %5 = lshr <4 x i32 > %4 , <i32 17 , i32 17 , i32 17 , i32 17 >
337+ ret <4 x i32 > %5
338+ }
339+
340+ define <4 x i32 > @test_sabd_knownbits_vec4i32 (<4 x i32 > %lhs , <4 x i32 > %rhs ) {
341+ ; CHECK-LABEL: test_sabd_knownbits_vec4i32:
342+ ; CHECK: // %bb.0:
343+ ; CHECK-NEXT: adrp x8, .LCPI31_0
344+ ; CHECK-NEXT: adrp x9, .LCPI31_1
345+ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI31_0]
346+ ; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI31_1]
347+ ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
348+ ; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
349+ ; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
350+ ; CHECK-NEXT: movi v1.2d, #0x0000ff000000ff
351+ ; CHECK-NEXT: mov v0.s[1], v0.s[0]
352+ ; CHECK-NEXT: trn2 v0.4s, v0.4s, v0.4s
353+ ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
354+ ; CHECK-NEXT: ret
355+ %and1 = and <4 x i32 > %lhs , <i32 255 , i32 -1 , i32 -1 , i32 255 >
356+ %and2 = and <4 x i32 > %rhs , <i32 255 , i32 255 , i32 -1 , i32 -1 >
357+ %abd = call <4 x i32 > @llvm.aarch64.neon.sabd.v4i32 (<4 x i32 > %and1 , <4 x i32 > %and2 )
358+ %s = shufflevector <4 x i32 > %abd , <4 x i32 > undef , <4 x i32 > <i32 0 , i32 0 , i32 3 , i32 3 >
359+ %4 = and <4 x i32 > %s , <i32 255 , i32 255 , i32 255 , i32 255 >
360+ ret <4 x i32 > %4
361+ }
362+
363+ define <4 x i32 > @knownbits_sabd_and_mask (<4 x i32 > %a0 , <4 x i32 > %a1 ) {
364+ ; CHECK-LABEL: knownbits_sabd_and_mask:
365+ ; CHECK: // %bb.0:
366+ ; CHECK-NEXT: adrp x8, .LCPI32_0
367+ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI32_0]
368+ ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
369+ ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
370+ ; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
371+ ; CHECK-NEXT: zip2 v0.4s, v0.4s, v0.4s
372+ ; CHECK-NEXT: ret
373+ %1 = and <4 x i32 > %a0 , <i32 -1 , i32 -1 , i32 255 , i32 4085 >
374+ %2 = and <4 x i32 > %a1 , <i32 -1 , i32 -1 , i32 255 , i32 4085 >
375+ %3 = call <4 x i32 > @llvm.aarch64.neon.sabd.v4i32 (<4 x i32 > %1 , <4 x i32 > %2 )
376+ %4 = shufflevector <4 x i32 > %3 , <4 x i32 > undef , <4 x i32 > <i32 2 , i32 2 , i32 3 , i32 3 >
377+ ret <4 x i32 > %4
378+ }
379+
380+ define <4 x i32 > @knownbits_sabd_and_or_mask (<4 x i32 > %a0 , <4 x i32 > %a1 ) {
381+ ; CHECK-LABEL: knownbits_sabd_and_or_mask:
382+ ; CHECK: // %bb.0:
383+ ; CHECK-NEXT: movi v0.2d, #0000000000000000
384+ ; CHECK-NEXT: ret
385+ %1 = and <4 x i32 > %a0 , <i32 -1 , i32 -1 , i32 255 , i32 4085 >
386+ %2 = or <4 x i32 > %1 , <i32 65535 , i32 65535 , i32 65535 , i32 65535 >
387+ %3 = and <4 x i32 > %a1 , <i32 -1 , i32 -1 , i32 255 , i32 4085 >
388+ %4 = or <4 x i32 > %3 , <i32 65535 , i32 65535 , i32 65535 , i32 65535 >
389+ %5 = call <4 x i32 > @llvm.aarch64.neon.uabd.v4i32 (<4 x i32 > %2 , <4 x i32 > %4 )
390+ %6 = shufflevector <4 x i32 > %5 , <4 x i32 > undef , <4 x i32 > <i32 2 , i32 2 , i32 3 , i32 3 >
391+ ret <4 x i32 > %6
392+ }
393+
394+ define <4 x i32 > @knownbits_sabd_and_xor_mask (<4 x i32 > %a0 , <4 x i32 > %a1 ) {
395+ ; CHECK-LABEL: knownbits_sabd_and_xor_mask:
396+ ; CHECK: // %bb.0:
397+ ; CHECK-NEXT: adrp x8, .LCPI34_0
398+ ; CHECK-NEXT: movi v3.2d, #0x00ffff0000ffff
399+ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI34_0]
400+ ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
401+ ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
402+ ; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b
403+ ; CHECK-NEXT: eor v1.16b, v1.16b, v3.16b
404+ ; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
405+ ; CHECK-NEXT: zip2 v0.4s, v0.4s, v0.4s
406+ ; CHECK-NEXT: ret
407+ %1 = and <4 x i32 > %a0 , <i32 -1 , i32 -1 , i32 255 , i32 4085 >
408+ %2 = xor <4 x i32 > %1 , <i32 65535 , i32 65535 , i32 65535 , i32 65535 >
409+ %3 = and <4 x i32 > %a1 , <i32 -1 , i32 -1 , i32 255 , i32 4085 >
410+ %4 = xor <4 x i32 > %3 , <i32 65535 , i32 65535 , i32 65535 , i32 65535 >
411+ %5 = call <4 x i32 > @llvm.aarch64.neon.sabd.v4i32 (<4 x i32 > %2 , <4 x i32 > %4 )
412+ %6 = shufflevector <4 x i32 > %5 , <4 x i32 > undef , <4 x i32 > <i32 2 , i32 2 , i32 3 , i32 3 >
413+ ret <4 x i32 > %6
414+ }
415+
416+ define <4 x i32 > @knownbits_sabd_and_shl_mask (<4 x i32 > %a0 , <4 x i32 > %a1 ) {
417+ ; CHECK-LABEL: knownbits_sabd_and_shl_mask:
418+ ; CHECK: // %bb.0:
419+ ; CHECK-NEXT: movi v0.2d, #0000000000000000
420+ ; CHECK-NEXT: ret
421+ %1 = and <4 x i32 > %a0 , <i32 -65536 , i32 -7 , i32 -7 , i32 -65536 >
422+ %2 = shl <4 x i32 > %1 , <i32 17 , i32 17 , i32 17 , i32 17 >
423+ %3 = and <4 x i32 > %a1 , <i32 -65536 , i32 -7 , i32 -7 , i32 -65536 >
424+ %4 = shl <4 x i32 > %3 , <i32 17 , i32 17 , i32 17 , i32 17 >
425+ %5 = call <4 x i32 > @llvm.aarch64.neon.sabd.v4i32 (<4 x i32 > %2 , <4 x i32 > %4 )
426+ %6 = shufflevector <4 x i32 > %5 , <4 x i32 > undef , <4 x i32 > <i32 0 , i32 0 , i32 3 , i32 3 >
427+ ret <4 x i32 > %6
428+ }
429+
430+ define <4 x i32 > @knownbits_sabd_and_mul_mask (<4 x i32 > %a0 , <4 x i32 > %a1 ) {
431+ ; CHECK-LABEL: knownbits_sabd_and_mul_mask:
432+ ; CHECK: // %bb.0:
433+ ; CHECK-NEXT: adrp x8, .LCPI36_0
434+ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
435+ ; CHECK-NEXT: and v3.16b, v0.16b, v2.16b
436+ ; CHECK-NEXT: and v2.16b, v1.16b, v2.16b
437+ ; CHECK-NEXT: mul v0.4s, v0.4s, v3.4s
438+ ; CHECK-NEXT: mul v1.4s, v1.4s, v2.4s
439+ ; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
440+ ; CHECK-NEXT: mov v0.s[1], v0.s[0]
441+ ; CHECK-NEXT: trn2 v0.4s, v0.4s, v0.4s
442+ ; CHECK-NEXT: ret
443+ %1 = and <4 x i32 > %a0 , <i32 -65536 , i32 -7 , i32 -7 , i32 -65536 >
444+ %2 = mul <4 x i32 > %a0 , %1
445+ %3 = and <4 x i32 > %a1 , <i32 -65536 , i32 -7 , i32 -7 , i32 -65536 >
446+ %4 = mul <4 x i32 > %a1 , %3
447+ %5 = call <4 x i32 > @llvm.aarch64.neon.sabd.v4i32 (<4 x i32 > %2 , <4 x i32 > %4 )
448+ %6 = shufflevector <4 x i32 > %5 , <4 x i32 > undef , <4 x i32 > <i32 0 , i32 0 , i32 3 , i32 3 >
449+ ret <4 x i32 > %6
450+ }
0 commit comments