@@ -274,19 +274,20 @@ define void @sub_i16rhs() minsize {
274274; N.b. we could probably check more here ("add w2, w3, w1, uxtw" for
275275; example), but the remaining instructions are probably not idiomatic
276276; in the face of "add/sub (shifted register)" so I don't intend to.
277- define void @addsub_i32rhs () minsize {
277+ define void @addsub_i32rhs (i32 %in32 ) minsize {
278278; CHECK-LABEL: addsub_i32rhs:
279279 %val32_tmp = load i32 , i32* @var32
280280 %lhs64 = load i64 , i64* @var64
281281
282282 %val32 = add i32 %val32_tmp , 123
283283
284- %rhs64_zext = zext i32 %val32 to i64
284+ %rhs64_zext = zext i32 %in32 to i64
285285 %res64_zext = add i64 %lhs64 , %rhs64_zext
286286 store volatile i64 %res64_zext , i64* @var64
287287; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
288288
289- %rhs64_zext_shift = shl i64 %rhs64_zext , 2
289+ %rhs64_zext2 = zext i32 %val32 to i64
290+ %rhs64_zext_shift = shl i64 %rhs64_zext2 , 2
290291 %res64_zext_shift = add i64 %lhs64 , %rhs64_zext_shift
291292 store volatile i64 %res64_zext_shift , i64* @var64
292293; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
@@ -304,19 +305,20 @@ define void @addsub_i32rhs() minsize {
304305 ret void
305306}
306307
307- define void @sub_i32rhs () minsize {
308+ define void @sub_i32rhs (i32 %in32 ) minsize {
308309; CHECK-LABEL: sub_i32rhs:
309310 %val32_tmp = load i32 , i32* @var32
310311 %lhs64 = load i64 , i64* @var64
311312
312313 %val32 = add i32 %val32_tmp , 123
313314
314- %rhs64_zext = zext i32 %val32 to i64
315+ %rhs64_zext = zext i32 %in32 to i64
315316 %res64_zext = sub i64 %lhs64 , %rhs64_zext
316317 store volatile i64 %res64_zext , i64* @var64
317318; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
318319
319- %rhs64_zext_shift = shl i64 %rhs64_zext , 2
320+ %rhs64_zext2 = zext i32 %val32 to i64
321+ %rhs64_zext_shift = shl i64 %rhs64_zext2 , 2
320322 %res64_zext_shift = sub i64 %lhs64 , %rhs64_zext_shift
321323 store volatile i64 %res64_zext_shift , i64* @var64
322324; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
@@ -333,3 +335,98 @@ define void @sub_i32rhs() minsize {
333335
334336 ret void
335337}
338+
339+ ; Check that implicit zext from w reg write is used instead of uxtw form of add.
340+ define i64 @add_fold_uxtw (i32 %x , i64 %y ) {
341+ ; CHECK-LABEL: add_fold_uxtw:
342+ entry:
343+ ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
344+ %m = and i32 %x , 3
345+ %ext = zext i32 %m to i64
346+ ; CHECK-NEXT: add x0, x1, x[[TMP]]
347+ %ret = add i64 %y , %ext
348+ ret i64 %ret
349+ }
350+
351+ ; Check that implicit zext from w reg write is used instead of uxtw
352+ ; form of sub and that mov WZR is folded to form a neg instruction.
353+ define i64 @sub_fold_uxtw_xzr (i32 %x ) {
354+ ; CHECK-LABEL: sub_fold_uxtw_xzr:
355+ entry:
356+ ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
357+ %m = and i32 %x , 3
358+ %ext = zext i32 %m to i64
359+ ; CHECK-NEXT: neg x0, x[[TMP]]
360+ %ret = sub i64 0 , %ext
361+ ret i64 %ret
362+ }
363+
364+ ; Check that implicit zext from w reg write is used instead of uxtw form of subs/cmp.
365+ define i1 @cmp_fold_uxtw (i32 %x , i64 %y ) {
366+ ; CHECK-LABEL: cmp_fold_uxtw:
367+ entry:
368+ ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
369+ %m = and i32 %x , 3
370+ %ext = zext i32 %m to i64
371+ ; CHECK-NEXT: cmp x1, x[[TMP]]
372+ ; CHECK-NEXT: cset
373+ %ret = icmp eq i64 %y , %ext
374+ ret i1 %ret
375+ }
376+
377+ ; Check that implicit zext from w reg write is used instead of uxtw
378+ ; form of add, leading to madd selection.
379+ define i64 @madd_fold_uxtw (i32 %x , i64 %y ) {
380+ ; CHECK-LABEL: madd_fold_uxtw:
381+ entry:
382+ ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
383+ %m = and i32 %x , 3
384+ %ext = zext i32 %m to i64
385+ ; CHECK-NEXT: madd x0, x1, x1, x[[TMP]]
386+ %mul = mul i64 %y , %y
387+ %ret = add i64 %mul , %ext
388+ ret i64 %ret
389+ }
390+
391+ ; Check that implicit zext from w reg write is used instead of uxtw
392+ ; form of sub, leading to sub/cmp folding.
393+ ; Check that implicit zext from w reg write is used instead of uxtw form of subs/cmp.
394+ define i1 @cmp_sub_fold_uxtw (i32 %x , i64 %y , i64 %z ) {
395+ ; CHECK-LABEL: cmp_sub_fold_uxtw:
396+ entry:
397+ ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
398+ %m = and i32 %x , 3
399+ %ext = zext i32 %m to i64
400+ ; CHECK-NEXT: cmp x[[TMP2:[0-9]+]], x[[TMP]]
401+ ; CHECK-NEXT: cset
402+ %sub = sub i64 %z , %ext
403+ %ret = icmp eq i64 %sub , 0
404+ ret i1 %ret
405+ }
406+
407+ ; Check that implicit zext from w reg write is used instead of uxtw
408+ ; form of add and add of -1 gets selected as sub.
409+ define i64 @add_imm_fold_uxtw (i32 %x ) {
410+ ; CHECK-LABEL: add_imm_fold_uxtw:
411+ entry:
412+ ; CHECK: and w[[TMP:[0-9]+]], w0, #0x3
413+ %m = and i32 %x , 3
414+ %ext = zext i32 %m to i64
415+ ; CHECK-NEXT: sub x0, x[[TMP]], #1
416+ %ret = add i64 %ext , -1
417+ ret i64 %ret
418+ }
419+
420+ ; Check that implicit zext from w reg write is used instead of uxtw
421+ ; form of add and add lsl form gets selected.
422+ define i64 @add_lsl_fold_uxtw (i32 %x , i64 %y ) {
423+ ; CHECK-LABEL: add_lsl_fold_uxtw:
424+ entry:
425+ ; CHECK: orr w[[TMP:[0-9]+]], w0, #0x3
426+ %m = or i32 %x , 3
427+ %ext = zext i32 %m to i64
428+ %shift = shl i64 %y , 3
429+ ; CHECK-NEXT: add x0, x[[TMP]], x1, lsl #3
430+ %ret = add i64 %ext , %shift
431+ ret i64 %ret
432+ }
0 commit comments