From 9cc45c93be3dde703bcee545d48e6c5b78378123 Mon Sep 17 00:00:00 2001 From: tomershafir Date: Wed, 11 Jun 2025 13:33:03 +0300 Subject: [PATCH] [AArch64] improve zero-cycle regmov test - Add a `gpr32` suffix to test name to denote the specific register class being checked - Expand `-mtriple=arm64-apple-ios` to `-march=arm64` to broaden the test context to the generic architecture, as the specific triple is not required - Port `bl` match to Linux too via the regex: `{{_?foo}}` - Advance `-mcpu=cyclone` to the newer M series major `-mcpu=apple-m1` - Use `-mcpu` so that `-mattr=-zcm` has a real effect - Add a test that generic arm64 doesn't optimize for ZCM - Distinguish 4 different assembly layouts: NOTCPU, CPU, NOTATTR, ATTR - Fix broken test logic, for example: `; NOT: mov [[REG2:w[0-9]+]], w3` matched `mov w1, w3` then `REG2` captured `w1` but then `; NOT: mov w1, [[REG2]]` matched by prefix `mov, w1, w19` even though it should have matched `mov w1, w1`. This change adds explicit matches for all of the generated copies, and `--match-full-lines` param to FileCheck. - Remove nownwind and ssp from t function decleration - Remove nounwind from foo call sites - Separate different outputs for readability - Change return type from `i32` to `void` - FileCheck exact position of calls with `*-NEXT` --- .../AArch64/arm64-zero-cycle-regmov-gpr32.ll | 45 +++++++++++++++++++ .../AArch64/arm64-zero-cycle-regmov.ll | 23 ---------- 2 files changed, 45 insertions(+), 23 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr32.ll delete mode 100644 llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr32.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr32.ll new file mode 100644 index 0000000000000..5ef6d3e84805a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr32.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -march=arm64 | FileCheck %s -check-prefixes=NOTCPU --match-full-lines +; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 | FileCheck %s -check-prefixes=CPU --match-full-lines +; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 -mattr=-zcm | FileCheck %s -check-prefixes=NOTATTR --match-full-lines +; RUN: llc < %s -mtriple=arm64-apple-macosx -mattr=+zcm | FileCheck %s -check-prefixes=ATTR --match-full-lines + +define void @t(i32 %a, i32 %b, i32 %c, i32 %d) { +entry: +; CHECK-LABEL: t: +; NOTCPU: mov w0, w2 +; NOTCPU: mov w1, w3 +; NOTCPU: mov [[REG2:w[0-9]+]], w3 +; NOTCPU: mov [[REG1:w[0-9]+]], w2 +; NOTCPU-NEXT: bl {{_?foo}} +; NOTCPU: mov w0, [[REG1]] +; NOTCPU: mov w1, [[REG2]] + +; CPU: mov [[REG2:x[0-9]+]], x3 +; CPU: mov [[REG1:x[0-9]+]], x2 +; CPU: mov x0, x2 +; CPU: mov x1, x3 +; CPU-NEXT: bl {{_?foo}} +; CPU: mov x0, [[REG1]] +; CPU: mov x1, [[REG2]] + +; NOTATTR: mov [[REG2:w[0-9]+]], w3 +; NOTATTR: mov [[REG1:w[0-9]+]], w2 +; NOTATTR: mov w0, w2 +; NOTATTR: mov w1, w3 +; NOTATTR-NEXT: bl {{_?foo}} +; NOTATTR: mov w0, [[REG1]] +; NOTATTR: mov w1, [[REG2]] + +; ATTR: mov x0, x2 +; ATTR: mov x1, x3 +; ATTR: mov [[REG2:x[0-9]+]], x3 +; ATTR: mov [[REG1:x[0-9]+]], x2 +; ATTR-NEXT: bl {{_?foo}} +; ATTR: mov x0, [[REG1]] +; ATTR: mov x1, [[REG2]] + %call = call i32 @foo(i32 %c, i32 %d) + %call1 = call i32 @foo(i32 %c, i32 %d) + unreachable +} + +declare i32 @foo(i32, i32) diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll deleted file mode 100644 index b390853d44bff..0000000000000 --- a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll +++ /dev/null @@ -1,23 +0,0 @@ -; RUN: llc < %s -mtriple=arm64-apple-ios -mattr=-zcm | FileCheck %s -check-prefixes=CHECK,NOT -; RUN: llc < %s -mtriple=arm64-apple-ios -mattr=+zcm | FileCheck %s -check-prefixes=CHECK,YES -; RUN: llc < %s -mtriple=arm64-apple-ios -mcpu=cyclone | FileCheck %s -check-prefixes=CHECK,YES - -; rdar://12254953 -define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind ssp { -entry: -; CHECK-LABEL: t: -; NOT: mov [[REG2:w[0-9]+]], w3 -; NOT: mov [[REG1:w[0-9]+]], w2 -; YES: mov [[REG2:x[0-9]+]], x3 -; YES: mov [[REG1:x[0-9]+]], x2 -; CHECK: bl _foo -; NOT: mov w0, [[REG1]] -; NOT: mov w1, [[REG2]] -; YES: mov x0, [[REG1]] -; YES: mov x1, [[REG2]] - %call = call i32 @foo(i32 %c, i32 %d) nounwind - %call1 = call i32 @foo(i32 %c, i32 %d) nounwind - unreachable -} - -declare i32 @foo(i32, i32)