diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td index 24fbe207c4969..05562516e5198 100644 --- a/llvm/lib/Target/AArch64/AArch64Features.td +++ b/llvm/lib/Target/AArch64/AArch64Features.td @@ -612,8 +612,11 @@ def FeatureExperimentalZeroingPseudos def FeatureNoSVEFPLD1R : SubtargetFeature<"no-sve-fp-ld1r", "NoSVEFPLD1R", "true", "Avoid using LD1RX instructions for FP">; -def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", - "Has zero-cycle register moves">; +def FeatureZCRegMoveGPR64 : SubtargetFeature<"zcm-gpr64", "HasZeroCycleRegMoveGPR64", "true", + "Has zero-cycle register moves for GPR64 registers">; + +def FeatureZCRegMoveGPR32 : SubtargetFeature<"zcm-gpr32", "HasZeroCycleRegMoveGPR32", "true", + "Has zero-cycle register moves for GPR32 registers">; def FeatureZCRegMoveFPR64 : SubtargetFeature<"zcm-fpr64", "HasZeroCycleRegMoveFPR64", "true", "Has zero-cycle register moves for FPR64 registers">; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index c3837cfe73d28..652c34ed87f6e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5037,7 +5037,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { // If either operand is WSP, expand to ADD #0. - if (Subtarget.hasZeroCycleRegMove()) { + if (Subtarget.hasZeroCycleRegMoveGPR64() && + !Subtarget.hasZeroCycleRegMoveGPR32()) { // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. MCRegister DestRegX = TRI->getMatchingSuperReg( DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass); @@ -5063,7 +5064,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addImm(0) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } else { - if (Subtarget.hasZeroCycleRegMove()) { + if (Subtarget.hasZeroCycleRegMoveGPR64() && + !Subtarget.hasZeroCycleRegMoveGPR32()) { // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. MCRegister DestRegX = TRI->getMatchingSuperReg( DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass); diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td index 4a5682475d107..dcccde4a4d666 100644 --- a/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -311,7 +311,7 @@ def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7", FeatureDisableLatencySchedHeuristic, FeatureFuseAES, FeatureFuseCryptoEOR, FeatureStorePairSuppress, - FeatureZCRegMove, + FeatureZCRegMoveGPR64, FeatureZCRegMoveFPR64, FeatureZCZeroing, FeatureZCZeroingFPWorkaround]>; @@ -325,7 +325,7 @@ def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10", FeatureFuseAES, FeatureFuseCryptoEOR, FeatureStorePairSuppress, - FeatureZCRegMove, + FeatureZCRegMoveGPR64, FeatureZCRegMoveFPR64, FeatureZCZeroing]>; @@ -338,7 +338,7 @@ def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11", FeatureFuseAES, FeatureFuseCryptoEOR, FeatureStorePairSuppress, - FeatureZCRegMove, + FeatureZCRegMoveGPR64, FeatureZCRegMoveFPR64, FeatureZCZeroing]>; @@ -351,7 +351,7 @@ def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12", FeatureFuseAES, FeatureFuseCryptoEOR, FeatureStorePairSuppress, - FeatureZCRegMove, + FeatureZCRegMoveGPR64, FeatureZCRegMoveFPR64, FeatureZCZeroing]>; @@ -364,7 +364,7 @@ def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13", FeatureFuseAES, FeatureFuseCryptoEOR, FeatureStorePairSuppress, - FeatureZCRegMove, + FeatureZCRegMoveGPR64, FeatureZCRegMoveFPR64, FeatureZCZeroing]>; @@ -382,7 +382,7 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14", FeatureFuseCryptoEOR, FeatureFuseLiterals, FeatureStorePairSuppress, - FeatureZCRegMove, + FeatureZCRegMoveGPR64, FeatureZCRegMoveFPR64, FeatureZCZeroing]>; @@ -400,7 +400,7 @@ def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15", FeatureFuseCryptoEOR, FeatureFuseLiterals, FeatureStorePairSuppress, - FeatureZCRegMove, + FeatureZCRegMoveGPR64, FeatureZCRegMoveFPR64, FeatureZCZeroing]>; @@ -418,7 +418,7 @@ def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16", FeatureFuseCryptoEOR, FeatureFuseLiterals, FeatureStorePairSuppress, - FeatureZCRegMove, + FeatureZCRegMoveGPR64, FeatureZCRegMoveFPR64, FeatureZCZeroing]>; @@ -436,7 +436,7 @@ def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17", FeatureFuseCryptoEOR, FeatureFuseLiterals, FeatureStorePairSuppress, - FeatureZCRegMove, + FeatureZCRegMoveGPR64, FeatureZCRegMoveFPR64, FeatureZCZeroing]>; @@ -453,7 +453,7 @@ def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4", FeatureFuseCCSelect, FeatureFuseCryptoEOR, FeatureFuseLiterals, - FeatureZCRegMove, + FeatureZCRegMoveGPR64, FeatureZCRegMoveFPR64, FeatureZCZeroing ]>; diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr.ll new file mode 100644 index 0000000000000..e14e69b5e6a2a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr.ll @@ -0,0 +1,54 @@ +; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s -check-prefixes=NOTCPU-LINUX --match-full-lines +; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=generic | FileCheck %s -check-prefixes=NOTCPU-APPLE --match-full-lines +; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 | FileCheck %s -check-prefixes=CPU --match-full-lines +; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 -mattr=-zcm-gpr64 | FileCheck %s -check-prefixes=NOTATTR --match-full-lines +; RUN: llc < %s -mtriple=arm64-apple-macosx -mattr=+zcm-gpr64 | FileCheck %s -check-prefixes=ATTR --match-full-lines + +define void @zero_cycle_regmov_GPR32(i32 %a, i32 %b, i32 %c, i32 %d) { +entry: +; CHECK-LABEL: t: +; NOTCPU-LINUX: mov w0, w2 +; NOTCPU-LINUX: mov w1, w3 +; NOTCPU-LINUX: mov [[REG2:w[0-9]+]], w3 +; NOTCPU-LINUX: mov [[REG1:w[0-9]+]], w2 +; NOTCPU-LINUX-NEXT: bl {{_?foo_i32}} +; NOTCPU-LINUX: mov w0, [[REG1]] +; NOTCPU-LINUX: mov w1, [[REG2]] + +; NOTCPU-APPLE: mov w0, w2 +; NOTCPU-APPLE: mov w1, w3 +; NOTCPU-APPLE: mov [[REG2:w[0-9]+]], w3 +; NOTCPU-APPLE: mov [[REG1:w[0-9]+]], w2 +; NOTCPU-APPLE-NEXT: bl {{_?foo_i32}} +; NOTCPU-APPLE: mov w0, [[REG1]] +; NOTCPU-APPLE: mov w1, [[REG2]] + +; CPU: mov [[REG2:x[0-9]+]], x3 +; CPU: mov [[REG1:x[0-9]+]], x2 +; CPU: mov x0, x2 +; CPU: mov x1, x3 +; CPU-NEXT: bl {{_?foo_i32}} +; CPU: mov x0, [[REG1]] +; CPU: mov x1, [[REG2]] + +; NOTATTR: mov [[REG2:w[0-9]+]], w3 +; NOTATTR: mov [[REG1:w[0-9]+]], w2 +; NOTATTR: mov w0, w2 +; NOTATTR: mov w1, w3 +; NOTATTR-NEXT: bl {{_?foo_i32}} +; NOTATTR: mov w0, [[REG1]] +; NOTATTR: mov w1, [[REG2]] + +; ATTR: mov x0, x2 +; ATTR: mov x1, x3 +; ATTR: mov [[REG2:x[0-9]+]], x3 +; ATTR: mov [[REG1:x[0-9]+]], x2 +; ATTR-NEXT: bl {{_?foo_i32}} +; ATTR: mov x0, [[REG1]] +; ATTR: mov x1, [[REG2]] + %call = call i32 @foo_i32(i32 %c, i32 %d) + %call1 = call i32 @foo_i32(i32 %c, i32 %d) + unreachable +} + +declare i32 @foo_i32(i32, i32) diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr32.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr32.ll deleted file mode 100644 index 5ef6d3e84805a..0000000000000 --- a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr32.ll +++ /dev/null @@ -1,45 +0,0 @@ -; RUN: llc < %s -march=arm64 | FileCheck %s -check-prefixes=NOTCPU --match-full-lines -; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 | FileCheck %s -check-prefixes=CPU --match-full-lines -; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 -mattr=-zcm | FileCheck %s -check-prefixes=NOTATTR --match-full-lines -; RUN: llc < %s -mtriple=arm64-apple-macosx -mattr=+zcm | FileCheck %s -check-prefixes=ATTR --match-full-lines - -define void @t(i32 %a, i32 %b, i32 %c, i32 %d) { -entry: -; CHECK-LABEL: t: -; NOTCPU: mov w0, w2 -; NOTCPU: mov w1, w3 -; NOTCPU: mov [[REG2:w[0-9]+]], w3 -; NOTCPU: mov [[REG1:w[0-9]+]], w2 -; NOTCPU-NEXT: bl {{_?foo}} -; NOTCPU: mov w0, [[REG1]] -; NOTCPU: mov w1, [[REG2]] - -; CPU: mov [[REG2:x[0-9]+]], x3 -; CPU: mov [[REG1:x[0-9]+]], x2 -; CPU: mov x0, x2 -; CPU: mov x1, x3 -; CPU-NEXT: bl {{_?foo}} -; CPU: mov x0, [[REG1]] -; CPU: mov x1, [[REG2]] - -; NOTATTR: mov [[REG2:w[0-9]+]], w3 -; NOTATTR: mov [[REG1:w[0-9]+]], w2 -; NOTATTR: mov w0, w2 -; NOTATTR: mov w1, w3 -; NOTATTR-NEXT: bl {{_?foo}} -; NOTATTR: mov w0, [[REG1]] -; NOTATTR: mov w1, [[REG2]] - -; ATTR: mov x0, x2 -; ATTR: mov x1, x3 -; ATTR: mov [[REG2:x[0-9]+]], x3 -; ATTR: mov [[REG1:x[0-9]+]], x2 -; ATTR-NEXT: bl {{_?foo}} -; ATTR: mov x0, [[REG1]] -; ATTR: mov x1, [[REG2]] - %call = call i32 @foo(i32 %c, i32 %d) - %call1 = call i32 @foo(i32 %c, i32 %d) - unreachable -} - -declare i32 @foo(i32, i32)