Skip to content

Commit 2859165

Browse files
authored
[AArch64] Add FeatureFuseCCSelect to a number of CPU configurations. (#153188)
This marks CMP+CSel as fusable according to the SWOGs of cortex-a78 cortex-a710 cortex-a715 cortex-a720 cortex-a725 cortex-x4 cortex-x925 neoverse-n2 neoverse-n3 neoverse-v1 neoverse-v2 neoverse-v3
1 parent 47737cd commit 2859165

File tree

2 files changed

+54
-5
lines changed

2 files changed

+54
-5
lines changed

llvm/lib/Target/AArch64/AArch64Processors.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78",
134134
FeatureCmpBccFusion,
135135
FeatureFuseAES,
136136
FeatureFuseAdrpAdd,
137+
FeatureFuseCCSelect,
137138
FeatureAddrLSLSlow14,
138139
FeatureALULSLFast,
139140
FeaturePostRAScheduler,
@@ -146,6 +147,7 @@ def TuneA78AE : SubtargetFeature<"a78ae", "ARMProcFamily",
146147
FeatureCmpBccFusion,
147148
FeatureFuseAES,
148149
FeatureFuseAdrpAdd,
150+
FeatureFuseCCSelect,
149151
FeatureAddrLSLSlow14,
150152
FeatureALULSLFast,
151153
FeaturePostRAScheduler,
@@ -158,6 +160,7 @@ def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily",
158160
FeatureCmpBccFusion,
159161
FeatureFuseAES,
160162
FeatureFuseAdrpAdd,
163+
FeatureFuseCCSelect,
161164
FeatureAddrLSLSlow14,
162165
FeatureALULSLFast,
163166
FeaturePostRAScheduler,
@@ -169,6 +172,7 @@ def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710",
169172
FeatureCmpBccFusion,
170173
FeatureFuseAES,
171174
FeatureFuseAdrpAdd,
175+
FeatureFuseCCSelect,
172176
FeatureALULSLFast,
173177
FeaturePostRAScheduler,
174178
FeatureEnableSelectOptimize,
@@ -181,6 +185,7 @@ def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715",
181185
FeatureCmpBccFusion,
182186
FeatureALULSLFast,
183187
FeatureFuseAdrpAdd,
188+
FeatureFuseCCSelect,
184189
FeatureEnableSelectOptimize,
185190
FeaturePredictableSelectIsExpensive]>;
186191

@@ -191,6 +196,7 @@ def TuneA720 : SubtargetFeature<"a720", "ARMProcFamily", "CortexA720",
191196
FeatureCmpBccFusion,
192197
FeatureALULSLFast,
193198
FeatureFuseAdrpAdd,
199+
FeatureFuseCCSelect,
194200
FeatureEnableSelectOptimize,
195201
FeaturePredictableSelectIsExpensive]>;
196202

@@ -201,6 +207,7 @@ def TuneA720AE : SubtargetFeature<"a720ae", "ARMProcFamily", "CortexA720",
201207
FeatureCmpBccFusion,
202208
FeatureALULSLFast,
203209
FeatureFuseAdrpAdd,
210+
FeatureFuseCCSelect,
204211
FeatureEnableSelectOptimize,
205212
FeaturePredictableSelectIsExpensive]>;
206213

@@ -212,6 +219,7 @@ def TuneA725 : SubtargetFeature<"cortex-a725", "ARMProcFamily",
212219
FeatureCmpBccFusion,
213220
FeatureALULSLFast,
214221
FeatureFuseAdrpAdd,
222+
FeatureFuseCCSelect,
215223
FeatureEnableSelectOptimize,
216224
FeaturePredictableSelectIsExpensive]>;
217225

@@ -262,6 +270,7 @@ def TuneX4 : SubtargetFeature<"cortex-x4", "ARMProcFamily", "CortexX4",
262270
"Cortex-X4 ARM processors", [
263271
FeatureALULSLFast,
264272
FeatureFuseAdrpAdd,
273+
FeatureFuseCCSelect,
265274
FeatureFuseAES,
266275
FeaturePostRAScheduler,
267276
FeatureEnableSelectOptimize,
@@ -273,6 +282,7 @@ def TuneX925 : SubtargetFeature<"cortex-x925", "ARMProcFamily",
273282
"CortexX925", "Cortex-X925 ARM processors",[
274283
FeatureALULSLFast,
275284
FeatureFuseAdrpAdd,
285+
FeatureFuseCCSelect,
276286
FeatureFuseAES,
277287
FeaturePostRAScheduler,
278288
FeatureEnableSelectOptimize,
@@ -548,6 +558,7 @@ def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2
548558
"Neoverse N2 ARM processors", [
549559
FeatureFuseAES,
550560
FeatureFuseAdrpAdd,
561+
FeatureFuseCCSelect,
551562
FeatureALULSLFast,
552563
FeaturePostRAScheduler,
553564
FeatureEnableSelectOptimize,
@@ -559,6 +570,7 @@ def TuneNeoverseN3 : SubtargetFeature<"neoversen3", "ARMProcFamily", "NeoverseN3
559570
FeaturePostRAScheduler,
560571
FeatureALULSLFast,
561572
FeatureFuseAdrpAdd,
573+
FeatureFuseCCSelect,
562574
FeatureEnableSelectOptimize,
563575
FeaturePredictableSelectIsExpensive]>;
564576

@@ -575,6 +587,7 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1
575587
"Neoverse V1 ARM processors", [
576588
FeatureFuseAES,
577589
FeatureFuseAdrpAdd,
590+
FeatureFuseCCSelect,
578591
FeatureAddrLSLSlow14,
579592
FeatureALULSLFast,
580593
FeaturePostRAScheduler,
@@ -587,6 +600,7 @@ def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2
587600
FeatureFuseAES,
588601
FeatureCmpBccFusion,
589602
FeatureFuseAdrpAdd,
603+
FeatureFuseCCSelect,
590604
FeatureALULSLFast,
591605
FeaturePostRAScheduler,
592606
FeatureEnableSelectOptimize,
@@ -600,6 +614,7 @@ def TuneNeoverseV3 : SubtargetFeature<"neoversev3", "ARMProcFamily", "NeoverseV3
600614
FeatureFuseAES,
601615
FeatureALULSLFast,
602616
FeatureFuseAdrpAdd,
617+
FeatureFuseCCSelect,
603618
FeaturePostRAScheduler,
604619
FeatureEnableSelectOptimize,
605620
FeatureAvoidLDAPUR,
@@ -610,6 +625,7 @@ def TuneNeoverseV3AE : SubtargetFeature<"neoversev3AE", "ARMProcFamily", "Neover
610625
FeatureFuseAES,
611626
FeatureALULSLFast,
612627
FeatureFuseAdrpAdd,
628+
FeatureFuseCCSelect,
613629
FeaturePostRAScheduler,
614630
FeatureEnableSelectOptimize,
615631
FeatureAvoidLDAPUR,

llvm/test/CodeGen/AArch64/misched-fusion-csel.ll

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,42 @@
1-
; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=fuse-csel | FileCheck %s
2-
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s
3-
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s
4-
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 | FileCheck %s
1+
; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=fuse-csel -debug-only=machine-scheduler 2>&1 | FileCheck %s
2+
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 -debug-only=machine-scheduler 2>&1 | FileCheck %s
3+
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 -debug-only=machine-scheduler 2>&1 | FileCheck %s
4+
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 -debug-only=machine-scheduler 2>&1 | FileCheck %s
5+
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a78 -debug-only=machine-scheduler 2>&1 | FileCheck %s
6+
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a710 -debug-only=machine-scheduler 2>&1 | FileCheck %s
7+
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a715 -debug-only=machine-scheduler 2>&1 | FileCheck %s
8+
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a720 -debug-only=machine-scheduler 2>&1 | FileCheck %s
9+
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a725 -debug-only=machine-scheduler 2>&1 | FileCheck %s
10+
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-x4 -debug-only=machine-scheduler 2>&1 | FileCheck %s
11+
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-x925 -debug-only=machine-scheduler 2>&1 | FileCheck %s
12+
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-n2 -debug-only=machine-scheduler 2>&1 | FileCheck %s
13+
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-n3 -debug-only=machine-scheduler 2>&1 | FileCheck %s
14+
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-v1 -debug-only=machine-scheduler 2>&1 | FileCheck %s
15+
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-v2 -debug-only=machine-scheduler 2>&1 | FileCheck %s
16+
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-v3 -debug-only=machine-scheduler 2>&1 | FileCheck %s
17+
; REQUIRES: asserts
518

6-
target triple = "aarch64-unknown"
19+
; Check that the scheduling model has an edge between the SUBS and the CSEL.
20+
; CHECK-LABEL: test_sub_cselw:%bb.0
21+
; CHECK: SU(2): %3:gpr32common = ADDWri %1:gpr32common, 7, 0
22+
; CHECK: SU(3): dead $wzr = SUBSWri %0:gpr32common, 13, 0, implicit-def $nzcv
23+
; CHECK: Successors:
24+
; CHECK: SU(4): Ord Latency=0 Cluster
25+
; CHECK: SU(4): %5:gpr32 = CSELWr %0:gpr32common, %3:gpr32common, 0, implicit killed $nzcv
26+
; CHECK: Predecessors:
27+
; CHECK: SU(3): Ord Latency=0 Cluster
28+
; CHECK: SU(5): $w0 = COPY %5:gpr32
29+
30+
31+
; CHECK-LABEL: test_sub_cselx:%bb.0
32+
; CHECK: SU(2): %3:gpr64common = ADDXri %1:gpr64common, 7, 0
33+
; CHECK: SU(3): dead $xzr = SUBSXri %0:gpr64common, 13, 0, implicit-def $nzcv
34+
; CHECK: Successors:
35+
; CHECK: SU(4): Ord Latency=0 Cluster
36+
; CHECK: SU(4): %5:gpr64 = CSELXr %0:gpr64common, %3:gpr64common, 0, implicit killed $nzcv
37+
; CHECK: Predecessors:
38+
; CHECK: SU(3): Ord Latency=0 Cluster
39+
; CHECK: SU(5): $x0 = COPY %5:gpr64
740

841
define i32 @test_sub_cselw(i32 %a0, i32 %a1, i32 %a2) {
942
entry:

0 commit comments

Comments
 (0)