Skip to content

Commit fba17cd

Browse files
authored
[AArch64] Guard fptosi+sitofp patterns with one use checks. (#156407)
Otherwise we can end up with more instructions, needing to emit both `fcvtzu w0, s0` and `fcvtzu s0, s0`.
1 parent f1dcdaa commit fba17cd

File tree

2 files changed

+16
-18
lines changed

2 files changed

+16
-18
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6706,20 +6706,24 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
67066706
// Some float -> int -> float conversion patterns for which we want to keep the
67076707
// int values in FP registers using the corresponding NEON instructions to
67086708
// avoid more costly int <-> fp register transfers.
6709+
let HasOneUse = 1 in {
6710+
def any_fp_to_sint_oneuse: PatFrag<(ops node:$src0), (any_fp_to_sint $src0)>;
6711+
def any_fp_to_uint_oneuse: PatFrag<(ops node:$src0), (any_fp_to_uint $src0)>;
6712+
}
67096713
let Predicates = [HasNEONandIsSME2p2StreamingSafe] in {
6710-
def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
6714+
def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint_oneuse f64:$Rn)))),
67116715
(SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
6712-
def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
6716+
def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint_oneuse f32:$Rn)))),
67136717
(SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>;
6714-
def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
6718+
def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint_oneuse f64:$Rn)))),
67156719
(UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>;
6716-
def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
6720+
def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint_oneuse f32:$Rn)))),
67176721
(UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
67186722

67196723
let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
6720-
def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
6724+
def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint_oneuse f16:$Rn)))),
67216725
(SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
6722-
def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
6726+
def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint_oneuse f16:$Rn)))),
67236727
(UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
67246728
}
67256729

llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,8 @@ entry:
122122
define i64 @testu_f64_multiuse(double %x) {
123123
; CHECK-LABEL: testu_f64_multiuse:
124124
; CHECK: // %bb.0: // %entry
125-
; CHECK-NEXT: fcvtzu d1, d0
126125
; CHECK-NEXT: fcvtzu x8, d0
127-
; CHECK-NEXT: ucvtf d1, d1
126+
; CHECK-NEXT: ucvtf d1, x8
128127
; CHECK-NEXT: fcmp d0, d1
129128
; CHECK-NEXT: csel x0, x8, xzr, eq
130129
; CHECK-NEXT: ret
@@ -139,9 +138,8 @@ entry:
139138
define i32 @testu_f32_multiuse(float %x) {
140139
; CHECK-LABEL: testu_f32_multiuse:
141140
; CHECK: // %bb.0: // %entry
142-
; CHECK-NEXT: fcvtzu s1, s0
143141
; CHECK-NEXT: fcvtzu w8, s0
144-
; CHECK-NEXT: ucvtf s1, s1
142+
; CHECK-NEXT: ucvtf s1, w8
145143
; CHECK-NEXT: fcmp s0, s1
146144
; CHECK-NEXT: csel w0, w8, wzr, eq
147145
; CHECK-NEXT: ret
@@ -156,9 +154,8 @@ entry:
156154
define i32 @testu_f16_multiuse(half %x) {
157155
; CHECK-LABEL: testu_f16_multiuse:
158156
; CHECK: // %bb.0: // %entry
159-
; CHECK-NEXT: fcvtzu h1, h0
160157
; CHECK-NEXT: fcvtzu w8, h0
161-
; CHECK-NEXT: ucvtf h1, h1
158+
; CHECK-NEXT: ucvtf h1, w8
162159
; CHECK-NEXT: fcmp h0, h1
163160
; CHECK-NEXT: csel w0, w8, wzr, eq
164161
; CHECK-NEXT: ret
@@ -173,9 +170,8 @@ entry:
173170
define i64 @tests_f64_multiuse(double %x) {
174171
; CHECK-LABEL: tests_f64_multiuse:
175172
; CHECK: // %bb.0: // %entry
176-
; CHECK-NEXT: fcvtzs d1, d0
177173
; CHECK-NEXT: fcvtzs x8, d0
178-
; CHECK-NEXT: scvtf d1, d1
174+
; CHECK-NEXT: scvtf d1, x8
179175
; CHECK-NEXT: fcmp d0, d1
180176
; CHECK-NEXT: csel x0, x8, xzr, eq
181177
; CHECK-NEXT: ret
@@ -190,9 +186,8 @@ entry:
190186
define i32 @tests_f32_multiuse(float %x) {
191187
; CHECK-LABEL: tests_f32_multiuse:
192188
; CHECK: // %bb.0: // %entry
193-
; CHECK-NEXT: fcvtzs s1, s0
194189
; CHECK-NEXT: fcvtzs w8, s0
195-
; CHECK-NEXT: scvtf s1, s1
190+
; CHECK-NEXT: scvtf s1, w8
196191
; CHECK-NEXT: fcmp s0, s1
197192
; CHECK-NEXT: csel w0, w8, wzr, eq
198193
; CHECK-NEXT: ret
@@ -207,9 +202,8 @@ entry:
207202
define i32 @tests_f16_multiuse(half %x) {
208203
; CHECK-LABEL: tests_f16_multiuse:
209204
; CHECK: // %bb.0: // %entry
210-
; CHECK-NEXT: fcvtzs h1, h0
211205
; CHECK-NEXT: fcvtzs w8, h0
212-
; CHECK-NEXT: scvtf h1, h1
206+
; CHECK-NEXT: scvtf h1, w8
213207
; CHECK-NEXT: fcmp h0, h1
214208
; CHECK-NEXT: csel w0, w8, wzr, eq
215209
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)