Skip to content

Commit 1cc4f8d

Browse files
committed
[ARM] Expand vector reduction intrinsics on soft float
Followup to D73135. If the target doesn't have hard float (default for ARM), then we assert when trying to soften the result of vector reduction intrinsics. This patch marks these for expansion as well. (A bit odd to use vectors on a target without hard float ... but that's where you end up if you expose target-independent vector types.) Differential Revision: https://reviews.llvm.org/D73854
1 parent 9eb74f6 commit 1cc4f8d

File tree

2 files changed

+71
-1
lines changed

2 files changed

+71
-1
lines changed

llvm/lib/Target/ARM/ARMTargetTransformInfo.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,14 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
174174
case Intrinsic::experimental_vector_reduce_v2_fadd:
175175
case Intrinsic::experimental_vector_reduce_v2_fmul:
176176
// We don't have legalization support for ordered FP reductions.
177-
return !II->getFastMathFlags().allowReassoc();
177+
if (!II->getFastMathFlags().allowReassoc())
178+
return true;
179+
LLVM_FALLTHROUGH;
180+
181+
case Intrinsic::experimental_vector_reduce_fmin:
182+
case Intrinsic::experimental_vector_reduce_fmax:
183+
// Can't legalize reductions with soft floats.
184+
return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs();
178185

179186
default:
180187
// Don't expand anything else, let legalization deal with it.
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK
3+
4+
declare float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float, <4 x float>)
5+
declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double, <2 x double>)
6+
declare fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128, <2 x fp128>)
7+
8+
define float @test_v4f32(<4 x float> %a) nounwind {
9+
; CHECK-LABEL: test_v4f32:
10+
; CHECK: @ %bb.0:
11+
; CHECK-NEXT: .save {r4, r5, r6, lr}
12+
; CHECK-NEXT: push {r4, r5, r6, lr}
13+
; CHECK-NEXT: mov r5, r1
14+
; CHECK-NEXT: mov r1, r2
15+
; CHECK-NEXT: mov r4, r3
16+
; CHECK-NEXT: bl __aeabi_fadd
17+
; CHECK-NEXT: mov r6, r0
18+
; CHECK-NEXT: mov r0, r5
19+
; CHECK-NEXT: mov r1, r4
20+
; CHECK-NEXT: bl __aeabi_fadd
21+
; CHECK-NEXT: mov r1, r0
22+
; CHECK-NEXT: mov r0, r6
23+
; CHECK-NEXT: bl __aeabi_fadd
24+
; CHECK-NEXT: pop {r4, r5, r6, lr}
25+
; CHECK-NEXT: mov pc, lr
26+
%b = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.0, <4 x float> %a)
27+
ret float %b
28+
}
29+
30+
define double @test_v2f64(<2 x double> %a) nounwind {
31+
; CHECK-LABEL: test_v2f64:
32+
; CHECK: @ %bb.0:
33+
; CHECK-NEXT: .save {r11, lr}
34+
; CHECK-NEXT: push {r11, lr}
35+
; CHECK-NEXT: bl __aeabi_dadd
36+
; CHECK-NEXT: pop {r11, lr}
37+
; CHECK-NEXT: mov pc, lr
38+
%b = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v2f64(double zeroinitializer, <2 x double> %a)
39+
ret double %b
40+
}
41+
42+
define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
43+
; CHECK-LABEL: test_v2f128:
44+
; CHECK: @ %bb.0:
45+
; CHECK-NEXT: .save {r11, lr}
46+
; CHECK-NEXT: push {r11, lr}
47+
; CHECK-NEXT: .pad #16
48+
; CHECK-NEXT: sub sp, sp, #16
49+
; CHECK-NEXT: ldr r12, [sp, #36]
50+
; CHECK-NEXT: str r12, [sp, #12]
51+
; CHECK-NEXT: ldr r12, [sp, #32]
52+
; CHECK-NEXT: str r12, [sp, #8]
53+
; CHECK-NEXT: ldr r12, [sp, #28]
54+
; CHECK-NEXT: str r12, [sp, #4]
55+
; CHECK-NEXT: ldr r12, [sp, #24]
56+
; CHECK-NEXT: str r12, [sp]
57+
; CHECK-NEXT: bl __addtf3
58+
; CHECK-NEXT: add sp, sp, #16
59+
; CHECK-NEXT: pop {r11, lr}
60+
; CHECK-NEXT: mov pc, lr
61+
%b = call fast fp128 @llvm.experimental.vector.reduce.v2.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
62+
ret fp128 %b
63+
}

0 commit comments

Comments
 (0)