Skip to content

Commit b203c39

Browse files
committed
[ARM] Mark function calls as possibly changing FPSCR
1 parent 908a32d commit b203c39

File tree

3 files changed

+209
-0
lines changed

3 files changed

+209
-0
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22036,6 +22036,11 @@ bool ARMTargetLowering::isComplexDeinterleavingOperationSupported(
2203622036
ScalarTy->isIntegerTy(32));
2203722037
}
2203822038

22039+
ArrayRef<MCPhysReg> ARMTargetLowering::getRoundingControlRegisters() const {
22040+
static const MCPhysReg RCRegs[] = {ARM::FPSCR};
22041+
return RCRegs;
22042+
}
22043+
2203922044
Value *ARMTargetLowering::createComplexDeinterleavingIR(
2204022045
IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
2204122046
ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,

llvm/lib/Target/ARM/ARMISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -992,6 +992,8 @@ class VectorType;
992992

993993
bool isUnsupportedFloatingType(EVT VT) const;
994994

995+
ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
996+
995997
SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
996998
SDValue ARMcc, SDValue Flags, SelectionDAG &DAG) const;
997999
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple arm-- -mattr=+vfp4 %s -o - | FileCheck %s
3+
4+
5+
; Div whose result is unused should be removed unless we have strict exceptions
6+
7+
define void @unused_div(float %x, float %y) {
8+
; CHECK-LABEL: unused_div:
9+
; CHECK: @ %bb.0: @ %entry
10+
; CHECK-NEXT: mov pc, lr
11+
entry:
12+
%add = fdiv float %x, %y
13+
ret void
14+
}
15+
16+
define void @unused_div_fpexcept_strict(float %x, float %y) #0 {
17+
; CHECK-LABEL: unused_div_fpexcept_strict:
18+
; CHECK: @ %bb.0: @ %entry
19+
; CHECK-NEXT: vmov s0, r1
20+
; CHECK-NEXT: vmov s2, r0
21+
; CHECK-NEXT: vdiv.f32 s0, s2, s0
22+
; CHECK-NEXT: mov pc, lr
23+
entry:
24+
%add = call float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
25+
ret void
26+
}
27+
28+
define void @unused_div_round_dynamic(float %x, float %y) #0 {
29+
; CHECK-LABEL: unused_div_round_dynamic:
30+
; CHECK: @ %bb.0: @ %entry
31+
; CHECK-NEXT: mov pc, lr
32+
entry:
33+
%add = call float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
34+
ret void
35+
}
36+
37+
38+
; Machine CSE should eliminate the second add unless we have strict exceptions
39+
40+
define float @add_twice(float %x, float %y, i32 %n) {
41+
; CHECK-LABEL: add_twice:
42+
; CHECK: @ %bb.0: @ %entry
43+
; CHECK-NEXT: vmov s0, r1
44+
; CHECK-NEXT: cmp r2, #0
45+
; CHECK-NEXT: vmov s2, r0
46+
; CHECK-NEXT: vadd.f32 s0, s2, s0
47+
; CHECK-NEXT: vmulne.f32 s0, s0, s0
48+
; CHECK-NEXT: vmov r0, s0
49+
; CHECK-NEXT: mov pc, lr
50+
entry:
51+
%add = fadd float %x, %y
52+
%tobool.not = icmp eq i32 %n, 0
53+
br i1 %tobool.not, label %if.end, label %if.then
54+
55+
if.then:
56+
%add1 = fadd float %x, %y
57+
%mul = fmul float %add, %add1
58+
br label %if.end
59+
60+
if.end:
61+
%a.0 = phi float [ %mul, %if.then ], [ %add, %entry ]
62+
ret float %a.0
63+
}
64+
65+
define float @add_twice_fpexcept_strict(float %x, float %y, i32 %n) #0 {
66+
; CHECK-LABEL: add_twice_fpexcept_strict:
67+
; CHECK: @ %bb.0: @ %entry
68+
; CHECK-NEXT: vmov s2, r1
69+
; CHECK-NEXT: cmp r2, #0
70+
; CHECK-NEXT: vmov s4, r0
71+
; CHECK-NEXT: vadd.f32 s0, s4, s2
72+
; CHECK-NEXT: vaddne.f32 s2, s4, s2
73+
; CHECK-NEXT: vmulne.f32 s0, s0, s2
74+
; CHECK-NEXT: vmov r0, s0
75+
; CHECK-NEXT: mov pc, lr
76+
entry:
77+
%add = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
78+
%tobool.not = icmp eq i32 %n, 0
79+
br i1 %tobool.not, label %if.end, label %if.then
80+
81+
if.then:
82+
%add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
83+
%mul = call float @llvm.experimental.constrained.fmul.f32(float %add, float %add1, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
84+
br label %if.end
85+
86+
if.end:
87+
%a.0 = phi float [ %mul, %if.then ], [ %add, %entry ]
88+
ret float %a.0
89+
}
90+
91+
define float @add_twice_round_dynamic(float %x, float %y, i32 %n) #0 {
92+
; CHECK-LABEL: add_twice_round_dynamic:
93+
; CHECK: @ %bb.0: @ %entry
94+
; CHECK-NEXT: vmov s0, r1
95+
; CHECK-NEXT: cmp r2, #0
96+
; CHECK-NEXT: vmov s2, r0
97+
; CHECK-NEXT: vadd.f32 s0, s2, s0
98+
; CHECK-NEXT: vmulne.f32 s0, s0, s0
99+
; CHECK-NEXT: vmov r0, s0
100+
; CHECK-NEXT: mov pc, lr
101+
entry:
102+
%add = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
103+
%tobool.not = icmp eq i32 %n, 0
104+
br i1 %tobool.not, label %if.end, label %if.then
105+
106+
if.then:
107+
%add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
108+
%mul = call float @llvm.experimental.constrained.fmul.f32(float %add, float %add1, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
109+
br label %if.end
110+
111+
if.end:
112+
%a.0 = phi float [ %mul, %if.then ], [ %add, %entry ]
113+
ret float %a.0
114+
}
115+
116+
; Two adds separated by llvm.set.rounding should be preserved when rounding is
117+
; dynamic (as they may give different results) or when we have strict exceptions
118+
; (the llvm.set.rounding is irrelevant, but both could trap).
119+
120+
define float @set_rounding(float %x, float %y) {
121+
; CHECK-LABEL: set_rounding:
122+
; CHECK: @ %bb.0: @ %entry
123+
; CHECK-NEXT: vmrs r2, fpscr
124+
; CHECK-NEXT: vmov s2, r0
125+
; CHECK-NEXT: vmov s0, r1
126+
; CHECK-NEXT: vadd.f32 s0, s2, s0
127+
; CHECK-NEXT: vsub.f32 s0, s0, s0
128+
; CHECK-NEXT: orr r0, r2, #12582912
129+
; CHECK-NEXT: vmsr fpscr, r0
130+
; CHECK-NEXT: vmov r0, s0
131+
; CHECK-NEXT: vmrs r1, fpscr
132+
; CHECK-NEXT: bic r1, r1, #12582912
133+
; CHECK-NEXT: vmsr fpscr, r1
134+
; CHECK-NEXT: mov pc, lr
135+
entry:
136+
%add1 = fadd float %x, %y
137+
call void @llvm.set.rounding(i32 0)
138+
%add2 = fadd float %x, %y
139+
call void @llvm.set.rounding(i32 1)
140+
%sub = fsub float %add1, %add2
141+
ret float %sub
142+
}
143+
144+
define float @set_rounding_fpexcept_strict(float %x, float %y) #0 {
145+
; CHECK-LABEL: set_rounding_fpexcept_strict:
146+
; CHECK: @ %bb.0: @ %entry
147+
; CHECK-NEXT: vmov s0, r1
148+
; CHECK-NEXT: vmov s2, r0
149+
; CHECK-NEXT: vadd.f32 s4, s2, s0
150+
; CHECK-NEXT: vmrs r0, fpscr
151+
; CHECK-NEXT: orr r0, r0, #12582912
152+
; CHECK-NEXT: vmsr fpscr, r0
153+
; CHECK-NEXT: vadd.f32 s0, s2, s0
154+
; CHECK-NEXT: vmrs r0, fpscr
155+
; CHECK-NEXT: bic r0, r0, #12582912
156+
; CHECK-NEXT: vmsr fpscr, r0
157+
; CHECK-NEXT: vsub.f32 s0, s4, s0
158+
; CHECK-NEXT: vmov r0, s0
159+
; CHECK-NEXT: mov pc, lr
160+
entry:
161+
%add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
162+
call void @llvm.set.rounding(i32 0) #0
163+
%add2 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
164+
call void @llvm.set.rounding(i32 1) #0
165+
%sub = call float @llvm.experimental.constrained.fsub.f32(float %add1, float %add2, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
166+
ret float %sub
167+
}
168+
169+
define float @set_rounding_round_dynamic(float %x, float %y) #0 {
170+
; CHECK-LABEL: set_rounding_round_dynamic:
171+
; CHECK: @ %bb.0: @ %entry
172+
; CHECK-NEXT: vmov s2, r0
173+
; CHECK-NEXT: vmrs r0, fpscr
174+
; CHECK-NEXT: vmov s0, r1
175+
; CHECK-NEXT: vadd.f32 s4, s2, s0
176+
; CHECK-NEXT: orr r0, r0, #12582912
177+
; CHECK-NEXT: vmsr fpscr, r0
178+
; CHECK-NEXT: vmrs r0, fpscr
179+
; CHECK-NEXT: vadd.f32 s0, s2, s0
180+
; CHECK-NEXT: bic r0, r0, #12582912
181+
; CHECK-NEXT: vmsr fpscr, r0
182+
; CHECK-NEXT: vsub.f32 s0, s4, s0
183+
; CHECK-NEXT: vmov r0, s0
184+
; CHECK-NEXT: mov pc, lr
185+
entry:
186+
%add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
187+
call void @llvm.set.rounding(i32 0) #0
188+
%add2 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
189+
call void @llvm.set.rounding(i32 1) #0
190+
%sub = call float @llvm.experimental.constrained.fsub.f32(float %add1, float %add2, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
191+
ret float %sub
192+
}
193+
194+
declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
195+
declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
196+
declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
197+
declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
198+
declare i32 @llvm.get.rounding()
199+
declare void @llvm.set.rounding(i32)
200+
201+
attributes #0 = { strictfp }
202+

0 commit comments

Comments
 (0)