-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[SelectionDAG] Remove UnsafeFPMath
in LegalizeDAG
#146316
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
1eac26d
to
c386827
Compare
UnsafeFPMath
in LegalizeDAGUnsafeFPMath
in LegalizeDAG
@llvm/pr-subscribers-backend-arm @llvm/pr-subscribers-llvm-selectiondag Author: None (paperchalice) ChangesFull diff: https://github.com/llvm/llvm-project/pull/146316.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 74172b230361d..b7a96cb2dc826 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3853,7 +3853,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
case ISD::FP_TO_FP16:
LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
- if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
+ if (!TLI.useSoftFloat() && Node->getFlags().hasApproximateFuncs()) {
SDValue Op = Node->getOperand(0);
MVT SVT = Op.getSimpleValueType();
if ((SVT == MVT::f64 || SVT == MVT::f80) &&
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 163646513918d..6eca7b73a9d76 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3923,11 +3923,15 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
// FPTrunc is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
SDLoc dl = getCurSDLoc();
+ SDNodeFlags Flags;
+ if (auto *TruncInst = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*TruncInst);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
DAG.getTargetConstant(
- 0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
+ 0, dl, TLI.getPointerTy(DAG.getDataLayout())),
+ Flags));
}
void SelectionDAGBuilder::visitFPExt(const User &I) {
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
index 0a900f904bec5..89ce0bda41f8e 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
@@ -196,7 +196,7 @@ define amdgpu_kernel void @fptrunc_f32_to_f16(
ptr addrspace(1) %a) {
entry:
%a.val = load float, ptr addrspace(1) %a
- %r.val = fptrunc float %a.val to half
+ %r.val = fptrunc afn float %a.val to half
store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -401,7 +401,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
ptr addrspace(1) %a) {
entry:
%a.val = load double, ptr addrspace(1) %a
- %r.val = fptrunc double %a.val to half
+ %r.val = fptrunc afn double %a.val to half
store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -863,7 +863,7 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
ptr addrspace(1) %a) {
entry:
%a.val = load <2 x double>, ptr addrspace(1) %a
- %r.val = fptrunc <2 x double> %a.val to <2 x half>
+ %r.val = fptrunc afn <2 x double> %a.val to <2 x half>
store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
diff --git a/llvm/test/CodeGen/ARM/fp16.ll b/llvm/test/CodeGen/ARM/fp16.ll
index dc35fa34f42c1..9ff701050ac7e 100644
--- a/llvm/test/CodeGen/ARM/fp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16.ll
@@ -86,8 +86,8 @@ define i16 @test_to_fp16(double %in) {
; CHECK-FP16-SAFE: bl __aeabi_d2h
-; CHECK-FP16-UNSAFE: vcvt.f32.f64 s0, d0
-; CHECK-FP16-UNSAFE-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-FP16-UNSAFE: vmov r0, r1, d0
+; CHECK-FP16-UNSAFE-NEXT: bl __aeabi_d2h
; CHECK-ARMV8: vcvtb.f16.f64 [[TMP:s[0-9]+]], d0
; CHECK-ARMV8: vmov r0, [[TMP]]
|
@llvm/pr-subscribers-backend-amdgpu Author: None (paperchalice) ChangesFull diff: https://github.com/llvm/llvm-project/pull/146316.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 74172b230361d..b7a96cb2dc826 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3853,7 +3853,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
case ISD::FP_TO_FP16:
LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
- if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
+ if (!TLI.useSoftFloat() && Node->getFlags().hasApproximateFuncs()) {
SDValue Op = Node->getOperand(0);
MVT SVT = Op.getSimpleValueType();
if ((SVT == MVT::f64 || SVT == MVT::f80) &&
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 163646513918d..6eca7b73a9d76 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3923,11 +3923,15 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
// FPTrunc is never a no-op cast, no need to check
SDValue N = getValue(I.getOperand(0));
SDLoc dl = getCurSDLoc();
+ SDNodeFlags Flags;
+ if (auto *TruncInst = dyn_cast<FPMathOperator>(&I))
+ Flags.copyFMF(*TruncInst);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
DAG.getTargetConstant(
- 0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
+ 0, dl, TLI.getPointerTy(DAG.getDataLayout())),
+ Flags));
}
void SelectionDAGBuilder::visitFPExt(const User &I) {
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
index 0a900f904bec5..89ce0bda41f8e 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
@@ -196,7 +196,7 @@ define amdgpu_kernel void @fptrunc_f32_to_f16(
ptr addrspace(1) %a) {
entry:
%a.val = load float, ptr addrspace(1) %a
- %r.val = fptrunc float %a.val to half
+ %r.val = fptrunc afn float %a.val to half
store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -401,7 +401,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
ptr addrspace(1) %a) {
entry:
%a.val = load double, ptr addrspace(1) %a
- %r.val = fptrunc double %a.val to half
+ %r.val = fptrunc afn double %a.val to half
store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -863,7 +863,7 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
ptr addrspace(1) %a) {
entry:
%a.val = load <2 x double>, ptr addrspace(1) %a
- %r.val = fptrunc <2 x double> %a.val to <2 x half>
+ %r.val = fptrunc afn <2 x double> %a.val to <2 x half>
store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
diff --git a/llvm/test/CodeGen/ARM/fp16.ll b/llvm/test/CodeGen/ARM/fp16.ll
index dc35fa34f42c1..9ff701050ac7e 100644
--- a/llvm/test/CodeGen/ARM/fp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16.ll
@@ -86,8 +86,8 @@ define i16 @test_to_fp16(double %in) {
; CHECK-FP16-SAFE: bl __aeabi_d2h
-; CHECK-FP16-UNSAFE: vcvt.f32.f64 s0, d0
-; CHECK-FP16-UNSAFE-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-FP16-UNSAFE: vmov r0, r1, d0
+; CHECK-FP16-UNSAFE-NEXT: bl __aeabi_d2h
; CHECK-ARMV8: vcvtb.f16.f64 [[TMP:s[0-9]+]], d0
; CHECK-ARMV8: vmov r0, [[TMP]]
|
Currently I couldn't fix the test |
I believe we do not use them any more because useFP16ConversionIntrinsics==false in clang. |
Just take the regressions, these intrinsics should be removed from the IR |
@@ -3853,7 +3853,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { | |||
break; | |||
case ISD::FP_TO_FP16: | |||
LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n"); | |||
if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { | |||
if (!TLI.useSoftFloat() && Node->getFlags().hasApproximateFuncs()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These checks should be swapped
@@ -196,7 +196,7 @@ define amdgpu_kernel void @fptrunc_f32_to_f16( | |||
ptr addrspace(1) %a) { | |||
entry: | |||
%a.val = load float, ptr addrspace(1) %a | |||
%r.val = fptrunc float %a.val to half | |||
%r.val = fptrunc afn float %a.val to half |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you duplicate the functions to have a safe and unsafe version, and remove the -enable-unsafe-fp-math command line arguments
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Regenerate selection dag part, global isel part is a work in progress.
6179283
to
c033e05
Compare
This change is not the solution, due to the type of `llvm.convert.to.fp16.f64`.
c033e05
to
0a32e08
Compare
; GFX9-SDAG-NEXT: s_mov_b32 s1, s9 | ||
; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7e00 | ||
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) | ||
; GFX9-SDAG-NEXT: v_readfirstlane_b32 s5, v1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We probably should have separate s and v versions but I won't subject you to that
These global flags hinder further improvements like [RFC] Honor pragmas with -ffp-contract=fast and pass concurrency support. Remove them incrementally.