Skip to content

Conversation

paperchalice
Copy link
Contributor

@paperchalice paperchalice commented Jun 30, 2025

These global flags hinder further improvements like [RFC] Honor pragmas with -ffp-contract=fast and pass concurrency support. Remove them incrementally.

@paperchalice paperchalice force-pushed the dag-legalize branch 2 times, most recently from 1eac26d to c386827 Compare July 24, 2025 09:37
@paperchalice paperchalice marked this pull request as ready for review July 24, 2025 09:38
@paperchalice paperchalice changed the title WIP: [SelectionDAG] Remove UnsafeFPMath in LegalizeDAG [SelectionDAG] Remove UnsafeFPMath in LegalizeDAG Jul 24, 2025
@llvmbot
Copy link
Member

llvmbot commented Jul 24, 2025

@llvm/pr-subscribers-backend-arm

@llvm/pr-subscribers-llvm-selectiondag

Author: None (paperchalice)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/146316.diff

4 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (+1-1)
  • (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+5-1)
  • (modified) llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll (+3-3)
  • (modified) llvm/test/CodeGen/ARM/fp16.ll (+2-2)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 74172b230361d..b7a96cb2dc826 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3853,7 +3853,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     break;
   case ISD::FP_TO_FP16:
     LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
-    if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
+    if (!TLI.useSoftFloat() && Node->getFlags().hasApproximateFuncs()) {
       SDValue Op = Node->getOperand(0);
       MVT SVT = Op.getSimpleValueType();
       if ((SVT == MVT::f64 || SVT == MVT::f80) &&
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 163646513918d..6eca7b73a9d76 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3923,11 +3923,15 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   SDLoc dl = getCurSDLoc();
+  SDNodeFlags Flags;
+  if (auto *TruncInst = dyn_cast<FPMathOperator>(&I))
+    Flags.copyFMF(*TruncInst);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
   setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
                            DAG.getTargetConstant(
-                               0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
+                               0, dl, TLI.getPointerTy(DAG.getDataLayout())),
+                           Flags));
 }
 
 void SelectionDAGBuilder::visitFPExt(const User &I) {
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
index 0a900f904bec5..89ce0bda41f8e 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
@@ -196,7 +196,7 @@ define amdgpu_kernel void @fptrunc_f32_to_f16(
     ptr addrspace(1) %a) {
 entry:
   %a.val = load float, ptr addrspace(1) %a
-  %r.val = fptrunc float %a.val to half
+  %r.val = fptrunc afn float %a.val to half
   store half %r.val, ptr addrspace(1) %r
   ret void
 }
@@ -401,7 +401,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
     ptr addrspace(1) %a) {
 entry:
   %a.val = load double, ptr addrspace(1) %a
-  %r.val = fptrunc double %a.val to half
+  %r.val = fptrunc afn double %a.val to half
   store half %r.val, ptr addrspace(1) %r
   ret void
 }
@@ -863,7 +863,7 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
     ptr addrspace(1) %a) {
 entry:
   %a.val = load <2 x double>, ptr addrspace(1) %a
-  %r.val = fptrunc <2 x double> %a.val to <2 x half>
+  %r.val = fptrunc afn <2 x double> %a.val to <2 x half>
   store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/fp16.ll b/llvm/test/CodeGen/ARM/fp16.ll
index dc35fa34f42c1..9ff701050ac7e 100644
--- a/llvm/test/CodeGen/ARM/fp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16.ll
@@ -86,8 +86,8 @@ define i16 @test_to_fp16(double %in) {
 
 ; CHECK-FP16-SAFE: bl __aeabi_d2h
 
-; CHECK-FP16-UNSAFE:      vcvt.f32.f64 s0, d0
-; CHECK-FP16-UNSAFE-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-FP16-UNSAFE:      vmov r0, r1, d0
+; CHECK-FP16-UNSAFE-NEXT: bl __aeabi_d2h
 
 ; CHECK-ARMV8: vcvtb.f16.f64 [[TMP:s[0-9]+]], d0
 ; CHECK-ARMV8: vmov r0, [[TMP]]

@llvmbot
Copy link
Member

llvmbot commented Jul 24, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: None (paperchalice)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/146316.diff

4 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (+1-1)
  • (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+5-1)
  • (modified) llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll (+3-3)
  • (modified) llvm/test/CodeGen/ARM/fp16.ll (+2-2)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 74172b230361d..b7a96cb2dc826 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3853,7 +3853,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     break;
   case ISD::FP_TO_FP16:
     LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
-    if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
+    if (!TLI.useSoftFloat() && Node->getFlags().hasApproximateFuncs()) {
       SDValue Op = Node->getOperand(0);
       MVT SVT = Op.getSimpleValueType();
       if ((SVT == MVT::f64 || SVT == MVT::f80) &&
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 163646513918d..6eca7b73a9d76 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3923,11 +3923,15 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
   // FPTrunc is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   SDLoc dl = getCurSDLoc();
+  SDNodeFlags Flags;
+  if (auto *TruncInst = dyn_cast<FPMathOperator>(&I))
+    Flags.copyFMF(*TruncInst);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
   setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
                            DAG.getTargetConstant(
-                               0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
+                               0, dl, TLI.getPointerTy(DAG.getDataLayout())),
+                           Flags));
 }
 
 void SelectionDAGBuilder::visitFPExt(const User &I) {
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
index 0a900f904bec5..89ce0bda41f8e 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
@@ -196,7 +196,7 @@ define amdgpu_kernel void @fptrunc_f32_to_f16(
     ptr addrspace(1) %a) {
 entry:
   %a.val = load float, ptr addrspace(1) %a
-  %r.val = fptrunc float %a.val to half
+  %r.val = fptrunc afn float %a.val to half
   store half %r.val, ptr addrspace(1) %r
   ret void
 }
@@ -401,7 +401,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
     ptr addrspace(1) %a) {
 entry:
   %a.val = load double, ptr addrspace(1) %a
-  %r.val = fptrunc double %a.val to half
+  %r.val = fptrunc afn double %a.val to half
   store half %r.val, ptr addrspace(1) %r
   ret void
 }
@@ -863,7 +863,7 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
     ptr addrspace(1) %a) {
 entry:
   %a.val = load <2 x double>, ptr addrspace(1) %a
-  %r.val = fptrunc <2 x double> %a.val to <2 x half>
+  %r.val = fptrunc afn <2 x double> %a.val to <2 x half>
   store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/fp16.ll b/llvm/test/CodeGen/ARM/fp16.ll
index dc35fa34f42c1..9ff701050ac7e 100644
--- a/llvm/test/CodeGen/ARM/fp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16.ll
@@ -86,8 +86,8 @@ define i16 @test_to_fp16(double %in) {
 
 ; CHECK-FP16-SAFE: bl __aeabi_d2h
 
-; CHECK-FP16-UNSAFE:      vcvt.f32.f64 s0, d0
-; CHECK-FP16-UNSAFE-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-FP16-UNSAFE:      vmov r0, r1, d0
+; CHECK-FP16-UNSAFE-NEXT: bl __aeabi_d2h
 
 ; CHECK-ARMV8: vcvtb.f16.f64 [[TMP:s[0-9]+]], d0
 ; CHECK-ARMV8: vmov r0, [[TMP]]

@paperchalice
Copy link
Contributor Author

Currently I couldn't fix the test CodeGen/ARM/fp16.ll, because llvm.convert.to.fp16.f64 in this test is incompatible with fast math flags.

@davemgreen
Copy link
Collaborator

Currently I couldn't fix the test CodeGen/ARM/fp16.ll, because llvm.convert.to.fp16.f64 in this test is incompatible with fast math flags.

I believe we do not use them any more because useFP16ConversionIntrinsics==false in clang.

@arsenm
Copy link
Contributor

arsenm commented Jul 25, 2025

Currently I couldn't fix the test CodeGen/ARM/fp16.ll, because llvm.convert.to.fp16.f64 in this test is incompatible with fast math flags.

Just take the regressions, these intrinsics should be removed from the IR

@@ -3853,7 +3853,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
case ISD::FP_TO_FP16:
LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
if (!TLI.useSoftFloat() && Node->getFlags().hasApproximateFuncs()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These checks should be swapped

@@ -196,7 +196,7 @@ define amdgpu_kernel void @fptrunc_f32_to_f16(
ptr addrspace(1) %a) {
entry:
%a.val = load float, ptr addrspace(1) %a
%r.val = fptrunc float %a.val to half
%r.val = fptrunc afn float %a.val to half
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you duplicate the functions to have a safe and unsafe version, and remove the -enable-unsafe-fp-math command line arguments

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regenerate selection dag part, global isel part is a work in progress.

; GFX9-SDAG-NEXT: s_mov_b32 s1, s9
; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7e00
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: v_readfirstlane_b32 s5, v1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We probably should have separate s and v versions but I won't subject you to that

@paperchalice paperchalice merged commit 21836f4 into llvm:main Jul 29, 2025
9 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants