[NVPTX] Constant-folding for f2i, d2ui, f2ll etc. #118965

LewisCrawford · 2024-12-06T13:08:36Z

Add constant-folding support for the NVVM intrinsics for converting float/double to signed/unsigned int32/int64 types, including all rounding-modes and ftz modifiers.

llvmbot · 2024-12-06T13:09:13Z

@llvm/pr-subscribers-llvm-ir
@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-backend-nvptx

@llvm/pr-subscribers-llvm-analysis

Author: Lewis Crawford (LewisCrawford)

Changes

Add constant-folding support for the NVVM intrinsics for converting float/double to signed/unsigned int32/int64 types, including all rounding-modes and ftz modifiers.

Patch is 77.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/118965.diff

3 Files Affected:

(modified) llvm/lib/Analysis/ConstantFolding.cpp (+265)
(added) llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll (+1129)
(added) llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2ll-d2ll.ll (+1129)

diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index efbccee76f2c51..2806c29462fa31 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -45,6 +45,7 @@
 #include "llvm/IR/IntrinsicsAArch64.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsARM.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
 #include "llvm/IR/IntrinsicsWebAssembly.h"
 #include "llvm/IR/IntrinsicsX86.h"
 #include "llvm/IR/Operator.h"
@@ -1678,6 +1679,58 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::x86_avx512_cvttsd2usi64:
     return !Call->isStrictFP();
 
+  // NVVM float/double to int32/uint32 conversion intrinsics
+  case Intrinsic::nvvm_f2i_rm:
+  case Intrinsic::nvvm_f2i_rn:
+  case Intrinsic::nvvm_f2i_rp:
+  case Intrinsic::nvvm_f2i_rz:
+  case Intrinsic::nvvm_f2i_rm_ftz:
+  case Intrinsic::nvvm_f2i_rn_ftz:
+  case Intrinsic::nvvm_f2i_rp_ftz:
+  case Intrinsic::nvvm_f2i_rz_ftz:
+  case Intrinsic::nvvm_f2ui_rm:
+  case Intrinsic::nvvm_f2ui_rn:
+  case Intrinsic::nvvm_f2ui_rp:
+  case Intrinsic::nvvm_f2ui_rz:
+  case Intrinsic::nvvm_f2ui_rm_ftz:
+  case Intrinsic::nvvm_f2ui_rn_ftz:
+  case Intrinsic::nvvm_f2ui_rp_ftz:
+  case Intrinsic::nvvm_f2ui_rz_ftz:
+  case Intrinsic::nvvm_d2i_rm:
+  case Intrinsic::nvvm_d2i_rn:
+  case Intrinsic::nvvm_d2i_rp:
+  case Intrinsic::nvvm_d2i_rz:
+  case Intrinsic::nvvm_d2ui_rm:
+  case Intrinsic::nvvm_d2ui_rn:
+  case Intrinsic::nvvm_d2ui_rp:
+  case Intrinsic::nvvm_d2ui_rz:
+
+  // NVVM float/double to int64/uint64 conversion intrinsics
+  case Intrinsic::nvvm_f2ll_rm:
+  case Intrinsic::nvvm_f2ll_rn:
+  case Intrinsic::nvvm_f2ll_rp:
+  case Intrinsic::nvvm_f2ll_rz:
+  case Intrinsic::nvvm_f2ll_rm_ftz:
+  case Intrinsic::nvvm_f2ll_rn_ftz:
+  case Intrinsic::nvvm_f2ll_rp_ftz:
+  case Intrinsic::nvvm_f2ll_rz_ftz:
+  case Intrinsic::nvvm_f2ull_rm:
+  case Intrinsic::nvvm_f2ull_rn:
+  case Intrinsic::nvvm_f2ull_rp:
+  case Intrinsic::nvvm_f2ull_rz:
+  case Intrinsic::nvvm_f2ull_rm_ftz:
+  case Intrinsic::nvvm_f2ull_rn_ftz:
+  case Intrinsic::nvvm_f2ull_rp_ftz:
+  case Intrinsic::nvvm_f2ull_rz_ftz:
+  case Intrinsic::nvvm_d2ll_rm:
+  case Intrinsic::nvvm_d2ll_rn:
+  case Intrinsic::nvvm_d2ll_rp:
+  case Intrinsic::nvvm_d2ll_rz:
+  case Intrinsic::nvvm_d2ull_rm:
+  case Intrinsic::nvvm_d2ull_rn:
+  case Intrinsic::nvvm_d2ull_rp:
+  case Intrinsic::nvvm_d2ull_rz:
+
   // Sign operations are actually bitwise operations, they do not raise
   // exceptions even for SNANs.
   case Intrinsic::fabs:
@@ -1840,6 +1893,13 @@ inline bool llvm_fenv_testexcept() {
   return false;
 }
 
+static const APFloat FTZPreserveSign(Type *Ty, const APFloat &V) {
+  if (V.isDenormal())
+    return APFloat::getZero(Ty->getFltSemantics(), V.isNegative());
+
+  return V;
+}
+
 Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,
                          Type *Ty) {
   llvm_fenv_clearexcept();
@@ -2300,6 +2360,211 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
       return ConstantFP::get(Ty->getContext(), U);
     }
 
+    // NVVM float/double to signed/unsigned int32/int64 conversions:
+    switch (IntrinsicID) {
+    // f2i
+    case Intrinsic::nvvm_f2i_rm:
+    case Intrinsic::nvvm_f2i_rn:
+    case Intrinsic::nvvm_f2i_rp:
+    case Intrinsic::nvvm_f2i_rz:
+    case Intrinsic::nvvm_f2i_rm_ftz:
+    case Intrinsic::nvvm_f2i_rn_ftz:
+    case Intrinsic::nvvm_f2i_rp_ftz:
+    case Intrinsic::nvvm_f2i_rz_ftz:
+    // f2ui
+    case Intrinsic::nvvm_f2ui_rm:
+    case Intrinsic::nvvm_f2ui_rn:
+    case Intrinsic::nvvm_f2ui_rp:
+    case Intrinsic::nvvm_f2ui_rz:
+    case Intrinsic::nvvm_f2ui_rm_ftz:
+    case Intrinsic::nvvm_f2ui_rn_ftz:
+    case Intrinsic::nvvm_f2ui_rp_ftz:
+    case Intrinsic::nvvm_f2ui_rz_ftz:
+    // d2i
+    case Intrinsic::nvvm_d2i_rm:
+    case Intrinsic::nvvm_d2i_rn:
+    case Intrinsic::nvvm_d2i_rp:
+    case Intrinsic::nvvm_d2i_rz:
+    // d2ui
+    case Intrinsic::nvvm_d2ui_rm:
+    case Intrinsic::nvvm_d2ui_rn:
+    case Intrinsic::nvvm_d2ui_rp:
+    case Intrinsic::nvvm_d2ui_rz:
+    // f2ll
+    case Intrinsic::nvvm_f2ll_rm:
+    case Intrinsic::nvvm_f2ll_rn:
+    case Intrinsic::nvvm_f2ll_rp:
+    case Intrinsic::nvvm_f2ll_rz:
+    case Intrinsic::nvvm_f2ll_rm_ftz:
+    case Intrinsic::nvvm_f2ll_rn_ftz:
+    case Intrinsic::nvvm_f2ll_rp_ftz:
+    case Intrinsic::nvvm_f2ll_rz_ftz:
+    // f2ull
+    case Intrinsic::nvvm_f2ull_rm:
+    case Intrinsic::nvvm_f2ull_rn:
+    case Intrinsic::nvvm_f2ull_rp:
+    case Intrinsic::nvvm_f2ull_rz:
+    case Intrinsic::nvvm_f2ull_rm_ftz:
+    case Intrinsic::nvvm_f2ull_rn_ftz:
+    case Intrinsic::nvvm_f2ull_rp_ftz:
+    case Intrinsic::nvvm_f2ull_rz_ftz:
+    // d2ll
+    case Intrinsic::nvvm_d2ll_rm:
+    case Intrinsic::nvvm_d2ll_rn:
+    case Intrinsic::nvvm_d2ll_rp:
+    case Intrinsic::nvvm_d2ll_rz:
+    // d2ull
+    case Intrinsic::nvvm_d2ull_rm:
+    case Intrinsic::nvvm_d2ull_rn:
+    case Intrinsic::nvvm_d2ull_rp:
+    case Intrinsic::nvvm_d2ull_rz: {
+      // In float-to-integer conversion, NaN inputs are converted to 0.
+      if (U.isNaN())
+        return ConstantInt::get(Ty, 0);
+
+      APFloat::roundingMode RMode = APFloat::roundingMode::Invalid;
+      switch (IntrinsicID) {
+      // i_rm
+      case Intrinsic::nvvm_f2i_rm:
+      case Intrinsic::nvvm_f2ui_rm:
+      case Intrinsic::nvvm_f2i_rm_ftz:
+      case Intrinsic::nvvm_f2ui_rm_ftz:
+      case Intrinsic::nvvm_d2i_rm:
+      case Intrinsic::nvvm_d2ui_rm:
+      // ll_rm
+      case Intrinsic::nvvm_f2ll_rm:
+      case Intrinsic::nvvm_f2ull_rm:
+      case Intrinsic::nvvm_f2ll_rm_ftz:
+      case Intrinsic::nvvm_f2ull_rm_ftz:
+      case Intrinsic::nvvm_d2ll_rm:
+      case Intrinsic::nvvm_d2ull_rm:
+        RMode = APFloat::rmTowardNegative;
+        break;
+
+      // i_rn
+      case Intrinsic::nvvm_f2i_rn:
+      case Intrinsic::nvvm_f2ui_rn:
+      case Intrinsic::nvvm_f2i_rn_ftz:
+      case Intrinsic::nvvm_f2ui_rn_ftz:
+      case Intrinsic::nvvm_d2i_rn:
+      case Intrinsic::nvvm_d2ui_rn:
+      // ll_rn
+      case Intrinsic::nvvm_f2ll_rn:
+      case Intrinsic::nvvm_f2ull_rn:
+      case Intrinsic::nvvm_f2ll_rn_ftz:
+      case Intrinsic::nvvm_f2ull_rn_ftz:
+      case Intrinsic::nvvm_d2ll_rn:
+      case Intrinsic::nvvm_d2ull_rn:
+        RMode = APFloat::rmNearestTiesToEven;
+        break;
+
+      // i_rp
+      case Intrinsic::nvvm_f2i_rp:
+      case Intrinsic::nvvm_f2ui_rp:
+      case Intrinsic::nvvm_f2i_rp_ftz:
+      case Intrinsic::nvvm_f2ui_rp_ftz:
+      case Intrinsic::nvvm_d2i_rp:
+      case Intrinsic::nvvm_d2ui_rp:
+      // ll_rp
+      case Intrinsic::nvvm_f2ll_rp:
+      case Intrinsic::nvvm_f2ull_rp:
+      case Intrinsic::nvvm_f2ll_rp_ftz:
+      case Intrinsic::nvvm_f2ull_rp_ftz:
+      case Intrinsic::nvvm_d2ll_rp:
+      case Intrinsic::nvvm_d2ull_rp:
+        RMode = APFloat::rmTowardPositive;
+        break;
+
+      // i_rz
+      case Intrinsic::nvvm_f2i_rz:
+      case Intrinsic::nvvm_f2ui_rz:
+      case Intrinsic::nvvm_f2i_rz_ftz:
+      case Intrinsic::nvvm_f2ui_rz_ftz:
+      case Intrinsic::nvvm_d2i_rz:
+      case Intrinsic::nvvm_d2ui_rz:
+      // ll_rz
+      case Intrinsic::nvvm_f2ll_rz:
+      case Intrinsic::nvvm_f2ull_rz:
+      case Intrinsic::nvvm_f2ll_rz_ftz:
+      case Intrinsic::nvvm_f2ull_rz_ftz:
+      case Intrinsic::nvvm_d2ll_rz:
+      case Intrinsic::nvvm_d2ull_rz:
+        RMode = APFloat::rmTowardZero;
+        break;
+      default:
+        llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic");
+      }
+      assert(RM != APFloat::roundingMode::Invalid);
+
+      bool IsFTZ = false;
+      switch (IntrinsicID) {
+      case Intrinsic::nvvm_f2i_rm_ftz:
+      case Intrinsic::nvvm_f2i_rn_ftz:
+      case Intrinsic::nvvm_f2i_rp_ftz:
+      case Intrinsic::nvvm_f2i_rz_ftz:
+      case Intrinsic::nvvm_f2ui_rm_ftz:
+      case Intrinsic::nvvm_f2ui_rn_ftz:
+      case Intrinsic::nvvm_f2ui_rp_ftz:
+      case Intrinsic::nvvm_f2ui_rz_ftz:
+      case Intrinsic::nvvm_f2ll_rm_ftz:
+      case Intrinsic::nvvm_f2ll_rn_ftz:
+      case Intrinsic::nvvm_f2ll_rp_ftz:
+      case Intrinsic::nvvm_f2ll_rz_ftz:
+      case Intrinsic::nvvm_f2ull_rm_ftz:
+      case Intrinsic::nvvm_f2ull_rn_ftz:
+      case Intrinsic::nvvm_f2ull_rp_ftz:
+      case Intrinsic::nvvm_f2ull_rz_ftz:
+        IsFTZ = true;
+        break;
+      }
+
+      bool IsSigned = false;
+      switch (IntrinsicID) {
+      // f2i
+      case Intrinsic::nvvm_f2i_rm:
+      case Intrinsic::nvvm_f2i_rm_ftz:
+      case Intrinsic::nvvm_f2i_rn:
+      case Intrinsic::nvvm_f2i_rn_ftz:
+      case Intrinsic::nvvm_f2i_rp:
+      case Intrinsic::nvvm_f2i_rp_ftz:
+      case Intrinsic::nvvm_f2i_rz:
+      case Intrinsic::nvvm_f2i_rz_ftz:
+      // d2i
+      case Intrinsic::nvvm_d2i_rm:
+      case Intrinsic::nvvm_d2i_rn:
+      case Intrinsic::nvvm_d2i_rp:
+      case Intrinsic::nvvm_d2i_rz:
+      // f2ll
+      case Intrinsic::nvvm_f2ll_rm:
+      case Intrinsic::nvvm_f2ll_rm_ftz:
+      case Intrinsic::nvvm_f2ll_rn:
+      case Intrinsic::nvvm_f2ll_rn_ftz:
+      case Intrinsic::nvvm_f2ll_rp:
+      case Intrinsic::nvvm_f2ll_rp_ftz:
+      case Intrinsic::nvvm_f2ll_rz:
+      case Intrinsic::nvvm_f2ll_rz_ftz:
+      // d2ll
+      case Intrinsic::nvvm_d2ll_rm:
+      case Intrinsic::nvvm_d2ll_rn:
+      case Intrinsic::nvvm_d2ll_rp:
+      case Intrinsic::nvvm_d2ll_rz:
+        IsSigned = true;
+        break;
+      }
+
+      APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
+      auto FloatToRound = IsFTZ ? FTZPreserveSign(Op->getType(), U) : U;
+
+      bool IsExact = false;
+      APFloat::opStatus Status =
+          FloatToRound.convertToInteger(ResInt, RMode, &IsExact);
+
+      if (Status != APFloat::opInvalidOp)
+        return ConstantInt::get(Ty, ResInt);
+      return nullptr;
+    }
+    }
+
     /// We only fold functions with finite arguments. Folding NaN and inf is
     /// likely to be aborted with an exception anyway, and some host libms
     /// have known errors raising exceptions.
diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll
new file mode 100644
index 00000000000000..543c73137c1b64
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll
@@ -0,0 +1,1129 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instsimplify -march=nvptx64 -S | FileCheck %s
+
+; f2i/f2ui and d2i/d2ui - double/float to i32 tests
+
+;###############################################################
+;#               Tests with Positive 1.5                       #
+;###############################################################
+
+;+-------------------------------------------------------------+
+;|                        f2i                                  |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_f2i_rm() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rm() {
+; CHECK-NEXT:    ret i32 1
+;
+  %res = call i32 @llvm.nvvm.f2i.rm(float 1.5)
+  ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rn() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rn() {
+; CHECK-NEXT:    ret i32 2
+;
+  %res = call i32 @llvm.nvvm.f2i.rn(float 1.5)
+  ret i32 %res
+}
+
+
+define i32 @test_pos_1_5_f2i_rp() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rp() {
+; CHECK-NEXT:    ret i32 2
+;
+  %res = call i32 @llvm.nvvm.f2i.rp(float 1.5)
+  ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rz() {
+; CHECK-NEXT:    ret i32 1
+;
+  %res = call i32 @llvm.nvvm.f2i.rz(float 1.5)
+  ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;|                      f2i_ftz                                |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_f2i_rm_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rm_ftz() {
+; CHECK-NEXT:    ret i32 1
+;
+  %res = call i32 @llvm.nvvm.f2i.rm.ftz(float 1.5)
+  ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rn_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rn_ftz() {
+; CHECK-NEXT:    ret i32 2
+;
+  %res = call i32 @llvm.nvvm.f2i.rn.ftz(float 1.5)
+  ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rp_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rp_ftz() {
+; CHECK-NEXT:    ret i32 2
+;
+  %res = call i32 @llvm.nvvm.f2i.rp.ftz(float 1.5)
+  ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rz_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rz_ftz() {
+; CHECK-NEXT:    ret i32 1
+;
+  %res = call i32 @llvm.nvvm.f2i.rz.ftz(float 1.5)
+  ret i32 %res
+}
+;+-------------------------------------------------------------+
+;|                        d2i                                  |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_d2i_rm() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rm() {
+; CHECK-NEXT:    ret i32 1
+;
+  %res = call i32 @llvm.nvvm.d2i.rm(double 1.5)
+  ret i32 %res
+}
+
+define i32 @test_pos_1_5_d2i_rn() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rn() {
+; CHECK-NEXT:    ret i32 2
+;
+  %res = call i32 @llvm.nvvm.d2i.rn(double 1.5)
+  ret i32 %res
+}
+
+
+define i32 @test_pos_1_5_d2i_rp() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rp() {
+; CHECK-NEXT:    ret i32 2
+;
+  %res = call i32 @llvm.nvvm.d2i.rp(double 1.5)
+  ret i32 %res
+}
+
+define i32 @test_pos_1_5_d2i_rz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rz() {
+; CHECK-NEXT:    ret i32 1
+;
+  %res = call i32 @llvm.nvvm.d2i.rz(double 1.5)
+  ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;|                        f2ui                                  |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_f2ui_rm() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rm() {
+; CHECK-NEXT:    ret i32 1
+;
+  %res = call i32 @llvm.nvvm.f2ui.rm(float 1.5)
+  ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rn() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rn() {
+; CHECK-NEXT:    ret i32 2
+;
+  %res = call i32 @llvm.nvvm.f2ui.rn(float 1.5)
+  ret i32 %res
+}
+
+
+define i32 @test_pos_1_5_f2ui_rp() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rp() {
+; CHECK-NEXT:    ret i32 2
+;
+  %res = call i32 @llvm.nvvm.f2ui.rp(float 1.5)
+  ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rz() {
+; CHECK-NEXT:    ret i32 1
+;
+  %res = call i32 @llvm.nvvm.f2ui.rz(float 1.5)
+  ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;|                      f2ui_ftz                                |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_f2ui_rm_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rm_ftz() {
+; CHECK-NEXT:    ret i32 1
+;
+  %res = call i32 @llvm.nvvm.f2ui.rm.ftz(float 1.5)
+  ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rn_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rn_ftz() {
+; CHECK-NEXT:    ret i32 2
+;
+  %res = call i32 @llvm.nvvm.f2ui.rn.ftz(float 1.5)
+  ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rp_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rp_ftz() {
+; CHECK-NEXT:    ret i32 2
+;
+  %res = call i32 @llvm.nvvm.f2ui.rp.ftz(float 1.5)
+  ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rz_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rz_ftz() {
+; CHECK-NEXT:    ret i32 1
+;
+  %res = call i32 @llvm.nvvm.f2ui.rz.ftz(float 1.5)
+  ret i32 %res
+}
+;+-------------------------------------------------------------+
+;|                        d2ui                                  |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_d2ui_rm() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rm() {
+; CHECK-NEXT:    ret i32 1
+;
+  %res = call i32 @llvm.nvvm.d2ui.rm(double 1.5)
+  ret i32 %res
+}
+
+define i32 @test_pos_1_5_d2ui_rn() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rn() {
+; CHECK-NEXT:    ret i32 2
+;
+  %res = call i32 @llvm.nvvm.d2ui.rn(double 1.5)
+  ret i32 %res
+}
+
+
+define i32 @test_pos_1_5_d2ui_rp() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rp() {
+; CHECK-NEXT:    ret i32 2
+;
+  %res = call i32 @llvm.nvvm.d2ui.rp(double 1.5)
+  ret i32 %res
+}
+
+define i32 @test_pos_1_5_d2ui_rz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rz() {
+; CHECK-NEXT:    ret i32 1
+;
+  %res = call i32 @llvm.nvvm.d2ui.rz(double 1.5)
+  ret i32 %res
+}
+
+;###############################################################
+;#               Tests with Negative 1.5                       #
+;###############################################################
+
+;+-------------------------------------------------------------+
+;|                        f2i                                  |
+;+-------------------------------------------------------------+
+define i32 @test_neg_1_5_f2i_rm() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rm() {
+; CHECK-NEXT:    ret i32 -2
+;
+  %res = call i32 @llvm.nvvm.f2i.rm(float -1.5)
+  ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rn() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rn() {
+; CHECK-NEXT:    ret i32 -2
+;
+  %res = call i32 @llvm.nvvm.f2i.rn(float -1.5)
+  ret i32 %res
+}
+
+
+define i32 @test_neg_1_5_f2i_rp() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rp() {
+; CHECK-NEXT:    ret i32 -1
+;
+  %res = call i32 @llvm.nvvm.f2i.rp(float -1.5)
+  ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rz() {
+; CHECK-NEXT:    ret i32 -1
+;
+  %res = call i32 @llvm.nvvm.f2i.rz(float -1.5)
+  ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;|                      f2i_ftz                                |
+;+-------------------------------------------------------------+
+define i32 @test_neg_1_5_f2i_rm_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rm_ftz() {
+; CHECK-NEXT:    ret i32 -2
+;
+  %res = call i32 @llvm.nvvm.f2i.rm.ftz(float -1.5)
+  ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rn_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rn_ftz() {
+; CHECK-NEXT:    ret i32 -2
+;
+  %res = call i32 @llvm.nvvm.f2i.rn.ftz(float -1.5)
+  ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rp_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rp_ftz() {
+; CHECK-NEXT:    ret i32 -1
+;
+  %res = call i32 @llvm.nvvm.f2i.rp.ftz(float -1.5)
+  ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rz_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rz_ftz() {
+; CHECK-NEXT:    ret i32 -1
+;
+  %res = call i32 @llvm.nvvm.f2i.rz.ftz(float -1.5)
+  ret i32 %res
+}
+;+-------------------------------------------------------------+
+;|                        d2i                                  |
+;+-------------------------------------------------------------+
+define i32 @test_neg_1_5_d2i_rm() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rm() {
+; CHECK-NEXT:    ret i32 -2
+;
+  %res = call i32 @llvm.nvvm.d2i.rm(double -1.5)
+  ret i32 %res
+}
+
+define i32 @test_neg_1_5_d2i_rn() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rn() {
+; CHECK-NEXT:    ret i32 -2
+;
+  %res = call i32 @llvm.nvvm.d2i.rn(double -1.5)
+  ret i32 %res
+}
+
+
+define i32 @test_neg_1_5_d2i_rp() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rp() {
+; CHECK-NEXT:    ret i32 -1
+;
+  %res = call i32 @llvm.nvvm.d2i.rp(double -1.5)
+  ret i32 %res
+}
+
+define i32 @test_neg_1_5_d2i_rz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rz() {
+; CHECK-NEXT:    ret i32 -1
+;
+  %res = call i32 @llvm.nvvm.d2i.rz(double -1.5)
+  ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;|                        f2ui                                  |
+;+-------------------------------------------------------------+
+define i32 @test_neg_1_5_f2ui_rm() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rm() {
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rm(float -1.500000e+00)
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %res = call i32 @llvm.nvvm.f2ui.rm(float -1.5)
+  ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2ui_rn() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rn() {
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rn(float...
[truncated]

AlexMaclean

Overall seems reasonable to me, I agree it seems good to constant fold these intrinsics. I'm not too familiar with ConstantFolding though so I can't say if it's okay to add so much target-specific code here.

llvm/lib/Analysis/ConstantFolding.cpp

LewisCrawford · 2024-12-13T11:55:46Z

I can't say if it's okay to add so much target-specific code here.

Currently, ConstantFolding.cpp has target-specific code for WebAssembly, AArch64, x86, and AMDGPU intrinsics, so adding target-specific intrinsics to this file looks like the current status quo.

However, the NVPTX backend has an abnormally large number of constant-foldable target-specific intrinsics (the 48 float-to-int conversion intrinsics covered in patch are just the tip of the ice-berg), so it may be reasonable to split them into a separate NVVM-specific file if anyone objects to adding them to the main ConstantFolding.cpp file.

LewisCrawford · 2024-12-13T13:47:39Z

Adding @nikic and @MDevereau for feedback on whether ConstantFolding.cpp is still the right place to add target-specific intrinsic folding like this.

Is it ok to keep going as-is, or do we want to split out either all targets (or just the NVVM intrinsics) into a separate target-specific constant-folding files?

I have several draft patches to add even more NVVM intrinsics here, so this patch is a proof-of concept to get feedback about where is best to put this code.

nikic · 2024-12-13T14:11:17Z

It's okay to add these to ConstantFolding for now.

AlexMaclean

LGTM with one minor nit you can ignore if you want

llvm/lib/Analysis/ConstantFolding.cpp

Artem-B

LGTM in principle.

Artem-B · 2024-12-13T19:38:03Z

llvm/include/llvm/IR/NVVMIntrinsicUtils.h

+
+bool IntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
+  switch (IntrinsicID) {
+  // Float to i32 / i64 conversion intrinsics:


Makes me wonder if we can add these FTZ, signedness, and rounding mode, properties as some sort of flag on the intrinsic in tablegen, where we define them, so we don't have to play a whack-a-mole updating these switches every time we add/change a NVPTX intrinsic.

If that's too cumbersome, we could construct a static table/map of intrinsic->flags, populate it once, and then just lookup individual intrinsic flag where we need it. That may be less cumbersome to update in the future.

Add constant-folding support for the NVVM intrinsics for converting float/double to signed/unsigned int32/int64 types, including all rounding-modes and ftz modifiers.

Also remove unnecessary parameter for FTZPreserveSign.

Move NVVM intrinsic helper functions into NVVMIntrinsicFlags.h and then rename it NVVMIntrinsicUtils.h.

Add the <stdint.h> header include back into NVVMIntrinsicUtils.h after rebasing the commit which renamed it to a point after the original NVVMIntrinsicFlags.h version was edited to include <stdint> This extra include was originally added by: f33e236 [clang][Modules] Fixing Build Breaks When -DLLVM_ENABLE_MODULES=ON (llvm#119473)

Mark the new intrinsic helper functions as inline to avoid linker issues.

llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes backend:NVPTX llvm:analysis Includes value tracking, cost tables and constant folding llvm:transforms labels Dec 6, 2024

LewisCrawford requested review from Artem-B, AlexMaclean and durga4github December 6, 2024 13:10

LewisCrawford self-assigned this Dec 6, 2024

AlexMaclean reviewed Dec 12, 2024

View reviewed changes

llvm/lib/Analysis/ConstantFolding.cpp Outdated Show resolved Hide resolved

llvm/lib/Analysis/ConstantFolding.cpp Show resolved Hide resolved

LewisCrawford requested review from nikic and MDevereau December 13, 2024 13:39

AlexMaclean approved these changes Dec 13, 2024

View reviewed changes

llvm/lib/Analysis/ConstantFolding.cpp Outdated Show resolved Hide resolved

llvmbot added the llvm:ir label Dec 13, 2024

Artem-B approved these changes Dec 13, 2024

View reviewed changes

LewisCrawford added 5 commits December 13, 2024 20:34

[NVPTX] Constant-folding for f2i, d2ui, f2ll etc.

e520c47

Add constant-folding support for the NVVM intrinsics for converting float/double to signed/unsigned int32/int64 types, including all rounding-modes and ftz modifiers.

Move internal case statements into helper funcs

54524d3

Also remove unnecessary parameter for FTZPreserveSign.

Move helper functions into separate file

108265f

Move NVVM intrinsic helper functions into NVVMIntrinsicFlags.h and then rename it NVVMIntrinsicUtils.h.

Minor tidying

623215d

LewisCrawford force-pushed the fold_f2i_d2i branch from 2f89d4b to 216709f Compare December 13, 2024 20:44

LewisCrawford added 2 commits December 13, 2024 21:33

Mark helper functions as inline

0a51bdd

Mark the new intrinsic helper functions as inline to avoid linker issues.

Merge branch 'main' into fold_f2i_d2i

b72c862

LewisCrawford merged commit a629d9e into llvm:main Jan 7, 2025
5 of 7 checks passed

LewisCrawford mentioned this pull request Jan 8, 2025

[NVPTX] Constant fold NVVM fmin and fmax #121966

Merged

durga4github mentioned this pull request Jan 9, 2025

[NVPTX] Add float to tf32 conversion intrinsics #121507

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[NVPTX] Constant-folding for f2i, d2ui, f2ll etc. #118965

[NVPTX] Constant-folding for f2i, d2ui, f2ll etc. #118965

Uh oh!

LewisCrawford commented Dec 6, 2024

Uh oh!

llvmbot commented Dec 6, 2024 •

edited

Loading

Uh oh!

AlexMaclean left a comment

Uh oh!

Uh oh!

Uh oh!

LewisCrawford commented Dec 13, 2024 •

edited

Loading

Uh oh!

LewisCrawford commented Dec 13, 2024

Uh oh!

nikic commented Dec 13, 2024

Uh oh!

AlexMaclean left a comment

Uh oh!

Uh oh!

Artem-B left a comment

Uh oh!

Artem-B Dec 13, 2024

Uh oh!

Uh oh!

Uh oh!

[NVPTX] Constant-folding for f2i, d2ui, f2ll etc. #118965

[NVPTX] Constant-folding for f2i, d2ui, f2ll etc. #118965

Uh oh!

Conversation

LewisCrawford commented Dec 6, 2024

Uh oh!

llvmbot commented Dec 6, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

AlexMaclean left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

LewisCrawford commented Dec 13, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

LewisCrawford commented Dec 13, 2024

Uh oh!

nikic commented Dec 13, 2024

Uh oh!

AlexMaclean left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Artem-B left a comment

Choose a reason for hiding this comment

Uh oh!

Artem-B Dec 13, 2024

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Dec 6, 2024 •

edited

Loading

LewisCrawford commented Dec 13, 2024 •

edited

Loading