-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[NVPTX] Constant-folding for f2i, d2ui, f2ll etc. #118965
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-llvm-analysis Author: Lewis Crawford (LewisCrawford) ChangesAdd constant-folding support for the NVVM intrinsics for converting float/double to signed/unsigned int32/int64 types, including all rounding-modes and ftz modifiers. Patch is 77.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/118965.diff 3 Files Affected:
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index efbccee76f2c51..2806c29462fa31 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -45,6 +45,7 @@
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsARM.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/Operator.h"
@@ -1678,6 +1679,58 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::x86_avx512_cvttsd2usi64:
return !Call->isStrictFP();
+ // NVVM float/double to int32/uint32 conversion intrinsics
+ case Intrinsic::nvvm_f2i_rm:
+ case Intrinsic::nvvm_f2i_rn:
+ case Intrinsic::nvvm_f2i_rp:
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ case Intrinsic::nvvm_f2ui_rm:
+ case Intrinsic::nvvm_f2ui_rn:
+ case Intrinsic::nvvm_f2ui_rp:
+ case Intrinsic::nvvm_f2ui_rz:
+ case Intrinsic::nvvm_f2ui_rm_ftz:
+ case Intrinsic::nvvm_f2ui_rn_ftz:
+ case Intrinsic::nvvm_f2ui_rp_ftz:
+ case Intrinsic::nvvm_f2ui_rz_ftz:
+ case Intrinsic::nvvm_d2i_rm:
+ case Intrinsic::nvvm_d2i_rn:
+ case Intrinsic::nvvm_d2i_rp:
+ case Intrinsic::nvvm_d2i_rz:
+ case Intrinsic::nvvm_d2ui_rm:
+ case Intrinsic::nvvm_d2ui_rn:
+ case Intrinsic::nvvm_d2ui_rp:
+ case Intrinsic::nvvm_d2ui_rz:
+
+ // NVVM float/double to int64/uint64 conversion intrinsics
+ case Intrinsic::nvvm_f2ll_rm:
+ case Intrinsic::nvvm_f2ll_rn:
+ case Intrinsic::nvvm_f2ll_rp:
+ case Intrinsic::nvvm_f2ll_rz:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ case Intrinsic::nvvm_f2ull_rm:
+ case Intrinsic::nvvm_f2ull_rn:
+ case Intrinsic::nvvm_f2ull_rp:
+ case Intrinsic::nvvm_f2ull_rz:
+ case Intrinsic::nvvm_f2ull_rm_ftz:
+ case Intrinsic::nvvm_f2ull_rn_ftz:
+ case Intrinsic::nvvm_f2ull_rp_ftz:
+ case Intrinsic::nvvm_f2ull_rz_ftz:
+ case Intrinsic::nvvm_d2ll_rm:
+ case Intrinsic::nvvm_d2ll_rn:
+ case Intrinsic::nvvm_d2ll_rp:
+ case Intrinsic::nvvm_d2ll_rz:
+ case Intrinsic::nvvm_d2ull_rm:
+ case Intrinsic::nvvm_d2ull_rn:
+ case Intrinsic::nvvm_d2ull_rp:
+ case Intrinsic::nvvm_d2ull_rz:
+
// Sign operations are actually bitwise operations, they do not raise
// exceptions even for SNANs.
case Intrinsic::fabs:
@@ -1840,6 +1893,13 @@ inline bool llvm_fenv_testexcept() {
return false;
}
+static const APFloat FTZPreserveSign(Type *Ty, const APFloat &V) {
+ if (V.isDenormal())
+ return APFloat::getZero(Ty->getFltSemantics(), V.isNegative());
+
+ return V;
+}
+
Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,
Type *Ty) {
llvm_fenv_clearexcept();
@@ -2300,6 +2360,211 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return ConstantFP::get(Ty->getContext(), U);
}
+ // NVVM float/double to signed/unsigned int32/int64 conversions:
+ switch (IntrinsicID) {
+ // f2i
+ case Intrinsic::nvvm_f2i_rm:
+ case Intrinsic::nvvm_f2i_rn:
+ case Intrinsic::nvvm_f2i_rp:
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ // f2ui
+ case Intrinsic::nvvm_f2ui_rm:
+ case Intrinsic::nvvm_f2ui_rn:
+ case Intrinsic::nvvm_f2ui_rp:
+ case Intrinsic::nvvm_f2ui_rz:
+ case Intrinsic::nvvm_f2ui_rm_ftz:
+ case Intrinsic::nvvm_f2ui_rn_ftz:
+ case Intrinsic::nvvm_f2ui_rp_ftz:
+ case Intrinsic::nvvm_f2ui_rz_ftz:
+ // d2i
+ case Intrinsic::nvvm_d2i_rm:
+ case Intrinsic::nvvm_d2i_rn:
+ case Intrinsic::nvvm_d2i_rp:
+ case Intrinsic::nvvm_d2i_rz:
+ // d2ui
+ case Intrinsic::nvvm_d2ui_rm:
+ case Intrinsic::nvvm_d2ui_rn:
+ case Intrinsic::nvvm_d2ui_rp:
+ case Intrinsic::nvvm_d2ui_rz:
+ // f2ll
+ case Intrinsic::nvvm_f2ll_rm:
+ case Intrinsic::nvvm_f2ll_rn:
+ case Intrinsic::nvvm_f2ll_rp:
+ case Intrinsic::nvvm_f2ll_rz:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ // f2ull
+ case Intrinsic::nvvm_f2ull_rm:
+ case Intrinsic::nvvm_f2ull_rn:
+ case Intrinsic::nvvm_f2ull_rp:
+ case Intrinsic::nvvm_f2ull_rz:
+ case Intrinsic::nvvm_f2ull_rm_ftz:
+ case Intrinsic::nvvm_f2ull_rn_ftz:
+ case Intrinsic::nvvm_f2ull_rp_ftz:
+ case Intrinsic::nvvm_f2ull_rz_ftz:
+ // d2ll
+ case Intrinsic::nvvm_d2ll_rm:
+ case Intrinsic::nvvm_d2ll_rn:
+ case Intrinsic::nvvm_d2ll_rp:
+ case Intrinsic::nvvm_d2ll_rz:
+ // d2ull
+ case Intrinsic::nvvm_d2ull_rm:
+ case Intrinsic::nvvm_d2ull_rn:
+ case Intrinsic::nvvm_d2ull_rp:
+ case Intrinsic::nvvm_d2ull_rz: {
+ // In float-to-integer conversion, NaN inputs are converted to 0.
+ if (U.isNaN())
+ return ConstantInt::get(Ty, 0);
+
+ APFloat::roundingMode RMode = APFloat::roundingMode::Invalid;
+ switch (IntrinsicID) {
+ // i_rm
+ case Intrinsic::nvvm_f2i_rm:
+ case Intrinsic::nvvm_f2ui_rm:
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2ui_rm_ftz:
+ case Intrinsic::nvvm_d2i_rm:
+ case Intrinsic::nvvm_d2ui_rm:
+ // ll_rm
+ case Intrinsic::nvvm_f2ll_rm:
+ case Intrinsic::nvvm_f2ull_rm:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ull_rm_ftz:
+ case Intrinsic::nvvm_d2ll_rm:
+ case Intrinsic::nvvm_d2ull_rm:
+ RMode = APFloat::rmTowardNegative;
+ break;
+
+ // i_rn
+ case Intrinsic::nvvm_f2i_rn:
+ case Intrinsic::nvvm_f2ui_rn:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2ui_rn_ftz:
+ case Intrinsic::nvvm_d2i_rn:
+ case Intrinsic::nvvm_d2ui_rn:
+ // ll_rn
+ case Intrinsic::nvvm_f2ll_rn:
+ case Intrinsic::nvvm_f2ull_rn:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ull_rn_ftz:
+ case Intrinsic::nvvm_d2ll_rn:
+ case Intrinsic::nvvm_d2ull_rn:
+ RMode = APFloat::rmNearestTiesToEven;
+ break;
+
+ // i_rp
+ case Intrinsic::nvvm_f2i_rp:
+ case Intrinsic::nvvm_f2ui_rp:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2ui_rp_ftz:
+ case Intrinsic::nvvm_d2i_rp:
+ case Intrinsic::nvvm_d2ui_rp:
+ // ll_rp
+ case Intrinsic::nvvm_f2ll_rp:
+ case Intrinsic::nvvm_f2ull_rp:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ull_rp_ftz:
+ case Intrinsic::nvvm_d2ll_rp:
+ case Intrinsic::nvvm_d2ull_rp:
+ RMode = APFloat::rmTowardPositive;
+ break;
+
+ // i_rz
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_f2ui_rz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ case Intrinsic::nvvm_f2ui_rz_ftz:
+ case Intrinsic::nvvm_d2i_rz:
+ case Intrinsic::nvvm_d2ui_rz:
+ // ll_rz
+ case Intrinsic::nvvm_f2ll_rz:
+ case Intrinsic::nvvm_f2ull_rz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ case Intrinsic::nvvm_f2ull_rz_ftz:
+ case Intrinsic::nvvm_d2ll_rz:
+ case Intrinsic::nvvm_d2ull_rz:
+ RMode = APFloat::rmTowardZero;
+ break;
+ default:
+ llvm_unreachable("Invalid f2i/d2i rounding mode intrinsic");
+ }
+ assert(RM != APFloat::roundingMode::Invalid);
+
+ bool IsFTZ = false;
+ switch (IntrinsicID) {
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ case Intrinsic::nvvm_f2ui_rm_ftz:
+ case Intrinsic::nvvm_f2ui_rn_ftz:
+ case Intrinsic::nvvm_f2ui_rp_ftz:
+ case Intrinsic::nvvm_f2ui_rz_ftz:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ case Intrinsic::nvvm_f2ull_rm_ftz:
+ case Intrinsic::nvvm_f2ull_rn_ftz:
+ case Intrinsic::nvvm_f2ull_rp_ftz:
+ case Intrinsic::nvvm_f2ull_rz_ftz:
+ IsFTZ = true;
+ break;
+ }
+
+ bool IsSigned = false;
+ switch (IntrinsicID) {
+ // f2i
+ case Intrinsic::nvvm_f2i_rm:
+ case Intrinsic::nvvm_f2i_rm_ftz:
+ case Intrinsic::nvvm_f2i_rn:
+ case Intrinsic::nvvm_f2i_rn_ftz:
+ case Intrinsic::nvvm_f2i_rp:
+ case Intrinsic::nvvm_f2i_rp_ftz:
+ case Intrinsic::nvvm_f2i_rz:
+ case Intrinsic::nvvm_f2i_rz_ftz:
+ // d2i
+ case Intrinsic::nvvm_d2i_rm:
+ case Intrinsic::nvvm_d2i_rn:
+ case Intrinsic::nvvm_d2i_rp:
+ case Intrinsic::nvvm_d2i_rz:
+ // f2ll
+ case Intrinsic::nvvm_f2ll_rm:
+ case Intrinsic::nvvm_f2ll_rm_ftz:
+ case Intrinsic::nvvm_f2ll_rn:
+ case Intrinsic::nvvm_f2ll_rn_ftz:
+ case Intrinsic::nvvm_f2ll_rp:
+ case Intrinsic::nvvm_f2ll_rp_ftz:
+ case Intrinsic::nvvm_f2ll_rz:
+ case Intrinsic::nvvm_f2ll_rz_ftz:
+ // d2ll
+ case Intrinsic::nvvm_d2ll_rm:
+ case Intrinsic::nvvm_d2ll_rn:
+ case Intrinsic::nvvm_d2ll_rp:
+ case Intrinsic::nvvm_d2ll_rz:
+ IsSigned = true;
+ break;
+ }
+
+ APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
+ auto FloatToRound = IsFTZ ? FTZPreserveSign(Op->getType(), U) : U;
+
+ bool IsExact = false;
+ APFloat::opStatus Status =
+ FloatToRound.convertToInteger(ResInt, RMode, &IsExact);
+
+ if (Status != APFloat::opInvalidOp)
+ return ConstantInt::get(Ty, ResInt);
+ return nullptr;
+ }
+ }
+
/// We only fold functions with finite arguments. Folding NaN and inf is
/// likely to be aborted with an exception anyway, and some host libms
/// have known errors raising exceptions.
diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll
new file mode 100644
index 00000000000000..543c73137c1b64
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll
@@ -0,0 +1,1129 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=instsimplify -march=nvptx64 -S | FileCheck %s
+
+; f2i/f2ui and d2i/d2ui - double/float to i32 tests
+
+;###############################################################
+;# Tests with Positive 1.5 #
+;###############################################################
+
+;+-------------------------------------------------------------+
+;| f2i |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_f2i_rm() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rm() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2i.rm(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rn() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rn() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2i.rn(float 1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_pos_1_5_f2i_rp() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rp() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2i.rp(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2i.rz(float 1.5)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2i_ftz |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_f2i_rm_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rm_ftz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2i.rm.ftz(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rn_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rn_ftz() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2i.rn.ftz(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rp_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rp_ftz() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2i.rp.ftz(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2i_rz_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2i_rz_ftz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2i.rz.ftz(float 1.5)
+ ret i32 %res
+}
+;+-------------------------------------------------------------+
+;| d2i |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_d2i_rm() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rm() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.d2i.rm(double 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_d2i_rn() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rn() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.d2i.rn(double 1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_pos_1_5_d2i_rp() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rp() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.d2i.rp(double 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_d2i_rz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2i_rz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.d2i.rz(double 1.5)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ui |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_f2ui_rm() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rm() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2ui.rm(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rn() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rn() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2ui.rn(float 1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_pos_1_5_f2ui_rp() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rp() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2ui.rp(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2ui.rz(float 1.5)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ui_ftz |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_f2ui_rm_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rm_ftz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2ui.rm.ftz(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rn_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rn_ftz() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2ui.rn.ftz(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rp_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rp_ftz() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.f2ui.rp.ftz(float 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_f2ui_rz_ftz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_f2ui_rz_ftz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.f2ui.rz.ftz(float 1.5)
+ ret i32 %res
+}
+;+-------------------------------------------------------------+
+;| d2ui |
+;+-------------------------------------------------------------+
+define i32 @test_pos_1_5_d2ui_rm() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rm() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.d2ui.rm(double 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_d2ui_rn() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rn() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.d2ui.rn(double 1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_pos_1_5_d2ui_rp() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rp() {
+; CHECK-NEXT: ret i32 2
+;
+ %res = call i32 @llvm.nvvm.d2ui.rp(double 1.5)
+ ret i32 %res
+}
+
+define i32 @test_pos_1_5_d2ui_rz() {
+; CHECK-LABEL: define i32 @test_pos_1_5_d2ui_rz() {
+; CHECK-NEXT: ret i32 1
+;
+ %res = call i32 @llvm.nvvm.d2ui.rz(double 1.5)
+ ret i32 %res
+}
+
+;###############################################################
+;# Tests with Negative 1.5 #
+;###############################################################
+
+;+-------------------------------------------------------------+
+;| f2i |
+;+-------------------------------------------------------------+
+define i32 @test_neg_1_5_f2i_rm() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rm() {
+; CHECK-NEXT: ret i32 -2
+;
+ %res = call i32 @llvm.nvvm.f2i.rm(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rn() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rn() {
+; CHECK-NEXT: ret i32 -2
+;
+ %res = call i32 @llvm.nvvm.f2i.rn(float -1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_neg_1_5_f2i_rp() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rp() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.f2i.rp(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rz() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.f2i.rz(float -1.5)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2i_ftz |
+;+-------------------------------------------------------------+
+define i32 @test_neg_1_5_f2i_rm_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rm_ftz() {
+; CHECK-NEXT: ret i32 -2
+;
+ %res = call i32 @llvm.nvvm.f2i.rm.ftz(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rn_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rn_ftz() {
+; CHECK-NEXT: ret i32 -2
+;
+ %res = call i32 @llvm.nvvm.f2i.rn.ftz(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rp_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rp_ftz() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.f2i.rp.ftz(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2i_rz_ftz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2i_rz_ftz() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.f2i.rz.ftz(float -1.5)
+ ret i32 %res
+}
+;+-------------------------------------------------------------+
+;| d2i |
+;+-------------------------------------------------------------+
+define i32 @test_neg_1_5_d2i_rm() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rm() {
+; CHECK-NEXT: ret i32 -2
+;
+ %res = call i32 @llvm.nvvm.d2i.rm(double -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_d2i_rn() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rn() {
+; CHECK-NEXT: ret i32 -2
+;
+ %res = call i32 @llvm.nvvm.d2i.rn(double -1.5)
+ ret i32 %res
+}
+
+
+define i32 @test_neg_1_5_d2i_rp() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rp() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.d2i.rp(double -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_d2i_rz() {
+; CHECK-LABEL: define i32 @test_neg_1_5_d2i_rz() {
+; CHECK-NEXT: ret i32 -1
+;
+ %res = call i32 @llvm.nvvm.d2i.rz(double -1.5)
+ ret i32 %res
+}
+
+;+-------------------------------------------------------------+
+;| f2ui |
+;+-------------------------------------------------------------+
+define i32 @test_neg_1_5_f2ui_rm() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rm() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rm(float -1.500000e+00)
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.nvvm.f2ui.rm(float -1.5)
+ ret i32 %res
+}
+
+define i32 @test_neg_1_5_f2ui_rn() {
+; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rn() {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rn(float...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall seems reasonable to me, I agree it seems good to constant fold these intrinsics. I'm not too familiar with ConstantFolding though so I can't say if it's okay to add so much target-specific code here.
Currently, ConstantFolding.cpp has target-specific code for WebAssembly, AArch64, x86, and AMDGPU intrinsics, so adding target-specific intrinsics to this file looks like the current status quo. However, the NVPTX backend has an abnormally large number of constant-foldable target-specific intrinsics (the 48 float-to-int conversion intrinsics covered in patch are just the tip of the ice-berg), so it may be reasonable to split them into a separate NVVM-specific file if anyone objects to adding them to the main ConstantFolding.cpp file. |
Adding @nikic and @MDevereau for feedback on whether ConstantFolding.cpp is still the right place to add target-specific intrinsic folding like this. Is it ok to keep going as-is, or do we want to split out either all targets (or just the NVVM intrinsics) into a separate target-specific constant-folding files? I have several draft patches to add even more NVVM intrinsics here, so this patch is a proof-of concept to get feedback about where is best to put this code. |
It's okay to add these to ConstantFolding for now. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with one minor nit you can ignore if you want
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM in principle.
|
||
bool IntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) { | ||
switch (IntrinsicID) { | ||
// Float to i32 / i64 conversion intrinsics: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes me wonder if we can add these FTZ, signedness, and rounding mode, properties as some sort of flag on the intrinsic in tablegen, where we define them, so we don't have to play a whack-a-mole updating these switches every time we add/change a NVPTX intrinsic.
If that's too cumbersome, we could construct a static table/map of intrinsic->flags, populate it once, and then just lookup individual intrinsic flag where we need it. That may be less cumbersome to update in the future.
Add constant-folding support for the NVVM intrinsics for converting float/double to signed/unsigned int32/int64 types, including all rounding-modes and ftz modifiers.
Also remove unnecessary parameter for FTZPreserveSign.
Move NVVM intrinsic helper functions into NVVMIntrinsicFlags.h and then rename it NVVMIntrinsicUtils.h.
Add the <stdint.h> header include back into NVVMIntrinsicUtils.h after rebasing the commit which renamed it to a point after the original NVVMIntrinsicFlags.h version was edited to include <stdint> This extra include was originally added by: f33e236 [clang][Modules] Fixing Build Breaks When -DLLVM_ENABLE_MODULES=ON (llvm#119473)
2f89d4b
to
216709f
Compare
Mark the new intrinsic helper functions as inline to avoid linker issues.
Add constant-folding support for the NVVM intrinsics for converting float/double to signed/unsigned int32/int64 types, including all rounding-modes and ftz modifiers.