diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 68c2373a1a454..9a2f38d760e65 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -1444,10 +1444,18 @@ let TargetPrefix = "nvvm" in { Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; def int_nvvm_f2tf32_rn_relu : ClangBuiltin<"__nvvm_f2tf32_rn_relu">, Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_f2tf32_rn_satfinite : ClangBuiltin<"__nvvm_f2tf32_rn_satfinite">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_f2tf32_rn_relu_satfinite : ClangBuiltin<"__nvvm_f2tf32_rn_relu_satfinite">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; def int_nvvm_f2tf32_rz : ClangBuiltin<"__nvvm_f2tf32_rz">, Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; def int_nvvm_f2tf32_rz_relu : ClangBuiltin<"__nvvm_f2tf32_rz_relu">, Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_f2tf32_rz_satfinite : ClangBuiltin<"__nvvm_f2tf32_rz_satfinite">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; + def int_nvvm_f2tf32_rz_relu_satfinite : ClangBuiltin<"__nvvm_f2tf32_rz_relu_satfinite">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>; def int_nvvm_ff_to_e4m3x2_rn : ClangBuiltin<"__nvvm_ff_to_e4m3x2_rn">, Intrinsic<[llvm_i16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>; diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index f17799c130015..633a99d0fc1be 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -764,6 +764,11 @@ let hasSideEffects = false in { defm CVT_to_tf32_rz_relu : CVT_TO_TF32<"rz.relu">; defm CVT_to_tf32_rna : CVT_TO_TF32<"rna", [hasPTX<70>, hasSM<80>]>; defm CVT_to_tf32_rna_satf : CVT_TO_TF32<"rna.satfinite", [hasPTX<81>, hasSM<89>]>; + + defm CVT_to_tf32_rn_satf : CVT_TO_TF32<"rn.satfinite", [hasPTX<86>, hasSM<100>]>; + defm CVT_to_tf32_rz_satf : CVT_TO_TF32<"rz.satfinite", [hasPTX<86>, hasSM<100>]>; + defm CVT_to_tf32_rn_relu_satf : CVT_TO_TF32<"rn.relu.satfinite", [hasPTX<86>, hasSM<100>]>; + defm CVT_to_tf32_rz_relu_satf : CVT_TO_TF32<"rz.relu.satfinite", [hasPTX<86>, hasSM<100>]>; } def fpround_oneuse : PatFrag<(ops node:$a), (fpround node:$a), [{ diff --git a/llvm/test/CodeGen/NVPTX/convert-sm100.ll b/llvm/test/CodeGen/NVPTX/convert-sm100.ll new file mode 100644 index 0000000000000..f92822f7e0c16 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/convert-sm100.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100 -mattr=+ptx86| FileCheck --check-prefixes=CHECK %s +; RUN: %if ptxas-12.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100 -mattr=+ptx86| %ptxas-verify -arch=sm_100 %} + +declare i32 @llvm.nvvm.f2tf32.rn.satfinite(float %f1) +declare i32 @llvm.nvvm.f2tf32.rn.relu.satfinite(float %f1) +declare i32 @llvm.nvvm.f2tf32.rz.satfinite(float %f1) +declare i32 @llvm.nvvm.f2tf32.rz.relu.satfinite(float %f1) + +define i32 @cvt_rn_satf_tf32_f32(float %f1) { +; CHECK-LABEL: cvt_rn_satf_tf32_f32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .f32 %f<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_satf_tf32_f32_param_0]; +; CHECK-NEXT: cvt.rn.satfinite.tf32.f32 %r1, %f1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %val = call i32 @llvm.nvvm.f2tf32.rn.satfinite(float %f1) + ret i32 %val +} + +define i32 @cvt_rn_relu_satf_tf32_f32(float %f1) { +; CHECK-LABEL: cvt_rn_relu_satf_tf32_f32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .f32 %f<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_relu_satf_tf32_f32_param_0]; +; CHECK-NEXT: cvt.rn.relu.satfinite.tf32.f32 %r1, %f1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %val = call i32 @llvm.nvvm.f2tf32.rn.relu.satfinite(float %f1) + ret i32 %val +} + +define i32 @cvt_rz_satf_tf32_f32(float %f1) { +; CHECK-LABEL: cvt_rz_satf_tf32_f32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .f32 %f<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_satf_tf32_f32_param_0]; +; CHECK-NEXT: cvt.rz.satfinite.tf32.f32 %r1, %f1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %val = call i32 @llvm.nvvm.f2tf32.rz.satfinite(float %f1) + ret i32 %val +} + +define i32 @cvt_rz_relu_satf_tf32_f32(float %f1) { +; CHECK-LABEL: cvt_rz_relu_satf_tf32_f32( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-NEXT: .reg .f32 %f<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_relu_satf_tf32_f32_param_0]; +; CHECK-NEXT: cvt.rz.relu.satfinite.tf32.f32 %r1, %f1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %val = call i32 @llvm.nvvm.f2tf32.rz.relu.satfinite(float %f1) + ret i32 %val +}