From 8f9868d88549814a7f81bd75732cc3bd6e875bf9 Mon Sep 17 00:00:00 2001 From: Michael Klemm Date: Mon, 15 Sep 2025 16:02:18 +0200 Subject: [PATCH 01/15] Add first rough implementation of -ffast-real-mod --- flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index ce1376fd209cc..5f78fe6592dfc 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -7009,8 +7009,24 @@ mlir::Value IntrinsicLibrary::genMergeBits(mlir::Type resultType, } // MOD +static mlir::Value genFastMod(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value a, mlir::Value p) { + mlir::Value divResult = mlir::arith::DivFOp::create(builder, loc, a, p); + fprintf(stderr, "--> int type width: %d\n", a.getType().getIntOrFloatBitWidth()); + mlir::Type intType = builder.getIntegerType( + a.getType().getIntOrFloatBitWidth(), /*signed=*/true); + mlir::Value intResult = builder.createConvert(loc, intType, divResult); + mlir::Value cnvResult = builder.createConvert(loc, a.getType(), intResult); + mlir::Value mulResult = + mlir::arith::MulFOp::create(builder, loc, cnvResult, p); + mlir::Value subResult = + mlir::arith::SubFOp::create(builder, loc, a, mulResult); + return subResult; +} + mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType, llvm::ArrayRef args) { + bool useFastRealMod = true; assert(args.size() == 2); if (resultType.isUnsignedInteger()) { mlir::Type signlessType = mlir::IntegerType::get( @@ -7022,9 +7038,19 @@ mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType, if (mlir::isa(resultType)) return mlir::arith::RemSIOp::create(builder, loc, args[0], args[1]); - // Use runtime. - return builder.createConvert( - loc, resultType, fir::runtime::genMod(builder, loc, args[0], args[1])); + if (useFastRealMod) { + // If fast MOD for REAL has been requested, generate less precise, + // but faster code directly. + assert(resultType.isFloat() && + "non floating-point type hit for fast real MOD"); + fprintf(stderr, "--> emitting fast mod operation for MOD\n"); + return builder.createConvert(loc, resultType, + genFastMod(builder, loc, args[0], args[1])); + } else { + // Use runtime. + return builder.createConvert( + loc, resultType, fir::runtime::genMod(builder, loc, args[0], args[1])); + } } // MODULO From ecae88cd454a9bd82b5bdd990dca570ed24fa6f8 Mon Sep 17 00:00:00 2001 From: Michael Klemm Date: Mon, 15 Sep 2025 20:04:41 +0200 Subject: [PATCH 02/15] Add command line flag --- clang/include/clang/Driver/Options.td | 2 +- clang/lib/Driver/ToolChains/Flang.cpp | 5 +++++ flang/include/flang/Support/LangOptions.def | 3 ++- flang/lib/Frontend/CompilerInvocation.cpp | 6 ++++++ flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 7 +++++++ 5 files changed, 21 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a7c514e809aa9..3293a91d107b2 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2750,6 +2750,7 @@ def fno_unsafe_math_optimizations : Flag<["-"], "fno-unsafe-math-optimizations"> Group; def fassociative_math : Flag<["-"], "fassociative-math">, Visibility<[ClangOption, FlangOption]>, Group; def fno_associative_math : Flag<["-"], "fno-associative-math">, Visibility<[ClangOption, FlangOption]>, Group; +def ffast_real_mod : Flag<["-"], "ffast-real-mod">, Visibility<[FlangOption, FC1Option]>, Group; defm reciprocal_math : BoolFOption<"reciprocal-math", LangOpts<"AllowRecip">, DefaultFalse, PosFlag, Alias; def emit_hlfir : Flag<["-"], "emit-hlfir">, Group, HelpText<"Build the parse tree, then lower it to HLFIR">; - } // let Visibility = [FC1Option] //===----------------------------------------------------------------------===// diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1535f4cebf436..1969bfd08e27c 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -766,6 +766,11 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, if (ReciprocalMath) CmdArgs.push_back("-freciprocal-math"); + + if (Args.hasArg(options::OPT_ffast_real_mod)) { + fprintf(stderr, "##> -ffast-real-mod: %d\n", options::OPT_ffast_real_mod); + CmdArgs.push_back("-ffast-real-mod"); + } } static void renderRemarksOptions(const ArgList &Args, ArgStringList &CmdArgs, diff --git a/flang/include/flang/Support/LangOptions.def b/flang/include/flang/Support/LangOptions.def index ba72d7b4b7212..e310ecf37a52d 100644 --- a/flang/include/flang/Support/LangOptions.def +++ b/flang/include/flang/Support/LangOptions.def @@ -60,7 +60,8 @@ LANGOPT(OpenMPNoThreadState, 1, 0) LANGOPT(OpenMPNoNestedParallelism, 1, 0) /// Use SIMD only OpenMP support. LANGOPT(OpenMPSimd, 1, false) - +/// Enable fast MOD operations for REAL +LANGOPT(FastRealMod, 1, false) LANGOPT(VScaleMin, 32, 0) ///< Minimum vscale range value LANGOPT(VScaleMax, 32, 0) ///< Maximum vscale range value diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 6295a58b1bdad..e93af6d7b9c38 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1424,6 +1424,12 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc, opts.setFPContractMode(Fortran::common::LangOptions::FPM_Fast); } + if (args.hasArg(clang::driver::options::OPT_ffast_real_mod)) { + fprintf(stderr, "$$> FC1: -ffast-real-mod: %d\n", (int) opts.FastRealMod); + opts.FastRealMod = true; + fprintf(stderr, "$$> FC1: -ffast-real-mod: %d\n", (int) opts.FastRealMod); + } + return true; } diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 5f78fe6592dfc..41d0ef830b2b1 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -46,6 +46,7 @@ #include "flang/Optimizer/Support/Utils.h" #include "flang/Runtime/entry-names.h" #include "flang/Runtime/iostat-consts.h" +#include "flang/Support/LangOptions.h" #include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" @@ -7027,6 +7028,11 @@ static mlir::Value genFastMod(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType, llvm::ArrayRef args) { bool useFastRealMod = true; + auto mod = builder.getModule(); + if (auto attr = mod->getAttrOfType("omp.version")) + fprintf(stderr, "omp version: %d\n", attr.getVersion()); + + fprintf(stderr, "--> -ffast-real-mod: %d\n", (int) useFastRealMod); assert(args.size() == 2); if (resultType.isUnsignedInteger()) { mlir::Type signlessType = mlir::IntegerType::get( @@ -7048,6 +7054,7 @@ mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType, genFastMod(builder, loc, args[0], args[1])); } else { // Use runtime. + fprintf(stderr, "--> emitting slow path MOD\n"); return builder.createConvert( loc, resultType, fir::runtime::genMod(builder, loc, args[0], args[1])); } From 85b14eb1d8889f22487b482af4741bf27d6a9d2a Mon Sep 17 00:00:00 2001 From: Michael Klemm Date: Mon, 22 Sep 2025 19:34:48 +0200 Subject: [PATCH 03/15] Pass -ffast-real-mod via MLIR module attribute to code-gen --- flang/lib/Frontend/FrontendActions.cpp | 7 +++++++ flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 12 ++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 3bef6b1c31825..614d2edf606f1 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -277,6 +277,13 @@ bool CodeGenAction::beginSourceFileAction() { ci.getInvocation().getLangOpts().OpenMPVersion); } + if (ci.getInvocation().getLangOpts().FastRealMod) { + fprintf(stderr, "YAY!!!!\n"); + auto mod = lb.getModule(); + mod.getOperation()->setAttr(mlir::StringAttr::get(mod.getContext(), llvm::Twine{"fir.fast_real_mod"}), + mlir::BoolAttr::get(mod.getContext(), true)); + } + // Create a parse tree and lower it to FIR parseAndLowerTree(ci, lb); diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 41d0ef830b2b1..64dcebcf021e3 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -7027,12 +7027,16 @@ static mlir::Value genFastMod(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType, llvm::ArrayRef args) { - bool useFastRealMod = true; auto mod = builder.getModule(); - if (auto attr = mod->getAttrOfType("omp.version")) - fprintf(stderr, "omp version: %d\n", attr.getVersion()); - + bool useFastRealMod = false; + if (auto attr = mod->getAttrOfType("fir.fast_real_mod")) { + fprintf(stderr, "fir.fast_real_mod present: %d\n", (int) attr.getValue()); + useFastRealMod = attr.getValue(); + } else { + fprintf(stderr, "fir.fast_real_mod not present\n"); + } fprintf(stderr, "--> -ffast-real-mod: %d\n", (int) useFastRealMod); + assert(args.size() == 2); if (resultType.isUnsignedInteger()) { mlir::Type signlessType = mlir::IntegerType::get( From d32863a8e54308e07eb023a046f2d75eea61b77a Mon Sep 17 00:00:00 2001 From: Michael Klemm Date: Mon, 22 Sep 2025 20:06:46 +0200 Subject: [PATCH 04/15] Clean up code --- clang/lib/Driver/ToolChains/Flang.cpp | 4 +--- flang/lib/Frontend/CompilerInvocation.cpp | 2 -- flang/lib/Frontend/FrontendActions.cpp | 7 ++++--- flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 10 +--------- 4 files changed, 6 insertions(+), 17 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1969bfd08e27c..fbaa083d204b8 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -767,10 +767,8 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args, if (ReciprocalMath) CmdArgs.push_back("-freciprocal-math"); - if (Args.hasArg(options::OPT_ffast_real_mod)) { - fprintf(stderr, "##> -ffast-real-mod: %d\n", options::OPT_ffast_real_mod); + if (Args.hasArg(options::OPT_ffast_real_mod)) CmdArgs.push_back("-ffast-real-mod"); - } } static void renderRemarksOptions(const ArgList &Args, ArgStringList &CmdArgs, diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index e93af6d7b9c38..5b3f64971013e 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1425,9 +1425,7 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc, } if (args.hasArg(clang::driver::options::OPT_ffast_real_mod)) { - fprintf(stderr, "$$> FC1: -ffast-real-mod: %d\n", (int) opts.FastRealMod); opts.FastRealMod = true; - fprintf(stderr, "$$> FC1: -ffast-real-mod: %d\n", (int) opts.FastRealMod); } return true; diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 614d2edf606f1..d22124bc0bdeb 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -278,10 +278,11 @@ bool CodeGenAction::beginSourceFileAction() { } if (ci.getInvocation().getLangOpts().FastRealMod) { - fprintf(stderr, "YAY!!!!\n"); auto mod = lb.getModule(); - mod.getOperation()->setAttr(mlir::StringAttr::get(mod.getContext(), llvm::Twine{"fir.fast_real_mod"}), - mlir::BoolAttr::get(mod.getContext(), true)); + mod.getOperation()->setAttr( + mlir::StringAttr::get(mod.getContext(), + llvm::Twine{"fir.fast_real_mod"}), + mlir::BoolAttr::get(mod.getContext(), true)); } // Create a parse tree and lower it to FIR diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 64dcebcf021e3..fbb03bf9f0291 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -7013,7 +7013,6 @@ mlir::Value IntrinsicLibrary::genMergeBits(mlir::Type resultType, static mlir::Value genFastMod(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value a, mlir::Value p) { mlir::Value divResult = mlir::arith::DivFOp::create(builder, loc, a, p); - fprintf(stderr, "--> int type width: %d\n", a.getType().getIntOrFloatBitWidth()); mlir::Type intType = builder.getIntegerType( a.getType().getIntOrFloatBitWidth(), /*signed=*/true); mlir::Value intResult = builder.createConvert(loc, intType, divResult); @@ -7029,13 +7028,8 @@ mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType, llvm::ArrayRef args) { auto mod = builder.getModule(); bool useFastRealMod = false; - if (auto attr = mod->getAttrOfType("fir.fast_real_mod")) { - fprintf(stderr, "fir.fast_real_mod present: %d\n", (int) attr.getValue()); + if (auto attr = mod->getAttrOfType("fir.fast_real_mod")) useFastRealMod = attr.getValue(); - } else { - fprintf(stderr, "fir.fast_real_mod not present\n"); - } - fprintf(stderr, "--> -ffast-real-mod: %d\n", (int) useFastRealMod); assert(args.size() == 2); if (resultType.isUnsignedInteger()) { @@ -7053,12 +7047,10 @@ mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType, // but faster code directly. assert(resultType.isFloat() && "non floating-point type hit for fast real MOD"); - fprintf(stderr, "--> emitting fast mod operation for MOD\n"); return builder.createConvert(loc, resultType, genFastMod(builder, loc, args[0], args[1])); } else { // Use runtime. - fprintf(stderr, "--> emitting slow path MOD\n"); return builder.createConvert( loc, resultType, fir::runtime::genMod(builder, loc, args[0], args[1])); } From d06a1adbde034e502d08ad16c3e858de396cfbea Mon Sep 17 00:00:00 2001 From: Michael Klemm Date: Mon, 22 Sep 2025 20:48:21 +0200 Subject: [PATCH 05/15] Add test --- flang/test/Lower/Intrinsics/fast-real-mod.f90 | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 flang/test/Lower/Intrinsics/fast-real-mod.f90 diff --git a/flang/test/Lower/Intrinsics/fast-real-mod.f90 b/flang/test/Lower/Intrinsics/fast-real-mod.f90 new file mode 100644 index 0000000000000..26422e305cbe8 --- /dev/null +++ b/flang/test/Lower/Intrinsics/fast-real-mod.f90 @@ -0,0 +1,57 @@ +! RUN: %flang_fc1 -ffast-real-mod -emit-mlir -o - %s | FileCheck %s + +! CHECK: module attributes {{{.*}}fir.fast_real_mod = true{{.*}}} + +! CHECK-LABEL: @_QPmod_real4 +subroutine mod_real4(r, a, p) + implicit none + real(kind=4) :: r, a, p +! CHECK: %[[A:.*]] = fir.declare{{.*}}a" +! CHECK: %[[P:.*]] = fir.declare{{.*}}p" +! CHECK: %[[R:.*]] = fir.declare{{.*}}r" +! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]] +! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]] +! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f32 +! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f32) -> si32 +! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si32) -> f32 +! CHECK: %[[MUL:.*]] = arith.mulf %8, %5 fastmath : f32 +! CHECK: %[[SUB:.*]] = arith.subf %4, %9 fastmath : f32 +! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref + r = mod(a, p) +end subroutine mod_real4 + +! CHECK-LABEL: @_QPmod_real8 +subroutine mod_real8(r, a, p) + implicit none + real(kind=8) :: r, a, p +! CHECK: %[[A:.*]] = fir.declare{{.*}}a" +! CHECK: %[[P:.*]] = fir.declare{{.*}}p" +! CHECK: %[[R:.*]] = fir.declare{{.*}}r" +! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]] +! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]] +! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f64 +! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f64) -> si64 +! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si64) -> f64 +! CHECK: %[[MUL:.*]] = arith.mulf %8, %5 fastmath : f64 +! CHECK: %[[SUB:.*]] = arith.subf %4, %9 fastmath : f64 +! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref + r = mod(a, p) +end subroutine mod_real8 + +! CHECK-LABEL: @_QPmod_real10 +subroutine mod_real10(r, a, p) + implicit none + real(kind=10) :: r, a, p +! CHECK: %[[A:.*]] = fir.declare{{.*}}a" +! CHECK: %[[P:.*]] = fir.declare{{.*}}p" +! CHECK: %[[R:.*]] = fir.declare{{.*}}r" +! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]] +! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]] +! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f80 +! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f80) -> si80 +! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si80) -> f80 +! CHECK: %[[MUL:.*]] = arith.mulf %8, %5 fastmath : f80 +! CHECK: %[[SUB:.*]] = arith.subf %4, %9 fastmath : f80 +! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref + r = mod(a, p) +end subroutine mod_real10 From 313abd0266b4a643ce71905266ad0ad16a99a73b Mon Sep 17 00:00:00 2001 From: Michael Klemm Date: Mon, 22 Sep 2025 21:22:10 +0200 Subject: [PATCH 06/15] Improve test and add kind=16 test --- flang/test/Lower/Intrinsics/fast-real-mod.f90 | 42 +++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/flang/test/Lower/Intrinsics/fast-real-mod.f90 b/flang/test/Lower/Intrinsics/fast-real-mod.f90 index 26422e305cbe8..3bdd5930a1706 100644 --- a/flang/test/Lower/Intrinsics/fast-real-mod.f90 +++ b/flang/test/Lower/Intrinsics/fast-real-mod.f90 @@ -1,4 +1,4 @@ -! RUN: %flang_fc1 -ffast-real-mod -emit-mlir -o - %s | FileCheck %s +! RUN: %flang_fc1 -ffast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK%if target=x86_64{{.*}} %{,CHECK-KIND10%}%if flang-supports-f128-math %{,CHECK-KIND16%} ! CHECK: module attributes {{{.*}}fir.fast_real_mod = true{{.*}}} @@ -42,16 +42,34 @@ end subroutine mod_real8 subroutine mod_real10(r, a, p) implicit none real(kind=10) :: r, a, p -! CHECK: %[[A:.*]] = fir.declare{{.*}}a" -! CHECK: %[[P:.*]] = fir.declare{{.*}}p" -! CHECK: %[[R:.*]] = fir.declare{{.*}}r" -! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]] -! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]] -! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f80 -! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f80) -> si80 -! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si80) -> f80 -! CHECK: %[[MUL:.*]] = arith.mulf %8, %5 fastmath : f80 -! CHECK: %[[SUB:.*]] = arith.subf %4, %9 fastmath : f80 -! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref +! CHECK-KIND10: %[[A:.*]] = fir.declare{{.*}}a" +! CHECK-KIND10: %[[P:.*]] = fir.declare{{.*}}p" +! CHECK-KIND10: %[[R:.*]] = fir.declare{{.*}}r" +! CHECK-KIND10: %[[A_LOAD:.*]] = fir.load %[[A]] +! CHECK-KIND10: %[[P_LOAD:.*]] = fir.load %[[P]] +! CHECK-KIND10: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f80 +! CHECK-KIND10: %[[CV1:.*]] = fir.convert %[[DIV]] : (f80) -> si80 +! CHECK-KIND10: %[[CV2:.*]] = fir.convert %[[CV1]] : (si80) -> f80 +! CHECK-KIND10: %[[MUL:.*]] = arith.mulf %8, %5 fastmath : f80 +! CHECK-KIND10: %[[SUB:.*]] = arith.subf %4, %9 fastmath : f80 +! CHECK-KIND10: fir.store %[[SUB]] to %[[R]] : !fir.ref r = mod(a, p) end subroutine mod_real10 + +! CHECK-LABEL: @_QPmod_real16 +subroutine mod_real16(r, a, p) + implicit none + real(kind=16) :: r, a, p +! CHECK-KIND16: %[[A:.*]] = fir.declare{{.*}}a" +! CHECK-KIND16: %[[P:.*]] = fir.declare{{.*}}p" +! CHECK-KIND16: %[[R:.*]] = fir.declare{{.*}}r" +! CHECK-KIND16: %[[A_LOAD:.*]] = fir.load %[[A]] +! CHECK-KIND16: %[[P_LOAD:.*]] = fir.load %[[P]] +! CHECK-KIND16: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f128 +! CHECK-KIND16: %[[CV1:.*]] = fir.convert %[[DIV]] : (f128) -> si128 +! CHECK-KIND16: %[[CV2:.*]] = fir.convert %[[CV1]] : (si128) -> f128 +! CHECK-KIND16: %[[MUL:.*]] = arith.mulf %8, %5 fastmath : f128 +! CHECK-KIND16: %[[SUB:.*]] = arith.subf %4, %9 fastmath : f128 +! CHECK-KIND16: fir.store %[[SUB]] to %[[R]] : !fir.ref + r = mod(a, p) +end subroutine mod_real16 From 2572cc07389281d00ed0129d07e84bc9bf4d79b4 Mon Sep 17 00:00:00 2001 From: Michael Klemm Date: Tue, 23 Sep 2025 14:30:18 +0200 Subject: [PATCH 07/15] Don't use hard-coded register numbers --- flang/test/Lower/Intrinsics/fast-real-mod.f90 | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/flang/test/Lower/Intrinsics/fast-real-mod.f90 b/flang/test/Lower/Intrinsics/fast-real-mod.f90 index 3bdd5930a1706..00607fa5c30d1 100644 --- a/flang/test/Lower/Intrinsics/fast-real-mod.f90 +++ b/flang/test/Lower/Intrinsics/fast-real-mod.f90 @@ -14,8 +14,8 @@ subroutine mod_real4(r, a, p) ! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f32 ! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f32) -> si32 ! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si32) -> f32 -! CHECK: %[[MUL:.*]] = arith.mulf %8, %5 fastmath : f32 -! CHECK: %[[SUB:.*]] = arith.subf %4, %9 fastmath : f32 +! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath : f32 +! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath : f32 ! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref r = mod(a, p) end subroutine mod_real4 @@ -32,8 +32,8 @@ subroutine mod_real8(r, a, p) ! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f64 ! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f64) -> si64 ! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si64) -> f64 -! CHECK: %[[MUL:.*]] = arith.mulf %8, %5 fastmath : f64 -! CHECK: %[[SUB:.*]] = arith.subf %4, %9 fastmath : f64 +! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath : f64 +! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath : f64 ! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref r = mod(a, p) end subroutine mod_real8 @@ -50,8 +50,8 @@ subroutine mod_real10(r, a, p) ! CHECK-KIND10: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f80 ! CHECK-KIND10: %[[CV1:.*]] = fir.convert %[[DIV]] : (f80) -> si80 ! CHECK-KIND10: %[[CV2:.*]] = fir.convert %[[CV1]] : (si80) -> f80 -! CHECK-KIND10: %[[MUL:.*]] = arith.mulf %8, %5 fastmath : f80 -! CHECK-KIND10: %[[SUB:.*]] = arith.subf %4, %9 fastmath : f80 +! CHECK-KIND10: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath : f80 +! CHECK-KIND10: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath : f80 ! CHECK-KIND10: fir.store %[[SUB]] to %[[R]] : !fir.ref r = mod(a, p) end subroutine mod_real10 @@ -68,8 +68,8 @@ subroutine mod_real16(r, a, p) ! CHECK-KIND16: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f128 ! CHECK-KIND16: %[[CV1:.*]] = fir.convert %[[DIV]] : (f128) -> si128 ! CHECK-KIND16: %[[CV2:.*]] = fir.convert %[[CV1]] : (si128) -> f128 -! CHECK-KIND16: %[[MUL:.*]] = arith.mulf %8, %5 fastmath : f128 -! CHECK-KIND16: %[[SUB:.*]] = arith.subf %4, %9 fastmath : f128 +! CHECK-KIND16: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath : f128 +! CHECK-KIND16: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath : f128 ! CHECK-KIND16: fir.store %[[SUB]] to %[[R]] : !fir.ref r = mod(a, p) end subroutine mod_real16 From 7cc56df58bd4283639a9c62159cb57ecab8dd113 Mon Sep 17 00:00:00 2001 From: Michael Klemm Date: Tue, 23 Sep 2025 14:47:15 +0200 Subject: [PATCH 08/15] Honor -ffast-math when present --- flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index fbb03bf9f0291..1274164f25813 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -7012,15 +7012,18 @@ mlir::Value IntrinsicLibrary::genMergeBits(mlir::Type resultType, // MOD static mlir::Value genFastMod(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value a, mlir::Value p) { - mlir::Value divResult = mlir::arith::DivFOp::create(builder, loc, a, p); + auto fastmathFlags = mlir::arith::FastMathFlags::contract; + auto fastmathAttr = + mlir::arith::FastMathFlagsAttr::get(builder.getContext(), fastmathFlags); + mlir::Value divResult = mlir::arith::DivFOp::create(builder, loc, a, p, fastmathAttr); mlir::Type intType = builder.getIntegerType( a.getType().getIntOrFloatBitWidth(), /*signed=*/true); mlir::Value intResult = builder.createConvert(loc, intType, divResult); mlir::Value cnvResult = builder.createConvert(loc, a.getType(), intResult); mlir::Value mulResult = - mlir::arith::MulFOp::create(builder, loc, cnvResult, p); + mlir::arith::MulFOp::create(builder, loc, cnvResult, p, fastmathAttr); mlir::Value subResult = - mlir::arith::SubFOp::create(builder, loc, a, mulResult); + mlir::arith::SubFOp::create(builder, loc, a, mulResult, fastmathAttr); return subResult; } From ed6885752083177a33929b9ebbe27546819ecba9 Mon Sep 17 00:00:00 2001 From: Michael Klemm Date: Thu, 25 Sep 2025 08:45:51 +0200 Subject: [PATCH 09/15] Remove unwanted changes --- clang/include/clang/Driver/Options.td | 1 + flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 3293a91d107b2..4dc4acd5603cb 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -7374,6 +7374,7 @@ def emit_mlir : Flag<["-"], "emit-mlir">, Alias; def emit_hlfir : Flag<["-"], "emit-hlfir">, Group, HelpText<"Build the parse tree, then lower it to HLFIR">; + } // let Visibility = [FC1Option] //===----------------------------------------------------------------------===// diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 1274164f25813..5e0e4fbf81717 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -46,7 +46,6 @@ #include "flang/Optimizer/Support/Utils.h" #include "flang/Runtime/entry-names.h" #include "flang/Runtime/iostat-consts.h" -#include "flang/Support/LangOptions.h" #include "mlir/Dialect/Complex/IR/Complex.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" From afc2063c71f9596dd03e6a1b95a1ced3d5d01561 Mon Sep 17 00:00:00 2001 From: Michael Klemm Date: Thu, 25 Sep 2025 13:49:37 +0200 Subject: [PATCH 10/15] Format code --- flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 5e0e4fbf81717..dfcf034ae21d6 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -7014,7 +7014,8 @@ static mlir::Value genFastMod(fir::FirOpBuilder &builder, mlir::Location loc, auto fastmathFlags = mlir::arith::FastMathFlags::contract; auto fastmathAttr = mlir::arith::FastMathFlagsAttr::get(builder.getContext(), fastmathFlags); - mlir::Value divResult = mlir::arith::DivFOp::create(builder, loc, a, p, fastmathAttr); + mlir::Value divResult = + mlir::arith::DivFOp::create(builder, loc, a, p, fastmathAttr); mlir::Type intType = builder.getIntegerType( a.getType().getIntOrFloatBitWidth(), /*signed=*/true); mlir::Value intResult = builder.createConvert(loc, intType, divResult); From 3b392c858592b8cf9f333c3c4cfb361112b528e3 Mon Sep 17 00:00:00 2001 From: Michael Klemm Date: Thu, 25 Sep 2025 15:49:08 +0200 Subject: [PATCH 11/15] Follow suit of the test in flang/Lower/Intrinsics/mod.f90 --- flang/test/Lower/Intrinsics/fast-real-mod.f90 | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/flang/test/Lower/Intrinsics/fast-real-mod.f90 b/flang/test/Lower/Intrinsics/fast-real-mod.f90 index 00607fa5c30d1..62b2c4d58af02 100644 --- a/flang/test/Lower/Intrinsics/fast-real-mod.f90 +++ b/flang/test/Lower/Intrinsics/fast-real-mod.f90 @@ -41,7 +41,8 @@ end subroutine mod_real8 ! CHECK-LABEL: @_QPmod_real10 subroutine mod_real10(r, a, p) implicit none - real(kind=10) :: r, a, p + integer, parameter :: kind10 = merge(10, 4, selected_real_kind(p=18).eq.10) + real(kind=kind10) :: r, a, p ! CHECK-KIND10: %[[A:.*]] = fir.declare{{.*}}a" ! CHECK-KIND10: %[[P:.*]] = fir.declare{{.*}}p" ! CHECK-KIND10: %[[R:.*]] = fir.declare{{.*}}r" @@ -59,7 +60,8 @@ end subroutine mod_real10 ! CHECK-LABEL: @_QPmod_real16 subroutine mod_real16(r, a, p) implicit none - real(kind=16) :: r, a, p + integer, parameter :: kind16 = merge(16, 4, selected_real_kind(p=33).eq.16) + real(kind=kind16) :: r, a, p ! CHECK-KIND16: %[[A:.*]] = fir.declare{{.*}}a" ! CHECK-KIND16: %[[P:.*]] = fir.declare{{.*}}p" ! CHECK-KIND16: %[[R:.*]] = fir.declare{{.*}}r" From 6d5836c0d0175d349d3a8e2926349c9151e9830a Mon Sep 17 00:00:00 2001 From: Michael Klemm Date: Thu, 25 Sep 2025 16:54:30 +0200 Subject: [PATCH 12/15] Address reviewer comments --- flang/lib/Frontend/CompilerInvocation.cpp | 3 +-- flang/lib/Frontend/FrontendActions.cpp | 2 +- flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 4 +--- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 5b3f64971013e..d876d6a741303 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -1424,9 +1424,8 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc, opts.setFPContractMode(Fortran::common::LangOptions::FPM_Fast); } - if (args.hasArg(clang::driver::options::OPT_ffast_real_mod)) { + if (args.hasArg(clang::driver::options::OPT_ffast_real_mod)) opts.FastRealMod = true; - } return true; } diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index d22124bc0bdeb..c5e5bc11547a8 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -278,7 +278,7 @@ bool CodeGenAction::beginSourceFileAction() { } if (ci.getInvocation().getLangOpts().FastRealMod) { - auto mod = lb.getModule(); + mlir::ModuleOp mod = lb.getModule(); mod.getOperation()->setAttr( mlir::StringAttr::get(mod.getContext(), llvm::Twine{"fir.fast_real_mod"}), diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index dfcf034ae21d6..591e194ed2891 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -7045,11 +7045,9 @@ mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType, if (mlir::isa(resultType)) return mlir::arith::RemSIOp::create(builder, loc, args[0], args[1]); - if (useFastRealMod) { + if (useFastRealMod && resultType.isFloat()) { // If fast MOD for REAL has been requested, generate less precise, // but faster code directly. - assert(resultType.isFloat() && - "non floating-point type hit for fast real MOD"); return builder.createConvert(loc, resultType, genFastMod(builder, loc, args[0], args[1])); } else { From 5c8304dbb8d72800b9d0ab6e5be9856eb2e0f24d Mon Sep 17 00:00:00 2001 From: Michael Klemm Date: Thu, 25 Sep 2025 18:25:34 +0200 Subject: [PATCH 13/15] Add Flang driver check for -ffast-real-mod --- flang/test/Lower/Intrinsics/fast-real-mod.f90 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/flang/test/Lower/Intrinsics/fast-real-mod.f90 b/flang/test/Lower/Intrinsics/fast-real-mod.f90 index 62b2c4d58af02..6cb90fe9fe233 100644 --- a/flang/test/Lower/Intrinsics/fast-real-mod.f90 +++ b/flang/test/Lower/Intrinsics/fast-real-mod.f90 @@ -1,5 +1,8 @@ +! RUN: %flang -ffast-real-mod -### -c %s 2>&1 | FileCheck %s -check-prefix CHECK-FAST-REAL-MOD ! RUN: %flang_fc1 -ffast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK%if target=x86_64{{.*}} %{,CHECK-KIND10%}%if flang-supports-f128-math %{,CHECK-KIND16%} +! CHECK-FAST-REAL-MOD: "-ffast-real-mod" + ! CHECK: module attributes {{{.*}}fir.fast_real_mod = true{{.*}}} ! CHECK-LABEL: @_QPmod_real4 From 52c48db8b64010d8500d7a3041f2a5b518d520e0 Mon Sep 17 00:00:00 2001 From: Michael Klemm Date: Fri, 26 Sep 2025 16:09:20 +0200 Subject: [PATCH 14/15] Add -fno-fast-real-mod --- clang/include/clang/Driver/Options.td | 1 + clang/lib/Driver/ToolChains/Flang.cpp | 2 ++ flang/lib/Frontend/CompilerInvocation.cpp | 2 ++ flang/test/Driver/fast-real-mod.f90 | 9 +++++++++ flang/test/Lower/Intrinsics/fast-real-mod.f90 | 3 --- flang/test/Lower/Intrinsics/mod.f90 | 1 + 6 files changed, 15 insertions(+), 3 deletions(-) create mode 100644 flang/test/Driver/fast-real-mod.f90 diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 4dc4acd5603cb..32a36f4f788ac 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2751,6 +2751,7 @@ def fno_unsafe_math_optimizations : Flag<["-"], "fno-unsafe-math-optimizations"> def fassociative_math : Flag<["-"], "fassociative-math">, Visibility<[ClangOption, FlangOption]>, Group; def fno_associative_math : Flag<["-"], "fno-associative-math">, Visibility<[ClangOption, FlangOption]>, Group; def ffast_real_mod : Flag<["-"], "ffast-real-mod">, Visibility<[FlangOption, FC1Option]>, Group; +def fno_fast_real_mod : Flag<["-"], "fno-fast-real-mod">, Visibility<[FlangOption, FC1Option]>, Group; defm reciprocal_math : BoolFOption<"reciprocal-math", LangOpts<"AllowRecip">, DefaultFalse, PosFlag&1 | FileCheck %s -check-prefix CHECK-FAST-REAL-MOD +! RUN: %flang -fno-fast-real-mod -### -c %s 2>&1 | FileCheck %s -check-prefix CHECK-NO-FAST-REAL-MOD + +! CHECK-FAST-REAL-MOD: "-ffast-real-mod" +! CHECK-NO-FAST-REAL-MOD: "-fno-fast-real-mod" + +program test + ! nothing to be done in here +end program test diff --git a/flang/test/Lower/Intrinsics/fast-real-mod.f90 b/flang/test/Lower/Intrinsics/fast-real-mod.f90 index 6cb90fe9fe233..62b2c4d58af02 100644 --- a/flang/test/Lower/Intrinsics/fast-real-mod.f90 +++ b/flang/test/Lower/Intrinsics/fast-real-mod.f90 @@ -1,8 +1,5 @@ -! RUN: %flang -ffast-real-mod -### -c %s 2>&1 | FileCheck %s -check-prefix CHECK-FAST-REAL-MOD ! RUN: %flang_fc1 -ffast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK%if target=x86_64{{.*}} %{,CHECK-KIND10%}%if flang-supports-f128-math %{,CHECK-KIND16%} -! CHECK-FAST-REAL-MOD: "-ffast-real-mod" - ! CHECK: module attributes {{{.*}}fir.fast_real_mod = true{{.*}}} ! CHECK-LABEL: @_QPmod_real4 diff --git a/flang/test/Lower/Intrinsics/mod.f90 b/flang/test/Lower/Intrinsics/mod.f90 index 5bc81d923b800..0577168bfbf8e 100644 --- a/flang/test/Lower/Intrinsics/mod.f90 +++ b/flang/test/Lower/Intrinsics/mod.f90 @@ -1,4 +1,5 @@ ! RUN: bbc -emit-fir %s -o - | FileCheck %s --check-prefixes=CHECK%if target=x86_64{{.*}} %{,CHECK-KIND10%}%if flang-supports-f128-math %{,CHECK-KIND16%} +! RUN: %flang_fc1 -ffast-real-mod -fno-fast-real-mod -emit-fir %s -o - | FileCheck %s --check-prefixes=CHECK%if target=x86_64{{.*}} %{,CHECK-KIND10%}%if flang-supports-f128-math %{,CHECK-KIND16%} ! CHECK-LABEL: func @_QPmod_testr4( subroutine mod_testr4(r, a, p) From d7beb16c26818ec2330f5fc9340c826caab3c88c Mon Sep 17 00:00:00 2001 From: Michael Klemm Date: Tue, 30 Sep 2025 22:37:12 +0200 Subject: [PATCH 15/15] Put the MOD optimization under AFN and add -fno-fast-real-mod --- clang/include/clang/Driver/Options.td | 5 +-- clang/lib/Driver/ToolChains/Flang.cpp | 8 ++--- flang/include/flang/Support/LangOptions.def | 2 +- flang/lib/Frontend/CompilerInvocation.cpp | 4 +-- flang/lib/Frontend/FrontendActions.cpp | 4 +-- flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 14 ++++---- flang/test/Driver/fast-real-mod.f90 | 2 -- flang/test/Lower/Intrinsics/fast-real-mod.f90 | 34 +++++++++++-------- flang/test/Lower/Intrinsics/mod.f90 | 1 - 9 files changed, 38 insertions(+), 36 deletions(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 32a36f4f788ac..c86d17cfc4289 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2750,8 +2750,9 @@ def fno_unsafe_math_optimizations : Flag<["-"], "fno-unsafe-math-optimizations"> Group; def fassociative_math : Flag<["-"], "fassociative-math">, Visibility<[ClangOption, FlangOption]>, Group; def fno_associative_math : Flag<["-"], "fno-associative-math">, Visibility<[ClangOption, FlangOption]>, Group; -def ffast_real_mod : Flag<["-"], "ffast-real-mod">, Visibility<[FlangOption, FC1Option]>, Group; -def fno_fast_real_mod : Flag<["-"], "fno-fast-real-mod">, Visibility<[FlangOption, FC1Option]>, Group; +def fno_fast_real_mod : Flag<["-"], "fno-fast-real-mod">, + Group, Visibility<[FlangOption, FC1Option]>, + HelpText<"Disable optimization of MOD for REAL types in presence of -ffast-math">; defm reciprocal_math : BoolFOption<"reciprocal-math", LangOpts<"AllowRecip">, DefaultFalse, PosFlagsetAttr( mlir::StringAttr::get(mod.getContext(), - llvm::Twine{"fir.fast_real_mod"}), + llvm::Twine{"fir.no_fast_real_mod"}), mlir::BoolAttr::get(mod.getContext(), true)); } diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 591e194ed2891..4a326963db69c 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -7030,9 +7030,11 @@ static mlir::Value genFastMod(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType, llvm::ArrayRef args) { auto mod = builder.getModule(); - bool useFastRealMod = false; - if (auto attr = mod->getAttrOfType("fir.fast_real_mod")) - useFastRealMod = attr.getValue(); + bool dontUseFastRealMod = false; + bool canUseApprox = mlir::arith::bitEnumContainsAny( + builder.getFastMathFlags(), mlir::arith::FastMathFlags::afn); + if (auto attr = mod->getAttrOfType("fir.no_fast_real_mod")) + dontUseFastRealMod = attr.getValue(); assert(args.size() == 2); if (resultType.isUnsignedInteger()) { @@ -7045,9 +7047,9 @@ mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType, if (mlir::isa(resultType)) return mlir::arith::RemSIOp::create(builder, loc, args[0], args[1]); - if (useFastRealMod && resultType.isFloat()) { - // If fast MOD for REAL has been requested, generate less precise, - // but faster code directly. + if (resultType.isFloat() && canUseApprox && !dontUseFastRealMod) { + // Treat MOD as an approximate function and code-gen inline code + // instead of calling into the Fortran runtime library. return builder.createConvert(loc, resultType, genFastMod(builder, loc, args[0], args[1])); } else { diff --git a/flang/test/Driver/fast-real-mod.f90 b/flang/test/Driver/fast-real-mod.f90 index 8184f334c3d85..4ea9b26e64753 100644 --- a/flang/test/Driver/fast-real-mod.f90 +++ b/flang/test/Driver/fast-real-mod.f90 @@ -1,7 +1,5 @@ -! RUN: %flang -ffast-real-mod -### -c %s 2>&1 | FileCheck %s -check-prefix CHECK-FAST-REAL-MOD ! RUN: %flang -fno-fast-real-mod -### -c %s 2>&1 | FileCheck %s -check-prefix CHECK-NO-FAST-REAL-MOD -! CHECK-FAST-REAL-MOD: "-ffast-real-mod" ! CHECK-NO-FAST-REAL-MOD: "-fno-fast-real-mod" program test diff --git a/flang/test/Lower/Intrinsics/fast-real-mod.f90 b/flang/test/Lower/Intrinsics/fast-real-mod.f90 index 62b2c4d58af02..f80f7203ad1a2 100644 --- a/flang/test/Lower/Intrinsics/fast-real-mod.f90 +++ b/flang/test/Lower/Intrinsics/fast-real-mod.f90 @@ -1,6 +1,8 @@ -! RUN: %flang_fc1 -ffast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK%if target=x86_64{{.*}} %{,CHECK-KIND10%}%if flang-supports-f128-math %{,CHECK-KIND16%} +! RUN: %flang_fc1 -ffast-math -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK%if target=x86_64{{.*}} %{,CHECK-KIND10%}%if flang-supports-f128-math %{,CHECK-KIND16%} +! RUN: %flang_fc1 -ffast-math -fno-fast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK-NFRM%if target=x86_64{{.*}} %{,CHECK-NFRM-KIND10%}%if flang-supports-f128-math %{,CHECK-NFRM-KIND16%} -! CHECK: module attributes {{{.*}}fir.fast_real_mod = true{{.*}}} +! TODO: check line that fir.fast_real_mod is not there +! CHECK-NFRM: module attributes {{{.*}}fir.no_fast_real_mod = true{{.*}}} ! CHECK-LABEL: @_QPmod_real4 subroutine mod_real4(r, a, p) @@ -11,12 +13,13 @@ subroutine mod_real4(r, a, p) ! CHECK: %[[R:.*]] = fir.declare{{.*}}r" ! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]] ! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]] -! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f32 +! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f32 ! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f32) -> si32 ! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si32) -> f32 -! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath : f32 -! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath : f32 +! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath : f32 +! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath : f32 ! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref +! CHECK-NFRM: fir.call @_FortranAModReal4(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f32, f32, !fir.ref, i32) -> f32 r = mod(a, p) end subroutine mod_real4 @@ -29,12 +32,13 @@ subroutine mod_real8(r, a, p) ! CHECK: %[[R:.*]] = fir.declare{{.*}}r" ! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]] ! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]] -! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f64 +! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f64 ! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f64) -> si64 ! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si64) -> f64 -! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath : f64 -! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath : f64 +! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath : f64 +! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath : f64 ! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref +! CHECK-NFRM: fir.call @_FortranAModReal8(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f64, f64, !fir.ref, i32) -> f64 r = mod(a, p) end subroutine mod_real8 @@ -48,12 +52,13 @@ subroutine mod_real10(r, a, p) ! CHECK-KIND10: %[[R:.*]] = fir.declare{{.*}}r" ! CHECK-KIND10: %[[A_LOAD:.*]] = fir.load %[[A]] ! CHECK-KIND10: %[[P_LOAD:.*]] = fir.load %[[P]] -! CHECK-KIND10: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f80 +! CHECK-KIND10: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f80 ! CHECK-KIND10: %[[CV1:.*]] = fir.convert %[[DIV]] : (f80) -> si80 ! CHECK-KIND10: %[[CV2:.*]] = fir.convert %[[CV1]] : (si80) -> f80 -! CHECK-KIND10: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath : f80 -! CHECK-KIND10: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath : f80 +! CHECK-KIND10: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath : f80 +! CHECK-KIND10: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath : f80 ! CHECK-KIND10: fir.store %[[SUB]] to %[[R]] : !fir.ref +! CHECK-NFRM-KIND10: fir.call @_FortranAModReal10(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f80, f80, !fir.ref, i32) -> f80 r = mod(a, p) end subroutine mod_real10 @@ -67,11 +72,12 @@ subroutine mod_real16(r, a, p) ! CHECK-KIND16: %[[R:.*]] = fir.declare{{.*}}r" ! CHECK-KIND16: %[[A_LOAD:.*]] = fir.load %[[A]] ! CHECK-KIND16: %[[P_LOAD:.*]] = fir.load %[[P]] -! CHECK-KIND16: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f128 +! CHECK-KIND16: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath : f128 ! CHECK-KIND16: %[[CV1:.*]] = fir.convert %[[DIV]] : (f128) -> si128 ! CHECK-KIND16: %[[CV2:.*]] = fir.convert %[[CV1]] : (si128) -> f128 -! CHECK-KIND16: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath : f128 -! CHECK-KIND16: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath : f128 +! CHECK-KIND16: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath : f128 +! CHECK-KIND16: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath : f128 ! CHECK-KIND16: fir.store %[[SUB]] to %[[R]] : !fir.ref +! CHECK-NFRM-KIND16: fir.call @_FortranAModReal16(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f128, f128, !fir.ref, i32) -> f128 r = mod(a, p) end subroutine mod_real16 diff --git a/flang/test/Lower/Intrinsics/mod.f90 b/flang/test/Lower/Intrinsics/mod.f90 index 0577168bfbf8e..5bc81d923b800 100644 --- a/flang/test/Lower/Intrinsics/mod.f90 +++ b/flang/test/Lower/Intrinsics/mod.f90 @@ -1,5 +1,4 @@ ! RUN: bbc -emit-fir %s -o - | FileCheck %s --check-prefixes=CHECK%if target=x86_64{{.*}} %{,CHECK-KIND10%}%if flang-supports-f128-math %{,CHECK-KIND16%} -! RUN: %flang_fc1 -ffast-real-mod -fno-fast-real-mod -emit-fir %s -o - | FileCheck %s --check-prefixes=CHECK%if target=x86_64{{.*}} %{,CHECK-KIND10%}%if flang-supports-f128-math %{,CHECK-KIND16%} ! CHECK-LABEL: func @_QPmod_testr4( subroutine mod_testr4(r, a, p)