diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h index 8aeb3e373298e..0a9a3ca5bd030 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.h +++ b/flang/include/flang/Optimizer/Transforms/Passes.h @@ -79,6 +79,7 @@ createOMPEarlyOutliningPass(); std::unique_ptr createOMPFunctionFilteringPass(); std::unique_ptr> createOMPMarkDeclareTargetPass(); +std::unique_ptr createOMPLoopIndexMemToRegPass(); // declarative passes #define GEN_PASS_REGISTRATION diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td index 9474edf13ce46..8304b882d525c 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.td +++ b/flang/include/flang/Optimizer/Transforms/Passes.td @@ -326,4 +326,13 @@ def OMPFunctionFiltering : Pass<"omp-function-filtering"> { ]; } +def OMPLoopIndexMemToReg : Pass<"omp-loop-index-mem2reg", "mlir::func::FuncOp"> { + let summary = "Pushes allocations for index variables of OpenMP loops into " + "the loop region and, if they are never passed by reference, " + "they are replaced by the corresponding entry block arguments, " + "removing all redundant allocations in the process."; + let constructor = "::fir::createOMPLoopIndexMemToRegPass()"; + let dependentDialects = ["fir::FIROpsDialect", "mlir::omp::OpenMPDialect"]; +} + #endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index 616d9ddc066a7..0b5e8a0656804 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -270,6 +270,7 @@ inline void createOpenMPFIRPassPipeline( pm.addPass(fir::createOMPEarlyOutliningPass()); pm.addPass(fir::createOMPFunctionFilteringPass()); } + pm.addPass(fir::createOMPLoopIndexMemToRegPass()); } #if !defined(FLANG_EXCLUDE_CODEGEN) diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt index 3d2b7e5eaeade..306551b03ced1 100644 --- a/flang/lib/Optimizer/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt @@ -19,6 +19,7 @@ add_flang_library(FIRTransforms OMPEarlyOutlining.cpp OMPFunctionFiltering.cpp OMPMarkDeclareTarget.cpp + OMPLoopIndexMemToReg.cpp DEPENDS FIRDialect diff --git a/flang/lib/Optimizer/Transforms/OMPLoopIndexMemToReg.cpp b/flang/lib/Optimizer/Transforms/OMPLoopIndexMemToReg.cpp new file mode 100644 index 0000000000000..af117d625154b --- /dev/null +++ b/flang/lib/Optimizer/Transforms/OMPLoopIndexMemToReg.cpp @@ -0,0 +1,250 @@ +//===- OMPWsLoopIndexMem2Reg.cpp ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements transforms to push allocations into an OpenMP loop +// operation region when they are used to store loop indices. Then, they are +// removed together with any associated load or store operations if their +// address is not needed, in which case uses of their values are replaced for +// the block argument from which they were originally initialized. +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/HLFIR/HLFIROps.h" +#include "flang/Optimizer/Transforms/Passes.h" + +#include "flang/Optimizer/Dialect/FIRDialect.h" +#include "flang/Optimizer/Dialect/FIROps.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h" +#include "mlir/IR/BuiltinOps.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace fir { +#define GEN_PASS_DEF_OMPLOOPINDEXMEMTOREG +#include "flang/Optimizer/Transforms/Passes.h.inc" +} // namespace fir + +using namespace mlir; + +template +class LoopProcessorHelper { + LoopOpTy loop; + + bool allUsesInLoop(ValueRange stores) { + for (Value store : stores) { + for (OpOperand &use : store.getUses()) { + Operation *owner = use.getOwner(); + if (owner->getParentOfType() != loop.getOperation()) + return false; + } + } + return true; + } + + /// Check whether a given hlfir.declare known to only be used inside of the + /// loop and initialized by a fir.alloca operation also only used inside of + /// the loop can be removed and replaced by the block argument representing + /// the corresponding loop index. + static bool isDeclareRemovable(hlfir::DeclareOp declareOp) { + fir::AllocaOp allocaOp = llvm::dyn_cast_if_present( + declareOp.getMemref().getDefiningOp()); + + // Check that the hlfir.declare is initialized by a fir.alloca that is only + // used as argument to that operation. + if (!allocaOp || !allocaOp.getResult().hasOneUse()) + return false; + + // Check that uses of the pointers can be replaced by the block argument. + for (OpOperand &use : declareOp.getOriginalBase().getUses()) { + Operation *owner = use.getOwner(); + if (!isa(owner)) + return false; + } + for (OpOperand &use : declareOp.getBase().getUses()) { + Operation *owner = use.getOwner(); + if (!isa(owner)) + return false; + } + + return true; + } + + /// Check whether a given fir.alloca known to only be used inside of the loop + /// can be removed and replaced by the block argument representing the + /// corresponding loop index. + static bool isAllocaRemovable(fir::AllocaOp allocaOp) { + // Check that uses of the pointer are all fir.load and fir.store. + for (OpOperand &use : allocaOp.getResult().getUses()) { + Operation *owner = use.getOwner(); + if (!isa(owner) && !isa(owner)) + return false; + } + + return true; + } + + /// Try to push an hlfir.declare operation defined outside of the loop inside, + /// if all uses of that operation and the corresponding fir.alloca are + /// contained inside of the loop. + LogicalResult pushDeclareIntoLoop(hlfir::DeclareOp declareOp) { + // Check that all uses are inside of the loop. + if (!allUsesInLoop(declareOp->getResults())) + return failure(); + + // Push hlfir.declare into the beginning of the loop region. + Block &b = loop.getRegion().getBlocks().front(); + declareOp->moveBefore(&b, b.begin()); + + // Find associated fir.alloca and push into the beginning of the loop + // region. + fir::AllocaOp allocaOp = + cast(declareOp.getMemref().getDefiningOp()); + Value allocaVal = allocaOp.getResult(); + + if (!allUsesInLoop(allocaVal)) + return failure(); + + allocaOp->moveBefore(&b, b.begin()); + return success(); + } + + /// Try to push a fir.alloca operation defined outside of the loop inside, + /// if all uses of that operation are contained inside of the loop. + LogicalResult pushAllocaIntoLoop(fir::AllocaOp allocaOp) { + Value store = allocaOp.getResult(); + + // Check that all uses are inside of the loop. + if (!allUsesInLoop(store)) + return failure(); + + // Push fir.alloca into the beginning of the loop region. + Block &b = loop.getRegion().getBlocks().front(); + allocaOp->moveBefore(&b, b.begin()); + return success(); + } + + void processLoopArg(BlockArgument arg, llvm::ArrayRef argStores, + SmallPtrSetImpl &opsToDelete) { + llvm::SmallPtrSet toDelete; + for (Value store : argStores) { + Operation *op = store.getDefiningOp(); + + // Skip argument if storage not defined by an operation. + if (!op) + return; + + // Support HLFIR flow as well as regular FIR flow. + if (auto declareOp = dyn_cast(op)) { + if (succeeded(pushDeclareIntoLoop(declareOp)) && + isDeclareRemovable(declareOp)) { + // Mark hlfir.declare, fir.alloca and related uses for deletion. + for (OpOperand &use : declareOp.getOriginalBase().getUses()) + toDelete.insert(use.getOwner()); + + for (OpOperand &use : declareOp.getBase().getUses()) + toDelete.insert(use.getOwner()); + + Operation *allocaOp = declareOp.getMemref().getDefiningOp(); + toDelete.insert(declareOp); + toDelete.insert(allocaOp); + } + } else if (auto allocaOp = dyn_cast(op)) { + if (succeeded(pushAllocaIntoLoop(allocaOp)) && + isAllocaRemovable(allocaOp)) { + // Do not make any further modifications if an address to the index + // is necessary. Otherwise, the values can be used directly from the + // loop region first block's arguments. + + // Mark fir.alloca and related uses for deletion. + for (OpOperand &use : allocaOp.getResult().getUses()) + toDelete.insert(use.getOwner()); + + // Delete now-unused fir.alloca. + toDelete.insert(allocaOp); + } + } else { + return; + } + } + + // Only consider marked operations if all load, store and allocation + // operations associated with the given loop index can be removed. + opsToDelete.insert(toDelete.begin(), toDelete.end()); + + for (Operation *op : toDelete) { + // Replace all fir.load operations with the index as returned by the + // OpenMP loop operation. + if (isa(op)) + op->replaceAllUsesWith(ValueRange(arg)); + // Drop all uses of fir.alloca and hlfir.declare because their defining + // operations will be deleted as well. + else if (isa(op) || isa(op)) + op->dropAllUses(); + } + } + +public: + explicit LoopProcessorHelper(LoopOpTy loop) : loop(loop) {} + + void process() { + llvm::SmallPtrSet opsToDelete; + llvm::SmallVector> storeAddresses; + llvm::ArrayRef loopArgs = loop.getRegion().getArguments(); + + // Collect arguments of the loop operation. + for (BlockArgument arg : loopArgs) { + // Find fir.store uses of these indices and gather all addresses where + // they are stored. + llvm::SmallVector &argStores = storeAddresses.emplace_back(); + for (OpOperand &argUse : arg.getUses()) + if (auto storeOp = dyn_cast(argUse.getOwner())) + argStores.push_back(storeOp.getMemref()); + } + + // Process all loop indices and mark them for deletion independently of each + // other. + for (auto it : llvm::zip(loopArgs, storeAddresses)) + processLoopArg(std::get<0>(it), std::get<1>(it), opsToDelete); + + // Delete marked operations. + for (Operation *op : opsToDelete) + op->erase(); + } +}; + +namespace { +class OMPLoopIndexMemToRegPass + : public fir::impl::OMPLoopIndexMemToRegBase { +public: + void runOnOperation() override { + func::FuncOp func = getOperation(); + + func->walk( + [&](omp::WsLoopOp loop) { LoopProcessorHelper(loop).process(); }); + + func.walk( + [&](omp::SimdLoopOp loop) { LoopProcessorHelper(loop).process(); }); + + func.walk( + [&](omp::TaskLoopOp loop) { LoopProcessorHelper(loop).process(); }); + } +}; +} // namespace + +std::unique_ptr fir::createOMPLoopIndexMemToRegPass() { + return std::make_unique(); +} diff --git a/flang/test/Lower/OpenMP/FIR/copyin.f90 b/flang/test/Lower/OpenMP/FIR/copyin.f90 index ddfa0ea091462..3443b310074f5 100644 --- a/flang/test/Lower/OpenMP/FIR/copyin.f90 +++ b/flang/test/Lower/OpenMP/FIR/copyin.f90 @@ -138,17 +138,15 @@ subroutine copyin_derived_type() ! CHECK: %[[VAL_1:.*]] = fir.address_of(@_QFcombined_parallel_worksharing_loopEx6) : !fir.ref ! CHECK: %[[VAL_2:.*]] = omp.threadprivate %[[VAL_1]] : !fir.ref -> !fir.ref ! CHECK: omp.parallel { -! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -! CHECK: %[[VAL_4:.*]] = omp.threadprivate %[[VAL_1]] : !fir.ref -> !fir.ref -! CHECK: %[[VAL_5:.*]] = fir.load %[[VAL_2]] : !fir.ref -! CHECK: fir.store %[[VAL_5]] to %[[VAL_4]] : !fir.ref +! CHECK: %[[VAL_3:.*]] = omp.threadprivate %[[VAL_1]] : !fir.ref -> !fir.ref +! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_2]] : !fir.ref +! CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref ! CHECK: omp.barrier -! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_4]] : !fir.ref -! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[VAL_9:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { -! CHECK: fir.store %[[VAL_9]] to %[[VAL_3]] : !fir.ref -! CHECK: fir.call @_QPsub4(%[[VAL_4]]) {{.*}}: (!fir.ref) -> () +! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_3]] : !fir.ref +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop for (%[[VAL_9:.*]]) : i32 = (%[[VAL_5]]) to (%[[VAL_6]]) inclusive step (%[[VAL_7]]) { +! CHECK: fir.call @_QPsub4(%[[VAL_3]]) {{.*}}: (!fir.ref) -> () ! CHECK: omp.yield ! CHECK: } ! CHECK: omp.terminator @@ -269,30 +267,27 @@ subroutine common_1() !CHECK: %[[val_7:.*]] = fir.coordinate_of %[[val_6]], %[[val_c4]] : (!fir.ref>, index) -> !fir.ref !CHECK: %[[val_8:.*]] = fir.convert %[[val_7]] : (!fir.ref) -> !fir.ref !CHECK: omp.parallel { -!CHECK: %[[val_9:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[val_10:.*]] = omp.threadprivate %[[val_1]] : !fir.ref> -> !fir.ref> -!CHECK: %[[val_11:.*]] = fir.convert %[[val_10]] : (!fir.ref>) -> !fir.ref> +!CHECK: %[[val_9:.*]] = omp.threadprivate %[[val_1]] : !fir.ref> -> !fir.ref> +!CHECK: %[[val_10:.*]] = fir.convert %[[val_9]] : (!fir.ref>) -> !fir.ref> !CHECK: %[[val_c0_0:.*]] = arith.constant 0 : index -!CHECK: %[[val_12:.*]] = fir.coordinate_of %[[val_11]], %[[val_c0_0]] : (!fir.ref>, index) -> !fir.ref -!CHECK: %[[val_13:.*]] = fir.convert %[[val_12]] : (!fir.ref) -> !fir.ref -!CHECK: %[[val_14:.*]] = fir.convert %[[val_10]] : (!fir.ref>) -> !fir.ref> +!CHECK: %[[val_11:.*]] = fir.coordinate_of %[[val_10]], %[[val_c0_0]] : (!fir.ref>, index) -> !fir.ref +!CHECK: %[[val_12:.*]] = fir.convert %[[val_11]] : (!fir.ref) -> !fir.ref +!CHECK: %[[val_13:.*]] = fir.convert %[[val_9]] : (!fir.ref>) -> !fir.ref> !CHECK: %[[val_c4_1:.*]] = arith.constant 4 : index -!CHECK: %[[val_15:.*]] = fir.coordinate_of %[[val_14]], %[[val_c4_1]] : (!fir.ref>, index) -> !fir.ref -!CHECK: %[[val_16:.*]] = fir.convert %[[val_15]] : (!fir.ref) -> !fir.ref -!CHECK: %[[val_17:.*]] = fir.load %[[val_5]] : !fir.ref -!CHECK: fir.store %[[val_17]] to %[[val_13]] : !fir.ref -!CHECK: %[[val_18:.*]] = fir.load %[[val_8]] : !fir.ref -!CHECK: fir.store %[[val_18]] to %[[val_16]] : !fir.ref +!CHECK: %[[val_14:.*]] = fir.coordinate_of %[[val_13]], %[[val_c4_1]] : (!fir.ref>, index) -> !fir.ref +!CHECK: %[[val_15:.*]] = fir.convert %[[val_14]] : (!fir.ref) -> !fir.ref +!CHECK: %[[val_16:.*]] = fir.load %[[val_5]] : !fir.ref +!CHECK: fir.store %[[val_16]] to %[[val_12]] : !fir.ref +!CHECK: %[[val_17:.*]] = fir.load %[[val_8]] : !fir.ref +!CHECK: fir.store %[[val_17]] to %[[val_15]] : !fir.ref !CHECK: omp.barrier !CHECK: %[[val_c1_i32:.*]] = arith.constant 1 : i32 -!CHECK: %[[val_19:.*]] = fir.load %[[val_13]] : !fir.ref +!CHECK: %[[val_18:.*]] = fir.load %[[val_12]] : !fir.ref !CHECK: %[[val_c1_i32_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_19]]) inclusive step (%[[val_c1_i32_2]]) { -!CHECK: fir.store %[[arg]] to %[[val_9]] : !fir.ref -!CHECK: %[[val_20:.*]] = fir.load %[[val_16]] : !fir.ref -!CHECK: %[[val_21:.*]] = fir.load %[[val_9]] : !fir.ref -!CHECK: %[[val_22:.*]] = arith.addi %[[val_20]], %[[val_21]] : i32 -!CHECK: fir.store %[[val_22]] to %[[val_16]] : !fir.ref +!CHECK: omp.wsloop for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_18]]) inclusive step (%[[val_c1_i32_2]]) { +!CHECK: %[[val_19:.*]] = fir.load %[[val_15]] : !fir.ref +!CHECK: %[[val_20:.*]] = arith.addi %[[val_19]], %[[arg]] : i32 +!CHECK: fir.store %[[val_20]] to %[[val_15]] : !fir.ref !CHECK: omp.yield !CHECK: } !CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 b/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 index 06f3e1ca82234..bba9dbc4fc4cb 100644 --- a/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 +++ b/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 @@ -1,7 +1,6 @@ ! RUN: %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s !CHECK: func.func @_QPlastprivate_common() { -!CHECK: %[[val_0:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[val_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlastprivate_commonEi"} !CHECK: %[[val_2:.*]] = fir.address_of(@c_) : !fir.ref> !CHECK: %[[val_3:.*]] = fir.convert %[[val_2]] : (!fir.ref>) -> !fir.ref> @@ -18,7 +17,6 @@ !CHECK: %[[val_c100_i32:.*]] = arith.constant 100 : i32 !CHECK: %[[val_c1_i32_0:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_c100_i32]]) inclusive step (%[[val_c1_i32_0]]) { -!CHECK: fir.store %[[arg]] to %[[val_0]] : !fir.ref !CHECK: %[[val_11:.*]] = arith.cmpi eq, %[[arg]], %[[val_c100_i32]] : i32 !CHECK: fir.if %[[val_11]] { !CHECK: %[[val_12:.*]] = fir.load %[[val_9]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 index 3152f9c44d0c6..8cf216361bcb6 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 @@ -8,34 +8,31 @@ ! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_private_fixEx"} ! CHECK: omp.parallel { ! CHECK: %[[PRIV_J:.*]] = fir.alloca i32 {bindc_name = "j", pinned -! CHECK: %[[PRIV_I:.*]] = fir.alloca i32 {adapt.valuebyref, pinned ! CHECK: %[[PRIV_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned ! CHECK: %[[ONE:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_4:.*]] : !fir.ref ! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) { -! CHECK: fir.store %[[VAL_6]] to %[[PRIV_I]] : !fir.ref -! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index -! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_4]] : !fir.ref -! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index -! CHECK: %[[VAL_11:.*]] = arith.constant 1 : index -! CHECK: %[[LB:.*]] = fir.convert %[[VAL_8]] : (index) -> i32 -! CHECK: %[[VAL_12:.*]]:2 = fir.do_loop %[[VAL_13:[^ ]*]] = -! CHECK-SAME: %[[VAL_8]] to %[[VAL_10]] step %[[VAL_11]] -! CHECK-SAME: iter_args(%[[IV:.*]] = %[[LB]]) -> (index, i32) { -! CHECK: fir.store %[[IV]] to %[[PRIV_J]] : !fir.ref -! CHECK: %[[LOAD:.*]] = fir.load %[[PRIV_I]] : !fir.ref -! CHECK: %[[VAL_15:.*]] = fir.load %[[PRIV_J]] : !fir.ref -! CHECK: %[[VAL_16:.*]] = arith.addi %[[LOAD]], %[[VAL_15]] : i32 -! CHECK: fir.store %[[VAL_16]] to %[[PRIV_X]] : !fir.ref -! CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_13]], %[[VAL_11]] : index -! CHECK: %[[STEPCAST:.*]] = fir.convert %[[VAL_11]] : (index) -> i32 +! CHECK: omp.wsloop for (%[[IV_I:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) { +! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (i32) -> index +! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_4]] : !fir.ref +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i32) -> index +! CHECK: %[[VAL_10:.*]] = arith.constant 1 : index +! CHECK: %[[LB:.*]] = fir.convert %[[VAL_7]] : (index) -> i32 +! CHECK: %[[VAL_11:.*]]:2 = fir.do_loop %[[VAL_12:[^ ]*]] = +! CHECK-SAME: %[[VAL_7]] to %[[VAL_9]] step %[[VAL_10]] +! CHECK-SAME: iter_args(%[[IV_J:.*]] = %[[LB]]) -> (index, i32) { +! CHECK: fir.store %[[IV_J]] to %[[PRIV_J]] : !fir.ref +! CHECK: %[[VAL_13:.*]] = fir.load %[[PRIV_J]] : !fir.ref +! CHECK: %[[VAL_14:.*]] = arith.addi %[[IV_I]], %[[VAL_13]] : i32 +! CHECK: fir.store %[[VAL_14]] to %[[PRIV_X]] : !fir.ref +! CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_12]], %[[VAL_10]] : index +! CHECK: %[[STEPCAST:.*]] = fir.convert %[[VAL_10]] : (index) -> i32 ! CHECK: %[[IVLOAD:.*]] = fir.load %[[PRIV_J]] : !fir.ref ! CHECK: %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST]] -! CHECK: fir.result %[[VAL_17]], %[[IVINC]] : index, i32 +! CHECK: fir.result %[[VAL_15]], %[[IVINC]] : index, i32 ! CHECK: } -! CHECK: fir.store %[[VAL_12]]#1 to %[[PRIV_J]] : !fir.ref +! CHECK: fir.store %[[VAL_11]]#1 to %[[PRIV_J]] : !fir.ref ! CHECK: omp.yield ! CHECK: } ! CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 index e9d9218702cc5..f341d0ccda423 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 @@ -249,8 +249,6 @@ subroutine simple_loop_1 real, allocatable :: r; ! FIRDialect: omp.parallel !$OMP PARALLEL PRIVATE(r) - ! FIRDialect: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} - ! FIRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> @@ -262,9 +260,7 @@ subroutine simple_loop_1 ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO do i=1, 9 - ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref - ! FIRDialect: %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref - ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do ! FIRDialect: omp.yield @@ -285,8 +281,6 @@ subroutine simple_loop_2 real, allocatable :: r; ! FIRDialect: omp.parallel !$OMP PARALLEL - ! FIRDialect: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} - ! FIRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> @@ -298,9 +292,7 @@ subroutine simple_loop_2 ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO PRIVATE(r) do i=1, 9 - ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref - ! FIRDialect: %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref - ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do ! FIRDialect: omp.yield @@ -320,8 +312,6 @@ subroutine simple_loop_3 integer :: i real, allocatable :: r; ! FIRDialect: omp.parallel - ! FIRDialect: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} - ! FIRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> @@ -333,9 +323,7 @@ subroutine simple_loop_3 ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO PRIVATE(r) do i=1, 9 - ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref - ! FIRDialect: %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref - ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do ! FIRDialect: omp.yield @@ -353,9 +341,9 @@ subroutine simple_loop_3 subroutine simd_loop_1 integer :: i real, allocatable :: r; - ! IRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} - ! IRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> - ! IRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> + ! FIRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} + ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> + ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> ! FIRDialect: %[[LB:.*]] = arith.constant 1 : i32 ! FIRDialect: %[[UB:.*]] = arith.constant 9 : i32 @@ -364,9 +352,7 @@ subroutine simd_loop_1 ! FIRDialect: omp.simdloop for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { !$OMP SIMD PRIVATE(r) do i=1, 9 - ! FIRDialect: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref - ! FIRDialect: %[[LOAD_IV:.*]] = fir.load %[[LOCAL]] : !fir.ref - ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do !$OMP END SIMD diff --git a/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 index ac63c45677ffe..bcc08e59560ab 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 @@ -10,7 +10,6 @@ subroutine omp_do_firstprivate(a) n = a+1 !$omp parallel do firstprivate(a) ! CHECK: omp.parallel { - ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK-NEXT: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "a", pinned ! CHECK-NEXT: %[[LD:.*]] = fir.load %[[ARG0]] : !fir.ref ! CHECK-NEXT: fir.store %[[LD]] to %[[CLONE]] : !fir.ref @@ -18,6 +17,7 @@ subroutine omp_do_firstprivate(a) ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE]] : !fir.ref ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: omp.wsloop for (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK-NEXT: fir.store %[[ARG1]] to %[[REF]] : !fir.ref ! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK-NEXT: omp.yield @@ -36,19 +36,19 @@ subroutine omp_do_firstprivate2(a, n) n = a+1 !$omp parallel do firstprivate(a, n) ! CHECK: omp.parallel { - ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK-NEXT: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "a", pinned ! CHECK-NEXT: %[[LD:.*]] = fir.load %[[ARG0]] : !fir.ref ! CHECK-NEXT: fir.store %[[LD]] to %[[CLONE]] : !fir.ref ! CHECK-NEXT: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "n", pinned ! CHECK-NEXT: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref ! CHECK-NEXT: fir.store %[[LD1]] to %[[CLONE1]] : !fir.ref - - + + ! CHECK: %[[LB:.*]] = fir.load %[[CLONE]] : !fir.ref ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE1]] : !fir.ref ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: omp.wsloop for (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK-NEXT: fir.store %[[ARG2]] to %[[REF]] : !fir.ref ! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK-NEXT: omp.yield diff --git a/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 b/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 index c302b91be8e67..bc44a12c53f4d 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 @@ -12,9 +12,7 @@ subroutine simple_parallel_do ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO do i=1, 9 - ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref - ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do ! CHECK: omp.yield @@ -38,9 +36,7 @@ subroutine parallel_do_with_parallel_clauses(cond, nt) ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close) do i=1, 9 - ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref - ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do ! CHECK: omp.yield @@ -61,9 +57,7 @@ subroutine parallel_do_with_clauses(nt) ! CHECK: omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic) do i=1, 9 - ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref - ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do ! CHECK: omp.yield @@ -93,9 +87,7 @@ subroutine parallel_do_with_privatisation_clauses(cond,nt) ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt) do i=1, 9 - ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref - ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 ! CHECK: %[[PRIVATE_COND_VAL:.*]] = fir.load %[[PRIVATE_COND_REF]] : !fir.ref> ! CHECK: %[[PRIVATE_COND_VAL_CVT:.*]] = fir.convert %[[PRIVATE_COND_VAL]] : (!fir.logical<4>) -> i1 ! CHECK: fir.call @_FortranAioOutputLogical({{.*}}, %[[PRIVATE_COND_VAL_CVT]]) {{.*}}: (!fir.ref, i1) -> i1 @@ -132,7 +124,6 @@ end subroutine parallel_private_do ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "nt"}) { ! CHECK: %[[I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFparallel_private_doEi"} ! CHECK: omp.parallel { -! CHECK: %[[I_PRIV:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[COND_ADDR:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_private_doEcond"} ! CHECK: %[[NT_ADDR:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_private_doEnt"} ! CHECK: %[[NT:.*]] = fir.load %[[VAL_1]] : !fir.ref @@ -141,6 +132,7 @@ end subroutine parallel_private_do ! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: %[[I_PRIV:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: fir.store %[[I]] to %[[I_PRIV]] : !fir.ref ! CHECK: fir.call @_QPfoo(%[[I_PRIV]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref, !fir.ref>, !fir.ref) -> () ! CHECK: omp.yield @@ -172,7 +164,6 @@ end subroutine omp_parallel_multiple_firstprivate_do ! CHECK-SAME: %[[B_ADDR:.*]]: !fir.ref {fir.bindc_name = "b"}) { ! CHECK: %[[I_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_parallel_multiple_firstprivate_doEi"} ! CHECK: omp.parallel { -! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[A_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_parallel_multiple_firstprivate_doEa"} ! CHECK: %[[A:.*]] = fir.load %[[A_ADDR]] : !fir.ref ! CHECK: fir.store %[[A]] to %[[A_PRIV_ADDR]] : !fir.ref @@ -183,6 +174,7 @@ end subroutine omp_parallel_multiple_firstprivate_do ! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref ! CHECK: fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK: omp.yield @@ -216,7 +208,6 @@ end subroutine parallel_do_private ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "nt"}) { ! CHECK: %[[I_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFparallel_do_privateEi"} ! CHECK: omp.parallel { -! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[COND_ADDR:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_privateEcond"} ! CHECK: %[[NT_ADDR:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_privateEnt"} ! CHECK: %[[NT:.*]] = fir.load %[[VAL_1]] : !fir.ref @@ -225,6 +216,7 @@ end subroutine parallel_do_private ! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref ! CHECK: fir.call @_QPfoo(%[[I_PRIV_ADDR]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref, !fir.ref>, !fir.ref) -> () ! CHECK: omp.yield @@ -256,7 +248,6 @@ end subroutine omp_parallel_do_multiple_firstprivate ! CHECK-SAME: %[[B_ADDR:.*]]: !fir.ref {fir.bindc_name = "b"}) { ! CHECK: %[[I_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_parallel_do_multiple_firstprivateEi"} ! CHECK: omp.parallel { -! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[A_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_parallel_do_multiple_firstprivateEa"} ! CHECK: %[[A:.*]] = fir.load %[[A_ADDR]] : !fir.ref ! CHECK: fir.store %[[A]] to %[[A_PRIV_ADDR]] : !fir.ref @@ -267,6 +258,7 @@ end subroutine omp_parallel_do_multiple_firstprivate ! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref ! CHECK: fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/FIR/simd.f90 b/flang/test/Lower/OpenMP/FIR/simd.f90 index 47596cccdbc12..c0c8d7f32838a 100644 --- a/flang/test/Lower/OpenMP/FIR/simd.f90 +++ b/flang/test/Lower/OpenMP/FIR/simd.f90 @@ -11,9 +11,7 @@ subroutine simdloop ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: omp.simdloop for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i=1, 9 - ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref - ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do !$OMP END SIMD @@ -29,9 +27,7 @@ subroutine simdloop_with_if_clause(n, threshold) ! CHECK: %[[COND:.*]] = arith.cmpi sge ! CHECK: omp.simdloop if(%[[COND:.*]]) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n - ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref - ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do !$OMP END SIMD @@ -46,9 +42,7 @@ subroutine simdloop_with_simdlen_clause(n, threshold) ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK: omp.simdloop simdlen(2) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n - ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref - ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do !$OMP END SIMD @@ -64,9 +58,7 @@ subroutine simdloop_with_simdlen_clause_from_param(n, threshold) ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK: omp.simdloop simdlen(2) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n - ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref - ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do !$OMP END SIMD @@ -82,9 +74,7 @@ subroutine simdloop_with_simdlen_clause_from_expr_from_param(n, threshold) ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK: omp.simdloop simdlen(6) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n - ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref - ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do !$OMP END SIMD @@ -99,9 +89,7 @@ subroutine simdloop_with_safelen_clause(n, threshold) ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK: omp.simdloop safelen(2) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n - ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref - ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do !$OMP END SIMD @@ -117,9 +105,7 @@ subroutine simdloop_with_safelen_clause_from_expr_from_param(n, threshold) ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK: omp.simdloop safelen(6) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n - ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref - ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do !$OMP END SIMD @@ -134,9 +120,7 @@ subroutine simdloop_with_simdlen_safelen_clause(n, threshold) ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK: omp.simdloop simdlen(1) safelen(2) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n - ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref - ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do !$OMP END SIMD diff --git a/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 b/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 index 2f73fb31966ec..ab906e1dfba5b 100644 --- a/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 +++ b/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 @@ -71,14 +71,12 @@ subroutine test_stop_in_region3() end ! CHECK-LABEL: func.func @_QPtest_stop_in_region4() { -! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_stop_in_region4Ei"} ! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFtest_stop_in_region4Ex"} ! CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_4:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 ! CHECK: omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) { -! CHECK: fir.store %[[VAL_6]] to %[[VAL_0]] : !fir.ref ! CHECK: cf.br ^bb1 ! CHECK: ^bb1: ! CHECK: %[[VAL_7:.*]] = arith.constant 3 : i32 diff --git a/flang/test/Lower/OpenMP/FIR/target.f90 b/flang/test/Lower/OpenMP/FIR/target.f90 index 9b1fb5c15ac1d..90e4e8a058297 100644 --- a/flang/test/Lower/OpenMP/FIR/target.f90 +++ b/flang/test/Lower/OpenMP/FIR/target.f90 @@ -278,19 +278,16 @@ subroutine omp_target_parallel_do !CHECK: omp.target map_entries(%[[MAP]] : !fir.ref>) { !CHECK-NEXT: omp.parallel !$omp target parallel do map(tofrom: a) - !CHECK: %[[VAL_2:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} - !CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32 - !CHECK: %[[VAL_4:.*]] = arith.constant 1024 : i32 - !CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 - !CHECK: omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) { - !CHECK: fir.store %[[VAL_6]] to %[[VAL_2]] : !fir.ref - !CHECK: %[[VAL_7:.*]] = arith.constant 10 : i32 - !CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_2]] : !fir.ref - !CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i32) -> i64 - !CHECK: %[[VAL_10:.*]] = arith.constant 1 : i64 - !CHECK: %[[VAL_11:.*]] = arith.subi %[[VAL_9]], %[[VAL_10]] : i64 - !CHECK: %[[VAL_12:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_11]] : (!fir.ref>, i64) -> !fir.ref - !CHECK: fir.store %[[VAL_7]] to %[[VAL_12]] : !fir.ref + !CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32 + !CHECK: %[[VAL_3:.*]] = arith.constant 1024 : i32 + !CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 + !CHECK: omp.wsloop for (%[[ARG:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) { + !CHECK: %[[VAL_5:.*]] = arith.constant 10 : i32 + !CHECK: %[[VAL_6:.*]] = fir.convert %[[ARG]] : (i32) -> i64 + !CHECK: %[[VAL_7:.*]] = arith.constant 1 : i64 + !CHECK: %[[VAL_8:.*]] = arith.subi %[[VAL_6]], %[[VAL_7]] : i64 + !CHECK: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_8]] : (!fir.ref>, i64) -> !fir.ref + !CHECK: fir.store %[[VAL_5]] to %[[VAL_9]] : !fir.ref do i = 1, 1024 a(i) = 10 end do diff --git a/flang/test/Lower/OpenMP/FIR/unstructured.f90 b/flang/test/Lower/OpenMP/FIR/unstructured.f90 index e7d48bb269349..a6b572eaafecf 100644 --- a/flang/test/Lower/OpenMP/FIR/unstructured.f90 +++ b/flang/test/Lower/OpenMP/FIR/unstructured.f90 @@ -61,21 +61,16 @@ subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct ! CHECK-LABEL: func @_QPss3{{.*}} { ! CHECK: omp.parallel { ! CHECK: %[[ALLOCA_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned} -! CHECK: %[[ALLOCA_1:.*]] = fir.alloca i32 {{{.*}}, pinned} -! CHECK: %[[ALLOCA_2:.*]] = fir.alloca i32 {{{.*}}, pinned} ! CHECK: br ^bb1 ! CHECK: ^bb1: // 2 preds: ^bb0, ^bb3 ! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb4 ! CHECK: ^bb2: // pred: ^bb1 ! CHECK: omp.wsloop for (%[[ARG1:.*]]) : {{.*}} { -! CHECK: fir.store %[[ARG1]] to %[[ALLOCA_2]] : !fir.ref ! CHECK: @_FortranAioBeginExternalListOutput -! CHECK: %[[LOAD_1:.*]] = fir.load %[[ALLOCA_2]] : !fir.ref -! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_1]]) +! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[ARG1]]) ! CHECK: omp.yield ! CHECK: } ! CHECK: omp.wsloop for (%[[ARG2:.*]]) : {{.*}} { -! CHECK: fir.store %[[ARG2]] to %[[ALLOCA_1]] : !fir.ref ! CHECK: br ^bb1 ! CHECK: ^bb2: // 2 preds: ^bb1, ^bb5 ! CHECK: cond_br %{{[0-9]*}}, ^bb3, ^bb6 @@ -116,15 +111,12 @@ subroutine ss3(n) ! nested unstructured OpenMP constructs ! CHECK-LABEL: func @_QPss4{{.*}} { ! CHECK: omp.parallel { -! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 {{{.*}}, pinned} ! CHECK: omp.wsloop for (%[[ARG:.*]]) : {{.*}} { -! CHECK: fir.store %[[ARG]] to %[[ALLOCA]] : !fir.ref ! CHECK: %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}} ! CHECK: %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}} ! CHECK: fir.if %[[COND_XOR]] { ! CHECK: @_FortranAioBeginExternalListOutput -! CHECK: %[[LOAD:.*]] = fir.load %[[ALLOCA]] : !fir.ref -! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD]]) +! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[ARG]]) ! CHECK: } else { ! CHECK: } ! CHECK-NEXT: omp.yield diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 index 99b0cf0f1298e..f1bd5459a2b61 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 @@ -20,9 +20,7 @@ program wsloop ! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 4 : i32 ! CHECK: omp.wsloop schedule(static = %[[VAL_5]] : i32) nowait for (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) { -! CHECK: fir.store %[[ARG0]] to %[[STORE_IV:.*]] : !fir.ref -! CHECK: %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]] : !fir.ref -! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 +! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[ARG0]]) {{.*}}: (!fir.ref, i32) -> i1 ! CHECK: omp.yield ! CHECK: } @@ -38,10 +36,8 @@ program wsloop ! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_17:.*]] = arith.constant 4 : i32 ! CHECK: omp.wsloop schedule(static = %[[VAL_17]] : i32) nowait for (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) { -! CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref ! CHECK: %[[VAL_24:.*]] = arith.constant 2 : i32 -! CHECK: %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref -! CHECK: %[[VAL_25:.*]] = arith.muli %[[VAL_24]], %[[LOAD_IV1]] : i32 +! CHECK: %[[VAL_25:.*]] = arith.muli %[[VAL_24]], %[[ARG1]] : i32 ! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_25]]) {{.*}}: (!fir.ref, i32) -> i1 ! CHECK: omp.yield ! CHECK: } @@ -62,10 +58,8 @@ program wsloop ! CHECK: %[[VAL_31:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_0]] : !fir.ref ! CHECK: omp.wsloop schedule(static = %[[VAL_32]] : i32) nowait for (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) { -! CHECK: fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref ! CHECK: %[[VAL_39:.*]] = arith.constant 3 : i32 -! CHECK: %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]] : !fir.ref -! CHECK: %[[VAL_40:.*]] = arith.muli %[[VAL_39]], %[[LOAD_IV2]] : i32 +! CHECK: %[[VAL_40:.*]] = arith.muli %[[VAL_39]], %[[ARG2]] : i32 ! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_40]]) {{.*}}: (!fir.ref, i32) -> i1 ! CHECK: omp.yield ! CHECK: } diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 index a122a41ba8b8f..ba5860faac0e6 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 @@ -40,16 +40,10 @@ program wsloop_collapse do j= 1, b do k = 1, c ! CHECK: omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) { -! CHECK: fir.store %[[ARG0]] to %[[STORE_IV0:.*]] : !fir.ref -! CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref -! CHECK: fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref ! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_6]] : !fir.ref -! CHECK: %[[LOAD_IV0:.*]] = fir.load %[[STORE_IV0]] : !fir.ref -! CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_12]], %[[LOAD_IV0]] : i32 -! CHECK: %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref -! CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_13]], %[[LOAD_IV1]] : i32 -! CHECK: %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]] : !fir.ref -! CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_14]], %[[LOAD_IV2]] : i32 +! CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_12]], %[[ARG0]] : i32 +! CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_13]], %[[ARG1]] : i32 +! CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_14]], %[[ARG2]] : i32 ! CHECK: fir.store %[[VAL_15]] to %[[VAL_6]] : !fir.ref ! CHECK: omp.yield ! CHECK: } diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 index 9509920c6ec1b..ac0021e2edf20 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 @@ -11,18 +11,15 @@ program wsloop_dynamic !CHECK: omp.parallel { !$OMP DO SCHEDULE(monotonic:dynamic) -!CHECK: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} !CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 !CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 !CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop schedule(dynamic, monotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) -!CHECK: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref do i=1, 9 print*, i !CHECK: %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput -!CHECK: %[[LOAD:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref -!CHECK: fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref, i32) -> i1 +!CHECK: fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 !CHECK: fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref) -> i32 end do !CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 index 5e4e66c77b343..39215e8d31c92 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 @@ -12,18 +12,15 @@ program wsloop_dynamic !CHECK: omp.parallel { !$OMP DO SCHEDULE(nonmonotonic:dynamic) -!CHECK: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} !CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 !CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 !CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop schedule(dynamic, nonmonotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) -!CHECK: fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref do i=1, 9 print*, i !CHECK: %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput -!CHECK: %[[LOAD:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref -!CHECK: fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref, i32) -> i1 +!CHECK: fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 !CHECK: fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref) -> i32 end do !CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 index 69d133d50ffa0..069b777d1cdb2 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 @@ -50,14 +50,11 @@ !CHECK: %[[C0_2:.*]] = arith.constant 0 : i32 !CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[XREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator !CHECK: return @@ -78,14 +75,11 @@ subroutine simple_int_reduction !CHECK: %[[C0_2:.*]] = arith.constant 0.000000e+00 : f32 !CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : f32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator @@ -107,14 +101,11 @@ subroutine simple_real_reduction !CHECK: %[[C0_2:.*]] = arith.constant 0 : i32 !CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[XREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator !CHECK: return @@ -135,14 +126,11 @@ subroutine simple_int_reduction_switch_order !CHECK: %[[C0_2:.*]] = arith.constant 0.000000e+00 : f32 !CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : f32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator @@ -164,15 +152,10 @@ subroutine simple_real_reduction_switch_order !CHECK: %[[YREF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"} !CHECK: %[[ZREF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref, @[[RED_I32_NAME]] -> %[[YREF]] : !fir.ref, @[[RED_I32_NAME]] -> %[[ZREF]] : !fir.ref) for (%[[IVAL]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL1]], %[[XREF]] : i32, !fir.ref -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL2]], %[[YREF]] : i32, !fir.ref -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL3]], %[[ZREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[YREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[ZREF]] : i32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator !CHECK: return @@ -197,17 +180,12 @@ subroutine multiple_int_reductions_same_type !CHECK: %[[YREF:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"} !CHECK: %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref, @[[RED_F32_NAME]] -> %[[YREF]] : !fir.ref, @[[RED_F32_NAME]] -> %[[ZREF]] : !fir.ref) for (%[[IVAL]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1_F32:.*]] = fir.convert %[[I_PVT_VAL1_I32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL1_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL1_F32]], %[[XREF]] : f32, !fir.ref -!CHECK: %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL2_F32:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL2_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL2_F32]], %[[YREF]] : f32, !fir.ref -!CHECK: %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : f32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator @@ -234,19 +212,13 @@ subroutine multiple_real_reductions_same_type !CHECK: %[[YREF:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"} !CHECK: %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref, @[[RED_I64_NAME]] -> %[[YREF]] : !fir.ref, @[[RED_F32_NAME]] -> %[[ZREF]] : !fir.ref, @[[RED_F64_NAME]] -> %[[WREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL1_I32]], %[[XREF]] : i32, !fir.ref -!CHECK: %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL2_I64:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> i64 +!CHECK: omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref +!CHECK: %[[I_PVT_VAL2_I64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: omp.reduction %[[I_PVT_VAL2_I64]], %[[YREF]] : i64, !fir.ref -!CHECK: %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : f32, !fir.ref -!CHECK: %[[I_PVT_VAL4_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL4_F64:.*]] = fir.convert %[[I_PVT_VAL4_I32]] : (i32) -> f64 +!CHECK: %[[I_PVT_VAL4_F64:.*]] = fir.convert %[[IVAL]] : (i32) -> f64 !CHECK: omp.reduction %[[I_PVT_VAL4_F64]], %[[WREF]] : f64, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-and.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-and.f90 index 425d37398c571..03fbc2819659d 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-and.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-and.f90 @@ -21,14 +21,11 @@ !CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} !CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 +!CHECK: %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 !CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> @@ -54,14 +51,11 @@ subroutine simple_reduction(y) !CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} !CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 +!CHECK: %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 !CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> @@ -89,28 +83,23 @@ subroutine simple_reduction_switch_order(y) !CHECK: %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} !CHECK: %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>, @[[RED_NAME]] -> %[[YREF]] : !fir.ref>, @[[RED_NAME]] -> %[[ZREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64 +!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_1:.*]] = arith.subi %[[CONVI_64_1]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_1:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_1]] : (!fir.ref>>, i64) -> !fir.ref> !CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_1]] : !fir.ref> !CHECK: omp.reduction %[[WVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 +!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_2:.*]] = arith.subi %[[CONVI_64_2]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_2:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_2]] : (!fir.ref>>, i64) -> !fir.ref> !CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_2]] : !fir.ref> !CHECK: omp.reduction %[[WVAL]], %[[YREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[I_PVT_VAL3]] : (i32) -> i64 +!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_3:.*]] = arith.subi %[[CONVI_64_3]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_3:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_3]] : (!fir.ref>>, i64) -> !fir.ref> diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 index e8cf46f8261c4..eaa627e6afd51 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 @@ -21,14 +21,11 @@ !CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} !CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 +!CHECK: %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 !CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> @@ -54,14 +51,11 @@ subroutine simple_reduction(y) !CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} !CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 +!CHECK: %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 !CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> @@ -89,28 +83,23 @@ subroutine simple_reduction_switch_order(y) !CHECK: %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} !CHECK: %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>, @[[RED_NAME]] -> %[[YREF]] : !fir.ref>, @[[RED_NAME]] -> %[[ZREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64 +!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_1:.*]] = arith.subi %[[CONVI_64_1]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_1:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_1]] : (!fir.ref>>, i64) -> !fir.ref> !CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_1]] : !fir.ref> !CHECK: omp.reduction %[[WVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 +!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_2:.*]] = arith.subi %[[CONVI_64_2]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_2:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_2]] : (!fir.ref>>, i64) -> !fir.ref> !CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_2]] : !fir.ref> !CHECK: omp.reduction %[[WVAL]], %[[YREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[I_PVT_VAL3]] : (i32) -> i64 +!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_3:.*]] = arith.subi %[[CONVI_64_3]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_3:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_3]] : (!fir.ref>>, i64) -> !fir.ref> diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 index 6e5d6c34cedc5..617198d01716f 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 @@ -21,14 +21,11 @@ !CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} !CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 +!CHECK: %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 !CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> @@ -54,14 +51,11 @@ subroutine simple_reduction(y) !CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} !CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 +!CHECK: %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 !CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> @@ -89,28 +83,23 @@ subroutine simple_reduction_switch_order(y) !CHECK: %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} !CHECK: %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>, @[[RED_NAME]] -> %[[YREF]] : !fir.ref>, @[[RED_NAME]] -> %[[ZREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64 +!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_1:.*]] = arith.subi %[[CONVI_64_1]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_1:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_1]] : (!fir.ref>>, i64) -> !fir.ref> !CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_1]] : !fir.ref> !CHECK: omp.reduction %[[WVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 +!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_2:.*]] = arith.subi %[[CONVI_64_2]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_2:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_2]] : (!fir.ref>>, i64) -> !fir.ref> !CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_2]] : !fir.ref> !CHECK: omp.reduction %[[WVAL]], %[[YREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[I_PVT_VAL3]] : (i32) -> i64 +!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_3:.*]] = arith.subi %[[CONVI_64_3]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_3:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_3]] : (!fir.ref>>, i64) -> !fir.ref> diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-or.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-or.f90 index cdc12500e2c30..e3d691e347ba2 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-or.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-or.f90 @@ -21,14 +21,11 @@ !CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} !CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 +!CHECK: %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 !CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> @@ -54,14 +51,11 @@ subroutine simple_reduction(y) !CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} !CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 +!CHECK: %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 !CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> @@ -89,28 +83,23 @@ subroutine simple_reduction_switch_order(y) !CHECK: %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} !CHECK: %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>, @[[RED_NAME]] -> %[[YREF]] : !fir.ref>, @[[RED_NAME]] -> %[[ZREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64 +!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_1:.*]] = arith.subi %[[CONVI_64_1]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_1:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_1]] : (!fir.ref>>, i64) -> !fir.ref> !CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_1]] : !fir.ref> !CHECK: omp.reduction %[[WVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 +!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_2:.*]] = arith.subi %[[CONVI_64_2]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_2:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_2]] : (!fir.ref>>, i64) -> !fir.ref> !CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_2]] : !fir.ref> !CHECK: omp.reduction %[[WVAL]], %[[YREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[I_PVT_VAL3]] : (i32) -> i64 +!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_3:.*]] = arith.subi %[[CONVI_64_3]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_3:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_3]] : (!fir.ref>>, i64) -> !fir.ref> diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-mul.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-mul.f90 index c30cde66b5167..f5d2113d2e57a 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-mul.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-mul.f90 @@ -50,14 +50,11 @@ !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: fir.store %[[C1_2]] to %[[XREF]] : !fir.ref !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C10:.*]] = arith.constant 10 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[XREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator !CHECK: return @@ -79,14 +76,11 @@ subroutine simple_int_reduction !CHECK: %[[C0_2:.*]] = arith.constant 1.000000e+00 : f32 !CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 10 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : f32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator @@ -108,14 +102,11 @@ subroutine simple_real_reduction !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: fir.store %[[C1_2]] to %[[XREF]] : !fir.ref !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C10:.*]] = arith.constant 10 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[XREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator !CHECK: return @@ -136,14 +127,11 @@ subroutine simple_int_reduction_switch_order !CHECK: %[[C0_2:.*]] = arith.constant 1.000000e+00 : f32 !CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 10 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : f32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator @@ -165,15 +153,10 @@ subroutine simple_real_reduction_switch_order !CHECK: %[[YREF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"} !CHECK: %[[ZREF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref, @[[RED_I32_NAME]] -> %[[YREF]] : !fir.ref, @[[RED_I32_NAME]] -> %[[ZREF]] : !fir.ref) for (%[[IVAL]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL1]], %[[XREF]] : i32, !fir.ref -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL2]], %[[YREF]] : i32, !fir.ref -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL3]], %[[ZREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[YREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[ZREF]] : i32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator !CHECK: return @@ -198,17 +181,12 @@ subroutine multiple_int_reductions_same_type !CHECK: %[[YREF:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"} !CHECK: %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref, @[[RED_F32_NAME]] -> %[[YREF]] : !fir.ref, @[[RED_F32_NAME]] -> %[[ZREF]] : !fir.ref) for (%[[IVAL]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1_F32:.*]] = fir.convert %[[I_PVT_VAL1_I32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL1_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL1_F32]], %[[XREF]] : f32, !fir.ref -!CHECK: %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL2_F32:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL2_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL2_F32]], %[[YREF]] : f32, !fir.ref -!CHECK: %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : f32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator @@ -235,19 +213,13 @@ subroutine multiple_real_reductions_same_type !CHECK: %[[YREF:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"} !CHECK: %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %2 : !fir.ref, @[[RED_I64_NAME]] -> %3 : !fir.ref, @[[RED_F32_NAME]] -> %4 : !fir.ref, @[[RED_F64_NAME]] -> %1 : !fir.ref) for (%[[IVAL:.*]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL1_I32]], %[[XREF]] : i32, !fir.ref -!CHECK: %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL2_I64:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> i64 +!CHECK: omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref +!CHECK: %[[I_PVT_VAL2_I64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: omp.reduction %[[I_PVT_VAL2_I64]], %[[YREF]] : i64, !fir.ref -!CHECK: %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : f32, !fir.ref -!CHECK: %[[I_PVT_VAL4_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL4_F64:.*]] = fir.convert %[[I_PVT_VAL4_I32]] : (i32) -> f64 +!CHECK: %[[I_PVT_VAL4_F64:.*]] = fir.convert %[[IVAL]] : (i32) -> f64 !CHECK: omp.reduction %[[I_PVT_VAL4_F64]], %[[WREF]] : f64, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 index 47f9d572a8653..95248e99b9c63 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 @@ -15,13 +15,11 @@ program wsloop_dynamic !CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 !CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop schedule(runtime, simd) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) -!CHECK: fir.store %[[I]] to %[[STORE:.*]] : !fir.ref do i=1, 9 print*, i !CHECK: %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput -!CHECK: %[[LOAD:.*]] = fir.load %[[STORE]] : !fir.ref -!CHECK: fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref, i32) -> i1 +!CHECK: fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 !CHECK: fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref) -> i32 end do !CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 index 466055868f1cc..c515ed10d0d5c 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 @@ -25,11 +25,9 @@ program wsloop_variable !CHECK: omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) { !CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16 !CHECK: fir.store %[[ARG0_I16]] to %[[STORE_IV0:.*]] : !fir.ref -!CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref !CHECK: %[[LOAD_IV0:.*]] = fir.load %[[STORE_IV0]] : !fir.ref !CHECK: %[[LOAD_IV0_I64:.*]] = fir.convert %[[LOAD_IV0]] : (i16) -> i64 -!CHECK: %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref -!CHECK: %[[TMP10:.*]] = arith.addi %[[LOAD_IV0_I64]], %[[LOAD_IV1]] : i64 +!CHECK: %[[TMP10:.*]] = arith.addi %[[LOAD_IV0_I64]], %[[ARG1]] : i64 !CHECK: %[[TMP11:.*]] = fir.convert %[[TMP10]] : (i64) -> f32 !CHECK: fir.store %[[TMP11]] to %{{.*}} : !fir.ref !CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/FIR/wsloop.f90 b/flang/test/Lower/OpenMP/FIR/wsloop.f90 index 2c00d1a9fddae..49b47a6307284 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop.f90 @@ -7,16 +7,13 @@ subroutine simple_loop integer :: i ! CHECK: omp.parallel !$OMP PARALLEL - ! CHECK: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO do i=1, 9 - ! CHECK: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref - ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do ! CHECK: omp.yield @@ -30,16 +27,13 @@ subroutine simple_loop_with_step integer :: i ! CHECK: omp.parallel !$OMP PARALLEL - ! CHECK: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 2 : i32 ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) - ! CHECK: fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref - ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref !$OMP DO do i=1, 9, 2 - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do ! CHECK: omp.yield @@ -53,16 +47,13 @@ subroutine loop_with_schedule_nowait integer :: i ! CHECK: omp.parallel !$OMP PARALLEL - ! CHECK: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 ! CHECK: omp.wsloop schedule(runtime) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO SCHEDULE(runtime) do i=1, 9 - ! CHECK: fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref - ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do ! CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 b/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 index fd56038231b19..ea39293ab78ff 100644 --- a/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 +++ b/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 @@ -8,16 +8,13 @@ ! CHECK: omp.parallel { ! EXPECTED: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"} ! EXPECTED: %[[PRIVATE_Z:.*]] = fir.alloca i32 {bindc_name = "z", pinned, uniq_name = "_QFEz"} -! CHECK: %[[TEMP:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[const_1:.*]] = arith.constant 1 : i32 ! CHECK: %[[const_2:.*]] = arith.constant 10 : i32 ! CHECK: %[[const_3:.*]] = arith.constant 1 : i32 ! CHECK: omp.wsloop for (%[[ARG:.*]]) : i32 = (%[[const_1]]) to (%[[const_2]]) inclusive step (%[[const_3]]) { -! CHECK: fir.store %[[ARG]] to %[[TEMP]] : !fir.ref ! EXPECTED: %[[temp_1:.*]] = fir.load %[[PRIVATE_Z]] : !fir.ref ! CHECK: %[[temp_1:.*]] = fir.load %{{.*}} : !fir.ref -! CHECK: %[[temp_2:.*]] = fir.load %[[TEMP]] : !fir.ref -! CHECK: %[[result:.*]] = arith.addi %[[temp_1]], %[[temp_2]] : i32 +! CHECK: %[[result:.*]] = arith.addi %[[temp_1]], %[[ARG]] : i32 ! EXPECTED: fir.store %[[result]] to %[[PRIVATE_Y]] : !fir.ref ! CHECK: fir.store %[[result]] to %{{.*}} : !fir.ref ! CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/hlfir-wsloop.f90 b/flang/test/Lower/OpenMP/hlfir-wsloop.f90 index b6be77fe3016d..8b3bee48890d2 100644 --- a/flang/test/Lower/OpenMP/hlfir-wsloop.f90 +++ b/flang/test/Lower/OpenMP/hlfir-wsloop.f90 @@ -6,19 +6,21 @@ !CHECK-LABEL: func @_QPsimple_loop() subroutine simple_loop integer :: i - ! CHECK-DAG: %[[WS_ST:.*]] = arith.constant 1 : i32 - ! CHECK-DAG: %[[WS_END:.*]] = arith.constant 9 : i32 - ! CHECK: omp.parallel + ! CHECK-DAG: %[[WS_ST:.*]] = arith.constant 1 : i32 + ! CHECK-DAG: %[[WS_END:.*]] = arith.constant 9 : i32 + ! CHECK: omp.parallel !$OMP PARALLEL - ! CHECK-DAG: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} - ! CHECK: %[[IV:.*]] = fir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref) -> !fir.ref ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_ST]]) to (%[[WS_END]]) inclusive step (%[[WS_ST]]) !$OMP DO do i=1, 9 - ! CHECK: fir.store %[[I]] to %[[IV:.*]] : !fir.ref - ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK-DAG: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} + ! CHECK: %[[IV:.*]] = fir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref) -> !fir.ref + ! CHECK: fir.store %[[I]] to %[[IV:.*]] : !fir.ref + ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV]] : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i + ! CHECK: fir.call @_QPfoo(%[[IV]]) {{.*}}: (!fir.ref) -> () + call foo(i) end do ! CHECK: omp.yield !$OMP END DO diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 index 97ee665442e3a..97510745d0539 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 @@ -18,15 +18,11 @@ !CHECK: %[[C0_2:.*]] = arith.constant 0 : i32 !CHECK: hlfir.assign %[[C0_2]] to %[[XDECL]]#0 : i32, !fir.ref !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XDECL]]#0 : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[XDECL]]#0 : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[XDECL]]#0 : i32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator !CHECK: return diff --git a/flang/test/Transforms/omp-wsloop-index.mlir b/flang/test/Transforms/omp-wsloop-index.mlir new file mode 100644 index 0000000000000..9443209d90b9f --- /dev/null +++ b/flang/test/Transforms/omp-wsloop-index.mlir @@ -0,0 +1,247 @@ +// RUN: fir-opt --omp-loop-index-mem2reg %s | FileCheck %s + +func.func private @foo(%arg0 : !fir.ref) -> i32 + +// CHECK-LABEL: @wsloop_remove_alloca +func.func @wsloop_remove_alloca() { + // CHECK: %[[RESULT:.*]] = fir.alloca i32 + // CHECK: omp.parallel + %0 = fir.alloca i32 + omp.parallel { + // CHECK-NOT: fir.alloca + // CHECK-DAG: arith.constant 1 + // CHECK-DAG: arith.constant 10 + // CHECK-NEXT: omp.wsloop for (%[[INDEX:.*]]) : i32 + %1 = fir.alloca i32 + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + omp.wsloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) { + // CHECK-NOT: fir.alloca + // CHECK: fir.store %[[INDEX]] to %[[RESULT]] + // CHECK: omp.yield + fir.store %arg0 to %1 : !fir.ref + %2 = fir.load %1 : !fir.ref + fir.store %2 to %0 : !fir.ref + omp.yield + } + omp.terminator + } + return +} + +// CHECK-LABEL: @simdloop_remove_alloca +func.func @simdloop_remove_alloca() { + // CHECK: %[[RESULT:.*]] = fir.alloca i32 + // CHECK: omp.parallel + %0 = fir.alloca i32 + omp.parallel { + // CHECK-NOT: fir.alloca + // CHECK-DAG: arith.constant 1 + // CHECK-DAG: arith.constant 10 + // CHECK-NEXT: omp.simdloop for (%[[INDEX:.*]]) : i32 + %1 = fir.alloca i32 + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + omp.simdloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) { + // CHECK-NOT: fir.alloca + // CHECK: fir.store %[[INDEX]] to %[[RESULT]] + // CHECK: omp.yield + fir.store %arg0 to %1 : !fir.ref + %2 = fir.load %1 : !fir.ref + fir.store %2 to %0 : !fir.ref + omp.yield + } + omp.terminator + } + return +} + +// CHECK-LABEL: @wsloop_push_alloca +func.func @wsloop_push_alloca() { + // CHECK: %[[RESULT:.*]] = fir.alloca i32 + // CHECK: omp.parallel + %0 = fir.alloca i32 + omp.parallel { + // CHECK-NOT: fir.alloca + // CHECK-DAG: arith.constant 1 + // CHECK-DAG: arith.constant 10 + // CHECK-NEXT: omp.wsloop for (%[[INDEX:.*]]) : i32 + %1 = fir.alloca i32 + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + omp.wsloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) { + // CHECK: %[[ALLOCA:.*]] = fir.alloca i32 + // CHECK: fir.store %[[INDEX]] to %[[ALLOCA]] + // CHECK: %[[RETURN:.*]] = func.call @foo(%[[ALLOCA]]) + // CHECK: fir.store %[[RETURN]] to %[[RESULT]] + // CHECK: omp.yield + fir.store %arg0 to %1 : !fir.ref + %2 = func.call @foo(%1) : (!fir.ref) -> i32 + fir.store %2 to %0 : !fir.ref + omp.yield + } + omp.terminator + } + return +} + +// CHECK-LABEL: @simdloop_push_alloca +func.func @simdloop_push_alloca() { + // CHECK: %[[RESULT:.*]] = fir.alloca i32 + // CHECK: omp.parallel + %0 = fir.alloca i32 + omp.parallel { + // CHECK-NOT: fir.alloca + // CHECK-DAG: arith.constant 1 + // CHECK-DAG: arith.constant 10 + // CHECK-NEXT: omp.simdloop for (%[[INDEX:.*]]) : i32 + %1 = fir.alloca i32 + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + omp.simdloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) { + // CHECK: %[[ALLOCA:.*]] = fir.alloca i32 + // CHECK: fir.store %[[INDEX]] to %[[ALLOCA]] + // CHECK: %[[RETURN:.*]] = func.call @foo(%[[ALLOCA]]) + // CHECK: fir.store %[[RETURN]] to %[[RESULT]] + // CHECK: omp.yield + fir.store %arg0 to %1 : !fir.ref + %2 = func.call @foo(%1) : (!fir.ref) -> i32 + fir.store %2 to %0 : !fir.ref + omp.yield + } + omp.terminator + } + return +} + +// CHECK-LABEL: @hlfir_wsloop_remove_alloca +func.func @hlfir_wsloop_remove_alloca() { + // CHECK: %[[RESULT_ALLOCA:.*]] = fir.alloca i32 + // CHECK: %[[RESULT:.*]]:2 = hlfir.declare %[[RESULT_ALLOCA]] + // CHECK: omp.parallel + %0 = fir.alloca i32 + %1:2 = hlfir.declare %0 {uniq_name = "result"} : (!fir.ref) -> (!fir.ref, !fir.ref) + omp.parallel { + // CHECK-NOT: fir.alloca + // CHECK-NOT: hlfir.declare + // CHECK-DAG: arith.constant 1 + // CHECK-DAG: arith.constant 10 + // CHECK-NEXT: omp.wsloop for (%[[INDEX:.*]]) : i32 + %2 = fir.alloca i32 + %3:2 = hlfir.declare %2 {uniq_name = "index"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + omp.wsloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) { + // CHECK-NOT: fir.alloca + // CHECK-NOT: hlfir.declare + // CHECK: hlfir.assign %[[INDEX]] to %[[RESULT]]#0 + // CHECK: omp.yield + fir.store %arg0 to %3#1 : !fir.ref + %4 = fir.load %3#0 : !fir.ref + hlfir.assign %4 to %1#0 : i32, !fir.ref + omp.yield + } + omp.terminator + } + return +} + +// CHECK-LABEL: @hlfir_simdloop_remove_alloca +func.func @hlfir_simdloop_remove_alloca() { + // CHECK: %[[RESULT_ALLOCA:.*]] = fir.alloca i32 + // CHECK: %[[RESULT:.*]]:2 = hlfir.declare %[[RESULT_ALLOCA]] + // CHECK: omp.parallel + %0 = fir.alloca i32 + %1:2 = hlfir.declare %0 {uniq_name = "result"} : (!fir.ref) -> (!fir.ref, !fir.ref) + omp.parallel { + // CHECK-NOT: fir.alloca + // CHECK-NOT: hlfir.declare + // CHECK-DAG: arith.constant 1 + // CHECK-DAG: arith.constant 10 + // CHECK-NEXT: omp.simdloop for (%[[INDEX:.*]]) : i32 + %2 = fir.alloca i32 + %3:2 = hlfir.declare %2 {uniq_name = "index"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + omp.simdloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) { + // CHECK-NOT: fir.alloca + // CHECK-NOT: hlfir.declare + // CHECK: hlfir.assign %[[INDEX]] to %[[RESULT]]#0 + // CHECK: omp.yield + fir.store %arg0 to %3#1 : !fir.ref + %4 = fir.load %3#0 : !fir.ref + hlfir.assign %4 to %1#0 : i32, !fir.ref + omp.yield + } + omp.terminator + } + return +} + +// CHECK-LABEL: @hlfir_wsloop_push_alloca +func.func @hlfir_wsloop_push_alloca() { + // CHECK: %[[RESULT_ALLOCA:.*]] = fir.alloca i32 + // CHECK: %[[RESULT:.*]]:2 = hlfir.declare %[[RESULT_ALLOCA]] + // CHECK: omp.parallel + %0 = fir.alloca i32 + %1:2 = hlfir.declare %0 {uniq_name = "result"} : (!fir.ref) -> (!fir.ref, !fir.ref) + omp.parallel { + // CHECK-NOT: fir.alloca + // CHECK-NOT: hlfir.declare + // CHECK-DAG: arith.constant 1 + // CHECK-DAG: arith.constant 10 + // CHECK-NEXT: omp.wsloop for (%[[INDEX:.*]]) : i32 + %2 = fir.alloca i32 + %3:2 = hlfir.declare %2 {uniq_name = "index"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + omp.wsloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) { + // CHECK: %[[INDEX_ALLOCA:.*]] = fir.alloca i32 + // CHECK: %[[INDEX_DECL:.*]]:2 = hlfir.declare %[[INDEX_ALLOCA]] + // CHECK: fir.store %[[INDEX]] to %[[INDEX_DECL]]#1 + // CHECK: %[[RETURN:.*]] = fir.call @foo(%[[INDEX_DECL]]#1) + // CHECK: hlfir.assign %[[RETURN]] to %[[RESULT]]#0 + // CHECK: omp.yield + fir.store %arg0 to %3#1 : !fir.ref + %4 = fir.call @foo(%3#1) : (!fir.ref) -> i32 + hlfir.assign %4 to %1#0 : i32, !fir.ref + omp.yield + } + omp.terminator + } + return +} + +// CHECK-LABEL: @hlfir_simdloop_push_alloca +func.func @hlfir_simdloop_push_alloca() { + // CHECK: %[[RESULT_ALLOCA:.*]] = fir.alloca i32 + // CHECK: %[[RESULT:.*]]:2 = hlfir.declare %[[RESULT_ALLOCA]] + // CHECK: omp.parallel + %0 = fir.alloca i32 + %1:2 = hlfir.declare %0 {uniq_name = "result"} : (!fir.ref) -> (!fir.ref, !fir.ref) + omp.parallel { + // CHECK-NOT: fir.alloca + // CHECK-NOT: hlfir.declare + // CHECK-DAG: arith.constant 1 + // CHECK-DAG: arith.constant 10 + // CHECK-NEXT: omp.simdloop for (%[[INDEX:.*]]) : i32 + %2 = fir.alloca i32 + %3:2 = hlfir.declare %2 {uniq_name = "index"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + omp.simdloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) { + // CHECK: %[[INDEX_ALLOCA:.*]] = fir.alloca i32 + // CHECK: %[[INDEX_DECL:.*]]:2 = hlfir.declare %[[INDEX_ALLOCA]] + // CHECK: fir.store %[[INDEX]] to %[[INDEX_DECL]]#1 + // CHECK: %[[RETURN:.*]] = fir.call @foo(%[[INDEX_DECL]]#1) + // CHECK: hlfir.assign %[[RETURN]] to %[[RESULT]]#0 + // CHECK: omp.yield + fir.store %arg0 to %3#1 : !fir.ref + %4 = fir.call @foo(%3#1) : (!fir.ref) -> i32 + hlfir.assign %4 to %1#0 : i32, !fir.ref + omp.yield + } + omp.terminator + } + return +}