From bb5826b3c3468081b9a4aa39a5e91436648d2d1f Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Thu, 21 Sep 2023 13:36:48 +0100 Subject: [PATCH] [Flang][OpenMP] Create MLIR optimization pass to push index allocations into loop body and remove them if redundant This patch adds a Flang-only MLIR optimization pass that aims to remove redundant allocations of loop index variables related to OpenMP loops and improve LLVM IR code generation. The loop operations for which this is implemented and tested are `omp.wsloop` and `omp.simdloop`, and the main ways in which this is approached are to move allocations inside of the loop body (later this avoids having to pass these variables as arguments to an outlined function in LLVM IR) and to use block arguments representing loop indices on the loop region directly instead, if possible. This is done in two stages: 1. Push allocations (`fir.alloca` and `hlfir.declare`) inside of the loop operation's region. This is only done for allocations that are used to store loop index variables and only used inside of a single loop region. The result of this is that, during MLIR to LLVM IR translation, when the loop operation is lowered by creating a function the allocation does not need to be passed as an additional argument. 2. Remove allocations and related load and store operations, and access the index through the corresponding block argument. If the previous step is successful, this can also be done if all uses of the allocation are `fir.load` or `fir.store`, meaning that it's not passed as a reference to another function/subprocedure. The pass has been implemented to work with and without HLFIR support enabled, and multiple unit tests have been updated due to this pass running by default. --- .../flang/Optimizer/Transforms/Passes.h | 1 + .../flang/Optimizer/Transforms/Passes.td | 9 + flang/include/flang/Tools/CLOptions.inc | 1 + flang/lib/Optimizer/Transforms/CMakeLists.txt | 1 + .../Transforms/OMPLoopIndexMemToReg.cpp | 250 ++++++++++++++++++ flang/test/Lower/OpenMP/FIR/copyin.f90 | 53 ++-- .../OpenMP/FIR/lastprivate-commonblock.f90 | 2 - .../FIR/parallel-private-clause-fixes.f90 | 39 ++- .../OpenMP/FIR/parallel-private-clause.f90 | 28 +- .../OpenMP/FIR/parallel-wsloop-firstpriv.f90 | 8 +- .../test/Lower/OpenMP/FIR/parallel-wsloop.f90 | 24 +- flang/test/Lower/OpenMP/FIR/simd.f90 | 32 +-- .../Lower/OpenMP/FIR/stop-stmt-in-region.f90 | 2 - flang/test/Lower/OpenMP/FIR/target.f90 | 23 +- flang/test/Lower/OpenMP/FIR/unstructured.f90 | 12 +- flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 | 12 +- .../test/Lower/OpenMP/FIR/wsloop-collapse.f90 | 12 +- .../Lower/OpenMP/FIR/wsloop-monotonic.f90 | 5 +- .../Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 | 5 +- .../Lower/OpenMP/FIR/wsloop-reduction-add.f90 | 56 +--- .../FIR/wsloop-reduction-logical-and.f90 | 21 +- .../FIR/wsloop-reduction-logical-eqv.f90 | 21 +- .../FIR/wsloop-reduction-logical-neqv.f90 | 21 +- .../FIR/wsloop-reduction-logical-or.f90 | 21 +- .../Lower/OpenMP/FIR/wsloop-reduction-mul.f90 | 56 +--- flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 | 4 +- .../test/Lower/OpenMP/FIR/wsloop-variable.f90 | 4 +- flang/test/Lower/OpenMP/FIR/wsloop.f90 | 15 +- .../Todo/omp-default-clause-inner-loop.f90 | 5 +- flang/test/Lower/OpenMP/hlfir-wsloop.f90 | 18 +- .../OpenMP/wsloop-reduction-add-hlfir.f90 | 6 +- flang/test/Transforms/omp-wsloop-index.mlir | 247 +++++++++++++++++ 32 files changed, 663 insertions(+), 351 deletions(-) create mode 100644 flang/lib/Optimizer/Transforms/OMPLoopIndexMemToReg.cpp create mode 100644 flang/test/Transforms/omp-wsloop-index.mlir diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h index 8aeb3e373298e..0a9a3ca5bd030 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.h +++ b/flang/include/flang/Optimizer/Transforms/Passes.h @@ -79,6 +79,7 @@ createOMPEarlyOutliningPass(); std::unique_ptr createOMPFunctionFilteringPass(); std::unique_ptr> createOMPMarkDeclareTargetPass(); +std::unique_ptr createOMPLoopIndexMemToRegPass(); // declarative passes #define GEN_PASS_REGISTRATION diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td index 9474edf13ce46..8304b882d525c 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.td +++ b/flang/include/flang/Optimizer/Transforms/Passes.td @@ -326,4 +326,13 @@ def OMPFunctionFiltering : Pass<"omp-function-filtering"> { ]; } +def OMPLoopIndexMemToReg : Pass<"omp-loop-index-mem2reg", "mlir::func::FuncOp"> { + let summary = "Pushes allocations for index variables of OpenMP loops into " + "the loop region and, if they are never passed by reference, " + "they are replaced by the corresponding entry block arguments, " + "removing all redundant allocations in the process."; + let constructor = "::fir::createOMPLoopIndexMemToRegPass()"; + let dependentDialects = ["fir::FIROpsDialect", "mlir::omp::OpenMPDialect"]; +} + #endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index 616d9ddc066a7..0b5e8a0656804 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -270,6 +270,7 @@ inline void createOpenMPFIRPassPipeline( pm.addPass(fir::createOMPEarlyOutliningPass()); pm.addPass(fir::createOMPFunctionFilteringPass()); } + pm.addPass(fir::createOMPLoopIndexMemToRegPass()); } #if !defined(FLANG_EXCLUDE_CODEGEN) diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt index 3d2b7e5eaeade..306551b03ced1 100644 --- a/flang/lib/Optimizer/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt @@ -19,6 +19,7 @@ add_flang_library(FIRTransforms OMPEarlyOutlining.cpp OMPFunctionFiltering.cpp OMPMarkDeclareTarget.cpp + OMPLoopIndexMemToReg.cpp DEPENDS FIRDialect diff --git a/flang/lib/Optimizer/Transforms/OMPLoopIndexMemToReg.cpp b/flang/lib/Optimizer/Transforms/OMPLoopIndexMemToReg.cpp new file mode 100644 index 0000000000000..af117d625154b --- /dev/null +++ b/flang/lib/Optimizer/Transforms/OMPLoopIndexMemToReg.cpp @@ -0,0 +1,250 @@ +//===- OMPWsLoopIndexMem2Reg.cpp ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements transforms to push allocations into an OpenMP loop +// operation region when they are used to store loop indices. Then, they are +// removed together with any associated load or store operations if their +// address is not needed, in which case uses of their values are replaced for +// the block argument from which they were originally initialized. +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/HLFIR/HLFIROps.h" +#include "flang/Optimizer/Transforms/Passes.h" + +#include "flang/Optimizer/Dialect/FIRDialect.h" +#include "flang/Optimizer/Dialect/FIROps.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h" +#include "mlir/IR/BuiltinOps.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace fir { +#define GEN_PASS_DEF_OMPLOOPINDEXMEMTOREG +#include "flang/Optimizer/Transforms/Passes.h.inc" +} // namespace fir + +using namespace mlir; + +template +class LoopProcessorHelper { + LoopOpTy loop; + + bool allUsesInLoop(ValueRange stores) { + for (Value store : stores) { + for (OpOperand &use : store.getUses()) { + Operation *owner = use.getOwner(); + if (owner->getParentOfType() != loop.getOperation()) + return false; + } + } + return true; + } + + /// Check whether a given hlfir.declare known to only be used inside of the + /// loop and initialized by a fir.alloca operation also only used inside of + /// the loop can be removed and replaced by the block argument representing + /// the corresponding loop index. + static bool isDeclareRemovable(hlfir::DeclareOp declareOp) { + fir::AllocaOp allocaOp = llvm::dyn_cast_if_present( + declareOp.getMemref().getDefiningOp()); + + // Check that the hlfir.declare is initialized by a fir.alloca that is only + // used as argument to that operation. + if (!allocaOp || !allocaOp.getResult().hasOneUse()) + return false; + + // Check that uses of the pointers can be replaced by the block argument. + for (OpOperand &use : declareOp.getOriginalBase().getUses()) { + Operation *owner = use.getOwner(); + if (!isa(owner)) + return false; + } + for (OpOperand &use : declareOp.getBase().getUses()) { + Operation *owner = use.getOwner(); + if (!isa(owner)) + return false; + } + + return true; + } + + /// Check whether a given fir.alloca known to only be used inside of the loop + /// can be removed and replaced by the block argument representing the + /// corresponding loop index. + static bool isAllocaRemovable(fir::AllocaOp allocaOp) { + // Check that uses of the pointer are all fir.load and fir.store. + for (OpOperand &use : allocaOp.getResult().getUses()) { + Operation *owner = use.getOwner(); + if (!isa(owner) && !isa(owner)) + return false; + } + + return true; + } + + /// Try to push an hlfir.declare operation defined outside of the loop inside, + /// if all uses of that operation and the corresponding fir.alloca are + /// contained inside of the loop. + LogicalResult pushDeclareIntoLoop(hlfir::DeclareOp declareOp) { + // Check that all uses are inside of the loop. + if (!allUsesInLoop(declareOp->getResults())) + return failure(); + + // Push hlfir.declare into the beginning of the loop region. + Block &b = loop.getRegion().getBlocks().front(); + declareOp->moveBefore(&b, b.begin()); + + // Find associated fir.alloca and push into the beginning of the loop + // region. + fir::AllocaOp allocaOp = + cast(declareOp.getMemref().getDefiningOp()); + Value allocaVal = allocaOp.getResult(); + + if (!allUsesInLoop(allocaVal)) + return failure(); + + allocaOp->moveBefore(&b, b.begin()); + return success(); + } + + /// Try to push a fir.alloca operation defined outside of the loop inside, + /// if all uses of that operation are contained inside of the loop. + LogicalResult pushAllocaIntoLoop(fir::AllocaOp allocaOp) { + Value store = allocaOp.getResult(); + + // Check that all uses are inside of the loop. + if (!allUsesInLoop(store)) + return failure(); + + // Push fir.alloca into the beginning of the loop region. + Block &b = loop.getRegion().getBlocks().front(); + allocaOp->moveBefore(&b, b.begin()); + return success(); + } + + void processLoopArg(BlockArgument arg, llvm::ArrayRef argStores, + SmallPtrSetImpl &opsToDelete) { + llvm::SmallPtrSet toDelete; + for (Value store : argStores) { + Operation *op = store.getDefiningOp(); + + // Skip argument if storage not defined by an operation. + if (!op) + return; + + // Support HLFIR flow as well as regular FIR flow. + if (auto declareOp = dyn_cast(op)) { + if (succeeded(pushDeclareIntoLoop(declareOp)) && + isDeclareRemovable(declareOp)) { + // Mark hlfir.declare, fir.alloca and related uses for deletion. + for (OpOperand &use : declareOp.getOriginalBase().getUses()) + toDelete.insert(use.getOwner()); + + for (OpOperand &use : declareOp.getBase().getUses()) + toDelete.insert(use.getOwner()); + + Operation *allocaOp = declareOp.getMemref().getDefiningOp(); + toDelete.insert(declareOp); + toDelete.insert(allocaOp); + } + } else if (auto allocaOp = dyn_cast(op)) { + if (succeeded(pushAllocaIntoLoop(allocaOp)) && + isAllocaRemovable(allocaOp)) { + // Do not make any further modifications if an address to the index + // is necessary. Otherwise, the values can be used directly from the + // loop region first block's arguments. + + // Mark fir.alloca and related uses for deletion. + for (OpOperand &use : allocaOp.getResult().getUses()) + toDelete.insert(use.getOwner()); + + // Delete now-unused fir.alloca. + toDelete.insert(allocaOp); + } + } else { + return; + } + } + + // Only consider marked operations if all load, store and allocation + // operations associated with the given loop index can be removed. + opsToDelete.insert(toDelete.begin(), toDelete.end()); + + for (Operation *op : toDelete) { + // Replace all fir.load operations with the index as returned by the + // OpenMP loop operation. + if (isa(op)) + op->replaceAllUsesWith(ValueRange(arg)); + // Drop all uses of fir.alloca and hlfir.declare because their defining + // operations will be deleted as well. + else if (isa(op) || isa(op)) + op->dropAllUses(); + } + } + +public: + explicit LoopProcessorHelper(LoopOpTy loop) : loop(loop) {} + + void process() { + llvm::SmallPtrSet opsToDelete; + llvm::SmallVector> storeAddresses; + llvm::ArrayRef loopArgs = loop.getRegion().getArguments(); + + // Collect arguments of the loop operation. + for (BlockArgument arg : loopArgs) { + // Find fir.store uses of these indices and gather all addresses where + // they are stored. + llvm::SmallVector &argStores = storeAddresses.emplace_back(); + for (OpOperand &argUse : arg.getUses()) + if (auto storeOp = dyn_cast(argUse.getOwner())) + argStores.push_back(storeOp.getMemref()); + } + + // Process all loop indices and mark them for deletion independently of each + // other. + for (auto it : llvm::zip(loopArgs, storeAddresses)) + processLoopArg(std::get<0>(it), std::get<1>(it), opsToDelete); + + // Delete marked operations. + for (Operation *op : opsToDelete) + op->erase(); + } +}; + +namespace { +class OMPLoopIndexMemToRegPass + : public fir::impl::OMPLoopIndexMemToRegBase { +public: + void runOnOperation() override { + func::FuncOp func = getOperation(); + + func->walk( + [&](omp::WsLoopOp loop) { LoopProcessorHelper(loop).process(); }); + + func.walk( + [&](omp::SimdLoopOp loop) { LoopProcessorHelper(loop).process(); }); + + func.walk( + [&](omp::TaskLoopOp loop) { LoopProcessorHelper(loop).process(); }); + } +}; +} // namespace + +std::unique_ptr fir::createOMPLoopIndexMemToRegPass() { + return std::make_unique(); +} diff --git a/flang/test/Lower/OpenMP/FIR/copyin.f90 b/flang/test/Lower/OpenMP/FIR/copyin.f90 index ddfa0ea091462..3443b310074f5 100644 --- a/flang/test/Lower/OpenMP/FIR/copyin.f90 +++ b/flang/test/Lower/OpenMP/FIR/copyin.f90 @@ -138,17 +138,15 @@ subroutine copyin_derived_type() ! CHECK: %[[VAL_1:.*]] = fir.address_of(@_QFcombined_parallel_worksharing_loopEx6) : !fir.ref ! CHECK: %[[VAL_2:.*]] = omp.threadprivate %[[VAL_1]] : !fir.ref -> !fir.ref ! CHECK: omp.parallel { -! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -! CHECK: %[[VAL_4:.*]] = omp.threadprivate %[[VAL_1]] : !fir.ref -> !fir.ref -! CHECK: %[[VAL_5:.*]] = fir.load %[[VAL_2]] : !fir.ref -! CHECK: fir.store %[[VAL_5]] to %[[VAL_4]] : !fir.ref +! CHECK: %[[VAL_3:.*]] = omp.threadprivate %[[VAL_1]] : !fir.ref -> !fir.ref +! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_2]] : !fir.ref +! CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref ! CHECK: omp.barrier -! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_4]] : !fir.ref -! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[VAL_9:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { -! CHECK: fir.store %[[VAL_9]] to %[[VAL_3]] : !fir.ref -! CHECK: fir.call @_QPsub4(%[[VAL_4]]) {{.*}}: (!fir.ref) -> () +! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_3]] : !fir.ref +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop for (%[[VAL_9:.*]]) : i32 = (%[[VAL_5]]) to (%[[VAL_6]]) inclusive step (%[[VAL_7]]) { +! CHECK: fir.call @_QPsub4(%[[VAL_3]]) {{.*}}: (!fir.ref) -> () ! CHECK: omp.yield ! CHECK: } ! CHECK: omp.terminator @@ -269,30 +267,27 @@ subroutine common_1() !CHECK: %[[val_7:.*]] = fir.coordinate_of %[[val_6]], %[[val_c4]] : (!fir.ref>, index) -> !fir.ref !CHECK: %[[val_8:.*]] = fir.convert %[[val_7]] : (!fir.ref) -> !fir.ref !CHECK: omp.parallel { -!CHECK: %[[val_9:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[val_10:.*]] = omp.threadprivate %[[val_1]] : !fir.ref> -> !fir.ref> -!CHECK: %[[val_11:.*]] = fir.convert %[[val_10]] : (!fir.ref>) -> !fir.ref> +!CHECK: %[[val_9:.*]] = omp.threadprivate %[[val_1]] : !fir.ref> -> !fir.ref> +!CHECK: %[[val_10:.*]] = fir.convert %[[val_9]] : (!fir.ref>) -> !fir.ref> !CHECK: %[[val_c0_0:.*]] = arith.constant 0 : index -!CHECK: %[[val_12:.*]] = fir.coordinate_of %[[val_11]], %[[val_c0_0]] : (!fir.ref>, index) -> !fir.ref -!CHECK: %[[val_13:.*]] = fir.convert %[[val_12]] : (!fir.ref) -> !fir.ref -!CHECK: %[[val_14:.*]] = fir.convert %[[val_10]] : (!fir.ref>) -> !fir.ref> +!CHECK: %[[val_11:.*]] = fir.coordinate_of %[[val_10]], %[[val_c0_0]] : (!fir.ref>, index) -> !fir.ref +!CHECK: %[[val_12:.*]] = fir.convert %[[val_11]] : (!fir.ref) -> !fir.ref +!CHECK: %[[val_13:.*]] = fir.convert %[[val_9]] : (!fir.ref>) -> !fir.ref> !CHECK: %[[val_c4_1:.*]] = arith.constant 4 : index -!CHECK: %[[val_15:.*]] = fir.coordinate_of %[[val_14]], %[[val_c4_1]] : (!fir.ref>, index) -> !fir.ref -!CHECK: %[[val_16:.*]] = fir.convert %[[val_15]] : (!fir.ref) -> !fir.ref -!CHECK: %[[val_17:.*]] = fir.load %[[val_5]] : !fir.ref -!CHECK: fir.store %[[val_17]] to %[[val_13]] : !fir.ref -!CHECK: %[[val_18:.*]] = fir.load %[[val_8]] : !fir.ref -!CHECK: fir.store %[[val_18]] to %[[val_16]] : !fir.ref +!CHECK: %[[val_14:.*]] = fir.coordinate_of %[[val_13]], %[[val_c4_1]] : (!fir.ref>, index) -> !fir.ref +!CHECK: %[[val_15:.*]] = fir.convert %[[val_14]] : (!fir.ref) -> !fir.ref +!CHECK: %[[val_16:.*]] = fir.load %[[val_5]] : !fir.ref +!CHECK: fir.store %[[val_16]] to %[[val_12]] : !fir.ref +!CHECK: %[[val_17:.*]] = fir.load %[[val_8]] : !fir.ref +!CHECK: fir.store %[[val_17]] to %[[val_15]] : !fir.ref !CHECK: omp.barrier !CHECK: %[[val_c1_i32:.*]] = arith.constant 1 : i32 -!CHECK: %[[val_19:.*]] = fir.load %[[val_13]] : !fir.ref +!CHECK: %[[val_18:.*]] = fir.load %[[val_12]] : !fir.ref !CHECK: %[[val_c1_i32_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_19]]) inclusive step (%[[val_c1_i32_2]]) { -!CHECK: fir.store %[[arg]] to %[[val_9]] : !fir.ref -!CHECK: %[[val_20:.*]] = fir.load %[[val_16]] : !fir.ref -!CHECK: %[[val_21:.*]] = fir.load %[[val_9]] : !fir.ref -!CHECK: %[[val_22:.*]] = arith.addi %[[val_20]], %[[val_21]] : i32 -!CHECK: fir.store %[[val_22]] to %[[val_16]] : !fir.ref +!CHECK: omp.wsloop for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_18]]) inclusive step (%[[val_c1_i32_2]]) { +!CHECK: %[[val_19:.*]] = fir.load %[[val_15]] : !fir.ref +!CHECK: %[[val_20:.*]] = arith.addi %[[val_19]], %[[arg]] : i32 +!CHECK: fir.store %[[val_20]] to %[[val_15]] : !fir.ref !CHECK: omp.yield !CHECK: } !CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 b/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 index 06f3e1ca82234..bba9dbc4fc4cb 100644 --- a/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 +++ b/flang/test/Lower/OpenMP/FIR/lastprivate-commonblock.f90 @@ -1,7 +1,6 @@ ! RUN: %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s !CHECK: func.func @_QPlastprivate_common() { -!CHECK: %[[val_0:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[val_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFlastprivate_commonEi"} !CHECK: %[[val_2:.*]] = fir.address_of(@c_) : !fir.ref> !CHECK: %[[val_3:.*]] = fir.convert %[[val_2]] : (!fir.ref>) -> !fir.ref> @@ -18,7 +17,6 @@ !CHECK: %[[val_c100_i32:.*]] = arith.constant 100 : i32 !CHECK: %[[val_c1_i32_0:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop for (%[[arg:.*]]) : i32 = (%[[val_c1_i32]]) to (%[[val_c100_i32]]) inclusive step (%[[val_c1_i32_0]]) { -!CHECK: fir.store %[[arg]] to %[[val_0]] : !fir.ref !CHECK: %[[val_11:.*]] = arith.cmpi eq, %[[arg]], %[[val_c100_i32]] : i32 !CHECK: fir.if %[[val_11]] { !CHECK: %[[val_12:.*]] = fir.load %[[val_9]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 index 3152f9c44d0c6..8cf216361bcb6 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-private-clause-fixes.f90 @@ -8,34 +8,31 @@ ! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_private_fixEx"} ! CHECK: omp.parallel { ! CHECK: %[[PRIV_J:.*]] = fir.alloca i32 {bindc_name = "j", pinned -! CHECK: %[[PRIV_I:.*]] = fir.alloca i32 {adapt.valuebyref, pinned ! CHECK: %[[PRIV_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned ! CHECK: %[[ONE:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_4:.*]] : !fir.ref ! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 -! CHECK: omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) { -! CHECK: fir.store %[[VAL_6]] to %[[PRIV_I]] : !fir.ref -! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index -! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_4]] : !fir.ref -! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index -! CHECK: %[[VAL_11:.*]] = arith.constant 1 : index -! CHECK: %[[LB:.*]] = fir.convert %[[VAL_8]] : (index) -> i32 -! CHECK: %[[VAL_12:.*]]:2 = fir.do_loop %[[VAL_13:[^ ]*]] = -! CHECK-SAME: %[[VAL_8]] to %[[VAL_10]] step %[[VAL_11]] -! CHECK-SAME: iter_args(%[[IV:.*]] = %[[LB]]) -> (index, i32) { -! CHECK: fir.store %[[IV]] to %[[PRIV_J]] : !fir.ref -! CHECK: %[[LOAD:.*]] = fir.load %[[PRIV_I]] : !fir.ref -! CHECK: %[[VAL_15:.*]] = fir.load %[[PRIV_J]] : !fir.ref -! CHECK: %[[VAL_16:.*]] = arith.addi %[[LOAD]], %[[VAL_15]] : i32 -! CHECK: fir.store %[[VAL_16]] to %[[PRIV_X]] : !fir.ref -! CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_13]], %[[VAL_11]] : index -! CHECK: %[[STEPCAST:.*]] = fir.convert %[[VAL_11]] : (index) -> i32 +! CHECK: omp.wsloop for (%[[IV_I:.*]]) : i32 = (%[[ONE]]) to (%[[VAL_3]]) inclusive step (%[[VAL_5]]) { +! CHECK: %[[VAL_6:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_6]] : (i32) -> index +! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_4]] : !fir.ref +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i32) -> index +! CHECK: %[[VAL_10:.*]] = arith.constant 1 : index +! CHECK: %[[LB:.*]] = fir.convert %[[VAL_7]] : (index) -> i32 +! CHECK: %[[VAL_11:.*]]:2 = fir.do_loop %[[VAL_12:[^ ]*]] = +! CHECK-SAME: %[[VAL_7]] to %[[VAL_9]] step %[[VAL_10]] +! CHECK-SAME: iter_args(%[[IV_J:.*]] = %[[LB]]) -> (index, i32) { +! CHECK: fir.store %[[IV_J]] to %[[PRIV_J]] : !fir.ref +! CHECK: %[[VAL_13:.*]] = fir.load %[[PRIV_J]] : !fir.ref +! CHECK: %[[VAL_14:.*]] = arith.addi %[[IV_I]], %[[VAL_13]] : i32 +! CHECK: fir.store %[[VAL_14]] to %[[PRIV_X]] : !fir.ref +! CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_12]], %[[VAL_10]] : index +! CHECK: %[[STEPCAST:.*]] = fir.convert %[[VAL_10]] : (index) -> i32 ! CHECK: %[[IVLOAD:.*]] = fir.load %[[PRIV_J]] : !fir.ref ! CHECK: %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST]] -! CHECK: fir.result %[[VAL_17]], %[[IVINC]] : index, i32 +! CHECK: fir.result %[[VAL_15]], %[[IVINC]] : index, i32 ! CHECK: } -! CHECK: fir.store %[[VAL_12]]#1 to %[[PRIV_J]] : !fir.ref +! CHECK: fir.store %[[VAL_11]]#1 to %[[PRIV_J]] : !fir.ref ! CHECK: omp.yield ! CHECK: } ! CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 b/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 index e9d9218702cc5..f341d0ccda423 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-private-clause.f90 @@ -249,8 +249,6 @@ subroutine simple_loop_1 real, allocatable :: r; ! FIRDialect: omp.parallel !$OMP PARALLEL PRIVATE(r) - ! FIRDialect: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} - ! FIRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> @@ -262,9 +260,7 @@ subroutine simple_loop_1 ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO do i=1, 9 - ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref - ! FIRDialect: %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref - ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do ! FIRDialect: omp.yield @@ -285,8 +281,6 @@ subroutine simple_loop_2 real, allocatable :: r; ! FIRDialect: omp.parallel !$OMP PARALLEL - ! FIRDialect: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} - ! FIRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> @@ -298,9 +292,7 @@ subroutine simple_loop_2 ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO PRIVATE(r) do i=1, 9 - ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref - ! FIRDialect: %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref - ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do ! FIRDialect: omp.yield @@ -320,8 +312,6 @@ subroutine simple_loop_3 integer :: i real, allocatable :: r; ! FIRDialect: omp.parallel - ! FIRDialect: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} - ! FIRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> @@ -333,9 +323,7 @@ subroutine simple_loop_3 ! FIRDialect: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO PRIVATE(r) do i=1, 9 - ! FIRDialect: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref - ! FIRDialect: %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref - ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do ! FIRDialect: omp.yield @@ -353,9 +341,9 @@ subroutine simple_loop_3 subroutine simd_loop_1 integer :: i real, allocatable :: r; - ! IRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} - ! IRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> - ! IRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> + ! FIRDialect: [[R:%.*]] = fir.alloca !fir.box> {bindc_name = "r", pinned, uniq_name = "{{.*}}Er"} + ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> + ! FIRDialect: fir.store {{%.*}} to [[R]] : !fir.ref>> ! FIRDialect: %[[LB:.*]] = arith.constant 1 : i32 ! FIRDialect: %[[UB:.*]] = arith.constant 9 : i32 @@ -364,9 +352,7 @@ subroutine simd_loop_1 ! FIRDialect: omp.simdloop for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { !$OMP SIMD PRIVATE(r) do i=1, 9 - ! FIRDialect: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref - ! FIRDialect: %[[LOAD_IV:.*]] = fir.load %[[LOCAL]] : !fir.ref - ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! FIRDialect: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do !$OMP END SIMD diff --git a/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 index ac63c45677ffe..bcc08e59560ab 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-wsloop-firstpriv.f90 @@ -10,7 +10,6 @@ subroutine omp_do_firstprivate(a) n = a+1 !$omp parallel do firstprivate(a) ! CHECK: omp.parallel { - ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK-NEXT: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "a", pinned ! CHECK-NEXT: %[[LD:.*]] = fir.load %[[ARG0]] : !fir.ref ! CHECK-NEXT: fir.store %[[LD]] to %[[CLONE]] : !fir.ref @@ -18,6 +17,7 @@ subroutine omp_do_firstprivate(a) ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE]] : !fir.ref ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: omp.wsloop for (%[[ARG1:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK-NEXT: fir.store %[[ARG1]] to %[[REF]] : !fir.ref ! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK-NEXT: omp.yield @@ -36,19 +36,19 @@ subroutine omp_do_firstprivate2(a, n) n = a+1 !$omp parallel do firstprivate(a, n) ! CHECK: omp.parallel { - ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK-NEXT: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "a", pinned ! CHECK-NEXT: %[[LD:.*]] = fir.load %[[ARG0]] : !fir.ref ! CHECK-NEXT: fir.store %[[LD]] to %[[CLONE]] : !fir.ref ! CHECK-NEXT: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "n", pinned ! CHECK-NEXT: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref ! CHECK-NEXT: fir.store %[[LD1]] to %[[CLONE1]] : !fir.ref - - + + ! CHECK: %[[LB:.*]] = fir.load %[[CLONE]] : !fir.ref ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE1]] : !fir.ref ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: omp.wsloop for (%[[ARG2:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) + ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK-NEXT: fir.store %[[ARG2]] to %[[REF]] : !fir.ref ! CHECK-NEXT: fir.call @_QPfoo(%[[REF]], %[[CLONE]]) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK-NEXT: omp.yield diff --git a/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 b/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 index c302b91be8e67..bc44a12c53f4d 100644 --- a/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 +++ b/flang/test/Lower/OpenMP/FIR/parallel-wsloop.f90 @@ -12,9 +12,7 @@ subroutine simple_parallel_do ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO do i=1, 9 - ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref - ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do ! CHECK: omp.yield @@ -38,9 +36,7 @@ subroutine parallel_do_with_parallel_clauses(cond, nt) ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO IF(cond) NUM_THREADS(nt) PROC_BIND(close) do i=1, 9 - ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref - ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do ! CHECK: omp.yield @@ -61,9 +57,7 @@ subroutine parallel_do_with_clauses(nt) ! CHECK: omp.wsloop schedule(dynamic) for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO NUM_THREADS(nt) SCHEDULE(dynamic) do i=1, 9 - ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref - ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do ! CHECK: omp.yield @@ -93,9 +87,7 @@ subroutine parallel_do_with_privatisation_clauses(cond,nt) ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP PARALLEL DO PRIVATE(cond) FIRSTPRIVATE(nt) do i=1, 9 - ! CHECK: fir.store %[[I]] to %[[IV_ADDR:.*]] : !fir.ref - ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV_ADDR]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 ! CHECK: %[[PRIVATE_COND_VAL:.*]] = fir.load %[[PRIVATE_COND_REF]] : !fir.ref> ! CHECK: %[[PRIVATE_COND_VAL_CVT:.*]] = fir.convert %[[PRIVATE_COND_VAL]] : (!fir.logical<4>) -> i1 ! CHECK: fir.call @_FortranAioOutputLogical({{.*}}, %[[PRIVATE_COND_VAL_CVT]]) {{.*}}: (!fir.ref, i1) -> i1 @@ -132,7 +124,6 @@ end subroutine parallel_private_do ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "nt"}) { ! CHECK: %[[I:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFparallel_private_doEi"} ! CHECK: omp.parallel { -! CHECK: %[[I_PRIV:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[COND_ADDR:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_private_doEcond"} ! CHECK: %[[NT_ADDR:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_private_doEnt"} ! CHECK: %[[NT:.*]] = fir.load %[[VAL_1]] : !fir.ref @@ -141,6 +132,7 @@ end subroutine parallel_private_do ! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: %[[I_PRIV:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: fir.store %[[I]] to %[[I_PRIV]] : !fir.ref ! CHECK: fir.call @_QPfoo(%[[I_PRIV]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref, !fir.ref>, !fir.ref) -> () ! CHECK: omp.yield @@ -172,7 +164,6 @@ end subroutine omp_parallel_multiple_firstprivate_do ! CHECK-SAME: %[[B_ADDR:.*]]: !fir.ref {fir.bindc_name = "b"}) { ! CHECK: %[[I_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_parallel_multiple_firstprivate_doEi"} ! CHECK: omp.parallel { -! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[A_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_parallel_multiple_firstprivate_doEa"} ! CHECK: %[[A:.*]] = fir.load %[[A_ADDR]] : !fir.ref ! CHECK: fir.store %[[A]] to %[[A_PRIV_ADDR]] : !fir.ref @@ -183,6 +174,7 @@ end subroutine omp_parallel_multiple_firstprivate_do ! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref ! CHECK: fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK: omp.yield @@ -216,7 +208,6 @@ end subroutine parallel_do_private ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "nt"}) { ! CHECK: %[[I_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFparallel_do_privateEi"} ! CHECK: omp.parallel { -! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[COND_ADDR:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_privateEcond"} ! CHECK: %[[NT_ADDR:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_privateEnt"} ! CHECK: %[[NT:.*]] = fir.load %[[VAL_1]] : !fir.ref @@ -225,6 +216,7 @@ end subroutine parallel_do_private ! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_7]]) to (%[[VAL_8]]) inclusive step (%[[VAL_9]]) { +! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref ! CHECK: fir.call @_QPfoo(%[[I_PRIV_ADDR]], %[[COND_ADDR]], %[[NT_ADDR]]) {{.*}}: (!fir.ref, !fir.ref>, !fir.ref) -> () ! CHECK: omp.yield @@ -256,7 +248,6 @@ end subroutine omp_parallel_do_multiple_firstprivate ! CHECK-SAME: %[[B_ADDR:.*]]: !fir.ref {fir.bindc_name = "b"}) { ! CHECK: %[[I_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_parallel_do_multiple_firstprivateEi"} ! CHECK: omp.parallel { -! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[A_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_parallel_do_multiple_firstprivateEa"} ! CHECK: %[[A:.*]] = fir.load %[[A_ADDR]] : !fir.ref ! CHECK: fir.store %[[A]] to %[[A_PRIV_ADDR]] : !fir.ref @@ -267,6 +258,7 @@ end subroutine omp_parallel_do_multiple_firstprivate ! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { +! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: fir.store %[[I]] to %[[I_PRIV_ADDR]] : !fir.ref ! CHECK: fir.call @_QPbar(%[[I_PRIV_ADDR]], %[[A_PRIV_ADDR]]) {{.*}}: (!fir.ref, !fir.ref) -> () ! CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/FIR/simd.f90 b/flang/test/Lower/OpenMP/FIR/simd.f90 index 47596cccdbc12..c0c8d7f32838a 100644 --- a/flang/test/Lower/OpenMP/FIR/simd.f90 +++ b/flang/test/Lower/OpenMP/FIR/simd.f90 @@ -11,9 +11,7 @@ subroutine simdloop ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: omp.simdloop for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i=1, 9 - ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref - ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do !$OMP END SIMD @@ -29,9 +27,7 @@ subroutine simdloop_with_if_clause(n, threshold) ! CHECK: %[[COND:.*]] = arith.cmpi sge ! CHECK: omp.simdloop if(%[[COND:.*]]) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n - ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref - ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do !$OMP END SIMD @@ -46,9 +42,7 @@ subroutine simdloop_with_simdlen_clause(n, threshold) ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK: omp.simdloop simdlen(2) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n - ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref - ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do !$OMP END SIMD @@ -64,9 +58,7 @@ subroutine simdloop_with_simdlen_clause_from_param(n, threshold) ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK: omp.simdloop simdlen(2) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n - ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref - ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do !$OMP END SIMD @@ -82,9 +74,7 @@ subroutine simdloop_with_simdlen_clause_from_expr_from_param(n, threshold) ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK: omp.simdloop simdlen(6) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n - ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref - ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do !$OMP END SIMD @@ -99,9 +89,7 @@ subroutine simdloop_with_safelen_clause(n, threshold) ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK: omp.simdloop safelen(2) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n - ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref - ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do !$OMP END SIMD @@ -117,9 +105,7 @@ subroutine simdloop_with_safelen_clause_from_expr_from_param(n, threshold) ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK: omp.simdloop safelen(6) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n - ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref - ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do !$OMP END SIMD @@ -134,9 +120,7 @@ subroutine simdloop_with_simdlen_safelen_clause(n, threshold) ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 ! CHECK: omp.simdloop simdlen(1) safelen(2) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { do i = 1, n - ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref - ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do !$OMP END SIMD diff --git a/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 b/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 index 2f73fb31966ec..ab906e1dfba5b 100644 --- a/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 +++ b/flang/test/Lower/OpenMP/FIR/stop-stmt-in-region.f90 @@ -71,14 +71,12 @@ subroutine test_stop_in_region3() end ! CHECK-LABEL: func.func @_QPtest_stop_in_region4() { -! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFtest_stop_in_region4Ei"} ! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFtest_stop_in_region4Ex"} ! CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_4:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 ! CHECK: omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) { -! CHECK: fir.store %[[VAL_6]] to %[[VAL_0]] : !fir.ref ! CHECK: cf.br ^bb1 ! CHECK: ^bb1: ! CHECK: %[[VAL_7:.*]] = arith.constant 3 : i32 diff --git a/flang/test/Lower/OpenMP/FIR/target.f90 b/flang/test/Lower/OpenMP/FIR/target.f90 index 9b1fb5c15ac1d..90e4e8a058297 100644 --- a/flang/test/Lower/OpenMP/FIR/target.f90 +++ b/flang/test/Lower/OpenMP/FIR/target.f90 @@ -278,19 +278,16 @@ subroutine omp_target_parallel_do !CHECK: omp.target map_entries(%[[MAP]] : !fir.ref>) { !CHECK-NEXT: omp.parallel !$omp target parallel do map(tofrom: a) - !CHECK: %[[VAL_2:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} - !CHECK: %[[VAL_3:.*]] = arith.constant 1 : i32 - !CHECK: %[[VAL_4:.*]] = arith.constant 1024 : i32 - !CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 - !CHECK: omp.wsloop for (%[[VAL_6:.*]]) : i32 = (%[[VAL_3]]) to (%[[VAL_4]]) inclusive step (%[[VAL_5]]) { - !CHECK: fir.store %[[VAL_6]] to %[[VAL_2]] : !fir.ref - !CHECK: %[[VAL_7:.*]] = arith.constant 10 : i32 - !CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_2]] : !fir.ref - !CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i32) -> i64 - !CHECK: %[[VAL_10:.*]] = arith.constant 1 : i64 - !CHECK: %[[VAL_11:.*]] = arith.subi %[[VAL_9]], %[[VAL_10]] : i64 - !CHECK: %[[VAL_12:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_11]] : (!fir.ref>, i64) -> !fir.ref - !CHECK: fir.store %[[VAL_7]] to %[[VAL_12]] : !fir.ref + !CHECK: %[[VAL_2:.*]] = arith.constant 1 : i32 + !CHECK: %[[VAL_3:.*]] = arith.constant 1024 : i32 + !CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 + !CHECK: omp.wsloop for (%[[ARG:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) { + !CHECK: %[[VAL_5:.*]] = arith.constant 10 : i32 + !CHECK: %[[VAL_6:.*]] = fir.convert %[[ARG]] : (i32) -> i64 + !CHECK: %[[VAL_7:.*]] = arith.constant 1 : i64 + !CHECK: %[[VAL_8:.*]] = arith.subi %[[VAL_6]], %[[VAL_7]] : i64 + !CHECK: %[[VAL_9:.*]] = fir.coordinate_of %[[VAL_0]], %[[VAL_8]] : (!fir.ref>, i64) -> !fir.ref + !CHECK: fir.store %[[VAL_5]] to %[[VAL_9]] : !fir.ref do i = 1, 1024 a(i) = 10 end do diff --git a/flang/test/Lower/OpenMP/FIR/unstructured.f90 b/flang/test/Lower/OpenMP/FIR/unstructured.f90 index e7d48bb269349..a6b572eaafecf 100644 --- a/flang/test/Lower/OpenMP/FIR/unstructured.f90 +++ b/flang/test/Lower/OpenMP/FIR/unstructured.f90 @@ -61,21 +61,16 @@ subroutine ss2(n) ! unstructured OpenMP construct; loop exit inside construct ! CHECK-LABEL: func @_QPss3{{.*}} { ! CHECK: omp.parallel { ! CHECK: %[[ALLOCA_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned} -! CHECK: %[[ALLOCA_1:.*]] = fir.alloca i32 {{{.*}}, pinned} -! CHECK: %[[ALLOCA_2:.*]] = fir.alloca i32 {{{.*}}, pinned} ! CHECK: br ^bb1 ! CHECK: ^bb1: // 2 preds: ^bb0, ^bb3 ! CHECK: cond_br %{{[0-9]*}}, ^bb2, ^bb4 ! CHECK: ^bb2: // pred: ^bb1 ! CHECK: omp.wsloop for (%[[ARG1:.*]]) : {{.*}} { -! CHECK: fir.store %[[ARG1]] to %[[ALLOCA_2]] : !fir.ref ! CHECK: @_FortranAioBeginExternalListOutput -! CHECK: %[[LOAD_1:.*]] = fir.load %[[ALLOCA_2]] : !fir.ref -! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD_1]]) +! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[ARG1]]) ! CHECK: omp.yield ! CHECK: } ! CHECK: omp.wsloop for (%[[ARG2:.*]]) : {{.*}} { -! CHECK: fir.store %[[ARG2]] to %[[ALLOCA_1]] : !fir.ref ! CHECK: br ^bb1 ! CHECK: ^bb2: // 2 preds: ^bb1, ^bb5 ! CHECK: cond_br %{{[0-9]*}}, ^bb3, ^bb6 @@ -116,15 +111,12 @@ subroutine ss3(n) ! nested unstructured OpenMP constructs ! CHECK-LABEL: func @_QPss4{{.*}} { ! CHECK: omp.parallel { -! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 {{{.*}}, pinned} ! CHECK: omp.wsloop for (%[[ARG:.*]]) : {{.*}} { -! CHECK: fir.store %[[ARG]] to %[[ALLOCA]] : !fir.ref ! CHECK: %[[COND:.*]] = arith.cmpi eq, %{{.*}}, %{{.*}} ! CHECK: %[[COND_XOR:.*]] = arith.xori %[[COND]], %{{.*}} ! CHECK: fir.if %[[COND_XOR]] { ! CHECK: @_FortranAioBeginExternalListOutput -! CHECK: %[[LOAD:.*]] = fir.load %[[ALLOCA]] : !fir.ref -! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[LOAD]]) +! CHECK: @_FortranAioOutputInteger32(%{{.*}}, %[[ARG]]) ! CHECK: } else { ! CHECK: } ! CHECK-NEXT: omp.yield diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 index 99b0cf0f1298e..f1bd5459a2b61 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-chunks.f90 @@ -20,9 +20,7 @@ program wsloop ! CHECK: %[[VAL_4:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 4 : i32 ! CHECK: omp.wsloop schedule(static = %[[VAL_5]] : i32) nowait for (%[[ARG0:.*]]) : i32 = (%[[VAL_2]]) to (%[[VAL_3]]) inclusive step (%[[VAL_4]]) { -! CHECK: fir.store %[[ARG0]] to %[[STORE_IV:.*]] : !fir.ref -! CHECK: %[[LOAD_IV:.*]] = fir.load %[[STORE_IV]] : !fir.ref -! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 +! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[ARG0]]) {{.*}}: (!fir.ref, i32) -> i1 ! CHECK: omp.yield ! CHECK: } @@ -38,10 +36,8 @@ program wsloop ! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_17:.*]] = arith.constant 4 : i32 ! CHECK: omp.wsloop schedule(static = %[[VAL_17]] : i32) nowait for (%[[ARG1:.*]]) : i32 = (%[[VAL_14]]) to (%[[VAL_15]]) inclusive step (%[[VAL_16]]) { -! CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref ! CHECK: %[[VAL_24:.*]] = arith.constant 2 : i32 -! CHECK: %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref -! CHECK: %[[VAL_25:.*]] = arith.muli %[[VAL_24]], %[[LOAD_IV1]] : i32 +! CHECK: %[[VAL_25:.*]] = arith.muli %[[VAL_24]], %[[ARG1]] : i32 ! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_25]]) {{.*}}: (!fir.ref, i32) -> i1 ! CHECK: omp.yield ! CHECK: } @@ -62,10 +58,8 @@ program wsloop ! CHECK: %[[VAL_31:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_32:.*]] = fir.load %[[VAL_0]] : !fir.ref ! CHECK: omp.wsloop schedule(static = %[[VAL_32]] : i32) nowait for (%[[ARG2:.*]]) : i32 = (%[[VAL_29]]) to (%[[VAL_30]]) inclusive step (%[[VAL_31]]) { -! CHECK: fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref ! CHECK: %[[VAL_39:.*]] = arith.constant 3 : i32 -! CHECK: %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]] : !fir.ref -! CHECK: %[[VAL_40:.*]] = arith.muli %[[VAL_39]], %[[LOAD_IV2]] : i32 +! CHECK: %[[VAL_40:.*]] = arith.muli %[[VAL_39]], %[[ARG2]] : i32 ! CHECK: {{.*}} = fir.call @_FortranAioOutputInteger32({{.*}}, %[[VAL_40]]) {{.*}}: (!fir.ref, i32) -> i1 ! CHECK: omp.yield ! CHECK: } diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 index a122a41ba8b8f..ba5860faac0e6 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-collapse.f90 @@ -40,16 +40,10 @@ program wsloop_collapse do j= 1, b do k = 1, c ! CHECK: omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[VAL_20]], %[[VAL_23]], %[[VAL_26]]) to (%[[VAL_21]], %[[VAL_24]], %[[VAL_27]]) inclusive step (%[[VAL_22]], %[[VAL_25]], %[[VAL_28]]) { -! CHECK: fir.store %[[ARG0]] to %[[STORE_IV0:.*]] : !fir.ref -! CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref -! CHECK: fir.store %[[ARG2]] to %[[STORE_IV2:.*]] : !fir.ref ! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_6]] : !fir.ref -! CHECK: %[[LOAD_IV0:.*]] = fir.load %[[STORE_IV0]] : !fir.ref -! CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_12]], %[[LOAD_IV0]] : i32 -! CHECK: %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref -! CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_13]], %[[LOAD_IV1]] : i32 -! CHECK: %[[LOAD_IV2:.*]] = fir.load %[[STORE_IV2]] : !fir.ref -! CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_14]], %[[LOAD_IV2]] : i32 +! CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_12]], %[[ARG0]] : i32 +! CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_13]], %[[ARG1]] : i32 +! CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_14]], %[[ARG2]] : i32 ! CHECK: fir.store %[[VAL_15]] to %[[VAL_6]] : !fir.ref ! CHECK: omp.yield ! CHECK: } diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 index 9509920c6ec1b..ac0021e2edf20 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-monotonic.f90 @@ -11,18 +11,15 @@ program wsloop_dynamic !CHECK: omp.parallel { !$OMP DO SCHEDULE(monotonic:dynamic) -!CHECK: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} !CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 !CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 !CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop schedule(dynamic, monotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) -!CHECK: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref do i=1, 9 print*, i !CHECK: %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput -!CHECK: %[[LOAD:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref -!CHECK: fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref, i32) -> i1 +!CHECK: fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 !CHECK: fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref) -> i32 end do !CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 index 5e4e66c77b343..39215e8d31c92 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-nonmonotonic.f90 @@ -12,18 +12,15 @@ program wsloop_dynamic !CHECK: omp.parallel { !$OMP DO SCHEDULE(nonmonotonic:dynamic) -!CHECK: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} !CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 !CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 !CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop schedule(dynamic, nonmonotonic) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) -!CHECK: fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref do i=1, 9 print*, i !CHECK: %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput -!CHECK: %[[LOAD:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref -!CHECK: fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref, i32) -> i1 +!CHECK: fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 !CHECK: fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref) -> i32 end do !CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 index 69d133d50ffa0..069b777d1cdb2 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-add.f90 @@ -50,14 +50,11 @@ !CHECK: %[[C0_2:.*]] = arith.constant 0 : i32 !CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[XREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator !CHECK: return @@ -78,14 +75,11 @@ subroutine simple_int_reduction !CHECK: %[[C0_2:.*]] = arith.constant 0.000000e+00 : f32 !CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : f32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator @@ -107,14 +101,11 @@ subroutine simple_real_reduction !CHECK: %[[C0_2:.*]] = arith.constant 0 : i32 !CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[XREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator !CHECK: return @@ -135,14 +126,11 @@ subroutine simple_int_reduction_switch_order !CHECK: %[[C0_2:.*]] = arith.constant 0.000000e+00 : f32 !CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : f32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator @@ -164,15 +152,10 @@ subroutine simple_real_reduction_switch_order !CHECK: %[[YREF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"} !CHECK: %[[ZREF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref, @[[RED_I32_NAME]] -> %[[YREF]] : !fir.ref, @[[RED_I32_NAME]] -> %[[ZREF]] : !fir.ref) for (%[[IVAL]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL1]], %[[XREF]] : i32, !fir.ref -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL2]], %[[YREF]] : i32, !fir.ref -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL3]], %[[ZREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[YREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[ZREF]] : i32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator !CHECK: return @@ -197,17 +180,12 @@ subroutine multiple_int_reductions_same_type !CHECK: %[[YREF:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"} !CHECK: %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref, @[[RED_F32_NAME]] -> %[[YREF]] : !fir.ref, @[[RED_F32_NAME]] -> %[[ZREF]] : !fir.ref) for (%[[IVAL]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1_F32:.*]] = fir.convert %[[I_PVT_VAL1_I32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL1_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL1_F32]], %[[XREF]] : f32, !fir.ref -!CHECK: %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL2_F32:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL2_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL2_F32]], %[[YREF]] : f32, !fir.ref -!CHECK: %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : f32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator @@ -234,19 +212,13 @@ subroutine multiple_real_reductions_same_type !CHECK: %[[YREF:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"} !CHECK: %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref, @[[RED_I64_NAME]] -> %[[YREF]] : !fir.ref, @[[RED_F32_NAME]] -> %[[ZREF]] : !fir.ref, @[[RED_F64_NAME]] -> %[[WREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL1_I32]], %[[XREF]] : i32, !fir.ref -!CHECK: %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL2_I64:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> i64 +!CHECK: omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref +!CHECK: %[[I_PVT_VAL2_I64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: omp.reduction %[[I_PVT_VAL2_I64]], %[[YREF]] : i64, !fir.ref -!CHECK: %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : f32, !fir.ref -!CHECK: %[[I_PVT_VAL4_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL4_F64:.*]] = fir.convert %[[I_PVT_VAL4_I32]] : (i32) -> f64 +!CHECK: %[[I_PVT_VAL4_F64:.*]] = fir.convert %[[IVAL]] : (i32) -> f64 !CHECK: omp.reduction %[[I_PVT_VAL4_F64]], %[[WREF]] : f64, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-and.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-and.f90 index 425d37398c571..03fbc2819659d 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-and.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-and.f90 @@ -21,14 +21,11 @@ !CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} !CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 +!CHECK: %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 !CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> @@ -54,14 +51,11 @@ subroutine simple_reduction(y) !CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} !CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 +!CHECK: %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 !CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> @@ -89,28 +83,23 @@ subroutine simple_reduction_switch_order(y) !CHECK: %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} !CHECK: %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>, @[[RED_NAME]] -> %[[YREF]] : !fir.ref>, @[[RED_NAME]] -> %[[ZREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64 +!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_1:.*]] = arith.subi %[[CONVI_64_1]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_1:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_1]] : (!fir.ref>>, i64) -> !fir.ref> !CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_1]] : !fir.ref> !CHECK: omp.reduction %[[WVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 +!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_2:.*]] = arith.subi %[[CONVI_64_2]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_2:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_2]] : (!fir.ref>>, i64) -> !fir.ref> !CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_2]] : !fir.ref> !CHECK: omp.reduction %[[WVAL]], %[[YREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[I_PVT_VAL3]] : (i32) -> i64 +!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_3:.*]] = arith.subi %[[CONVI_64_3]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_3:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_3]] : (!fir.ref>>, i64) -> !fir.ref> diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 index e8cf46f8261c4..eaa627e6afd51 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-eqv.f90 @@ -21,14 +21,11 @@ !CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} !CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 +!CHECK: %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 !CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> @@ -54,14 +51,11 @@ subroutine simple_reduction(y) !CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} !CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 +!CHECK: %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 !CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> @@ -89,28 +83,23 @@ subroutine simple_reduction_switch_order(y) !CHECK: %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} !CHECK: %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>, @[[RED_NAME]] -> %[[YREF]] : !fir.ref>, @[[RED_NAME]] -> %[[ZREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64 +!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_1:.*]] = arith.subi %[[CONVI_64_1]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_1:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_1]] : (!fir.ref>>, i64) -> !fir.ref> !CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_1]] : !fir.ref> !CHECK: omp.reduction %[[WVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 +!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_2:.*]] = arith.subi %[[CONVI_64_2]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_2:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_2]] : (!fir.ref>>, i64) -> !fir.ref> !CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_2]] : !fir.ref> !CHECK: omp.reduction %[[WVAL]], %[[YREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[I_PVT_VAL3]] : (i32) -> i64 +!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_3:.*]] = arith.subi %[[CONVI_64_3]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_3:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_3]] : (!fir.ref>>, i64) -> !fir.ref> diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 index 6e5d6c34cedc5..617198d01716f 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-neqv.f90 @@ -21,14 +21,11 @@ !CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} !CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 +!CHECK: %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 !CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> @@ -54,14 +51,11 @@ subroutine simple_reduction(y) !CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} !CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 +!CHECK: %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 !CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> @@ -89,28 +83,23 @@ subroutine simple_reduction_switch_order(y) !CHECK: %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} !CHECK: %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>, @[[RED_NAME]] -> %[[YREF]] : !fir.ref>, @[[RED_NAME]] -> %[[ZREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64 +!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_1:.*]] = arith.subi %[[CONVI_64_1]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_1:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_1]] : (!fir.ref>>, i64) -> !fir.ref> !CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_1]] : !fir.ref> !CHECK: omp.reduction %[[WVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 +!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_2:.*]] = arith.subi %[[CONVI_64_2]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_2:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_2]] : (!fir.ref>>, i64) -> !fir.ref> !CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_2]] : !fir.ref> !CHECK: omp.reduction %[[WVAL]], %[[YREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[I_PVT_VAL3]] : (i32) -> i64 +!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_3:.*]] = arith.subi %[[CONVI_64_3]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_3:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_3]] : (!fir.ref>>, i64) -> !fir.ref> diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-or.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-or.f90 index cdc12500e2c30..e3d691e347ba2 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-or.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-logical-or.f90 @@ -21,14 +21,11 @@ !CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reductionEi"} !CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 +!CHECK: %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 !CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> @@ -54,14 +51,11 @@ subroutine simple_reduction(y) !CHECK: %[[IREF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_reduction_switch_orderEi"} !CHECK: %[[XREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64:.*]] = fir.convert %[[I_PVT_VAL]] : (i32) -> i64 +!CHECK: %[[CONVI_64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI:.*]] = arith.subi %[[CONVI_64]], %[[C1_64]] : i64 !CHECK: %[[Y_PVT_REF:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI]] : (!fir.ref>>, i64) -> !fir.ref> @@ -89,28 +83,23 @@ subroutine simple_reduction_switch_order(y) !CHECK: %[[YREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "y", uniq_name = "_QFmultiple_reductionsEy"} !CHECK: %[[ZREF:.*]] = fir.alloca !fir.logical<4> {bindc_name = "z", uniq_name = "_QFmultiple_reductionsEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_NAME]] -> %[[XREF]] : !fir.ref>, @[[RED_NAME]] -> %[[YREF]] : !fir.ref>, @[[RED_NAME]] -> %[[ZREF]] : !fir.ref>) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) { -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[I_PVT_VAL1]] : (i32) -> i64 +!CHECK: %[[CONVI_64_1:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_1:.*]] = arith.subi %[[CONVI_64_1]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_1:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_1]] : (!fir.ref>>, i64) -> !fir.ref> !CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_1]] : !fir.ref> !CHECK: omp.reduction %[[WVAL]], %[[XREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[I_PVT_VAL2]] : (i32) -> i64 +!CHECK: %[[CONVI_64_2:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_2:.*]] = arith.subi %[[CONVI_64_2]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_2:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_2]] : (!fir.ref>>, i64) -> !fir.ref> !CHECK: %[[WVAL:.*]] = fir.load %[[W_PVT_REF_2]] : !fir.ref> !CHECK: omp.reduction %[[WVAL]], %[[YREF]] : !fir.logical<4>, !fir.ref> -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[I_PVT_VAL3]] : (i32) -> i64 +!CHECK: %[[CONVI_64_3:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 !CHECK: %[[SUBI_3:.*]] = arith.subi %[[CONVI_64_3]], %[[C1_64]] : i64 !CHECK: %[[W_PVT_REF_3:.*]] = fir.coordinate_of %[[ARRAY]], %[[SUBI_3]] : (!fir.ref>>, i64) -> !fir.ref> diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-mul.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-mul.f90 index c30cde66b5167..f5d2113d2e57a 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-reduction-mul.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-reduction-mul.f90 @@ -50,14 +50,11 @@ !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: fir.store %[[C1_2]] to %[[XREF]] : !fir.ref !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C10:.*]] = arith.constant 10 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[XREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator !CHECK: return @@ -79,14 +76,11 @@ subroutine simple_int_reduction !CHECK: %[[C0_2:.*]] = arith.constant 1.000000e+00 : f32 !CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 10 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : f32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator @@ -108,14 +102,11 @@ subroutine simple_real_reduction !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: fir.store %[[C1_2]] to %[[XREF]] : !fir.ref !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C10:.*]] = arith.constant 10 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[XREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator !CHECK: return @@ -136,14 +127,11 @@ subroutine simple_int_reduction_switch_order !CHECK: %[[C0_2:.*]] = arith.constant 1.000000e+00 : f32 !CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 10 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_i32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[I_PVT_VAL_i32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL_f32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL_f32]], %[[XREF]] : f32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator @@ -165,15 +153,10 @@ subroutine simple_real_reduction_switch_order !CHECK: %[[YREF:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFmultiple_int_reductions_same_typeEy"} !CHECK: %[[ZREF:.*]] = fir.alloca i32 {bindc_name = "z", uniq_name = "_QFmultiple_int_reductions_same_typeEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref, @[[RED_I32_NAME]] -> %[[YREF]] : !fir.ref, @[[RED_I32_NAME]] -> %[[ZREF]] : !fir.ref) for (%[[IVAL]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL1]], %[[XREF]] : i32, !fir.ref -!CHECK: %[[I_PVT_VAL2:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL2]], %[[YREF]] : i32, !fir.ref -!CHECK: %[[I_PVT_VAL3:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL3]], %[[ZREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[YREF]] : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[ZREF]] : i32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator !CHECK: return @@ -198,17 +181,12 @@ subroutine multiple_int_reductions_same_type !CHECK: %[[YREF:.*]] = fir.alloca f32 {bindc_name = "y", uniq_name = "_QFmultiple_real_reductions_same_typeEy"} !CHECK: %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_real_reductions_same_typeEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: omp.wsloop reduction(@[[RED_F32_NAME]] -> %[[XREF]] : !fir.ref, @[[RED_F32_NAME]] -> %[[YREF]] : !fir.ref, @[[RED_F32_NAME]] -> %[[ZREF]] : !fir.ref) for (%[[IVAL]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1_F32:.*]] = fir.convert %[[I_PVT_VAL1_I32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL1_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL1_F32]], %[[XREF]] : f32, !fir.ref -!CHECK: %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL2_F32:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL2_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL2_F32]], %[[YREF]] : f32, !fir.ref -!CHECK: %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : f32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator @@ -235,19 +213,13 @@ subroutine multiple_real_reductions_same_type !CHECK: %[[YREF:.*]] = fir.alloca i64 {bindc_name = "y", uniq_name = "_QFmultiple_reductions_different_typeEy"} !CHECK: %[[ZREF:.*]] = fir.alloca f32 {bindc_name = "z", uniq_name = "_QFmultiple_reductions_different_typeEz"} !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %2 : !fir.ref, @[[RED_I64_NAME]] -> %3 : !fir.ref, @[[RED_F32_NAME]] -> %4 : !fir.ref, @[[RED_F64_NAME]] -> %1 : !fir.ref) for (%[[IVAL:.*]]) : i32 -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL1_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL1_I32]], %[[XREF]] : i32, !fir.ref -!CHECK: %[[I_PVT_VAL2_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL2_I64:.*]] = fir.convert %[[I_PVT_VAL2_I32]] : (i32) -> i64 +!CHECK: omp.reduction %[[IVAL]], %[[XREF]] : i32, !fir.ref +!CHECK: %[[I_PVT_VAL2_I64:.*]] = fir.convert %[[IVAL]] : (i32) -> i64 !CHECK: omp.reduction %[[I_PVT_VAL2_I64]], %[[YREF]] : i64, !fir.ref -!CHECK: %[[I_PVT_VAL3_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[I_PVT_VAL3_I32]] : (i32) -> f32 +!CHECK: %[[I_PVT_VAL3_F32:.*]] = fir.convert %[[IVAL]] : (i32) -> f32 !CHECK: omp.reduction %[[I_PVT_VAL3_F32]], %[[ZREF]] : f32, !fir.ref -!CHECK: %[[I_PVT_VAL4_I32:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[I_PVT_VAL4_F64:.*]] = fir.convert %[[I_PVT_VAL4_I32]] : (i32) -> f64 +!CHECK: %[[I_PVT_VAL4_F64:.*]] = fir.convert %[[IVAL]] : (i32) -> f64 !CHECK: omp.reduction %[[I_PVT_VAL4_F64]], %[[WREF]] : f64, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 index 47f9d572a8653..95248e99b9c63 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-simd.f90 @@ -15,13 +15,11 @@ program wsloop_dynamic !CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 !CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop schedule(runtime, simd) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) -!CHECK: fir.store %[[I]] to %[[STORE:.*]] : !fir.ref do i=1, 9 print*, i !CHECK: %[[RTBEGIN:.*]] = fir.call @_FortranAioBeginExternalListOutput -!CHECK: %[[LOAD:.*]] = fir.load %[[STORE]] : !fir.ref -!CHECK: fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[LOAD]]) {{.*}}: (!fir.ref, i32) -> i1 +!CHECK: fir.call @_FortranAioOutputInteger32(%[[RTBEGIN]], %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 !CHECK: fir.call @_FortranAioEndIoStatement(%[[RTBEGIN]]) {{.*}}: (!fir.ref) -> i32 end do !CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 b/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 index 466055868f1cc..c515ed10d0d5c 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop-variable.f90 @@ -25,11 +25,9 @@ program wsloop_variable !CHECK: omp.wsloop for (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) { !CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16 !CHECK: fir.store %[[ARG0_I16]] to %[[STORE_IV0:.*]] : !fir.ref -!CHECK: fir.store %[[ARG1]] to %[[STORE_IV1:.*]] : !fir.ref !CHECK: %[[LOAD_IV0:.*]] = fir.load %[[STORE_IV0]] : !fir.ref !CHECK: %[[LOAD_IV0_I64:.*]] = fir.convert %[[LOAD_IV0]] : (i16) -> i64 -!CHECK: %[[LOAD_IV1:.*]] = fir.load %[[STORE_IV1]] : !fir.ref -!CHECK: %[[TMP10:.*]] = arith.addi %[[LOAD_IV0_I64]], %[[LOAD_IV1]] : i64 +!CHECK: %[[TMP10:.*]] = arith.addi %[[LOAD_IV0_I64]], %[[ARG1]] : i64 !CHECK: %[[TMP11:.*]] = fir.convert %[[TMP10]] : (i64) -> f32 !CHECK: fir.store %[[TMP11]] to %{{.*}} : !fir.ref !CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/FIR/wsloop.f90 b/flang/test/Lower/OpenMP/FIR/wsloop.f90 index 2c00d1a9fddae..49b47a6307284 100644 --- a/flang/test/Lower/OpenMP/FIR/wsloop.f90 +++ b/flang/test/Lower/OpenMP/FIR/wsloop.f90 @@ -7,16 +7,13 @@ subroutine simple_loop integer :: i ! CHECK: omp.parallel !$OMP PARALLEL - ! CHECK: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO do i=1, 9 - ! CHECK: fir.store %[[I]] to %[[ALLOCA_IV:.*]] : !fir.ref - ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do ! CHECK: omp.yield @@ -30,16 +27,13 @@ subroutine simple_loop_with_step integer :: i ! CHECK: omp.parallel !$OMP PARALLEL - ! CHECK: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 2 : i32 ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) - ! CHECK: fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref - ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref !$OMP DO do i=1, 9, 2 - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do ! CHECK: omp.yield @@ -53,16 +47,13 @@ subroutine loop_with_schedule_nowait integer :: i ! CHECK: omp.parallel !$OMP PARALLEL - ! CHECK: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} ! CHECK: %[[WS_LB:.*]] = arith.constant 1 : i32 ! CHECK: %[[WS_UB:.*]] = arith.constant 9 : i32 ! CHECK: %[[WS_STEP:.*]] = arith.constant 1 : i32 ! CHECK: omp.wsloop schedule(runtime) nowait for (%[[I:.*]]) : i32 = (%[[WS_LB]]) to (%[[WS_UB]]) inclusive step (%[[WS_STEP]]) !$OMP DO SCHEDULE(runtime) do i=1, 9 - ! CHECK: fir.store %[[I]] to %[[ALLOCA_IV]] : !fir.ref - ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[ALLOCA_IV]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[I]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i end do ! CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 b/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 index fd56038231b19..ea39293ab78ff 100644 --- a/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 +++ b/flang/test/Lower/OpenMP/Todo/omp-default-clause-inner-loop.f90 @@ -8,16 +8,13 @@ ! CHECK: omp.parallel { ! EXPECTED: %[[PRIVATE_Y:.*]] = fir.alloca i32 {bindc_name = "y", pinned, uniq_name = "_QFEy"} ! EXPECTED: %[[PRIVATE_Z:.*]] = fir.alloca i32 {bindc_name = "z", pinned, uniq_name = "_QFEz"} -! CHECK: %[[TEMP:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[const_1:.*]] = arith.constant 1 : i32 ! CHECK: %[[const_2:.*]] = arith.constant 10 : i32 ! CHECK: %[[const_3:.*]] = arith.constant 1 : i32 ! CHECK: omp.wsloop for (%[[ARG:.*]]) : i32 = (%[[const_1]]) to (%[[const_2]]) inclusive step (%[[const_3]]) { -! CHECK: fir.store %[[ARG]] to %[[TEMP]] : !fir.ref ! EXPECTED: %[[temp_1:.*]] = fir.load %[[PRIVATE_Z]] : !fir.ref ! CHECK: %[[temp_1:.*]] = fir.load %{{.*}} : !fir.ref -! CHECK: %[[temp_2:.*]] = fir.load %[[TEMP]] : !fir.ref -! CHECK: %[[result:.*]] = arith.addi %[[temp_1]], %[[temp_2]] : i32 +! CHECK: %[[result:.*]] = arith.addi %[[temp_1]], %[[ARG]] : i32 ! EXPECTED: fir.store %[[result]] to %[[PRIVATE_Y]] : !fir.ref ! CHECK: fir.store %[[result]] to %{{.*}} : !fir.ref ! CHECK: omp.yield diff --git a/flang/test/Lower/OpenMP/hlfir-wsloop.f90 b/flang/test/Lower/OpenMP/hlfir-wsloop.f90 index b6be77fe3016d..8b3bee48890d2 100644 --- a/flang/test/Lower/OpenMP/hlfir-wsloop.f90 +++ b/flang/test/Lower/OpenMP/hlfir-wsloop.f90 @@ -6,19 +6,21 @@ !CHECK-LABEL: func @_QPsimple_loop() subroutine simple_loop integer :: i - ! CHECK-DAG: %[[WS_ST:.*]] = arith.constant 1 : i32 - ! CHECK-DAG: %[[WS_END:.*]] = arith.constant 9 : i32 - ! CHECK: omp.parallel + ! CHECK-DAG: %[[WS_ST:.*]] = arith.constant 1 : i32 + ! CHECK-DAG: %[[WS_END:.*]] = arith.constant 9 : i32 + ! CHECK: omp.parallel !$OMP PARALLEL - ! CHECK-DAG: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} - ! CHECK: %[[IV:.*]] = fir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref) -> !fir.ref ! CHECK: omp.wsloop for (%[[I:.*]]) : i32 = (%[[WS_ST]]) to (%[[WS_END]]) inclusive step (%[[WS_ST]]) !$OMP DO do i=1, 9 - ! CHECK: fir.store %[[I]] to %[[IV:.*]] : !fir.ref - ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV]] : !fir.ref - ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 + ! CHECK-DAG: %[[ALLOCA_IV:.*]] = fir.alloca i32 {{{.*}}, pinned} + ! CHECK: %[[IV:.*]] = fir.declare %[[ALLOCA_IV]] {uniq_name = "_QFsimple_loopEi"} : (!fir.ref) -> !fir.ref + ! CHECK: fir.store %[[I]] to %[[IV:.*]] : !fir.ref + ! CHECK: %[[LOAD_IV:.*]] = fir.load %[[IV]] : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LOAD_IV]]) {{.*}}: (!fir.ref, i32) -> i1 print*, i + ! CHECK: fir.call @_QPfoo(%[[IV]]) {{.*}}: (!fir.ref) -> () + call foo(i) end do ! CHECK: omp.yield !$OMP END DO diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 index 97ee665442e3a..97510745d0539 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-add-hlfir.f90 @@ -18,15 +18,11 @@ !CHECK: %[[C0_2:.*]] = arith.constant 0 : i32 !CHECK: hlfir.assign %[[C0_2]] to %[[XDECL]]#0 : i32, !fir.ref !CHECK: omp.parallel -!CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} -!CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFsimple_int_reductionEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 !CHECK: %[[C100:.*]] = arith.constant 100 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XDECL]]#0 : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) -!CHECK: fir.store %[[IVAL]] to %[[I_PVT_DECL]]#1 : !fir.ref -!CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_DECL]]#0 : !fir.ref -!CHECK: omp.reduction %[[I_PVT_VAL]], %[[XDECL]]#0 : i32, !fir.ref +!CHECK: omp.reduction %[[IVAL]], %[[XDECL]]#0 : i32, !fir.ref !CHECK: omp.yield !CHECK: omp.terminator !CHECK: return diff --git a/flang/test/Transforms/omp-wsloop-index.mlir b/flang/test/Transforms/omp-wsloop-index.mlir new file mode 100644 index 0000000000000..9443209d90b9f --- /dev/null +++ b/flang/test/Transforms/omp-wsloop-index.mlir @@ -0,0 +1,247 @@ +// RUN: fir-opt --omp-loop-index-mem2reg %s | FileCheck %s + +func.func private @foo(%arg0 : !fir.ref) -> i32 + +// CHECK-LABEL: @wsloop_remove_alloca +func.func @wsloop_remove_alloca() { + // CHECK: %[[RESULT:.*]] = fir.alloca i32 + // CHECK: omp.parallel + %0 = fir.alloca i32 + omp.parallel { + // CHECK-NOT: fir.alloca + // CHECK-DAG: arith.constant 1 + // CHECK-DAG: arith.constant 10 + // CHECK-NEXT: omp.wsloop for (%[[INDEX:.*]]) : i32 + %1 = fir.alloca i32 + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + omp.wsloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) { + // CHECK-NOT: fir.alloca + // CHECK: fir.store %[[INDEX]] to %[[RESULT]] + // CHECK: omp.yield + fir.store %arg0 to %1 : !fir.ref + %2 = fir.load %1 : !fir.ref + fir.store %2 to %0 : !fir.ref + omp.yield + } + omp.terminator + } + return +} + +// CHECK-LABEL: @simdloop_remove_alloca +func.func @simdloop_remove_alloca() { + // CHECK: %[[RESULT:.*]] = fir.alloca i32 + // CHECK: omp.parallel + %0 = fir.alloca i32 + omp.parallel { + // CHECK-NOT: fir.alloca + // CHECK-DAG: arith.constant 1 + // CHECK-DAG: arith.constant 10 + // CHECK-NEXT: omp.simdloop for (%[[INDEX:.*]]) : i32 + %1 = fir.alloca i32 + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + omp.simdloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) { + // CHECK-NOT: fir.alloca + // CHECK: fir.store %[[INDEX]] to %[[RESULT]] + // CHECK: omp.yield + fir.store %arg0 to %1 : !fir.ref + %2 = fir.load %1 : !fir.ref + fir.store %2 to %0 : !fir.ref + omp.yield + } + omp.terminator + } + return +} + +// CHECK-LABEL: @wsloop_push_alloca +func.func @wsloop_push_alloca() { + // CHECK: %[[RESULT:.*]] = fir.alloca i32 + // CHECK: omp.parallel + %0 = fir.alloca i32 + omp.parallel { + // CHECK-NOT: fir.alloca + // CHECK-DAG: arith.constant 1 + // CHECK-DAG: arith.constant 10 + // CHECK-NEXT: omp.wsloop for (%[[INDEX:.*]]) : i32 + %1 = fir.alloca i32 + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + omp.wsloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) { + // CHECK: %[[ALLOCA:.*]] = fir.alloca i32 + // CHECK: fir.store %[[INDEX]] to %[[ALLOCA]] + // CHECK: %[[RETURN:.*]] = func.call @foo(%[[ALLOCA]]) + // CHECK: fir.store %[[RETURN]] to %[[RESULT]] + // CHECK: omp.yield + fir.store %arg0 to %1 : !fir.ref + %2 = func.call @foo(%1) : (!fir.ref) -> i32 + fir.store %2 to %0 : !fir.ref + omp.yield + } + omp.terminator + } + return +} + +// CHECK-LABEL: @simdloop_push_alloca +func.func @simdloop_push_alloca() { + // CHECK: %[[RESULT:.*]] = fir.alloca i32 + // CHECK: omp.parallel + %0 = fir.alloca i32 + omp.parallel { + // CHECK-NOT: fir.alloca + // CHECK-DAG: arith.constant 1 + // CHECK-DAG: arith.constant 10 + // CHECK-NEXT: omp.simdloop for (%[[INDEX:.*]]) : i32 + %1 = fir.alloca i32 + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + omp.simdloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) { + // CHECK: %[[ALLOCA:.*]] = fir.alloca i32 + // CHECK: fir.store %[[INDEX]] to %[[ALLOCA]] + // CHECK: %[[RETURN:.*]] = func.call @foo(%[[ALLOCA]]) + // CHECK: fir.store %[[RETURN]] to %[[RESULT]] + // CHECK: omp.yield + fir.store %arg0 to %1 : !fir.ref + %2 = func.call @foo(%1) : (!fir.ref) -> i32 + fir.store %2 to %0 : !fir.ref + omp.yield + } + omp.terminator + } + return +} + +// CHECK-LABEL: @hlfir_wsloop_remove_alloca +func.func @hlfir_wsloop_remove_alloca() { + // CHECK: %[[RESULT_ALLOCA:.*]] = fir.alloca i32 + // CHECK: %[[RESULT:.*]]:2 = hlfir.declare %[[RESULT_ALLOCA]] + // CHECK: omp.parallel + %0 = fir.alloca i32 + %1:2 = hlfir.declare %0 {uniq_name = "result"} : (!fir.ref) -> (!fir.ref, !fir.ref) + omp.parallel { + // CHECK-NOT: fir.alloca + // CHECK-NOT: hlfir.declare + // CHECK-DAG: arith.constant 1 + // CHECK-DAG: arith.constant 10 + // CHECK-NEXT: omp.wsloop for (%[[INDEX:.*]]) : i32 + %2 = fir.alloca i32 + %3:2 = hlfir.declare %2 {uniq_name = "index"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + omp.wsloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) { + // CHECK-NOT: fir.alloca + // CHECK-NOT: hlfir.declare + // CHECK: hlfir.assign %[[INDEX]] to %[[RESULT]]#0 + // CHECK: omp.yield + fir.store %arg0 to %3#1 : !fir.ref + %4 = fir.load %3#0 : !fir.ref + hlfir.assign %4 to %1#0 : i32, !fir.ref + omp.yield + } + omp.terminator + } + return +} + +// CHECK-LABEL: @hlfir_simdloop_remove_alloca +func.func @hlfir_simdloop_remove_alloca() { + // CHECK: %[[RESULT_ALLOCA:.*]] = fir.alloca i32 + // CHECK: %[[RESULT:.*]]:2 = hlfir.declare %[[RESULT_ALLOCA]] + // CHECK: omp.parallel + %0 = fir.alloca i32 + %1:2 = hlfir.declare %0 {uniq_name = "result"} : (!fir.ref) -> (!fir.ref, !fir.ref) + omp.parallel { + // CHECK-NOT: fir.alloca + // CHECK-NOT: hlfir.declare + // CHECK-DAG: arith.constant 1 + // CHECK-DAG: arith.constant 10 + // CHECK-NEXT: omp.simdloop for (%[[INDEX:.*]]) : i32 + %2 = fir.alloca i32 + %3:2 = hlfir.declare %2 {uniq_name = "index"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + omp.simdloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) { + // CHECK-NOT: fir.alloca + // CHECK-NOT: hlfir.declare + // CHECK: hlfir.assign %[[INDEX]] to %[[RESULT]]#0 + // CHECK: omp.yield + fir.store %arg0 to %3#1 : !fir.ref + %4 = fir.load %3#0 : !fir.ref + hlfir.assign %4 to %1#0 : i32, !fir.ref + omp.yield + } + omp.terminator + } + return +} + +// CHECK-LABEL: @hlfir_wsloop_push_alloca +func.func @hlfir_wsloop_push_alloca() { + // CHECK: %[[RESULT_ALLOCA:.*]] = fir.alloca i32 + // CHECK: %[[RESULT:.*]]:2 = hlfir.declare %[[RESULT_ALLOCA]] + // CHECK: omp.parallel + %0 = fir.alloca i32 + %1:2 = hlfir.declare %0 {uniq_name = "result"} : (!fir.ref) -> (!fir.ref, !fir.ref) + omp.parallel { + // CHECK-NOT: fir.alloca + // CHECK-NOT: hlfir.declare + // CHECK-DAG: arith.constant 1 + // CHECK-DAG: arith.constant 10 + // CHECK-NEXT: omp.wsloop for (%[[INDEX:.*]]) : i32 + %2 = fir.alloca i32 + %3:2 = hlfir.declare %2 {uniq_name = "index"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + omp.wsloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) { + // CHECK: %[[INDEX_ALLOCA:.*]] = fir.alloca i32 + // CHECK: %[[INDEX_DECL:.*]]:2 = hlfir.declare %[[INDEX_ALLOCA]] + // CHECK: fir.store %[[INDEX]] to %[[INDEX_DECL]]#1 + // CHECK: %[[RETURN:.*]] = fir.call @foo(%[[INDEX_DECL]]#1) + // CHECK: hlfir.assign %[[RETURN]] to %[[RESULT]]#0 + // CHECK: omp.yield + fir.store %arg0 to %3#1 : !fir.ref + %4 = fir.call @foo(%3#1) : (!fir.ref) -> i32 + hlfir.assign %4 to %1#0 : i32, !fir.ref + omp.yield + } + omp.terminator + } + return +} + +// CHECK-LABEL: @hlfir_simdloop_push_alloca +func.func @hlfir_simdloop_push_alloca() { + // CHECK: %[[RESULT_ALLOCA:.*]] = fir.alloca i32 + // CHECK: %[[RESULT:.*]]:2 = hlfir.declare %[[RESULT_ALLOCA]] + // CHECK: omp.parallel + %0 = fir.alloca i32 + %1:2 = hlfir.declare %0 {uniq_name = "result"} : (!fir.ref) -> (!fir.ref, !fir.ref) + omp.parallel { + // CHECK-NOT: fir.alloca + // CHECK-NOT: hlfir.declare + // CHECK-DAG: arith.constant 1 + // CHECK-DAG: arith.constant 10 + // CHECK-NEXT: omp.simdloop for (%[[INDEX:.*]]) : i32 + %2 = fir.alloca i32 + %3:2 = hlfir.declare %2 {uniq_name = "index"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + omp.simdloop for (%arg0) : i32 = (%c1_i32) to (%c10_i32) inclusive step (%c1_i32) { + // CHECK: %[[INDEX_ALLOCA:.*]] = fir.alloca i32 + // CHECK: %[[INDEX_DECL:.*]]:2 = hlfir.declare %[[INDEX_ALLOCA]] + // CHECK: fir.store %[[INDEX]] to %[[INDEX_DECL]]#1 + // CHECK: %[[RETURN:.*]] = fir.call @foo(%[[INDEX_DECL]]#1) + // CHECK: hlfir.assign %[[RETURN]] to %[[RESULT]]#0 + // CHECK: omp.yield + fir.store %arg0 to %3#1 : !fir.ref + %4 = fir.call @foo(%3#1) : (!fir.ref) -> i32 + hlfir.assign %4 to %1#0 : i32, !fir.ref + omp.yield + } + omp.terminator + } + return +}