diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h index e2af59e0aaa19..fa3fd9a8cc05e 100644 --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -16,6 +16,7 @@ #include "flang/Common/Fortran.h" #include "flang/Lower/LoweringOptions.h" #include "flang/Lower/PFTDefs.h" +#include "flang/Lower/SymbolMap.h" #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Semantics/symbol.h" #include "mlir/IR/Builders.h" @@ -299,6 +300,11 @@ class AbstractConverter { return loweringOptions; } + /// Find the symbol in one level up of symbol map such as for host-association + /// in OpenMP code or return null. + virtual Fortran::lower::SymbolBox + lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0; + private: /// Options controlling lowering behavior. const Fortran::lower::LoweringOptions &loweringOptions; diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index f865b53f74deb..0691753defceb 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -1000,6 +1000,17 @@ class FirConverter : public Fortran::lower::AbstractConverter { if (sym.detailsIf()) return symMap->lookupSymbol(sym); + // For symbols to be privatized in OMP, the symbol is mapped to an + // instance of `SymbolBox::Intrinsic` (i.e. a direct mapping to an MLIR + // SSA value). This MLIR SSA value is the block argument to the + // `omp.private`'s `alloc` block. If this is the case, we return this + // `SymbolBox::Intrinsic` value. + if (Fortran::lower::SymbolBox v = symMap->lookupSymbol(sym)) + return v.match( + [&](const Fortran::lower::SymbolBox::Intrinsic &) + -> Fortran::lower::SymbolBox { return v; }, + [](const auto &) -> Fortran::lower::SymbolBox { return {}; }); + return {}; } if (Fortran::lower::SymbolBox v = symMap->lookupSymbol(sym)) @@ -1018,7 +1029,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { /// Find the symbol in one level up of symbol map such as for host-association /// in OpenMP code or return null. Fortran::lower::SymbolBox - lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) { + lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) override { if (Fortran::lower::SymbolBox v = localSymbols.lookupOneLevelUpSymbol(sym)) return v; return {}; diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp index 136bda0b582ee..d6287cb4bc239 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp @@ -66,9 +66,10 @@ void DataSharingProcessor::cloneSymbol(const Fortran::semantics::Symbol *sym) { } void DataSharingProcessor::copyFirstPrivateSymbol( - const Fortran::semantics::Symbol *sym) { + const Fortran::semantics::Symbol *sym, + mlir::OpBuilder::InsertPoint *copyAssignIP) { if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) - converter.copyHostAssociateVar(*sym); + converter.copyHostAssociateVar(*sym, copyAssignIP); } void DataSharingProcessor::copyLastPrivateSymbol( @@ -307,14 +308,10 @@ void DataSharingProcessor::privatize() { for (const Fortran::semantics::Symbol *sym : privatizedSymbols) { if (const auto *commonDet = sym->detailsIf()) { - for (const auto &mem : commonDet->objects()) { - cloneSymbol(&*mem); - copyFirstPrivateSymbol(&*mem); - } - } else { - cloneSymbol(sym); - copyFirstPrivateSymbol(sym); - } + for (const auto &mem : commonDet->objects()) + doPrivatize(&*mem); + } else + doPrivatize(sym); } } @@ -338,11 +335,95 @@ void DataSharingProcessor::defaultPrivatize() { !sym->GetUltimate().has() && !symbolsInNestedRegions.contains(sym) && !symbolsInParentRegions.contains(sym) && - !privatizedSymbols.contains(sym)) { + !privatizedSymbols.contains(sym)) + doPrivatize(sym); + } +} + +void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) { + if (!useDelayedPrivatization) { + cloneSymbol(sym); + copyFirstPrivateSymbol(sym); + return; + } + + Fortran::lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym); + assert(hsb && "Host symbol box not found"); + + mlir::Type symType = hsb.getAddr().getType(); + mlir::Location symLoc = hsb.getAddr().getLoc(); + std::string privatizerName = sym->name().ToString() + ".privatizer"; + bool isFirstPrivate = + sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate); + + mlir::omp::PrivateClauseOp privatizerOp = [&]() { + auto moduleOp = firOpBuilder.getModule(); + auto uniquePrivatizerName = fir::getTypeAsString( + symType, converter.getKindMap(), + converter.mangleName(*sym) + + (isFirstPrivate ? "_firstprivate" : "_private")); + + if (auto existingPrivatizer = + moduleOp.lookupSymbol( + uniquePrivatizerName)) + return existingPrivatizer; + + auto ip = firOpBuilder.saveInsertionPoint(); + firOpBuilder.setInsertionPoint(&moduleOp.getBodyRegion().front(), + moduleOp.getBodyRegion().front().begin()); + auto result = firOpBuilder.create( + symLoc, uniquePrivatizerName, symType, + isFirstPrivate ? mlir::omp::DataSharingClauseType::FirstPrivate + : mlir::omp::DataSharingClauseType::Private); + + symTable->pushScope(); + + // Populate the `alloc` region. + { + mlir::Region &allocRegion = result.getAllocRegion(); + mlir::Block *allocEntryBlock = firOpBuilder.createBlock( + &allocRegion, /*insertPt=*/{}, symType, symLoc); + + firOpBuilder.setInsertionPointToEnd(allocEntryBlock); + symTable->addSymbol(*sym, allocRegion.getArgument(0)); + symTable->pushScope(); cloneSymbol(sym); - copyFirstPrivateSymbol(sym); + firOpBuilder.create( + hsb.getAddr().getLoc(), + symTable->shallowLookupSymbol(*sym).getAddr()); + symTable->popScope(); } - } + + // Populate the `copy` region if this is a `firstprivate`. + if (isFirstPrivate) { + mlir::Region ©Region = result.getCopyRegion(); + // First block argument corresponding to the original/host value while + // second block argument corresponding to the privatized value. + mlir::Block *copyEntryBlock = firOpBuilder.createBlock( + ©Region, /*insertPt=*/{}, {symType, symType}, {symLoc, symLoc}); + firOpBuilder.setInsertionPointToEnd(copyEntryBlock); + symTable->addSymbol(*sym, copyRegion.getArgument(0), + /*force=*/true); + symTable->pushScope(); + symTable->addSymbol(*sym, copyRegion.getArgument(1)); + auto ip = firOpBuilder.saveInsertionPoint(); + copyFirstPrivateSymbol(sym, &ip); + + firOpBuilder.create( + hsb.getAddr().getLoc(), + symTable->shallowLookupSymbol(*sym).getAddr()); + symTable->popScope(); + } + + symTable->popScope(); + firOpBuilder.restoreInsertionPoint(ip); + return result; + }(); + + delayedPrivatizationInfo.privatizers.push_back( + mlir::SymbolRefAttr::get(privatizerOp)); + delayedPrivatizationInfo.originalAddresses.push_back(hsb.getAddr()); + delayedPrivatizationInfo.symbols.push_back(sym); } } // namespace omp diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h index 10c0a30c09c39..9f7301df07598 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h @@ -23,6 +23,24 @@ namespace lower { namespace omp { class DataSharingProcessor { +public: + /// Collects all the information needed for delayed privatization. This can be + /// used by ops with data-sharing clauses to properly generate their regions + /// (e.g. add region arguments) and map the original SSA values to their + /// corresponding OMP region operands. + struct DelayedPrivatizationInfo { + // The list of symbols referring to delayed privatizer ops (i.e. + // `omp.private` ops). + llvm::SmallVector privatizers; + // SSA values that correspond to "original" values being privatized. + // "Original" here means the SSA value outside the OpenMP region from which + // a clone is created inside the region. + llvm::SmallVector originalAddresses; + // Fortran symbols corresponding to the above SSA values. + llvm::SmallVector symbols; + }; + +private: bool hasLastPrivateOp; mlir::OpBuilder::InsertPoint lastPrivIP; mlir::OpBuilder::InsertPoint insPt; @@ -36,6 +54,9 @@ class DataSharingProcessor { fir::FirOpBuilder &firOpBuilder; const Fortran::parser::OmpClauseList &opClauseList; Fortran::lower::pft::Evaluation &eval; + bool useDelayedPrivatization; + Fortran::lower::SymMap *symTable; + DelayedPrivatizationInfo delayedPrivatizationInfo; bool needBarrier(); void collectSymbols(Fortran::semantics::Symbol::Flag flag); @@ -47,10 +68,13 @@ class DataSharingProcessor { void collectDefaultSymbols(); void privatize(); void defaultPrivatize(); + void doPrivatize(const Fortran::semantics::Symbol *sym); void copyLastPrivatize(mlir::Operation *op); void insertLastPrivateCompare(mlir::Operation *op); void cloneSymbol(const Fortran::semantics::Symbol *sym); - void copyFirstPrivateSymbol(const Fortran::semantics::Symbol *sym); + void + copyFirstPrivateSymbol(const Fortran::semantics::Symbol *sym, + mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr); void copyLastPrivateSymbol(const Fortran::semantics::Symbol *sym, mlir::OpBuilder::InsertPoint *lastPrivIP); void insertDeallocs(); @@ -58,10 +82,14 @@ class DataSharingProcessor { public: DataSharingProcessor(Fortran::lower::AbstractConverter &converter, const Fortran::parser::OmpClauseList &opClauseList, - Fortran::lower::pft::Evaluation &eval) + Fortran::lower::pft::Evaluation &eval, + bool useDelayedPrivatization = false, + Fortran::lower::SymMap *symTable = nullptr) : hasLastPrivateOp(false), converter(converter), firOpBuilder(converter.getFirOpBuilder()), opClauseList(opClauseList), - eval(eval) {} + eval(eval), useDelayedPrivatization(useDelayedPrivatization), + symTable(symTable) {} + // Privatisation is split into two steps. // Step1 performs cloning of all privatisation clauses and copying for // firstprivates. Step1 is performed at the place where process/processStep1 @@ -80,6 +108,10 @@ class DataSharingProcessor { assert(!loopIV && "Loop iteration variable already set"); loopIV = iv; } + + const DelayedPrivatizationInfo &getDelayedPrivatizationInfo() const { + return delayedPrivatizationInfo; + } }; } // namespace omp diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 7953bf83cba0f..21ad51c53d874 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -558,6 +558,7 @@ genOrderedRegionOp(Fortran::lower::AbstractConverter &converter, static mlir::omp::ParallelOp genParallelOp(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, bool genNested, mlir::Location currentLocation, @@ -590,8 +591,8 @@ genParallelOp(Fortran::lower::AbstractConverter &converter, auto reductionCallback = [&](mlir::Operation *op) { llvm::SmallVector locs(reductionVars.size(), currentLocation); - auto block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {}, - reductionTypes, locs); + auto *block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {}, + reductionTypes, locs); for (auto [arg, prv] : llvm::zip_equal(reductionSymbols, block->getArguments())) { converter.bindSymbol(*arg, prv); @@ -599,13 +600,78 @@ genParallelOp(Fortran::lower::AbstractConverter &converter, return reductionSymbols; }; - return genOpWithBody( + OpWithBodyGenInfo genInfo = OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) .setGenNested(genNested) .setOuterCombined(outerCombined) .setClauses(&clauseList) .setReductions(&reductionSymbols, &reductionTypes) - .setGenRegionEntryCb(reductionCallback), + .setGenRegionEntryCb(reductionCallback); + + if (!enableDelayedPrivatization) { + return genOpWithBody( + genInfo, + /*resultTypes=*/mlir::TypeRange(), ifClauseOperand, + numThreadsClauseOperand, allocateOperands, allocatorOperands, + reductionVars, + reductionDeclSymbols.empty() + ? nullptr + : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), + reductionDeclSymbols), + procBindKindAttr, /*private_vars=*/llvm::SmallVector{}, + /*privatizers=*/nullptr); + } + + bool privatize = !outerCombined; + DataSharingProcessor dsp(converter, clauseList, eval, + /*useDelayedPrivatization=*/true, &symTable); + + if (privatize) + dsp.processStep1(); + + const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo(); + + auto genRegionEntryCB = [&](mlir::Operation *op) { + auto parallelOp = llvm::cast(op); + + llvm::SmallVector reductionLocs(reductionVars.size(), + currentLocation); + + mlir::OperandRange privateVars = parallelOp.getPrivateVars(); + mlir::Region ®ion = parallelOp.getRegion(); + + llvm::SmallVector privateVarTypes = reductionTypes; + privateVarTypes.reserve(privateVarTypes.size() + privateVars.size()); + llvm::transform(privateVars, std::back_inserter(privateVarTypes), + [](mlir::Value v) { return v.getType(); }); + + llvm::SmallVector privateVarLocs = reductionLocs; + privateVarLocs.reserve(privateVarLocs.size() + privateVars.size()); + llvm::transform(privateVars, std::back_inserter(privateVarLocs), + [](mlir::Value v) { return v.getLoc(); }); + + converter.getFirOpBuilder().createBlock(®ion, /*insertPt=*/{}, + privateVarTypes, privateVarLocs); + + llvm::SmallVector allSymbols = + reductionSymbols; + allSymbols.append(delayedPrivatizationInfo.symbols); + for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) { + converter.bindSymbol(*arg, prv); + } + + return allSymbols; + }; + + // TODO Merge with the reduction CB. + genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp); + + llvm::SmallVector privatizers( + delayedPrivatizationInfo.privatizers.begin(), + delayedPrivatizationInfo.privatizers.end()); + + return genOpWithBody( + genInfo, /*resultTypes=*/mlir::TypeRange(), ifClauseOperand, numThreadsClauseOperand, allocateOperands, allocatorOperands, reductionVars, @@ -613,8 +679,11 @@ genParallelOp(Fortran::lower::AbstractConverter &converter, ? nullptr : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), reductionDeclSymbols), - procBindKindAttr, /*private_vars=*/llvm::SmallVector{}, - /*privatizers=*/nullptr); + procBindKindAttr, delayedPrivatizationInfo.originalAddresses, + delayedPrivatizationInfo.privatizers.empty() + ? nullptr + : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), + privatizers)); } static mlir::omp::SectionOp @@ -1621,7 +1690,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet) .test(ompDirective)) { validDirective = true; - genParallelOp(converter, semaCtx, eval, /*genNested=*/false, + genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false, currentLocation, loopOpClauseList, /*outerCombined=*/true); } @@ -1711,8 +1780,8 @@ genOMP(Fortran::lower::AbstractConverter &converter, currentLocation); break; case llvm::omp::Directive::OMPD_parallel: - genParallelOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, - beginClauseList); + genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/true, + currentLocation, beginClauseList); break; case llvm::omp::Directive::OMPD_single: genSingleOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, @@ -1769,7 +1838,7 @@ genOMP(Fortran::lower::AbstractConverter &converter, .test(directive.v)) { bool outerCombined = directive.v != llvm::omp::Directive::OMPD_target_parallel; - genParallelOp(converter, semaCtx, eval, /*genNested=*/false, + genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false, currentLocation, beginClauseList, outerCombined); combinedDirective = true; } @@ -1852,7 +1921,7 @@ genOMP(Fortran::lower::AbstractConverter &converter, // Parallel wrapper of PARALLEL SECTIONS construct if (dir == llvm::omp::Directive::OMPD_parallel_sections) { - genParallelOp(converter, semaCtx, eval, + genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false, currentLocation, sectionsClauseList, /*outerCombined=*/true); } else { diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 31b15257d1868..49517f62895df 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -24,6 +24,12 @@ llvm::cl::opt treatIndexAsSection( llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."), llvm::cl::init(true)); +llvm::cl::opt enableDelayedPrivatization( + "openmp-enable-delayed-privatization", + llvm::cl::desc( + "Emit `[first]private` variables as clauses on the MLIR ops."), + llvm::cl::init(false)); + namespace Fortran { namespace lower { namespace omp { diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index c346f891f0797..f57cd7420ce4d 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -15,6 +15,7 @@ #include "llvm/Support/CommandLine.h" extern llvm::cl::opt treatIndexAsSection; +extern llvm::cl::opt enableDelayedPrivatization; namespace fir { class FirOpBuilder; diff --git a/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 b/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 new file mode 100644 index 0000000000000..122542345f104 --- /dev/null +++ b/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 @@ -0,0 +1,29 @@ +! Test delayed privatization for the `private` clause. + +! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s + +subroutine delayed_privatization_firstprivate + implicit none + integer :: var1 + +!$OMP PARALLEL FIRSTPRIVATE(var1) + var1 = 10 +!$OMP END PARALLEL +end subroutine + +! CHECK-LABEL: omp.private {type = firstprivate} +! CHECK-SAME: @[[VAR1_PRIVATIZER_SYM:.*]] : !fir.ref alloc { +! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: !fir.ref): +! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_firstprivateEvar1"} +! CHECK-NEXT: omp.yield(%[[PRIV_ALLOC]] : !fir.ref) +! CHECK: } copy { +! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: !fir.ref, %[[PRIV_PRIV_ARG:.*]]: !fir.ref): +! CHECK: %[[ORIG_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]] : !fir.ref +! CHECK: fir.store %[[ORIG_VAL]] to %[[PRIV_PRIV_ARG]] : !fir.ref +! CHECK: omp.yield(%[[PRIV_PRIV_ARG]] : !fir.ref) +! CHECK: } + +! CHECK-LABEL: @_QPdelayed_privatization_firstprivate +! CHECK: omp.parallel private(@[[VAR1_PRIVATIZER_SYM]] %{{.*}} -> %{{.*}} : !fir.ref) { +! CHECK: omp.terminator + diff --git a/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90 b/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90 new file mode 100644 index 0000000000000..2e9995ea1fd4c --- /dev/null +++ b/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90 @@ -0,0 +1,38 @@ +! Test delayed privatization for the `private` clause. + +! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s + +subroutine delayed_privatization_private + implicit none + integer :: var1 + +!$OMP PARALLEL PRIVATE(var1) + var1 = 10 +!$OMP END PARALLEL + +!$OMP PARALLEL PRIVATE(var1) + var1 = 20 +!$OMP END PARALLEL + +end subroutine + +! CHECK-LABEL: omp.private {type = private} +! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : !fir.ref alloc { +! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: !fir.ref): +! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_privateEvar1"} +! CHECK-NEXT: omp.yield(%[[PRIV_ALLOC]] : !fir.ref) +! CHECK-NOT: } copy { + +! CHECK-LABEL: @_QPdelayed_privatization_private +! CHECK: %[[ORIG_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", uniq_name = "_QFdelayed_privatization_privateEvar1"} +! CHECK: omp.parallel private(@[[PRIVATIZER_SYM]] %[[ORIG_ALLOC]] -> %[[PAR_ARG:.*]] : !fir.ref) { +! CHECK: %[[C10:.*]] = arith.constant 10 : i32 +! CHECK: fir.store %[[C10]] to %[[PAR_ARG]] : !fir.ref +! CHECK: omp.terminator + +! Test that the same privatizer is used if the a variable with the same type and +! name was previously privatized. +! CHECK: omp.parallel private(@[[PRIVATIZER_SYM]] %[[ORIG_ALLOC]] -> %[[PAR_ARG:.*]] : !fir.ref) { +! CHECK: %[[C20:.*]] = arith.constant 20 : i32 +! CHECK: fir.store %[[C20]] to %[[PAR_ARG]] : !fir.ref +! CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90 new file mode 100644 index 0000000000000..e3d2a5a8af260 --- /dev/null +++ b/flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90 @@ -0,0 +1,29 @@ +! Test delayed privatization for the `firstprivate` clause. + +! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s + +subroutine delayed_privatization_firstprivate + implicit none + integer :: var1 + +!$omp parallel firstprivate(var1) + var1 = 10 +!$omp end parallel +end subroutine + +! CHECK-LABEL: omp.private {type = firstprivate} +! CHECK-SAME: @[[VAR1_PRIVATIZER_SYM:.*]] : !fir.ref alloc { +! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: !fir.ref): +! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_firstprivateEvar1"} +! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]] {uniq_name = "_QFdelayed_privatization_firstprivateEvar1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : !fir.ref) +! CHECK: } copy { +! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: !fir.ref, %[[PRIV_PRIV_ARG:.*]]: !fir.ref): +! CHECK: %[[ORIG_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]] : !fir.ref +! CHECK: hlfir.assign %[[ORIG_VAL]] to %[[PRIV_PRIV_ARG]] temporary_lhs : i32, !fir.ref +! CHECK: omp.yield(%[[PRIV_PRIV_ARG]] : !fir.ref) +! CHECK: } + +! CHECK-LABEL: @_QPdelayed_privatization_firstprivate +! CHECK: omp.parallel private(@[[VAR1_PRIVATIZER_SYM]] %{{.*}} -> %{{.*}} : !fir.ref) { +! CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90 new file mode 100644 index 0000000000000..46eef6eb3bcf6 --- /dev/null +++ b/flang/test/Lower/OpenMP/delayed-privatization-private-firstprivate.f90 @@ -0,0 +1,34 @@ +! Test delayed privatization for both `private` and `firstprivate` clauses. + +! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s + +subroutine delayed_privatization_private_firstprivate + implicit none + integer :: var1 + integer :: var2 + +!$omp parallel private(var1) firstprivate(var2) + var1 = 10 + var2 = var1 + var2 +!$omp end parallel +end subroutine + +! CHECK-LABEL: omp.private {type = firstprivate} +! CHECK-SAME: @[[VAR2_PRIVATIZER_SYM:.*]] : !fir.ref alloc { +! CHECK: } copy { +! CHECK: } + +! CHECK-LABEL: omp.private {type = private} +! CHECK-SAME: @[[VAR1_PRIVATIZER_SYM:.*]] : !fir.ref alloc { +! CHECK: } + +! CHECK-LABEL: func.func @_QPdelayed_privatization_private_firstprivate() { +! CHECK: %[[VAR1_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1" +! CHECK: %[[VAR1_DECL:.*]]:2 = hlfir.declare %[[VAR1_ALLOC]] + +! CHECK: %[[VAR2_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var2" +! CHECK: %[[VAR2_DECL:.*]]:2 = hlfir.declare %[[VAR2_ALLOC]] + +! CHECK: omp.parallel private( +! CHECK-SAME: @[[VAR1_PRIVATIZER_SYM]] %[[VAR1_DECL]]#0 -> %{{.*}} : !fir.ref, +! CHECK-SAME: @[[VAR2_PRIVATIZER_SYM]] %[[VAR2_DECL]]#0 -> %{{.*}} : !fir.ref) { diff --git a/flang/test/Lower/OpenMP/delayed-privatization-private.f90 b/flang/test/Lower/OpenMP/delayed-privatization-private.f90 new file mode 100644 index 0000000000000..240e0e71bfcd1 --- /dev/null +++ b/flang/test/Lower/OpenMP/delayed-privatization-private.f90 @@ -0,0 +1,28 @@ +! Test delayed privatization for the `private` clause. + +! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s + +subroutine delayed_privatization_private + implicit none + integer :: var1 + +!$omp parallel private(var1) + var1 = 10 +!$omp end parallel +end subroutine + +! CHECK-LABEL: omp.private {type = private} +! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : !fir.ref alloc { +! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: !fir.ref): +! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_privateEvar1"} +! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]] {uniq_name = "_QFdelayed_privatization_privateEvar1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : !fir.ref) +! CHECK-NOT: } copy { + +! CHECK-LABEL: @_QPdelayed_privatization_private +! CHECK: %[[ORIG_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", uniq_name = "_QFdelayed_privatization_privateEvar1"} +! CHECK: %[[ORIG_DECL:.*]]:2 = hlfir.declare %[[ORIG_ALLOC]] {uniq_name = "_QFdelayed_privatization_privateEvar1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: omp.parallel private(@[[PRIVATIZER_SYM]] %[[ORIG_DECL]]#0 -> %[[PAR_ARG:.*]] : !fir.ref) { +! CHECK: %[[PAR_ARG_DECL:.*]]:2 = hlfir.declare %[[PAR_ARG]] {uniq_name = "_QFdelayed_privatization_privateEvar1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: hlfir.assign %{{.*}} to %[[PAR_ARG_DECL]]#0 : i32, !fir.ref +! CHECK: omp.terminator diff --git a/flang/test/Lower/OpenMP/delayed-privatization-reduction.f90 b/flang/test/Lower/OpenMP/delayed-privatization-reduction.f90 new file mode 100644 index 0000000000000..c61f352b9b055 --- /dev/null +++ b/flang/test/Lower/OpenMP/delayed-privatization-reduction.f90 @@ -0,0 +1,30 @@ +! Test that reductions and delayed privatization work properly togehter. Since +! both types of clauses add block arguments to the OpenMP region, we make sure +! that the block arguments are added in the proper order (reductions first and +! then delayed privatization. + +! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s + +subroutine red_and_delayed_private + integer :: red + integer :: prv + + red = 0 + prv = 10 + + !$omp parallel reduction(+:red) private(prv) + red = red + 1 + prv = 20 + !$omp end parallel +end subroutine + +! CHECK-LABEL: omp.private {type = private} +! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : !fir.ref alloc { + +! CHECK-LABEL: omp.reduction.declare +! CHECK-SAME: @[[REDUCTION_SYM:.*]] : i32 init + +! CHECK-LABEL: _QPred_and_delayed_private +! CHECK: omp.parallel +! CHECK-SAME: reduction(@[[REDUCTION_SYM]] %{{.*}} -> %arg0 : !fir.ref) +! CHECK-SAME: private(@[[PRIVATIZER_SYM]] %{{.*}} -> %arg1 : !fir.ref) {