From ed35b6f6bb0fe4f3057aadc2343da466006faf13 Mon Sep 17 00:00:00 2001 From: ergawy Date: Wed, 14 Feb 2024 04:24:06 -0600 Subject: [PATCH 1/2] [flang][OpenMP][MLIR] Basic support for delayed privatization code-gen Adds basic support for emitting delayed privatizers from flang. So far, only types of symbols are supported (i.e. scalars), support for more complicated types will be added later. This also makes sure that reductio and delayed privatization work properly together by merging the body-gen callbacks for both in case both clauses are present on the parallel construct. --- flang/include/flang/Lower/AbstractConverter.h | 4 + flang/lib/Lower/Bridge.cpp | 2 +- flang/lib/Lower/OpenMP.cpp | 245 ++++++++++++++++-- .../delayed-privatization-firstprivate.f90 | 29 +++ .../FIR/delayed-privatization-private.f90 | 40 +++ .../OpenMP/FIR/parallel-reduction-private.f90 | 30 +++ 6 files changed, 322 insertions(+), 28 deletions(-) create mode 100644 flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 create mode 100644 flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90 create mode 100644 flang/test/Lower/OpenMP/FIR/parallel-reduction-private.f90 diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h index 796933a4eb5f6..55bc33e76e5f6 100644 --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -16,6 +16,7 @@ #include "flang/Common/Fortran.h" #include "flang/Lower/LoweringOptions.h" #include "flang/Lower/PFTDefs.h" +#include "flang/Lower/SymbolMap.h" #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Semantics/symbol.h" #include "mlir/IR/Builders.h" @@ -296,6 +297,9 @@ class AbstractConverter { return loweringOptions; } + virtual Fortran::lower::SymbolBox + lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0; + private: /// Options controlling lowering behavior. const Fortran::lower::LoweringOptions &loweringOptions; diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 2d7f748cefa2d..a7a5b826ef7a9 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -1070,7 +1070,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { /// Find the symbol in one level up of symbol map such as for host-association /// in OpenMP code or return null. Fortran::lower::SymbolBox - lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) { + lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) override { if (Fortran::lower::SymbolBox v = localSymbols.lookupOneLevelUpSymbol(sym)) return v; return {}; diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 9397af8b8bd05..ff3ad8ecadd42 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -40,6 +40,12 @@ static llvm::cl::opt treatIndexAsSection( llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."), llvm::cl::init(true)); +static llvm::cl::opt enableDelayedPrivatization( + "openmp-enable-delayed-privatization", + llvm::cl::desc( + "Emit `[first]private` variables as clauses on the MLIR ops."), + llvm::cl::init(false)); + using DeclareTargetCapturePair = std::pair; @@ -147,6 +153,24 @@ static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter, //===----------------------------------------------------------------------===// class DataSharingProcessor { +public: + /// Collects all the information needed for delayed privatization. This can be + /// used by ops with data-sharing clauses to proplery generate theirs regions + /// (e.g. add region arguments) and map the original SSA values to their + /// corresponding OMP region operands. + struct DelayedPrivatizationInfo { + // The list of symbols referring to delayed privatizer ops (i.e. + // `omp.private` ops). + llvm::SmallVector privatizers; + // SSA values that correspond to "original" values being privatized. + // "Original" here means the SSA value outside the OpenMP region from which + // a clone is created inside the region. + llvm::SmallVector originalAddresses; + // Fortran symbols corresponding to the above SSA values. + llvm::SmallVector symbols; + }; + +private: bool hasLastPrivateOp; mlir::OpBuilder::InsertPoint lastPrivIP; mlir::OpBuilder::InsertPoint insPt; @@ -161,6 +185,11 @@ class DataSharingProcessor { const Fortran::parser::OmpClauseList &opClauseList; Fortran::lower::pft::Evaluation &eval; + bool useDelayedPrivatization; + Fortran::lower::SymMap *symTable; + + DelayedPrivatizationInfo delayedPrivatizationInfo; + bool needBarrier(); void collectSymbols(Fortran::semantics::Symbol::Flag flag); void collectOmpObjectListSymbol( @@ -171,10 +200,13 @@ class DataSharingProcessor { void collectDefaultSymbols(); void privatize(); void defaultPrivatize(); + void doPrivatize(const Fortran::semantics::Symbol *sym); void copyLastPrivatize(mlir::Operation *op); void insertLastPrivateCompare(mlir::Operation *op); void cloneSymbol(const Fortran::semantics::Symbol *sym); - void copyFirstPrivateSymbol(const Fortran::semantics::Symbol *sym); + void + copyFirstPrivateSymbol(const Fortran::semantics::Symbol *sym, + mlir::OpBuilder::InsertPoint *copyAssignIP = nullptr); void copyLastPrivateSymbol(const Fortran::semantics::Symbol *sym, mlir::OpBuilder::InsertPoint *lastPrivIP); void insertDeallocs(); @@ -182,10 +214,14 @@ class DataSharingProcessor { public: DataSharingProcessor(Fortran::lower::AbstractConverter &converter, const Fortran::parser::OmpClauseList &opClauseList, - Fortran::lower::pft::Evaluation &eval) + Fortran::lower::pft::Evaluation &eval, + bool useDelayedPrivatization = false, + Fortran::lower::SymMap *symTable = nullptr) : hasLastPrivateOp(false), converter(converter), firOpBuilder(converter.getFirOpBuilder()), opClauseList(opClauseList), - eval(eval) {} + eval(eval), useDelayedPrivatization(useDelayedPrivatization), + symTable(symTable) {} + // Privatisation is split into two steps. // Step1 performs cloning of all privatisation clauses and copying for // firstprivates. Step1 is performed at the place where process/processStep1 @@ -204,6 +240,10 @@ class DataSharingProcessor { assert(!loopIV && "Loop iteration variable already set"); loopIV = iv; } + + const DelayedPrivatizationInfo &getDelayedPrivatizationInfo() const { + return delayedPrivatizationInfo; + } }; void DataSharingProcessor::processStep1() { @@ -250,9 +290,10 @@ void DataSharingProcessor::cloneSymbol(const Fortran::semantics::Symbol *sym) { } void DataSharingProcessor::copyFirstPrivateSymbol( - const Fortran::semantics::Symbol *sym) { + const Fortran::semantics::Symbol *sym, + mlir::OpBuilder::InsertPoint *copyAssignIP) { if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) - converter.copyHostAssociateVar(*sym); + converter.copyHostAssociateVar(*sym, copyAssignIP); } void DataSharingProcessor::copyLastPrivateSymbol( @@ -491,14 +532,10 @@ void DataSharingProcessor::privatize() { for (const Fortran::semantics::Symbol *sym : privatizedSymbols) { if (const auto *commonDet = sym->detailsIf()) { - for (const auto &mem : commonDet->objects()) { - cloneSymbol(&*mem); - copyFirstPrivateSymbol(&*mem); - } - } else { - cloneSymbol(sym); - copyFirstPrivateSymbol(sym); - } + for (const auto &mem : commonDet->objects()) + doPrivatize(&*mem); + } else + doPrivatize(sym); } } @@ -522,11 +559,96 @@ void DataSharingProcessor::defaultPrivatize() { !sym->GetUltimate().has() && !symbolsInNestedRegions.contains(sym) && !symbolsInParentRegions.contains(sym) && - !privatizedSymbols.contains(sym)) { + !privatizedSymbols.contains(sym)) + doPrivatize(sym); + } +} + +void DataSharingProcessor::doPrivatize(const Fortran::semantics::Symbol *sym) { + if (!useDelayedPrivatization) { + cloneSymbol(sym); + copyFirstPrivateSymbol(sym); + return; + } + + Fortran::lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym); + assert(hsb && "Host symbol box not found"); + + mlir::Type symType = hsb.getAddr().getType(); + mlir::Location symLoc = hsb.getAddr().getLoc(); + std::string privatizerName = sym->name().ToString() + ".privatizer"; + bool isFirstPrivate = + sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate); + + mlir::omp::PrivateClauseOp privatizerOp = [&]() { + auto moduleOp = firOpBuilder.getModule(); + + auto uniquePrivatizerName = fir::getTypeAsString( + symType, converter.getKindMap(), + sym->name().ToString() + + (isFirstPrivate ? "_firstprivate" : "_private")); + + if (auto existingPrivatizer = + moduleOp.lookupSymbol( + uniquePrivatizerName)) + return existingPrivatizer; + + auto ip = firOpBuilder.saveInsertionPoint(); + firOpBuilder.setInsertionPoint(&moduleOp.getBodyRegion().front(), + moduleOp.getBodyRegion().front().begin()); + auto result = firOpBuilder.create( + symLoc, uniquePrivatizerName, symType, + isFirstPrivate ? mlir::omp::DataSharingClauseType ::FirstPrivate + : mlir::omp::DataSharingClauseType::Private); + + symTable->pushScope(); + + // Populate the `alloc` region. + { + mlir::Region &allocRegion = result.getAllocRegion(); + mlir::Block *allocEntryBlock = firOpBuilder.createBlock( + &allocRegion, /*insertPt=*/{}, symType, symLoc); + + firOpBuilder.setInsertionPointToEnd(allocEntryBlock); + symTable->addSymbol(*sym, allocRegion.getArgument(0)); + symTable->pushScope(); cloneSymbol(sym); - copyFirstPrivateSymbol(sym); + firOpBuilder.create( + hsb.getAddr().getLoc(), + symTable->shallowLookupSymbol(*sym).getAddr()); + symTable->popScope(); } - } + + // Poplate the `copy` region if this is a `firstprivate`. + if (isFirstPrivate) { + mlir::Region ©Region = result.getCopyRegion(); + // First block argument corresponding to the original/host value while + // second block argument corresponding to the privatized value. + mlir::Block *copyEntryBlock = firOpBuilder.createBlock( + ©Region, /*insertPt=*/{}, {symType, symType}, {symLoc, symLoc}); + firOpBuilder.setInsertionPointToEnd(copyEntryBlock); + symTable->addSymbol(*sym, copyRegion.getArgument(0), + /*force=*/true); + symTable->pushScope(); + symTable->addSymbol(*sym, copyRegion.getArgument(1)); + auto ip = firOpBuilder.saveInsertionPoint(); + copyFirstPrivateSymbol(sym, &ip); + + firOpBuilder.create( + hsb.getAddr().getLoc(), + symTable->shallowLookupSymbol(*sym).getAddr()); + symTable->popScope(); + } + + symTable->popScope(); + firOpBuilder.restoreInsertionPoint(ip); + return result; + }(); + + delayedPrivatizationInfo.privatizers.push_back( + mlir::SymbolRefAttr::get(privatizerOp)); + delayedPrivatizationInfo.originalAddresses.push_back(hsb.getAddr()); + delayedPrivatizationInfo.symbols.push_back(sym); } //===----------------------------------------------------------------------===// @@ -2585,6 +2707,7 @@ genOrderedRegionOp(Fortran::lower::AbstractConverter &converter, static mlir::omp::ParallelOp genParallelOp(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, bool genNested, mlir::Location currentLocation, @@ -2617,8 +2740,8 @@ genParallelOp(Fortran::lower::AbstractConverter &converter, auto reductionCallback = [&](mlir::Operation *op) { llvm::SmallVector locs(reductionVars.size(), currentLocation); - auto block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {}, - reductionTypes, locs); + auto *block = converter.getFirOpBuilder().createBlock(&op->getRegion(0), {}, + reductionTypes, locs); for (auto [arg, prv] : llvm::zip_equal(reductionSymbols, block->getArguments())) { converter.bindSymbol(*arg, prv); @@ -2626,13 +2749,78 @@ genParallelOp(Fortran::lower::AbstractConverter &converter, return reductionSymbols; }; - return genOpWithBody( + OpWithBodyGenInfo genInfo = OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) .setGenNested(genNested) .setOuterCombined(outerCombined) .setClauses(&clauseList) .setReductions(&reductionSymbols, &reductionTypes) - .setGenRegionEntryCb(reductionCallback), + .setGenRegionEntryCb(reductionCallback); + + if (!enableDelayedPrivatization) { + return genOpWithBody( + genInfo, + /*resultTypes=*/mlir::TypeRange(), ifClauseOperand, + numThreadsClauseOperand, allocateOperands, allocatorOperands, + reductionVars, + reductionDeclSymbols.empty() + ? nullptr + : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), + reductionDeclSymbols), + procBindKindAttr, /*private_vars=*/llvm::SmallVector{}, + /*privatizers=*/nullptr); + } + + bool privatize = !outerCombined; + DataSharingProcessor dsp(converter, clauseList, eval, + /*useDelayedPrivatization=*/true, &symTable); + + if (privatize) + dsp.processStep1(); + + const auto &delayedPrivatizationInfo = dsp.getDelayedPrivatizationInfo(); + + auto genRegionEntryCB = [&](mlir::Operation *op) { + auto parallelOp = llvm::cast(op); + + llvm::SmallVector reductionLocs(reductionVars.size(), + currentLocation); + + auto privateVars = parallelOp.getPrivateVars(); + auto ®ion = parallelOp.getRegion(); + + llvm::SmallVector privateVarTypes = reductionTypes; + privateVarTypes.reserve(privateVarTypes.size() + privateVars.size()); + llvm::transform(privateVars, std::back_inserter(privateVarTypes), + [](mlir::Value v) { return v.getType(); }); + + llvm::SmallVector privateVarLocs = reductionLocs; + privateVarLocs.reserve(privateVarLocs.size() + privateVars.size()); + llvm::transform(privateVars, std::back_inserter(privateVarLocs), + [](mlir::Value v) { return v.getLoc(); }); + + converter.getFirOpBuilder().createBlock(®ion, /*insertPt=*/{}, + privateVarTypes, privateVarLocs); + + llvm::SmallVector allSymbols = + reductionSymbols; + allSymbols.append(delayedPrivatizationInfo.symbols); + for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) { + converter.bindSymbol(*arg, prv); + } + + return allSymbols; + }; + + // TODO Merge with the reduction CB. + genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp); + + llvm::SmallVector privatizers( + delayedPrivatizationInfo.privatizers.begin(), + delayedPrivatizationInfo.privatizers.end()); + + return genOpWithBody( + genInfo, /*resultTypes=*/mlir::TypeRange(), ifClauseOperand, numThreadsClauseOperand, allocateOperands, allocatorOperands, reductionVars, @@ -2640,8 +2828,11 @@ genParallelOp(Fortran::lower::AbstractConverter &converter, ? nullptr : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), reductionDeclSymbols), - procBindKindAttr, /*private_vars=*/llvm::SmallVector{}, - /*privatizers=*/nullptr); + procBindKindAttr, delayedPrivatizationInfo.originalAddresses, + delayedPrivatizationInfo.privatizers.empty() + ? nullptr + : mlir::ArrayAttr::get(converter.getFirOpBuilder().getContext(), + privatizers)); } static mlir::omp::SectionOp @@ -3635,7 +3826,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet) .test(ompDirective)) { validDirective = true; - genParallelOp(converter, semaCtx, eval, /*genNested=*/false, + genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false, currentLocation, loopOpClauseList, /*outerCombined=*/true); } @@ -3724,8 +3915,8 @@ genOMP(Fortran::lower::AbstractConverter &converter, currentLocation); break; case llvm::omp::Directive::OMPD_parallel: - genParallelOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, - beginClauseList); + genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/true, + currentLocation, beginClauseList); break; case llvm::omp::Directive::OMPD_single: genSingleOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, @@ -3782,7 +3973,7 @@ genOMP(Fortran::lower::AbstractConverter &converter, .test(directive.v)) { bool outerCombined = directive.v != llvm::omp::Directive::OMPD_target_parallel; - genParallelOp(converter, semaCtx, eval, /*genNested=*/false, + genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false, currentLocation, beginClauseList, outerCombined); combinedDirective = true; } @@ -3865,7 +4056,7 @@ genOMP(Fortran::lower::AbstractConverter &converter, // Parallel wrapper of PARALLEL SECTIONS construct if (dir == llvm::omp::Directive::OMPD_parallel_sections) { - genParallelOp(converter, semaCtx, eval, + genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false, currentLocation, sectionsClauseList, /*outerCombined=*/true); } else { diff --git a/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 b/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 new file mode 100644 index 0000000000000..122542345f104 --- /dev/null +++ b/flang/test/Lower/OpenMP/FIR/delayed-privatization-firstprivate.f90 @@ -0,0 +1,29 @@ +! Test delayed privatization for the `private` clause. + +! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s + +subroutine delayed_privatization_firstprivate + implicit none + integer :: var1 + +!$OMP PARALLEL FIRSTPRIVATE(var1) + var1 = 10 +!$OMP END PARALLEL +end subroutine + +! CHECK-LABEL: omp.private {type = firstprivate} +! CHECK-SAME: @[[VAR1_PRIVATIZER_SYM:.*]] : !fir.ref alloc { +! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: !fir.ref): +! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_firstprivateEvar1"} +! CHECK-NEXT: omp.yield(%[[PRIV_ALLOC]] : !fir.ref) +! CHECK: } copy { +! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: !fir.ref, %[[PRIV_PRIV_ARG:.*]]: !fir.ref): +! CHECK: %[[ORIG_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]] : !fir.ref +! CHECK: fir.store %[[ORIG_VAL]] to %[[PRIV_PRIV_ARG]] : !fir.ref +! CHECK: omp.yield(%[[PRIV_PRIV_ARG]] : !fir.ref) +! CHECK: } + +! CHECK-LABEL: @_QPdelayed_privatization_firstprivate +! CHECK: omp.parallel private(@[[VAR1_PRIVATIZER_SYM]] %{{.*}} -> %{{.*}} : !fir.ref) { +! CHECK: omp.terminator + diff --git a/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90 b/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90 new file mode 100644 index 0000000000000..2b1da9b0eea19 --- /dev/null +++ b/flang/test/Lower/OpenMP/FIR/delayed-privatization-private.f90 @@ -0,0 +1,40 @@ +! Test delayed privatization for the `private` clause. + +! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s + +subroutine delayed_privatization_private + implicit none + integer :: var1 + +!$OMP PARALLEL PRIVATE(var1) + var1 = 10 +!$OMP END PARALLEL +end subroutine + +! CHECK-LABEL: omp.private {type = private} +! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : !fir.ref alloc { +! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: !fir.ref): +! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_privateEvar1"} +! CHECK-NEXT: omp.yield(%[[PRIV_ALLOC]] : !fir.ref) +! CHECK-NOT: } copy { + +! CHECK-LABEL: @_QPdelayed_privatization_private +! CHECK: %[[ORIG_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", uniq_name = "_QFdelayed_privatization_privateEvar1"} +! CHECK: omp.parallel private(@[[PRIVATIZER_SYM]] %[[ORIG_ALLOC]] -> %[[PAR_ARG:.*]] : !fir.ref) { +! CHECK: fir.store %{{.*}} to %[[PAR_ARG]] : !fir.ref +! CHECK: omp.terminator + +! Test that the same privatizer is used if the a variable with the same type and +! name was previously privatized. +subroutine delayed_privatization_private_same_type_and_name + implicit none + integer :: var1 + +!$OMP PARALLEL PRIVATE(var1) + var1 = 10 +!$OMP END PARALLEL +end subroutine + +! CHECK-LABEL: @_QPdelayed_privatization_private_same_type_and_name +! CHECK: %[[ORIG_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", uniq_name = "_QFdelayed_privatization_private_same_type_and_nameEvar1"} +! CHECK: omp.parallel private(@[[PRIVATIZER_SYM]] %[[ORIG_ALLOC]] -> %[[PAR_ARG:.*]] : !fir.ref) { diff --git a/flang/test/Lower/OpenMP/FIR/parallel-reduction-private.f90 b/flang/test/Lower/OpenMP/FIR/parallel-reduction-private.f90 new file mode 100644 index 0000000000000..d72093c8eb73e --- /dev/null +++ b/flang/test/Lower/OpenMP/FIR/parallel-reduction-private.f90 @@ -0,0 +1,30 @@ +! Test that reductions and delayed privatization work properly togehter. Since +! both types of clauses add block arguments to the OpenMP region, we make sure +! that the block arguments are added in the proper order (reductions first and +! then delayed privatization. + +! RUN: bbc -emit-fir -hlfir=false -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s + +subroutine red_and_delayed_private + integer :: red + integer :: prv + + red = 0 + prv = 10 + + !$omp parallel reduction(+:red) private(prv) + red = red + 1 + prv = 20 + !$omp end parallel +end subroutine + +! CHECK-LABEL: omp.private {type = private} +! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : !fir.ref alloc { + +! CHECK-LABEL: omp.reduction.declare +! CHECK-SAME: @[[REDUCTION_SYM:.*]] : i32 init + +! CHECK-LABEL: _QPred_and_delayed_private +! CHECK: omp.parallel +! CHECK-SAME: reduction(@[[REDUCTION_SYM]] %{{.*}} -> %arg0 : !fir.ref) +! CHECK-SAME: private(@[[PRIVATIZER_SYM]] %{{.*}} -> %arg1 : !fir.ref) { From 89b8a47ad1fad459666b8749162e01dd5485cd7e Mon Sep 17 00:00:00 2001 From: ergawy Date: Tue, 20 Feb 2024 02:32:49 -0600 Subject: [PATCH 2/2] [HLFIR][flang][OpenMP][MLIR] Basic support for delayed privatization code-gen Similar to #81833 but for `hlfir` since we need different handling for the `fir` vs. `hlfir` dialect. --- flang/lib/Lower/Bridge.cpp | 11 +++++++ .../delayed-privatization-firstprivate.f90 | 30 +++++++++++++++++++ .../OpenMP/delayed-privatization-private.f90 | 28 +++++++++++++++++ 3 files changed, 69 insertions(+) create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90 create mode 100644 flang/test/Lower/OpenMP/delayed-privatization-private.f90 diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index a7a5b826ef7a9..e5e471958125c 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -1052,6 +1052,17 @@ class FirConverter : public Fortran::lower::AbstractConverter { if (sym.detailsIf()) return symMap->lookupSymbol(sym); + // For symbols to be privatized in OMP, the symbol is mapped to an + // instance of `SymbolBox::Intrinsic` (i.e. a direct mapping to an MLIR + // SSA value). This MLIR SSA value is the block argument to the + // `omp.private`'s `alloc` block. If this is the case, we return this + // `SymbolBox::Intrinsic` value. + if (Fortran::lower::SymbolBox v = symMap->lookupSymbol(sym)) + return v.match( + [&](const Fortran::lower::SymbolBox::Intrinsic &) + -> Fortran::lower::SymbolBox { return v; }, + [](const auto &) -> Fortran::lower::SymbolBox { return {}; }); + return {}; } if (Fortran::lower::SymbolBox v = symMap->lookupSymbol(sym)) diff --git a/flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90 new file mode 100644 index 0000000000000..3ae22b9861e67 --- /dev/null +++ b/flang/test/Lower/OpenMP/delayed-privatization-firstprivate.f90 @@ -0,0 +1,30 @@ +! Test delayed privatization for the `firstprivate` clause. + +! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s + +subroutine delayed_privatization_firstprivate + implicit none + integer :: var1 + +!$OMP PARALLEL FIRSTPRIVATE(var1) + var1 = 10 +!$OMP END PARALLEL +end subroutine + +! CHECK-LABEL: omp.private {type = firstprivate} +! CHECK-SAME: @[[VAR1_PRIVATIZER_SYM:.*]] : !fir.ref alloc { +! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: !fir.ref): +! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_firstprivateEvar1"} +! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]] {uniq_name = "_QFdelayed_privatization_firstprivateEvar1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : !fir.ref) +! CHECK: } copy { +! CHECK: ^bb0(%[[PRIV_ORIG_ARG:.*]]: !fir.ref, %[[PRIV_PRIV_ARG:.*]]: !fir.ref): +! CHECK: %[[ORIG_VAL:.*]] = fir.load %[[PRIV_ORIG_ARG]] : !fir.ref +! CHECK: hlfir.assign %[[ORIG_VAL]] to %[[PRIV_PRIV_ARG]] temporary_lhs : i32, !fir.ref +! CHECK: omp.yield(%[[PRIV_PRIV_ARG]] : !fir.ref) +! CHECK: } + +! CHECK-LABEL: @_QPdelayed_privatization_firstprivate +! CHECK: omp.parallel private(@[[VAR1_PRIVATIZER_SYM]] %{{.*}} -> %{{.*}} : !fir.ref) { +! CHECK: omp.terminator + diff --git a/flang/test/Lower/OpenMP/delayed-privatization-private.f90 b/flang/test/Lower/OpenMP/delayed-privatization-private.f90 new file mode 100644 index 0000000000000..413496668b3db --- /dev/null +++ b/flang/test/Lower/OpenMP/delayed-privatization-private.f90 @@ -0,0 +1,28 @@ +! Test delayed privatization for the `private` clause. + +! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization -o - %s 2>&1 | FileCheck %s + +subroutine delayed_privatization_private + implicit none + integer :: var1 + +!$OMP PARALLEL PRIVATE(var1) + var1 = 10 +!$OMP END PARALLEL +end subroutine + +! CHECK-LABEL: omp.private {type = private} +! CHECK-SAME: @[[PRIVATIZER_SYM:.*]] : !fir.ref alloc { +! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: !fir.ref): +! CHECK-NEXT: %[[PRIV_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", pinned, uniq_name = "_QFdelayed_privatization_privateEvar1"} +! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]] {uniq_name = "_QFdelayed_privatization_privateEvar1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : !fir.ref) +! CHECK-NOT: } copy { + +! CHECK-LABEL: @_QPdelayed_privatization_private +! CHECK: %[[ORIG_ALLOC:.*]] = fir.alloca i32 {bindc_name = "var1", uniq_name = "_QFdelayed_privatization_privateEvar1"} +! CHECK: %[[ORIG_DECL:.*]]:2 = hlfir.declare %[[ORIG_ALLOC]] {uniq_name = "_QFdelayed_privatization_privateEvar1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: omp.parallel private(@[[PRIVATIZER_SYM]] %[[ORIG_DECL]]#0 -> %[[PAR_ARG:.*]] : !fir.ref) { +! CHECK: %[[PAR_ARG_DECL:.*]]:2 = hlfir.declare %[[PAR_ARG]] {uniq_name = "_QFdelayed_privatization_privateEvar1"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: hlfir.assign %{{.*}} to %[[PAR_ARG_DECL]]#0 : i32, !fir.ref +! CHECK: omp.terminator