From 548684f6aa82167920e755431581f0a03bfb7e46 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Fri, 14 Mar 2025 11:06:51 -0400 Subject: [PATCH 01/40] Initial implementation of tiling. --- flang/include/flang/Lower/OpenMP.h | 1 - flang/lib/Lower/OpenMP/OpenMP.cpp | 70 +++++-- flang/lib/Lower/OpenMP/Utils.cpp | 33 +++- flang/lib/Semantics/canonicalize-omp.cpp | 44 ++++- flang/lib/Semantics/resolve-directives.cpp | 176 ++++++++++++++---- .../Frontend/OpenMP/ConstructDecompositionT.h | 24 +++ .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 9 + llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 24 +++ llvm/lib/Transforms/Utils/CodeExtractor.cpp | 7 +- .../mlir/Dialect/OpenMP/OpenMPClauses.td | 32 ++++ mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 4 +- .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 3 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 52 +++++- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 65 +++++-- 14 files changed, 458 insertions(+), 86 deletions(-) diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h index 581c93f76d627..df01a7b82c66c 100644 --- a/flang/include/flang/Lower/OpenMP.h +++ b/flang/include/flang/Lower/OpenMP.h @@ -80,7 +80,6 @@ void genOpenMPDeclarativeConstruct(AbstractConverter &, void genOpenMPSymbolProperties(AbstractConverter &converter, const pft::Variable &var); -int64_t getCollapseValue(const Fortran::parser::OmpClauseList &clauseList); void genThreadprivateOp(AbstractConverter &, const pft::Variable &); void genDeclareTargetIntGlobal(AbstractConverter &, const pft::Variable &); bool isOpenMPTargetConstruct(const parser::OpenMPConstruct &); diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index def6cfff88231..f152416d6e69e 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -406,6 +406,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, return; const parser::OmpClauseList *beginClauseList = nullptr; + const parser::OmpClauseList *middleClauseList = nullptr; const parser::OmpClauseList *endClauseList = nullptr; common::visit( common::visitors{ @@ -420,6 +421,22 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, beginClauseList = &std::get(beginDirective.t); + // FIXME(JAN): For now we check if there is an inner + // OpenMPLoopConstruct, and extract the size clause from there + const auto &innerOptional = std::get>>( + ompConstruct.t); + if (innerOptional.has_value()) { + const auto &innerLoopDirective = innerOptional.value().value(); + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + std::get(innerBegin.t); + if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { + middleClauseList = + &std::get(innerBegin.t); + } + } if (auto &endDirective = std::get>( ompConstruct.t)) { @@ -433,6 +450,9 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, assert(beginClauseList && "expected begin directive"); clauses.append(makeClauses(*beginClauseList, semaCtx)); + if (middleClauseList) + clauses.append(makeClauses(*middleClauseList, semaCtx)); + if (endClauseList) clauses.append(makeClauses(*endClauseList, semaCtx)); }; @@ -919,6 +939,7 @@ static void genLoopVars( storeOp = createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); } + firOpBuilder.setInsertionPointAfter(storeOp); } @@ -1572,6 +1593,23 @@ genLoopNestClauses(lower::AbstractConverter &converter, cp.processCollapse(loc, eval, clauseOps, iv); clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); + + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + for (auto &clause : clauses) { + if (clause.id == llvm::omp::Clause::OMPC_collapse) { + const auto &collapse = std::get(clause.u); + int64_t collapseValue = evaluate::ToInt64(collapse.v).value(); + clauseOps.numCollapse = firOpBuilder.getI64IntegerAttr(collapseValue); + } else if (clause.id == llvm::omp::Clause::OMPC_sizes) { + const auto &sizes = std::get(clause.u); + llvm::SmallVector sizeValues; + for (auto &size : sizes.v) { + int64_t sizeValue = evaluate::ToInt64(size).value(); + sizeValues.push_back(sizeValue); + } + clauseOps.tileSizes = sizeValues; + } + } } static void genLoopClauses( @@ -1948,9 +1986,9 @@ static mlir::omp::LoopNestOp genLoopNestOp( return llvm::SmallVector(iv); }; - auto *nestedEval = - getCollapsedLoopEval(eval, getCollapseValue(item->clauses)); - + uint64_t nestValue = getCollapseValue(item->clauses); + nestValue = nestValue < iv.size() ? iv.size() : nestValue; + auto *nestedEval = getCollapsedLoopEval(eval, nestValue); return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval, directive) @@ -3823,6 +3861,20 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, std::get(loopConstruct.t); List clauses = makeClauses( std::get(beginLoopDirective.t), semaCtx); + + const auto &innerOptional = std::get>>(loopConstruct.t); + if (innerOptional.has_value()) { + const auto &innerLoopDirective = innerOptional.value().value(); + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + std::get(innerBegin.t); + if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { + clauses.append( + makeClauses(std::get(innerBegin.t), semaCtx)); + } + } + if (auto &endLoopDirective = std::get>( loopConstruct.t)) { @@ -3957,18 +4009,6 @@ void Fortran::lower::genOpenMPSymbolProperties( lower::genDeclareTargetIntGlobal(converter, var); } -int64_t -Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) { - for (const parser::OmpClause &clause : clauseList.v) { - if (const auto &collapseClause = - std::get_if(&clause.u)) { - const auto *expr = semantics::GetExpr(collapseClause->v); - return evaluate::ToInt64(*expr).value(); - } - } - return 1; -} - void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter, const lower::pft::Variable &var) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index cb6dd57667824..1a300756bccd8 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -39,14 +39,22 @@ namespace lower { namespace omp { int64_t getCollapseValue(const List &clauses) { - auto iter = llvm::find_if(clauses, [](const Clause &clause) { - return clause.id == llvm::omp::Clause::OMPC_collapse; - }); - if (iter != clauses.end()) { - const auto &collapse = std::get(iter->u); - return evaluate::ToInt64(collapse.v).value(); + int64_t collapseValue = 1; + int64_t numTileSizes = 0; + for (auto &clause : clauses) { + if (clause.id == llvm::omp::Clause::OMPC_collapse) { + const auto &collapse = std::get(clause.u); + collapseValue = evaluate::ToInt64(collapse.v).value(); + } else if (clause.id == llvm::omp::Clause::OMPC_sizes) { + const auto &sizes = std::get(clause.u); + numTileSizes = sizes.v.size(); + } } - return 1; + + collapseValue = collapseValue - numTileSizes; + int64_t result = + collapseValue > numTileSizes ? collapseValue : numTileSizes; + return result; } void genObjectList(const ObjectList &objects, @@ -582,6 +590,7 @@ bool collectLoopRelatedInfo( lower::pft::Evaluation &eval, const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) { + bool found = false; fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); @@ -597,7 +606,16 @@ bool collectLoopRelatedInfo( collapseValue = evaluate::ToInt64(clause->v).value(); found = true; } + std::int64_t sizesLengthValue = 0l; + if (auto *clause = + ClauseFinder::findUniqueClause(clauses)) { + sizesLengthValue = clause->v.size(); + found = true; + } + collapseValue = collapseValue - sizesLengthValue; + collapseValue = + collapseValue < sizesLengthValue ? sizesLengthValue : collapseValue; std::size_t loopVarTypeSize = 0; do { lower::pft::Evaluation *doLoop = @@ -630,7 +648,6 @@ bool collectLoopRelatedInfo( } while (collapseValue > 0); convertLoopBounds(converter, currentLocation, result, loopVarTypeSize); - return found; } diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 9722eca19447d..fb0bb0f923574 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -11,6 +11,7 @@ #include "flang/Parser/parse-tree.h" #include "flang/Semantics/semantics.h" +# include // After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP // Constructs more structured which provide explicit scopes for later // structural checks and semantic analysis. @@ -117,15 +118,17 @@ class CanonicalizationOfOmp { // in the same iteration // // Original: - // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct - // OmpBeginLoopDirective + // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t-> + // OmpBeginLoopDirective t-> OmpLoopDirective + // [ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct u-> + /// OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile] // ExecutableConstruct -> DoConstruct + // [ExecutableConstruct -> OmpEndLoopDirective] // ExecutableConstruct -> OmpEndLoopDirective (if available) // // After rewriting: - // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct - // OmpBeginLoopDirective - // DoConstruct + // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t-> + // OmpBeginLoopDirective t -> OmpLoopDirective -> DoConstruct // OmpEndLoopDirective (if available) parser::Block::iterator nextIt; auto &beginDir{std::get(x.t)}; @@ -147,20 +150,41 @@ class CanonicalizationOfOmp { if (GetConstructIf(*nextIt)) continue; + // Keep track of the loops to handle the end loop directives + std::stack loops; + loops.push(&x); + while (auto *innerConstruct{ + GetConstructIf(*nextIt)}) { + if (auto *innerOmpLoop{ + std::get_if(&innerConstruct->u)}) { + std::get< + std::optional>>( + loops.top()->t) = std::move(*innerOmpLoop); + // Retrieveing the address so that DoConstruct or inner loop can be + // set later. + loops.push(&(std::get>>( + loops.top()->t) + .value() + .value())); + nextIt = block.erase(nextIt); + } + } if (auto *doCons{GetConstructIf(*nextIt)}) { if (doCons->GetLoopControl()) { // move DoConstruct std::get>>>(x.t) = - std::move(*doCons); + common::Indirection>>>( + loops.top()->t) = std::move(*doCons); nextIt = block.erase(nextIt); // try to match OmpEndLoopDirective - if (nextIt != block.end()) { + while (nextIt != block.end() && !loops.empty()) { if (auto *endDir{ GetConstructIf(*nextIt)}) { - std::get>(x.t) = - std::move(*endDir); + std::get>( + loops.top()->t) = std::move(*endDir); nextIt = block.erase(nextIt); + loops.pop(); } } } else { diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 43f12c2b14038..806ae00720ce8 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -856,7 +856,28 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { const parser::OmpClause *GetAssociatedClause() { return associatedClause; } private: + std::int64_t SetAssociatedMaxClause(llvm::SmallVector &, + llvm::SmallVector &); + std::int64_t GetAssociatedLoopLevelFromLoopConstruct( + const parser::OpenMPLoopConstruct &); std::int64_t GetAssociatedLoopLevelFromClauses(const parser::OmpClauseList &); + void CollectAssociatedLoopLevelsFromLoopConstruct( + const parser::OpenMPLoopConstruct &, llvm::SmallVector &, + llvm::SmallVector &); + void CollectAssociatedLoopLevelsFromInnerLoopContruct( + const parser::OpenMPLoopConstruct &, llvm::SmallVector &, + llvm::SmallVector &); + template + void CollectAssociatedLoopLevelFromClauseValue( + const parser::OmpClause &clause, llvm::SmallVector &, + llvm::SmallVector &); + template + void CollectAssociatedLoopLevelFromClauseSize(const parser::OmpClause &, + llvm::SmallVector &, + llvm::SmallVector &); + void CollectAssociatedLoopLevelsFromClauses(const parser::OmpClauseList &, + llvm::SmallVector &, + llvm::SmallVector &); Symbol::Flags dataSharingAttributeFlags{Symbol::Flag::OmpShared, Symbol::Flag::OmpPrivate, Symbol::Flag::OmpFirstPrivate, @@ -1868,7 +1889,6 @@ bool OmpAttributeVisitor::Pre( bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { const auto &beginLoopDir{std::get(x.t)}; const auto &beginDir{std::get(beginLoopDir.t)}; - const auto &clauseList{std::get(beginLoopDir.t)}; switch (beginDir.v) { case llvm::omp::Directive::OMPD_distribute: case llvm::omp::Directive::OMPD_distribute_parallel_do: @@ -1919,7 +1939,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { beginDir.v == llvm::omp::Directive::OMPD_target_loop) IssueNonConformanceWarning(beginDir.v, beginDir.source, 52); ClearDataSharingAttributeObjects(); - SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromClauses(clauseList)); + SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromLoopConstruct(x)); if (beginDir.v == llvm::omp::Directive::OMPD_do) { auto &optLoopCons = std::get>(x.t); @@ -1933,7 +1953,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { } } PrivatizeAssociatedLoopIndexAndCheckLoopLevel(x); - ordCollapseLevel = GetAssociatedLoopLevelFromClauses(clauseList) + 1; + ordCollapseLevel = GetAssociatedLoopLevelFromLoopConstruct(x) + 1; return true; } @@ -2021,44 +2041,124 @@ bool OmpAttributeVisitor::Pre(const parser::DoConstruct &x) { return true; } +static bool isSizesClause(const parser::OmpClause *clause) { + return std::holds_alternative(clause->u); +} + +std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( + llvm::SmallVector &levels, + llvm::SmallVector &clauses) { + + // Find the tile level to know how much to reduce the level for collapse + std::int64_t tileLevel = 0; + for (auto [level, clause] : llvm::zip_equal(levels, clauses)) { + if (isSizesClause(clause)) { + tileLevel = level; + } + } + + std::int64_t maxLevel = 1; + const parser::OmpClause *maxClause = nullptr; + for (auto [level, clause] : llvm::zip_equal(levels, clauses)) { + if (tileLevel > 0 && tileLevel < level) { + context_.Say(clause->source, + "The value of the parameter in the COLLAPSE clause must" + " not be larger than the number of the number of tiled loops" + " because collapse relies on independent loop iterations."_err_en_US); + return 1; + } + + if (!isSizesClause(clause)) { + level = level - tileLevel; + } + + if (level > maxLevel) { + maxLevel = level; + maxClause = clause; + } + } + if (maxClause) + SetAssociatedClause(maxClause); + return maxLevel; +} + +std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromLoopConstruct( + const parser::OpenMPLoopConstruct &x) { + llvm::SmallVector levels; + llvm::SmallVector clauses; + + CollectAssociatedLoopLevelsFromLoopConstruct(x, levels, clauses); + return SetAssociatedMaxClause(levels, clauses); +} + std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromClauses( const parser::OmpClauseList &x) { - std::int64_t orderedLevel{0}; - std::int64_t collapseLevel{0}; + llvm::SmallVector levels; + llvm::SmallVector clauses; - const parser::OmpClause *ordClause{nullptr}; - const parser::OmpClause *collClause{nullptr}; + CollectAssociatedLoopLevelsFromClauses(x, levels, clauses); + return SetAssociatedMaxClause(levels, clauses); +} - for (const auto &clause : x.v) { - if (const auto *orderedClause{ - std::get_if(&clause.u)}) { - if (const auto v{EvaluateInt64(context_, orderedClause->v)}) { - orderedLevel = *v; - } - ordClause = &clause; - } - if (const auto *collapseClause{ - std::get_if(&clause.u)}) { - if (const auto v{EvaluateInt64(context_, collapseClause->v)}) { - collapseLevel = *v; - } - collClause = &clause; +void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromLoopConstruct( + const parser::OpenMPLoopConstruct &x, + llvm::SmallVector &levels, + llvm::SmallVector &clauses) { + const auto &beginLoopDir{std::get(x.t)}; + const auto &clauseList{std::get(beginLoopDir.t)}; + + CollectAssociatedLoopLevelsFromClauses(clauseList, levels, clauses); + CollectAssociatedLoopLevelsFromInnerLoopContruct(x, levels, clauses); +} + +void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct( + const parser::OpenMPLoopConstruct &x, + llvm::SmallVector &levels, + llvm::SmallVector &clauses) { + const auto &innerOptional = + std::get>>( + x.t); + if (innerOptional.has_value()) { + CollectAssociatedLoopLevelsFromLoopConstruct( + innerOptional.value().value(), levels, clauses); + } +} + +template +void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseValue( + const parser::OmpClause &clause, llvm::SmallVector &levels, + llvm::SmallVector &clauses) { + if (const auto tclause{std::get_if(&clause.u)}) { + std::int64_t level = 0; + if (const auto v{EvaluateInt64(context_, tclause->v)}) { + level = *v; } + levels.push_back(level); + clauses.push_back(&clause); } +} - if (orderedLevel && (!collapseLevel || orderedLevel >= collapseLevel)) { - SetAssociatedClause(ordClause); - return orderedLevel; - } else if (!orderedLevel && collapseLevel) { - SetAssociatedClause(collClause); - return collapseLevel; - } else { - SetAssociatedClause(nullptr); +template +void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseSize( + const parser::OmpClause &clause, llvm::SmallVector &levels, + llvm::SmallVector &clauses) { + if (const auto tclause{std::get_if(&clause.u)}) { + levels.push_back(tclause->v.size()); + clauses.push_back(&clause); } - // orderedLevel < collapseLevel is an error handled in structural - // checks +} - return 1; // default is outermost loop +void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromClauses( + const parser::OmpClauseList &x, llvm::SmallVector &levels, + llvm::SmallVector &clauses) { + for (const auto &clause : x.v) { + CollectAssociatedLoopLevelFromClauseValue( + clause, levels, clauses); + CollectAssociatedLoopLevelFromClauseValue( + clause, levels, clauses); + CollectAssociatedLoopLevelFromClauseSize( + clause, levels, clauses); + } } // 2.15.1.1 Data-sharing Attribute Rules - Predetermined @@ -2090,10 +2190,18 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( const parser::OmpClause *clause{GetAssociatedClause()}; bool hasCollapseClause{ clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false}; + const parser::OpenMPLoopConstruct *innerMostLoop = &x; + + while (auto &optLoopCons{ + std::get>(x.t)}) { + if (const auto &innerLoop{ + std::get_if < parser::OpenMPLoopConstruct >>> (innerMostLoop->t)}) { + innerMostLoop = &innerLoop.value().value(); + } + } - auto &optLoopCons = std::get>(x.t); if (optLoopCons.has_value()) { - if (const auto &outer{std::get_if(&*optLoopCons)}) { + if (const auto &outer{std::get_if(innerMostLoop->t)}) { for (const parser::DoConstruct *loop{&*outer}; loop && level > 0; --level) { if (loop->IsDoConcurrent()) { diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 047baa3a79f5d..83db78667c7f8 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -209,6 +209,8 @@ struct ConstructDecompositionT { bool applyClause(const tomp::clause::CollapseT &clause, const ClauseTy *); + bool applyClause(const tomp::clause::SizesT &clause, + const ClauseTy *); bool applyClause(const tomp::clause::PrivateT &clause, const ClauseTy *); bool @@ -482,6 +484,28 @@ bool ConstructDecompositionT::applyClause( return false; } +// FIXME(JAN): Do the correct thing, but for now we'll do the same as collapse +template +bool ConstructDecompositionT::applyClause( + const tomp::clause::SizesT &clause, + const ClauseTy *node) { + // Apply "sizes" to the innermost directive. If it's not one that + // allows it flag an error. + if (!leafs.empty()) { + auto &last = leafs.back(); + + if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) { + last.clauses.push_back(node); + return true; + } else { + llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n"; + last.clauses.push_back(node); + return true; + } + } + + return false; +} // PRIVATE // [5.2:111:5-7] diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 1050e3d8b08dd..a994f23c1fbe2 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -2257,6 +2257,9 @@ class OpenMPIRBuilder { /// Return the function that contains the region to be outlined. Function *getFunction() const { return EntryBB->getParent(); } + + /// Dump the info in a somewhat readable way + void dump(); }; /// Collection of regions that need to be outlined during finalization. @@ -2277,6 +2280,9 @@ class OpenMPIRBuilder { /// Add a new region that will be outlined later. void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); } + /// Dump outline infos + void dumpOutlineInfos(); + /// An ordered map of auto-generated variables to their unique names. /// It stores variables with the following names: 1) ".gomp_critical_user_" + /// + ".var" for "omp critical" directives; 2) @@ -3910,6 +3916,9 @@ class CanonicalLoopInfo { /// Invalidate this loop. That is, the underlying IR does not fulfill the /// requirements of an OpenMP canonical loop anymore. LLVM_ABI void invalidate(); + + /// Dump the info in a somewhat readable way + void dump(); }; /// ScanInfo holds the information to assist in lowering of Scan reduction. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 3d5e487c8990f..805723aef1348 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -9164,6 +9164,15 @@ Error OpenMPIRBuilder::emitOffloadingArrays( return Error::success(); } +void OpenMPIRBuilder::dumpOutlineInfos() { + errs() << "=== Outline Infos Begin ===\n"; + for (auto En : enumerate(OutlineInfos)) { + errs() << "[" << En.index() << "]: "; + En.value().dump(); + } + errs() << "=== Outline Infos End ===\n"; +} + void OpenMPIRBuilder::emitBranch(BasicBlock *Target) { BasicBlock *CurBB = Builder.GetInsertBlock(); @@ -10088,6 +10097,14 @@ void OpenMPIRBuilder::OutlineInfo::collectBlocks( } } +void OpenMPIRBuilder::OutlineInfo::dump() { + errs() << "=== OutilneInfo == " + << " EntryBB: " << (EntryBB ? EntryBB->getName() : "n\a") + << " ExitBB: " << (ExitBB ? ExitBB->getName() : "n\a") + << " OuterAllocaBB: " + << (OuterAllocaBB ? OuterAllocaBB->getName() : "n/a") << "\n"; +} + void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, @@ -10865,3 +10882,10 @@ void CanonicalLoopInfo::invalidate() { Latch = nullptr; Exit = nullptr; } + +void CanonicalLoopInfo::dump() { + errs() << "CanonicaLoop == Header: " << (Header ? Header->getName() : "n/a") + << " Cond: " << (Cond ? Cond->getName() : "n/a") + << " Latch: " << (Latch ? Latch->getName() : "n/a") + << " Exit: " << (Exit ? Exit->getName() : "n/a") << "\n"; +} diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index bbd1ed6a3ab2d..7ad1e70cb6e75 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -810,7 +810,11 @@ void CodeExtractor::severSplitPHINodesOfExits() { } void CodeExtractor::splitReturnBlocks() { - for (BasicBlock *Block : Blocks) + for (BasicBlock *Block : Blocks) { + if (!Block->getTerminator()) { + errs() << "====== No terminator in block: " << Block->getName() + << "======\n"; + } if (ReturnInst *RI = dyn_cast(Block->getTerminator())) { BasicBlock *New = Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret"); @@ -827,6 +831,7 @@ void CodeExtractor::splitReturnBlocks() { DT->changeImmediateDominator(I, NewNode); } } + } } Function *CodeExtractor::constructFunctionDeclaration( diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td index 311c57fb4446c..eb836db890738 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td @@ -317,6 +317,38 @@ class OpenMP_DeviceClauseSkip< def OpenMP_DeviceClause : OpenMP_DeviceClauseSkip<>; +//===----------------------------------------------------------------------===// +// V5.2: [XX.X] `collapse` clause +//===----------------------------------------------------------------------===// + +class OpenMP_CollapseClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause { + let arguments = (ins + DefaultValuedOptionalAttr:$num_collapse + ); +} + +def OpenMP_CollapseClause : OpenMP_CollapseClauseSkip<>; + +//===----------------------------------------------------------------------===// +// V5.2: [xx.x] `sizes` clause +//===----------------------------------------------------------------------===// + +class OpenMP_TileSizesClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause { + let arguments = (ins + OptionalAttr:$tile_sizes + ); +} + +def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>; + //===----------------------------------------------------------------------===// // V5.2: [11.6.1] `dist_schedule` clause //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 2548a8ab4aac6..e17315d923317 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -614,7 +614,9 @@ def WorkshareLoopWrapperOp : OpenMP_Op<"workshare.loop_wrapper", traits = [ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ RecursiveMemoryEffects, SameVariadicOperandSize ], clauses = [ - OpenMP_LoopRelatedClause + OpenMP_LoopRelatedClause, + OpenMP_CollapseClause, + OpenMP_TileSizesClause ], singleRegion = true> { let summary = "rectangular loop nest"; let description = [{ diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index c4a9fc2e556f1..aa5b532058737 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -493,7 +493,8 @@ struct ParallelOpLowering : public OpRewritePattern { // Create loop nest and populate region with contents of scf.parallel. auto loopOp = omp::LoopNestOp::create( rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(), - parallelOp.getUpperBound(), parallelOp.getStep()); + parallelOp.getUpperBound(), parallelOp.getStep(), false, 1, + nullptr); rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(), loopOp.getRegion().begin()); diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 6e43f28e8d93d..370aaaf4bf3a6 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -56,6 +56,11 @@ makeDenseBoolArrayAttr(MLIRContext *ctx, const ArrayRef boolArray) { return boolArray.empty() ? nullptr : DenseBoolArrayAttr::get(ctx, boolArray); } +static DenseI64ArrayAttr +makeDenseI64ArrayAttr(MLIRContext *ctx, const ArrayRef intArray) { + return intArray.empty() ? nullptr : DenseI64ArrayAttr::get(ctx, intArray); +} + namespace { struct MemRefPointerLikeModel : public PointerLikeType::ExternalModel steps; @@ -2967,6 +2972,38 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { parser.parseOperandList(steps, ivs.size(), OpAsmParser::Delimiter::Paren)) return failure(); + // Parse collapse + int64_t value = 0; + if (!parser.parseOptionalKeyword("collapse") && + (parser.parseLParen() || parser.parseInteger(value) || + parser.parseRParen())) + return failure(); + if (value > 1) { + result.addAttribute( + "num_collapse", + IntegerAttr::get(parser.getBuilder().getI64Type(), value)); + } + + // Parse tiles + SmallVector tiles; + auto parseTiles = [&]() -> ParseResult { + int64_t tile; + if (parser.parseInteger(tile)) + return failure(); + tiles.push_back(tile); + return success(); + }; + + if (!parser.parseOptionalKeyword("tiles") && + (parser.parseLParen() || + parser.parseCommaSeparatedList(parseTiles) || + parser.parseRParen())) + return failure(); + + if (tiles.size() > 0) { + result.addAttribute("tile_sizes", DenseI64ArrayAttr::get(ctx, tiles)); + } + // Parse the body. Region *region = result.addRegion(); if (parser.parseRegion(*region, ivs)) @@ -2990,14 +3027,23 @@ void LoopNestOp::print(OpAsmPrinter &p) { if (getLoopInclusive()) p << "inclusive "; p << "step (" << getLoopSteps() << ") "; + if (int64_t numCollapse = getNumCollapse()) { + if (numCollapse > 1) + p << "collapse(" << numCollapse << ") "; + } + if (const auto tiles = getTileSizes()) { + p << "tiles(" << tiles.value() << ") "; + } p.printRegion(region, /*printEntryBlockArgs=*/false); } void LoopNestOp::build(OpBuilder &builder, OperationState &state, const LoopNestOperands &clauses) { + MLIRContext *ctx = builder.getContext(); LoopNestOp::build(builder, state, clauses.loopLowerBounds, clauses.loopUpperBounds, clauses.loopSteps, - clauses.loopInclusive); + clauses.loopInclusive, clauses.numCollapse, + makeDenseI64ArrayAttr(ctx, clauses.tileSizes)); } LogicalResult LoopNestOp::verify() { diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 4e26e65cf9718..f97b30f4d6ff0 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -2972,10 +2972,9 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, /// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { + LLVM::ModuleTranslation &moduleTranslation) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); auto loopOp = cast(opInst); - // Set up the source location value for OpenMP runtime. llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); @@ -3041,18 +3040,60 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, loopInfos.push_back(*loopResult); } - // Collapse loops. Store the insertion point because LoopInfos may get - // invalidated. + // llvm::OpenMPIRBuilder::InsertPointTy afterIP = builder.saveIP(); llvm::OpenMPIRBuilder::InsertPointTy afterIP = - loopInfos.front()->getAfterIP(); + loopInfos.front()->getAfterIP(); - // Update the stack frame created for this loop to point to the resulting loop - // after applying transformations. - moduleTranslation.stackWalk( - [&](OpenMPLoopInfoStackFrame &frame) { - frame.loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); - return WalkResult::interrupt(); - }); + // Initialize the new loop info to the current one, in case there + // are no loop transformations done. + llvm::CanonicalLoopInfo *NewTopLoopInfo = nullptr; + + // Do tiling + if (const auto &tiles = loopOp.getTileSizes()) { + llvm::Type *IVType = loopInfos.front()->getIndVarType(); + SmallVector TileSizes; + + for (auto tile : tiles.value()) { + llvm::Value *TileVal = llvm::ConstantInt::get(IVType, tile); + TileSizes.push_back(TileVal); + } + + std::vector NewLoops = + ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes); + + // Collapse loops. Store the insertion point because LoopInfos may get + // invalidated. + auto AfterBB = NewLoops.front()->getAfter(); + auto AfterAfterBB = AfterBB->getSingleSuccessor(); + afterIP = {AfterAfterBB, AfterAfterBB->begin()}; + NewTopLoopInfo = NewLoops[0]; + + // Update the loop infos + loopInfos.clear(); + for (const auto& newLoop : NewLoops) { + loopInfos.push_back(newLoop); + } + } // Tiling done + + // Do collapse + if (const auto &numCollapse = loopOp.getNumCollapse()) { + SmallVector collapseLoopInfos( + loopInfos.begin(), loopInfos.begin() + (numCollapse)); + + auto newLoopInfo = + ompBuilder->collapseLoops(ompLoc.DL, collapseLoopInfos, {}); + NewTopLoopInfo = newLoopInfo; + } // Collapse done + + // Update the stack frame created for this loop to point to the resulting + // loop after applying transformations. + if (NewTopLoopInfo) { + moduleTranslation.stackWalk( + [&](OpenMPLoopInfoStackFrame &frame) { + frame.loopInfo = NewTopLoopInfo; + return WalkResult::interrupt(); + }); + } // Continue building IR after the loop. Note that the LoopInfo returned by // `collapseLoops` points inside the outermost loop and is intended for From 02fb537fb71591ad3cbbdaea2e01e39ceed3eed7 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 10 Jun 2025 09:51:02 -0400 Subject: [PATCH 02/40] Fix tests and limit the nesting of construct to only tiling. --- flang/lib/Semantics/canonicalize-omp.cpp | 34 ++++++++++++------- .../Lower/OpenMP/parallel-wsloop-lastpriv.f90 | 4 +-- flang/test/Lower/OpenMP/simd.f90 | 2 +- flang/test/Lower/OpenMP/wsloop-variable.f90 | 2 +- flang/test/Semantics/OpenMP/do-collapse.f90 | 1 + .../LLVMIR/omptarget-wsloop-collapsed.mlir | 2 +- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 12 +++---- 7 files changed, 33 insertions(+), 24 deletions(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index fb0bb0f923574..10eaaa83f5f4f 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -149,27 +149,32 @@ class CanonicalizationOfOmp { // Ignore compiler directives. if (GetConstructIf(*nextIt)) continue; - // Keep track of the loops to handle the end loop directives std::stack loops; loops.push(&x); - while (auto *innerConstruct{ + if (auto *innerConstruct{ GetConstructIf(*nextIt)}) { if (auto *innerOmpLoop{ std::get_if(&innerConstruct->u)}) { - std::get< - std::optional>>( - loops.top()->t) = std::move(*innerOmpLoop); - // Retrieveing the address so that DoConstruct or inner loop can be - // set later. - loops.push(&(std::get>>( - loops.top()->t) - .value() - .value())); - nextIt = block.erase(nextIt); + auto &innerBeginDir{ + std::get(innerOmpLoop->t)}; + auto &innerDir{std::get(innerBeginDir.t)}; + if (innerDir.v == llvm::omp::Directive::OMPD_tile) { + std::get>>( + loops.top()->t) = std::move(*innerOmpLoop); + // Retrieveing the address so that DoConstruct or inner loop can be + // set later. + loops.push(&(std::get>>( + loops.top()->t) + .value() + .value())); + nextIt = block.erase(nextIt); + } } } + if (auto *doCons{GetConstructIf(*nextIt)}) { if (doCons->GetLoopControl()) { // move DoConstruct @@ -185,6 +190,9 @@ class CanonicalizationOfOmp { loops.top()->t) = std::move(*endDir); nextIt = block.erase(nextIt); loops.pop(); + } else { + // If there is a mismatch bail out. + break; } } } else { diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 index 2890e78e9d17f..faf8f717f6308 100644 --- a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 +++ b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 @@ -108,7 +108,7 @@ subroutine omp_do_lastprivate_collapse2(a) ! CHECK-NEXT: %[[UB2:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref ! CHECK-NEXT: %[[STEP2:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]] : !fir.ref, !fir.ref, !fir.ref) { - ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) { + ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) collapse(2) { ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ea"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -174,7 +174,7 @@ subroutine omp_do_lastprivate_collapse3(a) ! CHECK-NEXT: %[[UB3:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref ! CHECK-NEXT: %[[STEP3:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[K_PVT_REF:.*]] : !fir.ref, !fir.ref, !fir.ref, !fir.ref) { - ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) { + ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) collapse(3) { ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ea"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90 index 7655c786573e3..369b5eb072af9 100644 --- a/flang/test/Lower/OpenMP/simd.f90 +++ b/flang/test/Lower/OpenMP/simd.f90 @@ -175,7 +175,7 @@ subroutine simd_with_collapse_clause(n) ! CHECK-NEXT: omp.loop_nest (%[[ARG_0:.*]], %[[ARG_1:.*]]) : i32 = ( ! CHECK-SAME: %[[LOWER_I]], %[[LOWER_J]]) to ( ! CHECK-SAME: %[[UPPER_I]], %[[UPPER_J]]) inclusive step ( - ! CHECK-SAME: %[[STEP_I]], %[[STEP_J]]) { + ! CHECK-SAME: %[[STEP_I]], %[[STEP_J]]) collapse(2) { !$OMP SIMD COLLAPSE(2) do i = 1, n do j = 1, n diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90 index f998c84331ce4..0f4aafb10ded3 100644 --- a/flang/test/Lower/OpenMP/wsloop-variable.f90 +++ b/flang/test/Lower/OpenMP/wsloop-variable.f90 @@ -22,7 +22,7 @@ program wsloop_variable !CHECK: %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64 !CHECK: %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64 !CHECK: omp.wsloop private({{.*}}) { -!CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) { +!CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) collapse(2) { !CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16 !CHECK: hlfir.assign %[[ARG0_I16]] to %[[STORE_IV0:.*]]#0 : i16, !fir.ref !CHECK: hlfir.assign %[[ARG1]] to %[[STORE_IV1:.*]]#0 : i64, !fir.ref diff --git a/flang/test/Semantics/OpenMP/do-collapse.f90 b/flang/test/Semantics/OpenMP/do-collapse.f90 index 480bd45b79b83..ec6a3bdad3686 100644 --- a/flang/test/Semantics/OpenMP/do-collapse.f90 +++ b/flang/test/Semantics/OpenMP/do-collapse.f90 @@ -31,6 +31,7 @@ program omp_doCollapse end do end do + !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct. !ERROR: At most one COLLAPSE clause can appear on the SIMD directive !$omp simd collapse(2) collapse(1) do i = 1, 4 diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir index b42e387acbb11..d84641ff9c99b 100644 --- a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir @@ -9,7 +9,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : index) : i32 omp.wsloop { - omp.loop_nest (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) { + omp.loop_nest (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) collapse(2) { %1 = llvm.add %arg1, %arg2 : i32 %2 = llvm.mul %arg2, %loop_ub overflow : i32 %3 = llvm.add %arg1, %2 :i32 diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 3f4dcd5e24c56..27210bc0890ce 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -698,7 +698,7 @@ llvm.func @simd_simple(%lb : i64, %ub : i64, %step : i64, %arg0: !llvm.ptr) { // CHECK-LABEL: @simd_simple_multiple llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) inclusive step (%step1, %step2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) inclusive step (%step1, %step2) collapse(2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 // The form of the emitted IR is controlled by OpenMPIRBuilder and // tested there. Just check that the right metadata is added and collapsed @@ -736,7 +736,7 @@ llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64 // CHECK-LABEL: @simd_simple_multiple_simdlen llvm.func @simd_simple_multiple_simdlen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd simdlen(2) { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 // The form of the emitted IR is controlled by OpenMPIRBuilder and // tested there. Just check that the right metadata is added. @@ -760,7 +760,7 @@ llvm.func @simd_simple_multiple_simdlen(%lb1 : i64, %ub1 : i64, %step1 : i64, %l // CHECK-LABEL: @simd_simple_multiple_safelen llvm.func @simd_simple_multiple_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd safelen(2) { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 %4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr, i64) -> !llvm.ptr, f32 %5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr, i64) -> !llvm.ptr, f32 @@ -779,7 +779,7 @@ llvm.func @simd_simple_multiple_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %l // CHECK-LABEL: @simd_simple_multiple_simdlen_safelen llvm.func @simd_simple_multiple_simdlen_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd simdlen(1) safelen(2) { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 %4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr, i64) -> !llvm.ptr, f32 %5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr, i64) -> !llvm.ptr, f32 @@ -1177,7 +1177,7 @@ llvm.func @collapse_wsloop( // CHECK: store i32 %[[TOTAL_SUB_1]], ptr // CHECK: call void @__kmpc_for_static_init_4u omp.wsloop { - omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) { + omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) collapse(3) { %31 = llvm.load %20 : !llvm.ptr -> i32 %32 = llvm.add %31, %arg0 : i32 %33 = llvm.add %32, %arg1 : i32 @@ -1239,7 +1239,7 @@ llvm.func @collapse_wsloop_dynamic( // CHECK: store i32 %[[TOTAL]], ptr // CHECK: call void @__kmpc_dispatch_init_4u omp.wsloop schedule(dynamic) { - omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) { + omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) collapse(3) { %31 = llvm.load %20 : !llvm.ptr -> i32 %32 = llvm.add %31, %arg0 : i32 %33 = llvm.add %32, %arg1 : i32 From 3efaa7732ac83617437b197e3f78b0193d922d28 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 10 Jun 2025 10:18:32 -0400 Subject: [PATCH 03/40] Enable stand-alone tiling, but it gives a warning and converting to simd. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 42 ++++++++++++++++++--- flang/test/Lower/OpenMP/wsloop-collapse.f90 | 2 +- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index f152416d6e69e..798d7cb1d7694 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2179,6 +2179,39 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, // Apply unrolling to it auto cli = canonLoop.getCli(); mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli); + +static mlir::omp::LoopOp +genTiledLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::const_iterator item) { + mlir::omp::LoopOperands loopClauseOps; + llvm::SmallVector loopReductionSyms; + genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps, + loopReductionSyms); + + DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/true, symTable); + dsp.processStep1(&loopClauseOps); + + mlir::omp::LoopNestOperands loopNestClauseOps; + llvm::SmallVector iv; + genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, + loopNestClauseOps, iv); + + EntryBlockArgs loopArgs; + loopArgs.priv.syms = dsp.getDelayedPrivSymbols(); + loopArgs.priv.vars = loopClauseOps.privateVars; + loopArgs.reduction.syms = loopReductionSyms; + loopArgs.reduction.vars = loopClauseOps.reductionVars; + + auto loopOp = + genWrapperOp(converter, loc, loopClauseOps, loopArgs); + genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, + loopNestClauseOps, iv, {{loopOp, loopArgs}}, + llvm::omp::Directive::OMPD_loop, dsp); + return loopOp; } static mlir::omp::MaskedOp @@ -3410,13 +3443,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter, newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; - case llvm::omp::Directive::OMPD_tile: { - unsigned version = semaCtx.langOptions().OpenMPVersion; - if (!semaCtx.langOptions().OpenMPSimd) - TODO(loc, "Unhandled loop directive (" + - llvm::omp::getOpenMPDirectiveName(dir, version) + ")"); + case llvm::omp::Directive::OMPD_tile: + newOp = + genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); break; - } case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; diff --git a/flang/test/Lower/OpenMP/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/wsloop-collapse.f90 index 7ec40ab4b2f43..677c7809c397f 100644 --- a/flang/test/Lower/OpenMP/wsloop-collapse.f90 +++ b/flang/test/Lower/OpenMP/wsloop-collapse.f90 @@ -57,7 +57,7 @@ program wsloop_collapse !CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref !CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_4:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_2:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_0:.*]] : !fir.ref, !fir.ref, !fir.ref) { -!CHECK-NEXT: omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) { +!CHECK-NEXT: omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) collapse(3) { !$omp do collapse(3) do i = 1, a do j= 1, b From b7f09a050abe25acf5b20b1a47fac07df819aefd Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Wed, 11 Jun 2025 10:25:00 -0400 Subject: [PATCH 04/40] Add minimal test, remove debug print. --- flang/test/Lower/OpenMP/wsloop-tile.f90 | 30 +++++++++++++++++++ .../Frontend/OpenMP/ConstructDecompositionT.h | 2 +- 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 flang/test/Lower/OpenMP/wsloop-tile.f90 diff --git a/flang/test/Lower/OpenMP/wsloop-tile.f90 b/flang/test/Lower/OpenMP/wsloop-tile.f90 new file mode 100644 index 0000000000000..f43b558ce46bb --- /dev/null +++ b/flang/test/Lower/OpenMP/wsloop-tile.f90 @@ -0,0 +1,30 @@ +! This test checks lowering of OpenMP DO Directive(Worksharing) with collapse. + +! RUN: bbc -fopenmp -fopenmp-version=51 -emit-hlfir %s -o - | FileCheck %s + +!CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "wsloop_tile"} { +program wsloop_tile + integer :: i, j, k + integer :: a, b, c + integer :: x + + a=30 + b=20 + c=50 + x=0 + + !CHECK: omp.loop_nest + !CHECK-SAME: tiles(2, 5, 10) + + !$omp do + !$omp tile sizes(2,5,10) + do i = 1, a + do j= 1, b + do k = 1, c + x = x + i + j + k + end do + end do + end do + !$omp end tile + !$omp end do +end program wsloop_tile diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 83db78667c7f8..e1083b7ef2cd9 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -498,7 +498,7 @@ bool ConstructDecompositionT::applyClause( last.clauses.push_back(node); return true; } else { - llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n"; + // llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n"; last.clauses.push_back(node); return true; } From 75b0ab5321708b0cbbc3b42326413941148cc1b3 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Fri, 13 Jun 2025 09:53:10 -0400 Subject: [PATCH 05/40] Fix formatting --- flang/lib/Lower/OpenMP/OpenMP.cpp | 27 +++++++++++-------- flang/lib/Lower/OpenMP/Utils.cpp | 3 +-- flang/lib/Semantics/canonicalize-omp.cpp | 4 +-- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 3 +-- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 12 ++++----- 5 files changed, 26 insertions(+), 23 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 798d7cb1d7694..c4eac20aa06e1 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -429,9 +429,10 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, if (innerOptional.has_value()) { const auto &innerLoopDirective = innerOptional.value().value(); const auto &innerBegin = - std::get(innerLoopDirective.t); + std::get( + innerLoopDirective.t); const auto &innerDirective = - std::get(innerBegin.t); + std::get(innerBegin.t); if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { middleClauseList = &std::get(innerBegin.t); @@ -2180,11 +2181,13 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, auto cli = canonLoop.getCli(); mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli); -static mlir::omp::LoopOp -genTiledLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item) { +static mlir::omp::LoopOp genTiledLoopOp(lower::AbstractConverter &converter, + lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { mlir::omp::LoopOperands loopClauseOps; llvm::SmallVector loopReductionSyms; genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps, @@ -3445,7 +3448,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, break; case llvm::omp::Directive::OMPD_tile: newOp = - genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); + genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); @@ -3892,13 +3895,15 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, List clauses = makeClauses( std::get(beginLoopDirective.t), semaCtx); - const auto &innerOptional = std::get>>(loopConstruct.t); + const auto &innerOptional = + std::get>>( + loopConstruct.t); if (innerOptional.has_value()) { const auto &innerLoopDirective = innerOptional.value().value(); const auto &innerBegin = - std::get(innerLoopDirective.t); + std::get(innerLoopDirective.t); const auto &innerDirective = - std::get(innerBegin.t); + std::get(innerBegin.t); if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { clauses.append( makeClauses(std::get(innerBegin.t), semaCtx)); diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 1a300756bccd8..8c76873ae67be 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -52,8 +52,7 @@ int64_t getCollapseValue(const List &clauses) { } collapseValue = collapseValue - numTileSizes; - int64_t result = - collapseValue > numTileSizes ? collapseValue : numTileSizes; + int64_t result = collapseValue > numTileSizes ? collapseValue : numTileSizes; return result; } diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 10eaaa83f5f4f..c519cb43628ed 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -11,7 +11,7 @@ #include "flang/Parser/parse-tree.h" #include "flang/Semantics/semantics.h" -# include +#include // After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP // Constructs more structured which provide explicit scopes for later // structural checks and semantic analysis. @@ -153,7 +153,7 @@ class CanonicalizationOfOmp { std::stack loops; loops.push(&x); if (auto *innerConstruct{ - GetConstructIf(*nextIt)}) { + GetConstructIf(*nextIt)}) { if (auto *innerOmpLoop{ std::get_if(&innerConstruct->u)}) { auto &innerBeginDir{ diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 370aaaf4bf3a6..226ecf3eac9b5 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -2995,8 +2995,7 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { }; if (!parser.parseOptionalKeyword("tiles") && - (parser.parseLParen() || - parser.parseCommaSeparatedList(parseTiles) || + (parser.parseLParen() || parser.parseCommaSeparatedList(parseTiles) || parser.parseRParen())) return failure(); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index f97b30f4d6ff0..0753ecf9d8c79 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -2972,7 +2972,7 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, /// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { + LLVM::ModuleTranslation &moduleTranslation) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); auto loopOp = cast(opInst); // Set up the source location value for OpenMP runtime. @@ -3042,7 +3042,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, // llvm::OpenMPIRBuilder::InsertPointTy afterIP = builder.saveIP(); llvm::OpenMPIRBuilder::InsertPointTy afterIP = - loopInfos.front()->getAfterIP(); + loopInfos.front()->getAfterIP(); // Initialize the new loop info to the current one, in case there // are no loop transformations done. @@ -3054,12 +3054,12 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, SmallVector TileSizes; for (auto tile : tiles.value()) { - llvm::Value *TileVal = llvm::ConstantInt::get(IVType, tile); + llvm::Value *TileVal = llvm::ConstantInt::get(IVType, tile); TileSizes.push_back(TileVal); } - std::vector NewLoops = - ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes); + std::vector NewLoops = + ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes); // Collapse loops. Store the insertion point because LoopInfos may get // invalidated. @@ -3070,7 +3070,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, // Update the loop infos loopInfos.clear(); - for (const auto& newLoop : NewLoops) { + for (const auto &newLoop : NewLoops) { loopInfos.push_back(newLoop); } } // Tiling done From dccb7f48ec626e1a57af8ae7baf760fde85bce73 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 14 Jun 2025 06:29:58 -0400 Subject: [PATCH 06/40] Fix formatting --- flang/lib/Semantics/canonicalize-omp.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index c519cb43628ed..1d00bdaad777c 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -166,10 +166,10 @@ class CanonicalizationOfOmp { // Retrieveing the address so that DoConstruct or inner loop can be // set later. loops.push(&(std::get>>( + common::Indirection>>( loops.top()->t) - .value() - .value())); + .value() + .value())); nextIt = block.erase(nextIt); } } From 1e63230274873c5ded0b77e78ad2802fd5dac422 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 19 Jun 2025 15:52:55 -0400 Subject: [PATCH 07/40] Fix test. --- flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 | 1 + 1 file changed, 1 insertion(+) diff --git a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 index bb1929249183b..355626f6e73b9 100644 --- a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 +++ b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 @@ -1,6 +1,7 @@ !RUN: %python %S/../test_errors.py %s %flang -fopenmp integer :: i, j +! ERROR: DO CONCURRENT loops cannot be used with the COLLAPSE clause. !$omp parallel do collapse(2) do i = 1, 1 ! ERROR: DO CONCURRENT loops cannot form part of a loop nest. From 4b5412ab43b3b8dcf3aee168fefe457854e1d761 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Fri, 20 Jun 2025 07:31:02 -0400 Subject: [PATCH 08/40] Add more mlir tests. Set collapse value when lowering from SCF to OpenMP. --- .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 4 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 12 +++++ .../Conversion/SCFToOpenMP/scf-to-openmp.mlir | 2 +- mlir/test/Dialect/OpenMP/invalid.mlir | 23 ++++++++ mlir/test/Dialect/OpenMP/ops.mlir | 54 +++++++++++++++++++ 5 files changed, 92 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index aa5b532058737..19fbefb48a378 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -493,8 +493,8 @@ struct ParallelOpLowering : public OpRewritePattern { // Create loop nest and populate region with contents of scf.parallel. auto loopOp = omp::LoopNestOp::create( rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(), - parallelOp.getUpperBound(), parallelOp.getStep(), false, 1, - nullptr); + parallelOp.getUpperBound(), parallelOp.getStep(), false, + parallelOp.getLowerBound().size(), nullptr); rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(), loopOp.getRegion().begin()); diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 226ecf3eac9b5..55b5f91a71a0b 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -3058,6 +3058,18 @@ LogicalResult LoopNestOp::verify() { << "range argument type does not match corresponding IV type"; } + uint64_t numIVs = getIVs().size(); + + if (const auto &numCollapse = getNumCollapse()) + if (numCollapse > numIVs) + return emitOpError() + << "collapse value is larger than the number of loops"; + + if (const auto &tiles = getTileSizes()) + if (tiles.value().size() > numIVs) + return emitOpError() + << "number of tilings is larger than the number of loops"; + if (!llvm::dyn_cast_if_present((*this)->getParentOp())) return emitOpError() << "expects parent op to be a loop wrapper"; diff --git a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir index a722acbf2c347..d362bb6092419 100644 --- a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir +++ b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir @@ -6,7 +6,7 @@ func.func @parallel(%arg0: index, %arg1: index, %arg2: index, // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32 // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) { // CHECK: omp.wsloop { - // CHECK: omp.loop_nest (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) { + // CHECK: omp.loop_nest (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) collapse(2) { // CHECK: memref.alloca_scope scf.parallel (%i, %j) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) { // CHECK: "test.payload"(%[[LVAR1]], %[[LVAR2]]) : (index, index) -> () diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 986c3844d0bb9..6e0bd03241d09 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -157,6 +157,29 @@ func.func @no_loops(%lb : index, %ub : index, %step : index) { } } +// ----- + +func.func @collapse_size(%lb : index, %ub : index, %step : index) { + omp.wsloop { + // expected-error@+1 {{collapse value is larger than the number of loops}} + omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) collapse(4) { + omp.yield + } + } +} + +// ----- + +func.func @tiles_length(%lb : index, %ub : index, %step : index) { + omp.wsloop { + // expected-error@+1 {{number of tilings is larger than the number of loops}} + omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) tiles(2, 4) { + omp.yield + } + } +} + + // ----- func.func @inclusive_not_a_clause(%lb : index, %ub : index, %step : index) { diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 3c2e0a3b7cc15..60b1f61135ac2 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -376,6 +376,60 @@ func.func @omp_loop_nest_pretty_multiple(%lb1 : i32, %ub1 : i32, %step1 : i32, return } +// CHECK-LABEL: omp_loop_nest_pretty_multiple_collapse +func.func @omp_loop_nest_pretty_multiple_collapse(%lb1 : i32, %ub1 : i32, %step1 : i32, + %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref) -> () { + + omp.wsloop { + // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) collapse(2) + omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { + %1 = "test.payload"(%iv1) : (i32) -> (index) + %2 = "test.payload"(%iv2) : (i32) -> (index) + memref.store %iv1, %data1[%1] : memref + memref.store %iv2, %data1[%2] : memref + omp.yield + } + } + + return +} + +// CHECK-LABEL: omp_loop_nest_pretty_multiple_tiles +func.func @omp_loop_nest_pretty_multiple_tiles(%lb1 : i32, %ub1 : i32, %step1 : i32, + %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref) -> () { + + omp.wsloop { + // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) tiles(5, 10) + omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) tiles(5, 10) { + %1 = "test.payload"(%iv1) : (i32) -> (index) + %2 = "test.payload"(%iv2) : (i32) -> (index) + memref.store %iv1, %data1[%1] : memref + memref.store %iv2, %data1[%2] : memref + omp.yield + } + } + + return +} + +// CHECK-LABEL: omp_loop_nest_pretty_multiple_collapse_tiles +func.func @omp_loop_nest_pretty_multiple_collapse_tiles(%lb1 : i32, %ub1 : i32, %step1 : i32, + %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref) -> () { + + omp.wsloop { + // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) collapse(2) tiles(5, 10) + omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) tiles(5, 10) { + %1 = "test.payload"(%iv1) : (i32) -> (index) + %2 = "test.payload"(%iv2) : (i32) -> (index) + memref.store %iv1, %data1[%1] : memref + memref.store %iv2, %data1[%2] : memref + omp.yield + } + } + + return +} + // CHECK-LABEL: omp_wsloop func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memref, %linear_var : i32, %chunk_var : i32) -> () { From 411882bc57bc08e667c751772f8bd295bee3818c Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Fri, 20 Jun 2025 07:52:28 -0400 Subject: [PATCH 09/40] Use llvm::SmallVector instead of std::stack --- flang/lib/Semantics/canonicalize-omp.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 1d00bdaad777c..5264ec25fd80c 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -10,8 +10,6 @@ #include "flang/Parser/parse-tree-visitor.h" #include "flang/Parser/parse-tree.h" #include "flang/Semantics/semantics.h" - -#include // After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP // Constructs more structured which provide explicit scopes for later // structural checks and semantic analysis. @@ -150,8 +148,8 @@ class CanonicalizationOfOmp { if (GetConstructIf(*nextIt)) continue; // Keep track of the loops to handle the end loop directives - std::stack loops; - loops.push(&x); + llvm::SmallVector loops; + loops.push_back(&x); if (auto *innerConstruct{ GetConstructIf(*nextIt)}) { if (auto *innerOmpLoop{ @@ -162,12 +160,12 @@ class CanonicalizationOfOmp { if (innerDir.v == llvm::omp::Directive::OMPD_tile) { std::get>>( - loops.top()->t) = std::move(*innerOmpLoop); + loops.back()->t) = std::move(*innerOmpLoop); // Retrieveing the address so that DoConstruct or inner loop can be // set later. - loops.push(&(std::get>>( - loops.top()->t) + loops.back()->t) .value() .value())); nextIt = block.erase(nextIt); @@ -180,16 +178,16 @@ class CanonicalizationOfOmp { // move DoConstruct std::get>>>( - loops.top()->t) = std::move(*doCons); + loops.back()->t) = std::move(*doCons); nextIt = block.erase(nextIt); // try to match OmpEndLoopDirective while (nextIt != block.end() && !loops.empty()) { if (auto *endDir{ GetConstructIf(*nextIt)}) { std::get>( - loops.top()->t) = std::move(*endDir); + loops.back()->t) = std::move(*endDir); nextIt = block.erase(nextIt); - loops.pop(); + loops.pop_back(); } else { // If there is a mismatch bail out. break; From b39287ef71d681007d5674ccd2f7b4bf59235e7a Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 21 Jun 2025 09:11:58 -0400 Subject: [PATCH 10/40] Improve test a bit to make sure IVs are used as expected. --- flang/test/Lower/OpenMP/wsloop-tile.f90 | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/flang/test/Lower/OpenMP/wsloop-tile.f90 b/flang/test/Lower/OpenMP/wsloop-tile.f90 index f43b558ce46bb..c9bf18e3b278d 100644 --- a/flang/test/Lower/OpenMP/wsloop-tile.f90 +++ b/flang/test/Lower/OpenMP/wsloop-tile.f90 @@ -13,7 +13,7 @@ program wsloop_tile c=50 x=0 - !CHECK: omp.loop_nest + !CHECK: omp.loop_nest (%[[IV_0:.*]], %[[IV_1:.*]], %[[IV_2:.*]]) : i32 !CHECK-SAME: tiles(2, 5, 10) !$omp do @@ -21,6 +21,15 @@ program wsloop_tile do i = 1, a do j= 1, b do k = 1, c + !CHECK: hlfir.assign %[[IV_0]] to %[[IV_0A:.*]] : i32 + !CHECK: hlfir.assign %[[IV_1]] to %[[IV_1A:.*]] : i32 + !CHECK: hlfir.assign %[[IV_2]] to %[[IV_2A:.*]] : i32 + !CHECK: %[[IVV_0:.*]] = fir.load %[[IV_0A]] + !CHECK: %[[SUM0:.*]] = arith.addi %{{.*}}, %[[IVV_0]] : i32 + !CHECK: %[[IVV_1:.*]] = fir.load %[[IV_1A]] + !CHECK: %[[SUM1:.*]] = arith.addi %[[SUM0]], %[[IVV_1]] : i32 + !CHECK: %[[IVV_2:.*]] = fir.load %[[IV_2A]] + !CHECK: %[[SUM2:.*]] = arith.addi %[[SUM1]], %[[IVV_2]] : i32 x = x + i + j + k end do end do From 5f5156536e26ce13b9fe6b6ea70e93c6272c7c29 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 21 Jun 2025 09:30:57 -0400 Subject: [PATCH 11/40] Fix comments to clarify canonicalization. --- flang/lib/Semantics/canonicalize-omp.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 5264ec25fd80c..d5b5b14d22dc2 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -119,13 +119,15 @@ class CanonicalizationOfOmp { // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t-> // OmpBeginLoopDirective t-> OmpLoopDirective // [ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct u-> - /// OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile] + // OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile] // ExecutableConstruct -> DoConstruct - // [ExecutableConstruct -> OmpEndLoopDirective] + // [ExecutableConstruct -> OmpEndLoopDirective] (note: tile) // ExecutableConstruct -> OmpEndLoopDirective (if available) // // After rewriting: // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t-> + // [OpenMPLoopConstruct t -> OmpBeginLoopDirective -> OmpLoopDirective + // OmpEndLoopDirective] (note: tile) // OmpBeginLoopDirective t -> OmpLoopDirective -> DoConstruct // OmpEndLoopDirective (if available) parser::Block::iterator nextIt; From e1eaf9ac24cbe62895a10f8e64388a5b26541cd2 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 21 Jun 2025 12:05:40 -0400 Subject: [PATCH 12/40] Special handling of tile directive when dealing with start end end loop directives. --- flang/lib/Semantics/canonicalize-omp.cpp | 31 ++++++++++++++++-------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index d5b5b14d22dc2..a7749b5a81678 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -160,16 +160,13 @@ class CanonicalizationOfOmp { std::get(innerOmpLoop->t)}; auto &innerDir{std::get(innerBeginDir.t)}; if (innerDir.v == llvm::omp::Directive::OMPD_tile) { - std::get>>( - loops.back()->t) = std::move(*innerOmpLoop); + loops.back()->t); + innerLoop = std::move(*innerOmpLoop); // Retrieveing the address so that DoConstruct or inner loop can be // set later. - loops.push_back(&(std::get>>( - loops.back()->t) - .value() - .value())); + loops.push_back(&(innerLoop.value().value())); nextIt = block.erase(nextIt); } } @@ -186,9 +183,23 @@ class CanonicalizationOfOmp { while (nextIt != block.end() && !loops.empty()) { if (auto *endDir{ GetConstructIf(*nextIt)}) { - std::get>( - loops.back()->t) = std::move(*endDir); - nextIt = block.erase(nextIt); + auto &endOmpDirective{ + std::get(endDir->t)}; + auto &loopBegin{ + std::get(loops.back()->t)}; + auto &loopDir{std::get(loopBegin.t)}; + + // If the directive is a tile we try to match the corresponding + // end tile if it exsists. If it is not a tile directive we + // always assign the end loop directive and fall back on the + // existing directive structure checks. + if (loopDir.v != llvm::omp::Directive::OMPD_tile || + loopDir.v == endOmpDirective.v) { + std::get>( + loops.back()->t) = std::move(*endDir); + nextIt = block.erase(nextIt); + } + loops.pop_back(); } else { // If there is a mismatch bail out. From ac4149965663334afb1bac143f8c5e53790018a2 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 21 Jun 2025 12:20:33 -0400 Subject: [PATCH 13/40] Inline functions. --- flang/lib/Semantics/resolve-directives.cpp | 62 +++++++++------------- 1 file changed, 24 insertions(+), 38 deletions(-) diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 806ae00720ce8..e4849601073f7 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -867,14 +867,6 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { void CollectAssociatedLoopLevelsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); - template - void CollectAssociatedLoopLevelFromClauseValue( - const parser::OmpClause &clause, llvm::SmallVector &, - llvm::SmallVector &); - template - void CollectAssociatedLoopLevelFromClauseSize(const parser::OmpClause &, - llvm::SmallVector &, - llvm::SmallVector &); void CollectAssociatedLoopLevelsFromClauses(const parser::OmpClauseList &, llvm::SmallVector &, llvm::SmallVector &); @@ -2124,40 +2116,34 @@ void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct( } } -template -void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseValue( - const parser::OmpClause &clause, llvm::SmallVector &levels, - llvm::SmallVector &clauses) { - if (const auto tclause{std::get_if(&clause.u)}) { - std::int64_t level = 0; - if (const auto v{EvaluateInt64(context_, tclause->v)}) { - level = *v; - } - levels.push_back(level); - clauses.push_back(&clause); - } -} - -template -void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseSize( - const parser::OmpClause &clause, llvm::SmallVector &levels, - llvm::SmallVector &clauses) { - if (const auto tclause{std::get_if(&clause.u)}) { - levels.push_back(tclause->v.size()); - clauses.push_back(&clause); - } -} - void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromClauses( const parser::OmpClauseList &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { for (const auto &clause : x.v) { - CollectAssociatedLoopLevelFromClauseValue( - clause, levels, clauses); - CollectAssociatedLoopLevelFromClauseValue( - clause, levels, clauses); - CollectAssociatedLoopLevelFromClauseSize( - clause, levels, clauses); + if (const auto oclause{ + std::get_if(&clause.u)}) { + std::int64_t level = 0; + if (const auto v{EvaluateInt64(context_, oclause->v)}) { + level = *v; + } + levels.push_back(level); + clauses.push_back(&clause); + } + + if (const auto cclause{ + std::get_if(&clause.u)}) { + std::int64_t level = 0; + if (const auto v{EvaluateInt64(context_, cclause->v)}) { + level = *v; + } + levels.push_back(level); + clauses.push_back(&clause); + } + + if (const auto tclause{std::get_if(&clause.u)}) { + levels.push_back(tclause->v.size()); + clauses.push_back(&clause); + } } } From 9f74cf109e565b3822a3eb9e8600c4b10b90d200 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Mon, 23 Jun 2025 11:03:50 -0400 Subject: [PATCH 14/40] Remove debug code. --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 7ad1e70cb6e75..bbd1ed6a3ab2d 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -810,11 +810,7 @@ void CodeExtractor::severSplitPHINodesOfExits() { } void CodeExtractor::splitReturnBlocks() { - for (BasicBlock *Block : Blocks) { - if (!Block->getTerminator()) { - errs() << "====== No terminator in block: " << Block->getName() - << "======\n"; - } + for (BasicBlock *Block : Blocks) if (ReturnInst *RI = dyn_cast(Block->getTerminator())) { BasicBlock *New = Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret"); @@ -831,7 +827,6 @@ void CodeExtractor::splitReturnBlocks() { DT->changeImmediateDominator(I, NewNode); } } - } } Function *CodeExtractor::constructFunctionDeclaration( From bb9132ca04143254150b77575b0dba535654b342 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Mon, 23 Jun 2025 11:08:42 -0400 Subject: [PATCH 15/40] Reuse loop op lowering, add comment. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index c4eac20aa06e1..c875db1642a25 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3448,7 +3448,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, break; case llvm::omp::Directive::OMPD_tile: newOp = - genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); + genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); From 6681b4ee7945bad3ae3d66c132a84bd6cf4d8ea7 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Mon, 23 Jun 2025 11:12:51 -0400 Subject: [PATCH 16/40] Fix formatting. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index c875db1642a25..4049253ce6cce 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3447,8 +3447,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, item); break; case llvm::omp::Directive::OMPD_tile: - newOp = - genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); + newOp = genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); From 54593af008dc713afdfbb7c1e10f5f8ea610b7c4 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Mon, 23 Jun 2025 11:21:03 -0400 Subject: [PATCH 17/40] Remove curly braces. --- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 55b5f91a71a0b..16821cc4935ff 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -2978,11 +2978,10 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { (parser.parseLParen() || parser.parseInteger(value) || parser.parseRParen())) return failure(); - if (value > 1) { + if (value > 1) result.addAttribute( "num_collapse", IntegerAttr::get(parser.getBuilder().getI64Type(), value)); - } // Parse tiles SmallVector tiles; @@ -2999,9 +2998,8 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { parser.parseRParen())) return failure(); - if (tiles.size() > 0) { + if (tiles.size() > 0) result.addAttribute("tile_sizes", DenseI64ArrayAttr::get(ctx, tiles)); - } // Parse the body. Region *region = result.addRegion(); @@ -3026,13 +3024,13 @@ void LoopNestOp::print(OpAsmPrinter &p) { if (getLoopInclusive()) p << "inclusive "; p << "step (" << getLoopSteps() << ") "; - if (int64_t numCollapse = getNumCollapse()) { + if (int64_t numCollapse = getNumCollapse()) if (numCollapse > 1) p << "collapse(" << numCollapse << ") "; - } - if (const auto tiles = getTileSizes()) { + + if (const auto tiles = getTileSizes()) p << "tiles(" << tiles.value() << ") "; - } + p.printRegion(region, /*printEntryBlockArgs=*/false); } From 93d9952f4ec0267cdab9cfde87817cd0bd2751b5 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Wed, 25 Jun 2025 10:11:36 -0400 Subject: [PATCH 18/40] Avoid attaching the sizes clause to the parent construct, instead find the tile sizes through the parse tree when getting the information needed to create the loop nest ops. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 23 ++--- flang/lib/Lower/OpenMP/Utils.cpp | 90 ++++++++++++++++++- flang/lib/Lower/OpenMP/Utils.h | 5 ++ .../Frontend/OpenMP/ConstructDecompositionT.h | 4 - 4 files changed, 99 insertions(+), 23 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 4049253ce6cce..674dae97fd72e 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -49,6 +49,7 @@ using namespace Fortran::lower::omp; using namespace Fortran::common::openmp; using namespace Fortran::utils::openmp; +using namespace Fortran::semantics; //===----------------------------------------------------------------------===// // Code generation helper functions @@ -1602,6 +1603,7 @@ genLoopNestClauses(lower::AbstractConverter &converter, int64_t collapseValue = evaluate::ToInt64(collapse.v).value(); clauseOps.numCollapse = firOpBuilder.getI64IntegerAttr(collapseValue); } else if (clause.id == llvm::omp::Clause::OMPC_sizes) { + // This case handles the stand-alone tiling construct const auto &sizes = std::get(clause.u); llvm::SmallVector sizeValues; for (auto &size : sizes.v) { @@ -1611,6 +1613,12 @@ genLoopNestClauses(lower::AbstractConverter &converter, clauseOps.tileSizes = sizeValues; } } + + llvm::SmallVector sizeValues; + auto *ompCons{eval.getIf()}; + collectTileSizesFromOpenMPConstruct (ompCons, sizeValues, semaCtx); + if (sizeValues.size() > 0) + clauseOps.tileSizes = sizeValues; } static void genLoopClauses( @@ -3894,21 +3902,6 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, List clauses = makeClauses( std::get(beginLoopDirective.t), semaCtx); - const auto &innerOptional = - std::get>>( - loopConstruct.t); - if (innerOptional.has_value()) { - const auto &innerLoopDirective = innerOptional.value().value(); - const auto &innerBegin = - std::get(innerLoopDirective.t); - const auto &innerDirective = - std::get(innerBegin.t); - if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { - clauses.append( - makeClauses(std::get(innerBegin.t), semaCtx)); - } - } - if (auto &endLoopDirective = std::get>( loopConstruct.t)) { diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 8c76873ae67be..573d9bd598c93 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -14,6 +14,7 @@ #include "ClauseFinder.h" #include "flang/Lower/OpenMP/Clauses.h" +#include "flang/Evaluate/fold.h" #include #include #include @@ -25,10 +26,31 @@ #include #include #include +#include #include #include +using namespace Fortran::semantics; + +template +MaybeIntExpr +EvaluateIntExpr(SemanticsContext &context, const T &expr) { + if (MaybeExpr maybeExpr{ + Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) { + if (auto *intExpr{Fortran::evaluate::UnwrapExpr(*maybeExpr)}) { + return std::move(*intExpr); + } + } + return std::nullopt; +} + +template +std::optional +EvaluateInt64(SemanticsContext &context, const T &expr) { + return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr)); +} + llvm::cl::opt treatIndexAsSection( "openmp-treat-index-as-section", llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."), @@ -584,6 +606,43 @@ static void convertLoopBounds(lower::AbstractConverter &converter, } } +// Populates the sizes vector with values if the given OpenMPConstruct +// Contains a loop construct with an inner tiling construct. +void collectTileSizesFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl &tileSizes, + SemanticsContext &semaCtx) { + if (!ompCons) + return; + + if (auto *ompLoop{std::get_if(&ompCons->u)}) { + const auto &innerOptional = std::get< + std::optional>>( + ompLoop->t); + if (innerOptional.has_value()) { + const auto &innerLoopDirective = innerOptional.value().value(); + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + std::get(innerBegin.t).v; + + if (innerDirective == llvm::omp::Directive::OMPD_tile) { + // Get the size values from parse tree and convert to a vector + const auto &innerClauseList{ + std::get(innerBegin.t)}; + for (const auto &clause : innerClauseList.v) + if (const auto tclause{ + std::get_if(&clause.u)}) { + for (auto &tval : tclause->v) { + if (const auto v{EvaluateInt64(semaCtx, tval)}) + tileSizes.push_back(*v); + } + } + } + } + } +} + bool collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List &clauses, @@ -605,11 +664,34 @@ bool collectLoopRelatedInfo( collapseValue = evaluate::ToInt64(clause->v).value(); found = true; } + + // Collect sizes from tile directive if present std::int64_t sizesLengthValue = 0l; - if (auto *clause = - ClauseFinder::findUniqueClause(clauses)) { - sizesLengthValue = clause->v.size(); - found = true; + if (auto *ompCons{eval.getIf()}) { + if (auto *ompLoop{std::get_if(&ompCons->u)}) { + const auto &innerOptional = std::get< + std::optional>>( + ompLoop->t); + if (innerOptional.has_value()) { + const auto &innerLoopDirective = innerOptional.value().value(); + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + std::get(innerBegin.t).v; + + if (innerDirective == llvm::omp::Directive::OMPD_tile) { + // Get the size values from parse tree and convert to a vector + const auto &innerClauseList{ + std::get(innerBegin.t)}; + for (const auto &clause : innerClauseList.v) + if (const auto tclause{ + std::get_if(&clause.u)}) { + sizesLengthValue = tclause->v.size(); + found = true; + } + } + } + } } collapseValue = collapseValue - sizesLengthValue; diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index 88371ab8bf969..e585eb463e9ce 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -165,6 +165,11 @@ bool collectLoopRelatedInfo( mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv); +void collectTileSizesFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl &tileSizes, + Fortran::semantics::SemanticsContext &semaCtx); + } // namespace omp } // namespace lower } // namespace Fortran diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index e1083b7ef2cd9..5bb1f3f36b65e 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -497,10 +497,6 @@ bool ConstructDecompositionT::applyClause( if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) { last.clauses.push_back(node); return true; - } else { - // llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n"; - last.clauses.push_back(node); - return true; } } From a738a56184a8376d387254a9fd19294db90ae3ce Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Wed, 25 Jun 2025 10:33:33 -0400 Subject: [PATCH 19/40] Fix formatting --- flang/lib/Lower/OpenMP/OpenMP.cpp | 2 +- flang/lib/Lower/OpenMP/Utils.cpp | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 674dae97fd72e..a0ecdd2149b14 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1616,7 +1616,7 @@ genLoopNestClauses(lower::AbstractConverter &converter, llvm::SmallVector sizeValues; auto *ompCons{eval.getIf()}; - collectTileSizesFromOpenMPConstruct (ompCons, sizeValues, semaCtx); + collectTileSizesFromOpenMPConstruct(ompCons, sizeValues, semaCtx); if (sizeValues.size() > 0) clauseOps.tileSizes = sizeValues; } diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 573d9bd598c93..26c209c37f2d2 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -34,8 +34,7 @@ using namespace Fortran::semantics; template -MaybeIntExpr -EvaluateIntExpr(SemanticsContext &context, const T &expr) { +MaybeIntExpr EvaluateIntExpr(SemanticsContext &context, const T &expr) { if (MaybeExpr maybeExpr{ Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) { if (auto *intExpr{Fortran::evaluate::UnwrapExpr(*maybeExpr)}) { @@ -46,8 +45,8 @@ EvaluateIntExpr(SemanticsContext &context, const T &expr) { } template -std::optional -EvaluateInt64(SemanticsContext &context, const T &expr) { +std::optional EvaluateInt64(SemanticsContext &context, + const T &expr) { return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr)); } @@ -610,8 +609,7 @@ static void convertLoopBounds(lower::AbstractConverter &converter, // Contains a loop construct with an inner tiling construct. void collectTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &tileSizes, - SemanticsContext &semaCtx) { + llvm::SmallVectorImpl &tileSizes, SemanticsContext &semaCtx) { if (!ompCons) return; From 97573607a10bc79e0ac8b55cec037f92921a3099 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 26 Jun 2025 10:03:13 -0400 Subject: [PATCH 20/40] Fix unparse and add a test for nested loop constructs. --- flang/test/Parser/OpenMP/do-tile-size.f90 | 29 +++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 flang/test/Parser/OpenMP/do-tile-size.f90 diff --git a/flang/test/Parser/OpenMP/do-tile-size.f90 b/flang/test/Parser/OpenMP/do-tile-size.f90 new file mode 100644 index 0000000000000..886ee4a2a680c --- /dev/null +++ b/flang/test/Parser/OpenMP/do-tile-size.f90 @@ -0,0 +1,29 @@ +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix="PARSE-TREE" %s + +subroutine openmp_do_tiles(x) + + integer, intent(inout)::x + + +!CHECK: !$omp do +!CHECK: !$omp tile sizes +!$omp do +!$omp tile sizes(2) +!CHECK: do + do x = 1, 100 + call F1() +!CHECK: end do + end do +!CHECK: !$omp end tile +!$omp end tile +!$omp end do + +!PARSE-TREE:| | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct +!PARSE-TREE:| | | OmpBeginLoopDirective +!PARSE-TREE:| | | OpenMPLoopConstruct +!PARSE-TREE:| | | | OmpBeginLoopDirective +!PARSE-TREE:| | | | | OmpLoopDirective -> llvm::omp::Directive = tile +!PARSE-TREE:| | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4' +!PARSE-TREE: | | | | DoConstruct +END subroutine openmp_do_tiles From 9e35a6f6b7f1d3bbfc0c49ac0cf5df7e216bf023 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 26 Jun 2025 10:50:08 -0400 Subject: [PATCH 21/40] Use more convenient function to get OpenMPLoopConstruct. Fix comments. --- flang/lib/Semantics/canonicalize-omp.cpp | 30 ++++++++----------- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 9 +++--- 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index a7749b5a81678..79630b564e51a 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -152,23 +152,19 @@ class CanonicalizationOfOmp { // Keep track of the loops to handle the end loop directives llvm::SmallVector loops; loops.push_back(&x); - if (auto *innerConstruct{ - GetConstructIf(*nextIt)}) { - if (auto *innerOmpLoop{ - std::get_if(&innerConstruct->u)}) { - auto &innerBeginDir{ - std::get(innerOmpLoop->t)}; - auto &innerDir{std::get(innerBeginDir.t)}; - if (innerDir.v == llvm::omp::Directive::OMPD_tile) { - auto &innerLoop = std::get>>( - loops.back()->t); - innerLoop = std::move(*innerOmpLoop); - // Retrieveing the address so that DoConstruct or inner loop can be - // set later. - loops.push_back(&(innerLoop.value().value())); - nextIt = block.erase(nextIt); - } + if (auto *innerOmpLoop{GetOmpIf(*nextIt)}) { + auto &innerBeginDir{ + std::get(innerOmpLoop->t)}; + auto &innerDir{std::get(innerBeginDir.t)}; + if (innerDir.v == llvm::omp::Directive::OMPD_tile) { + auto &innerLoop = std::get< + std::optional>>( + loops.back()->t); + innerLoop = std::move(*innerOmpLoop); + // Retrieveing the address so that DoConstruct or inner loop can be + // set later. + loops.push_back(&(innerLoop.value().value())); + nextIt = block.erase(nextIt); } } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 0753ecf9d8c79..7914c06fa6065 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3040,7 +3040,6 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, loopInfos.push_back(*loopResult); } - // llvm::OpenMPIRBuilder::InsertPointTy afterIP = builder.saveIP(); llvm::OpenMPIRBuilder::InsertPointTy afterIP = loopInfos.front()->getAfterIP(); @@ -3061,10 +3060,10 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, std::vector NewLoops = ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes); - // Collapse loops. Store the insertion point because LoopInfos may get - // invalidated. - auto AfterBB = NewLoops.front()->getAfter(); - auto AfterAfterBB = AfterBB->getSingleSuccessor(); + // Update afterIP to get the correct insertion point after + // tiling. + llvm::BasicBlock *AfterBB = NewLoops.front()->getAfter(); + llvm::BasicBlock *AfterAfterBB = AfterBB->getSingleSuccessor(); afterIP = {AfterAfterBB, AfterAfterBB->begin()}; NewTopLoopInfo = NewLoops[0]; From 8dfaf57880a0b7c68aff2da2ff3ab3d64867ce79 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 26 Jun 2025 10:54:46 -0400 Subject: [PATCH 22/40] Fix formatting. --- .../Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 7914c06fa6065..b8a993bb24c47 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3063,7 +3063,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, // Update afterIP to get the correct insertion point after // tiling. llvm::BasicBlock *AfterBB = NewLoops.front()->getAfter(); - llvm::BasicBlock *AfterAfterBB = AfterBB->getSingleSuccessor(); + llvm::BasicBlock *AfterAfterBB = AfterBB->getSingleSuccessor(); afterIP = {AfterAfterBB, AfterAfterBB->begin()}; NewTopLoopInfo = NewLoops[0]; From 84c5cc14d6f0da349392802899d3fc55fe4ed8f0 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 9 Aug 2025 10:53:23 -0400 Subject: [PATCH 23/40] Fix merge problems related to the different representations used for nested loop constructs. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 52 +++++----------------- flang/lib/Lower/OpenMP/Utils.cpp | 28 +++++++----- flang/lib/Semantics/canonicalize-omp.cpp | 10 +++-- flang/lib/Semantics/resolve-directives.cpp | 34 ++++++++------ 4 files changed, 56 insertions(+), 68 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index a0ecdd2149b14..4272169a5003d 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -424,14 +424,19 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, // FIXME(JAN): For now we check if there is an inner // OpenMPLoopConstruct, and extract the size clause from there - const auto &innerOptional = std::get>>( - ompConstruct.t); - if (innerOptional.has_value()) { - const auto &innerLoopDirective = innerOptional.value().value(); + const auto &nestedOptional = + std::get>( + ompConstruct.t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); + if (innerConstruct) { + const auto &innerLoopConstruct = innerConstruct->value(); const auto &innerBegin = std::get( - innerLoopDirective.t); + innerLoopConstruct.t); const auto &innerDirective = std::get(innerBegin.t); if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { @@ -2188,41 +2193,6 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, // Apply unrolling to it auto cli = canonLoop.getCli(); mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli); - -static mlir::omp::LoopOp genTiledLoopOp(lower::AbstractConverter &converter, - lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, - lower::pft::Evaluation &eval, - mlir::Location loc, - const ConstructQueue &queue, - ConstructQueue::const_iterator item) { - mlir::omp::LoopOperands loopClauseOps; - llvm::SmallVector loopReductionSyms; - genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps, - loopReductionSyms); - - DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, - /*shouldCollectPreDeterminedSymbols=*/true, - /*useDelayedPrivatization=*/true, symTable); - dsp.processStep1(&loopClauseOps); - - mlir::omp::LoopNestOperands loopNestClauseOps; - llvm::SmallVector iv; - genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, - loopNestClauseOps, iv); - - EntryBlockArgs loopArgs; - loopArgs.priv.syms = dsp.getDelayedPrivSymbols(); - loopArgs.priv.vars = loopClauseOps.privateVars; - loopArgs.reduction.syms = loopReductionSyms; - loopArgs.reduction.vars = loopClauseOps.reductionVars; - - auto loopOp = - genWrapperOp(converter, loc, loopClauseOps, loopArgs); - genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, - loopNestClauseOps, iv, {{loopOp, loopArgs}}, - llvm::omp::Directive::OMPD_loop, dsp); - return loopOp; } static mlir::omp::MaskedOp diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 26c209c37f2d2..59db76be4cfbc 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -614,11 +614,15 @@ void collectTileSizesFromOpenMPConstruct( return; if (auto *ompLoop{std::get_if(&ompCons->u)}) { - const auto &innerOptional = std::get< - std::optional>>( - ompLoop->t); - if (innerOptional.has_value()) { - const auto &innerLoopDirective = innerOptional.value().value(); + const auto &nestedOptional = + std::get>(ompLoop->t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); + if (innerConstruct) { + const auto &innerLoopDirective = innerConstruct->value(); const auto &innerBegin = std::get(innerLoopDirective.t); const auto &innerDirective = @@ -667,11 +671,15 @@ bool collectLoopRelatedInfo( std::int64_t sizesLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { if (auto *ompLoop{std::get_if(&ompCons->u)}) { - const auto &innerOptional = std::get< - std::optional>>( - ompLoop->t); - if (innerOptional.has_value()) { - const auto &innerLoopDirective = innerOptional.value().value(); + const auto &nestedOptional = + std::get>(ompLoop->t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); + if (innerConstruct) { + const auto &innerLoopDirective = innerConstruct->value(); const auto &innerBegin = std::get(innerLoopDirective.t); const auto &innerDirective = diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 79630b564e51a..4792bf2cb217c 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -157,13 +157,15 @@ class CanonicalizationOfOmp { std::get(innerOmpLoop->t)}; auto &innerDir{std::get(innerBeginDir.t)}; if (innerDir.v == llvm::omp::Directive::OMPD_tile) { - auto &innerLoop = std::get< - std::optional>>( - loops.back()->t); + auto &innerLoopVariant = + std::get>(loops.back()->t); + auto &innerLoop = + std::get>( + innerLoopVariant.value()); innerLoop = std::move(*innerOmpLoop); // Retrieveing the address so that DoConstruct or inner loop can be // set later. - loops.push_back(&(innerLoop.value().value())); + loops.push_back(&(innerLoop.value())); nextIt = block.erase(nextIt); } } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index e4849601073f7..648fcb308d19d 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2107,12 +2107,18 @@ void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { - const auto &innerOptional = - std::get>>( - x.t); - if (innerOptional.has_value()) { + + const auto &nestedOptional = + std::get>(x.t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); + + if (innerConstruct) { CollectAssociatedLoopLevelsFromLoopConstruct( - innerOptional.value().value(), levels, clauses); + innerConstruct->value(), levels, clauses); } } @@ -2177,17 +2183,19 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( bool hasCollapseClause{ clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false}; const parser::OpenMPLoopConstruct *innerMostLoop = &x; - + const parser::NestedConstruct *innerMostNest = nullptr; while (auto &optLoopCons{ - std::get>(x.t)}) { - if (const auto &innerLoop{ - std::get_if < parser::OpenMPLoopConstruct >>> (innerMostLoop->t)}) { - innerMostLoop = &innerLoop.value().value(); + std::get>(innerMostLoop->t)}) { + innerMostNest = &(optLoopCons.value()); + if (const auto *innerLoop{ + std::get_if>( + innerMostNest)}) { + innerMostLoop = &(innerLoop->value()); } } - if (optLoopCons.has_value()) { - if (const auto &outer{std::get_if(innerMostLoop->t)}) { + if (innerMostNest) { + if (const auto &outer{std::get_if(innerMostNest)}) { for (const parser::DoConstruct *loop{&*outer}; loop && level > 0; --level) { if (loop->IsDoConcurrent()) { @@ -2223,7 +2231,7 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( CheckAssocLoopLevel(level, GetAssociatedClause()); } else if (const auto &loop{std::get_if< common::Indirection>( - &*optLoopCons)}) { + innerMostNest)}) { auto &beginDirective = std::get(loop->value().t); auto &beginLoopDirective = From 53ab1950ae9fa2797dbefca6ed66b77f7764ceb5 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 9 Aug 2025 12:27:35 -0400 Subject: [PATCH 24/40] Fix bugs introduced when merging. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 4 ++-- flang/lib/Semantics/canonicalize-omp.cpp | 19 +++++++++--------- flang/lib/Semantics/resolve-directives.cpp | 3 ++- ...nested-loop-transformation-construct01.f90 | 20 ------------------- flang/test/Lower/OpenMP/wsloop-tile.f90 | 2 +- 5 files changed, 15 insertions(+), 33 deletions(-) delete mode 100644 flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 4272169a5003d..5a202fd74815e 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -422,8 +422,8 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, beginClauseList = &std::get(beginDirective.t); - // FIXME(JAN): For now we check if there is an inner - // OpenMPLoopConstruct, and extract the size clause from there + // For now we check if there is an inner OpenMPLoopConstruct, and + // extract the size clause from there const auto &nestedOptional = std::get>( ompConstruct.t); diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 4792bf2cb217c..c664171350d9e 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -143,7 +143,6 @@ class CanonicalizationOfOmp { "If a loop construct has been fully unrolled, it cannot then be tiled"_err_en_US, parser::ToUpperCaseLetters(dir.source.ToString())); }; - nextIt = it; while (++nextIt != block.end()) { // Ignore compiler directives. @@ -159,14 +158,16 @@ class CanonicalizationOfOmp { if (innerDir.v == llvm::omp::Directive::OMPD_tile) { auto &innerLoopVariant = std::get>(loops.back()->t); - auto &innerLoop = - std::get>( - innerLoopVariant.value()); - innerLoop = std::move(*innerOmpLoop); - // Retrieveing the address so that DoConstruct or inner loop can be - // set later. - loops.push_back(&(innerLoop.value())); - nextIt = block.erase(nextIt); + if (innerLoopVariant.has_value()) { + auto *innerLoop = + std::get_if>( + &(innerLoopVariant.value())); + *innerLoop = std::move(*innerOmpLoop); + // Retrieveing the address so that DoConstruct or inner loop can be + // set later. + loops.push_back(&(innerLoop->value())); + nextIt = block.erase(nextIt); + } } } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 648fcb308d19d..540262f4ab906 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2191,7 +2191,8 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( std::get_if>( innerMostNest)}) { innerMostLoop = &(innerLoop->value()); - } + } else + break; } if (innerMostNest) { diff --git a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 b/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 deleted file mode 100644 index 17eba93a7405d..0000000000000 --- a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 +++ /dev/null @@ -1,20 +0,0 @@ -! Test to ensure TODO message is emitted for tile OpenMP 5.1 Directives when they are nested. - -!RUN: not %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | FileCheck %s - -subroutine loop_transformation_construct - implicit none - integer :: I = 10 - integer :: x - integer :: y(I) - - !$omp do - !$omp tile - do i = 1, I - y(i) = y(i) * 5 - end do - !$omp end tile - !$omp end do -end subroutine - -!CHECK: not yet implemented: Unhandled loop directive (tile) diff --git a/flang/test/Lower/OpenMP/wsloop-tile.f90 b/flang/test/Lower/OpenMP/wsloop-tile.f90 index c9bf18e3b278d..4c412b357f52e 100644 --- a/flang/test/Lower/OpenMP/wsloop-tile.f90 +++ b/flang/test/Lower/OpenMP/wsloop-tile.f90 @@ -2,7 +2,7 @@ ! RUN: bbc -fopenmp -fopenmp-version=51 -emit-hlfir %s -o - | FileCheck %s -!CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "wsloop_tile"} { +!CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "WSLOOP_TILE"} { program wsloop_tile integer :: i, j, k integer :: a, b, c From fa6e323f5f83d3b443070247fe7ef5c21e637a40 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Mon, 11 Aug 2025 07:23:15 -0400 Subject: [PATCH 25/40] Move include --- flang/lib/Lower/OpenMP/Utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 59db76be4cfbc..72dd8b1042780 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -13,8 +13,8 @@ #include "Utils.h" #include "ClauseFinder.h" -#include "flang/Lower/OpenMP/Clauses.h" #include "flang/Evaluate/fold.h" +#include "flang/Lower/OpenMP/Clauses.h" #include #include #include From bce250cd9f4398233b7af69b51008605c41c2a3b Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 19 Aug 2025 10:55:05 -0400 Subject: [PATCH 26/40] Remove unused code. Currently the canonicalize-omp can only handle a single nested loop construct, which is what we prefer. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 1 - flang/lib/Lower/OpenMP/Utils.cpp | 22 ++---- flang/lib/Semantics/canonicalize-omp.cpp | 68 ++++--------------- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 2 - 4 files changed, 20 insertions(+), 73 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 5a202fd74815e..41ae05c97d3ec 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -946,7 +946,6 @@ static void genLoopVars( storeOp = createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); } - firOpBuilder.setInsertionPointAfter(storeOp); } diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 72dd8b1042780..cbd8fc1abd466 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -60,21 +60,14 @@ namespace lower { namespace omp { int64_t getCollapseValue(const List &clauses) { - int64_t collapseValue = 1; - int64_t numTileSizes = 0; - for (auto &clause : clauses) { - if (clause.id == llvm::omp::Clause::OMPC_collapse) { - const auto &collapse = std::get(clause.u); - collapseValue = evaluate::ToInt64(collapse.v).value(); - } else if (clause.id == llvm::omp::Clause::OMPC_sizes) { - const auto &sizes = std::get(clause.u); - numTileSizes = sizes.v.size(); - } + auto iter = llvm::find_if(clauses, [](const Clause &clause) { + return clause.id == llvm::omp::Clause::OMPC_collapse; + }); + if (iter != clauses.end()) { + const auto &collapse = std::get(iter->u); + return evaluate::ToInt64(collapse.v).value(); } - - collapseValue = collapseValue - numTileSizes; - int64_t result = collapseValue > numTileSizes ? collapseValue : numTileSizes; - return result; + return 1; } void genObjectList(const ObjectList &objects, @@ -650,7 +643,6 @@ bool collectLoopRelatedInfo( lower::pft::Evaluation &eval, const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) { - bool found = false; fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index c664171350d9e..9722eca19447d 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -10,6 +10,7 @@ #include "flang/Parser/parse-tree-visitor.h" #include "flang/Parser/parse-tree.h" #include "flang/Semantics/semantics.h" + // After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP // Constructs more structured which provide explicit scopes for later // structural checks and semantic analysis. @@ -116,19 +117,15 @@ class CanonicalizationOfOmp { // in the same iteration // // Original: - // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t-> - // OmpBeginLoopDirective t-> OmpLoopDirective - // [ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct u-> - // OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile] + // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct + // OmpBeginLoopDirective // ExecutableConstruct -> DoConstruct - // [ExecutableConstruct -> OmpEndLoopDirective] (note: tile) // ExecutableConstruct -> OmpEndLoopDirective (if available) // // After rewriting: - // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t-> - // [OpenMPLoopConstruct t -> OmpBeginLoopDirective -> OmpLoopDirective - // OmpEndLoopDirective] (note: tile) - // OmpBeginLoopDirective t -> OmpLoopDirective -> DoConstruct + // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct + // OmpBeginLoopDirective + // DoConstruct // OmpEndLoopDirective (if available) parser::Block::iterator nextIt; auto &beginDir{std::get(x.t)}; @@ -143,66 +140,27 @@ class CanonicalizationOfOmp { "If a loop construct has been fully unrolled, it cannot then be tiled"_err_en_US, parser::ToUpperCaseLetters(dir.source.ToString())); }; + nextIt = it; while (++nextIt != block.end()) { // Ignore compiler directives. if (GetConstructIf(*nextIt)) continue; - // Keep track of the loops to handle the end loop directives - llvm::SmallVector loops; - loops.push_back(&x); - if (auto *innerOmpLoop{GetOmpIf(*nextIt)}) { - auto &innerBeginDir{ - std::get(innerOmpLoop->t)}; - auto &innerDir{std::get(innerBeginDir.t)}; - if (innerDir.v == llvm::omp::Directive::OMPD_tile) { - auto &innerLoopVariant = - std::get>(loops.back()->t); - if (innerLoopVariant.has_value()) { - auto *innerLoop = - std::get_if>( - &(innerLoopVariant.value())); - *innerLoop = std::move(*innerOmpLoop); - // Retrieveing the address so that DoConstruct or inner loop can be - // set later. - loops.push_back(&(innerLoop->value())); - nextIt = block.erase(nextIt); - } - } - } if (auto *doCons{GetConstructIf(*nextIt)}) { if (doCons->GetLoopControl()) { // move DoConstruct std::get>>>( - loops.back()->t) = std::move(*doCons); + common::Indirection>>>(x.t) = + std::move(*doCons); nextIt = block.erase(nextIt); // try to match OmpEndLoopDirective - while (nextIt != block.end() && !loops.empty()) { + if (nextIt != block.end()) { if (auto *endDir{ GetConstructIf(*nextIt)}) { - auto &endOmpDirective{ - std::get(endDir->t)}; - auto &loopBegin{ - std::get(loops.back()->t)}; - auto &loopDir{std::get(loopBegin.t)}; - - // If the directive is a tile we try to match the corresponding - // end tile if it exsists. If it is not a tile directive we - // always assign the end loop directive and fall back on the - // existing directive structure checks. - if (loopDir.v != llvm::omp::Directive::OMPD_tile || - loopDir.v == endOmpDirective.v) { - std::get>( - loops.back()->t) = std::move(*endDir); - nextIt = block.erase(nextIt); - } - - loops.pop_back(); - } else { - // If there is a mismatch bail out. - break; + std::get>(x.t) = + std::move(*endDir); + nextIt = block.erase(nextIt); } } } else { diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index b8a993bb24c47..1e86ee2835026 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3043,8 +3043,6 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy afterIP = loopInfos.front()->getAfterIP(); - // Initialize the new loop info to the current one, in case there - // are no loop transformations done. llvm::CanonicalLoopInfo *NewTopLoopInfo = nullptr; // Do tiling From 8ae7ff697e4b7fcd388b601b9200455059c493bf Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 21 Aug 2025 22:15:45 -0400 Subject: [PATCH 27/40] Address review comments. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 1 - flang/lib/Lower/OpenMP/Utils.cpp | 4 +- flang/lib/Semantics/resolve-directives.cpp | 54 +++++++++---------- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 9 ---- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 24 --------- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 5 +- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 3 +- mlir/test/Dialect/OpenMP/invalid.mlir | 2 +- 8 files changed, 33 insertions(+), 69 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 41ae05c97d3ec..5f06a09f4f79e 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3870,7 +3870,6 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, std::get(loopConstruct.t); List clauses = makeClauses( std::get(beginLoopDirective.t), semaCtx); - if (auto &endLoopDirective = std::get>( loopConstruct.t)) { diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index cbd8fc1abd466..1f846eae91f6d 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -598,8 +598,8 @@ static void convertLoopBounds(lower::AbstractConverter &converter, } } -// Populates the sizes vector with values if the given OpenMPConstruct -// Contains a loop construct with an inner tiling construct. +/// Populates the sizes vector with values if the given OpenMPConstruct +/// Contains a loop construct with an inner tiling construct. void collectTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, llvm::SmallVectorImpl &tileSizes, SemanticsContext &semaCtx) { diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 540262f4ab906..f5e8e90696f3c 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -856,18 +856,22 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { const parser::OmpClause *GetAssociatedClause() { return associatedClause; } private: + /// Given a vector of loop levels and a vector of corresponding clauses find + /// the largest loop level and set the associated loop level to the found + /// maximum. This is used for error handling to ensure that the number of + /// affected loops is not larger that the number of available loops. std::int64_t SetAssociatedMaxClause(llvm::SmallVector &, llvm::SmallVector &); - std::int64_t GetAssociatedLoopLevelFromLoopConstruct( + std::int64_t GetNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &); - std::int64_t GetAssociatedLoopLevelFromClauses(const parser::OmpClauseList &); - void CollectAssociatedLoopLevelsFromLoopConstruct( + std::int64_t GetNumAffectedLoopsFromClauses(const parser::OmpClauseList &); + void CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); - void CollectAssociatedLoopLevelsFromInnerLoopContruct( + void CollectNumAffectedLoopsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); - void CollectAssociatedLoopLevelsFromClauses(const parser::OmpClauseList &, + void CollectNumAffectedLoopsFromClauses(const parser::OmpClauseList &, llvm::SmallVector &, llvm::SmallVector &); @@ -1931,7 +1935,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { beginDir.v == llvm::omp::Directive::OMPD_target_loop) IssueNonConformanceWarning(beginDir.v, beginDir.source, 52); ClearDataSharingAttributeObjects(); - SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromLoopConstruct(x)); + SetContextAssociatedLoopLevel(GetNumAffectedLoopsFromLoopConstruct(x)); if (beginDir.v == llvm::omp::Directive::OMPD_do) { auto &optLoopCons = std::get>(x.t); @@ -1945,7 +1949,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { } } PrivatizeAssociatedLoopIndexAndCheckLoopLevel(x); - ordCollapseLevel = GetAssociatedLoopLevelFromLoopConstruct(x) + 1; + ordCollapseLevel = GetNumAffectedLoopsFromLoopConstruct(x) + 1; return true; } @@ -2041,13 +2045,12 @@ std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( llvm::SmallVector &levels, llvm::SmallVector &clauses) { - // Find the tile level to know how much to reduce the level for collapse + // Find the tile level to ensure that the COLLAPSE clause value + // does not exeed the number of tiled loops. std::int64_t tileLevel = 0; - for (auto [level, clause] : llvm::zip_equal(levels, clauses)) { - if (isSizesClause(clause)) { + for (auto [level, clause] : llvm::zip_equal(levels, clauses)) + if (isSizesClause(clause)) tileLevel = level; - } - } std::int64_t maxLevel = 1; const parser::OmpClause *maxClause = nullptr; @@ -2056,14 +2059,11 @@ std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( context_.Say(clause->source, "The value of the parameter in the COLLAPSE clause must" " not be larger than the number of the number of tiled loops" - " because collapse relies on independent loop iterations."_err_en_US); + " because collapse currently is limited to independent loop" + " iterations."_err_en_US); return 1; } - if (!isSizesClause(clause)) { - level = level - tileLevel; - } - if (level > maxLevel) { maxLevel = level; maxClause = clause; @@ -2074,36 +2074,36 @@ std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( return maxLevel; } -std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromLoopConstruct( +std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &x) { llvm::SmallVector levels; llvm::SmallVector clauses; - CollectAssociatedLoopLevelsFromLoopConstruct(x, levels, clauses); + CollectNumAffectedLoopsFromLoopConstruct(x, levels, clauses); return SetAssociatedMaxClause(levels, clauses); } -std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromClauses( +std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromClauses( const parser::OmpClauseList &x) { llvm::SmallVector levels; llvm::SmallVector clauses; - CollectAssociatedLoopLevelsFromClauses(x, levels, clauses); + CollectNumAffectedLoopsFromClauses(x, levels, clauses); return SetAssociatedMaxClause(levels, clauses); } -void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromLoopConstruct( +void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { const auto &beginLoopDir{std::get(x.t)}; const auto &clauseList{std::get(beginLoopDir.t)}; - CollectAssociatedLoopLevelsFromClauses(clauseList, levels, clauses); - CollectAssociatedLoopLevelsFromInnerLoopContruct(x, levels, clauses); + CollectNumAffectedLoopsFromClauses(clauseList, levels, clauses); + CollectNumAffectedLoopsFromInnerLoopContruct(x, levels, clauses); } -void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct( +void OmpAttributeVisitor::CollectNumAffectedLoopsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { @@ -2117,12 +2117,12 @@ void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct( &(nestedOptional.value())); if (innerConstruct) { - CollectAssociatedLoopLevelsFromLoopConstruct( + CollectNumAffectedLoopsFromLoopConstruct( innerConstruct->value(), levels, clauses); } } -void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromClauses( +void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( const parser::OmpClauseList &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { for (const auto &clause : x.v) { diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index a994f23c1fbe2..1050e3d8b08dd 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -2257,9 +2257,6 @@ class OpenMPIRBuilder { /// Return the function that contains the region to be outlined. Function *getFunction() const { return EntryBB->getParent(); } - - /// Dump the info in a somewhat readable way - void dump(); }; /// Collection of regions that need to be outlined during finalization. @@ -2280,9 +2277,6 @@ class OpenMPIRBuilder { /// Add a new region that will be outlined later. void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); } - /// Dump outline infos - void dumpOutlineInfos(); - /// An ordered map of auto-generated variables to their unique names. /// It stores variables with the following names: 1) ".gomp_critical_user_" + /// + ".var" for "omp critical" directives; 2) @@ -3916,9 +3910,6 @@ class CanonicalLoopInfo { /// Invalidate this loop. That is, the underlying IR does not fulfill the /// requirements of an OpenMP canonical loop anymore. LLVM_ABI void invalidate(); - - /// Dump the info in a somewhat readable way - void dump(); }; /// ScanInfo holds the information to assist in lowering of Scan reduction. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 805723aef1348..3d5e487c8990f 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -9164,15 +9164,6 @@ Error OpenMPIRBuilder::emitOffloadingArrays( return Error::success(); } -void OpenMPIRBuilder::dumpOutlineInfos() { - errs() << "=== Outline Infos Begin ===\n"; - for (auto En : enumerate(OutlineInfos)) { - errs() << "[" << En.index() << "]: "; - En.value().dump(); - } - errs() << "=== Outline Infos End ===\n"; -} - void OpenMPIRBuilder::emitBranch(BasicBlock *Target) { BasicBlock *CurBB = Builder.GetInsertBlock(); @@ -10097,14 +10088,6 @@ void OpenMPIRBuilder::OutlineInfo::collectBlocks( } } -void OpenMPIRBuilder::OutlineInfo::dump() { - errs() << "=== OutilneInfo == " - << " EntryBB: " << (EntryBB ? EntryBB->getName() : "n\a") - << " ExitBB: " << (ExitBB ? ExitBB->getName() : "n\a") - << " OuterAllocaBB: " - << (OuterAllocaBB ? OuterAllocaBB->getName() : "n/a") << "\n"; -} - void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, @@ -10882,10 +10865,3 @@ void CanonicalLoopInfo::invalidate() { Latch = nullptr; Exit = nullptr; } - -void CanonicalLoopInfo::dump() { - errs() << "CanonicaLoop == Header: " << (Header ? Header->getName() : "n/a") - << " Cond: " << (Cond ? Cond->getName() : "n/a") - << " Latch: " << (Latch ? Latch->getName() : "n/a") - << " Exit: " << (Exit ? Exit->getName() : "n/a") << "\n"; -} diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 16821cc4935ff..8768eed13cf32 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -2961,7 +2961,7 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { for (auto &iv : ivs) iv.type = loopVarType; - auto ctx = parser.getBuilder().getContext(); + auto *ctx = parser.getBuilder().getContext(); // Parse "inclusive" flag. if (succeeded(parser.parseOptionalKeyword("inclusive"))) result.addAttribute("loop_inclusive", UnitAttr::get(ctx)); @@ -3065,8 +3065,7 @@ LogicalResult LoopNestOp::verify() { if (const auto &tiles = getTileSizes()) if (tiles.value().size() > numIVs) - return emitOpError() - << "number of tilings is larger than the number of loops"; + return emitOpError() << "too few canonical loops for tile dimensions"; if (!llvm::dyn_cast_if_present((*this)->getParentOp())) return emitOpError() << "expects parent op to be a loop wrapper"; diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 1e86ee2835026..67d6ccc8a6a1d 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3067,9 +3067,8 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, // Update the loop infos loopInfos.clear(); - for (const auto &newLoop : NewLoops) { + for (const auto &newLoop : NewLoops) loopInfos.push_back(newLoop); - } } // Tiling done // Do collapse diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 6e0bd03241d09..763f41c5420b8 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -172,7 +172,7 @@ func.func @collapse_size(%lb : index, %ub : index, %step : index) { func.func @tiles_length(%lb : index, %ub : index, %step : index) { omp.wsloop { - // expected-error@+1 {{number of tilings is larger than the number of loops}} + // expected-error@+1 {{op too few canonical loops for tile dimensions}} omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) tiles(2, 4) { omp.yield } From a3d3db7545e6352f27afd7b3129504f03aade132 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 21 Aug 2025 23:00:42 -0400 Subject: [PATCH 28/40] Undo unrelated change. --- flang/lib/Lower/OpenMP/Utils.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 1f846eae91f6d..20a7b12f66d22 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -727,6 +727,7 @@ bool collectLoopRelatedInfo( } while (collapseValue > 0); convertLoopBounds(converter, currentLocation, result, loopVarTypeSize); + return found; } From 8510c40248b6fc76aa02b9e5a9c127c5ba190560 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 26 Aug 2025 08:45:57 -0400 Subject: [PATCH 29/40] Remove stand-alone tiling. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 8 +++- flang/lib/Semantics/resolve-directives.cpp | 10 ----- flang/test/Lower/OpenMP/wsloop-tile.f90 | 39 ------------------- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 1 + 4 files changed, 7 insertions(+), 51 deletions(-) delete mode 100644 flang/test/Lower/OpenMP/wsloop-tile.f90 diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 5f06a09f4f79e..157b919ae7d7d 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3423,9 +3423,13 @@ static void genOMPDispatch(lower::AbstractConverter &converter, newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; - case llvm::omp::Directive::OMPD_tile: - newOp = genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); + case llvm::omp::Directive::OMPD_tile: { + unsigned version = semaCtx.langOptions().OpenMPVersion; + if (!semaCtx.langOptions().OpenMPSimd) + TODO(loc, "Unhandled loop directive (" + + llvm::omp::getOpenMPDirectiveName(dir, version) + ")"); break; + } case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index f5e8e90696f3c..1b7718d1314d3 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -864,7 +864,6 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { llvm::SmallVector &); std::int64_t GetNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &); - std::int64_t GetNumAffectedLoopsFromClauses(const parser::OmpClauseList &); void CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); @@ -2083,15 +2082,6 @@ std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromLoopConstruct( return SetAssociatedMaxClause(levels, clauses); } -std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromClauses( - const parser::OmpClauseList &x) { - llvm::SmallVector levels; - llvm::SmallVector clauses; - - CollectNumAffectedLoopsFromClauses(x, levels, clauses); - return SetAssociatedMaxClause(levels, clauses); -} - void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &x, llvm::SmallVector &levels, diff --git a/flang/test/Lower/OpenMP/wsloop-tile.f90 b/flang/test/Lower/OpenMP/wsloop-tile.f90 deleted file mode 100644 index 4c412b357f52e..0000000000000 --- a/flang/test/Lower/OpenMP/wsloop-tile.f90 +++ /dev/null @@ -1,39 +0,0 @@ -! This test checks lowering of OpenMP DO Directive(Worksharing) with collapse. - -! RUN: bbc -fopenmp -fopenmp-version=51 -emit-hlfir %s -o - | FileCheck %s - -!CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "WSLOOP_TILE"} { -program wsloop_tile - integer :: i, j, k - integer :: a, b, c - integer :: x - - a=30 - b=20 - c=50 - x=0 - - !CHECK: omp.loop_nest (%[[IV_0:.*]], %[[IV_1:.*]], %[[IV_2:.*]]) : i32 - !CHECK-SAME: tiles(2, 5, 10) - - !$omp do - !$omp tile sizes(2,5,10) - do i = 1, a - do j= 1, b - do k = 1, c - !CHECK: hlfir.assign %[[IV_0]] to %[[IV_0A:.*]] : i32 - !CHECK: hlfir.assign %[[IV_1]] to %[[IV_1A:.*]] : i32 - !CHECK: hlfir.assign %[[IV_2]] to %[[IV_2A:.*]] : i32 - !CHECK: %[[IVV_0:.*]] = fir.load %[[IV_0A]] - !CHECK: %[[SUM0:.*]] = arith.addi %{{.*}}, %[[IVV_0]] : i32 - !CHECK: %[[IVV_1:.*]] = fir.load %[[IV_1A]] - !CHECK: %[[SUM1:.*]] = arith.addi %[[SUM0]], %[[IVV_1]] : i32 - !CHECK: %[[IVV_2:.*]] = fir.load %[[IV_2A]] - !CHECK: %[[SUM2:.*]] = arith.addi %[[SUM1]], %[[IVV_2]] : i32 - x = x + i + j + k - end do - end do - end do - !$omp end tile - !$omp end do -end program wsloop_tile diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 67d6ccc8a6a1d..c2017398bf264 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -2975,6 +2975,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); auto loopOp = cast(opInst); + // Set up the source location value for OpenMP runtime. llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); From d462a6c2bcb68637f74a732bdb15a5021f731f93 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 26 Aug 2025 10:27:14 -0400 Subject: [PATCH 30/40] Revert unused changes. --- .../Frontend/OpenMP/ConstructDecompositionT.h | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 5bb1f3f36b65e..047baa3a79f5d 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -209,8 +209,6 @@ struct ConstructDecompositionT { bool applyClause(const tomp::clause::CollapseT &clause, const ClauseTy *); - bool applyClause(const tomp::clause::SizesT &clause, - const ClauseTy *); bool applyClause(const tomp::clause::PrivateT &clause, const ClauseTy *); bool @@ -484,24 +482,6 @@ bool ConstructDecompositionT::applyClause( return false; } -// FIXME(JAN): Do the correct thing, but for now we'll do the same as collapse -template -bool ConstructDecompositionT::applyClause( - const tomp::clause::SizesT &clause, - const ClauseTy *node) { - // Apply "sizes" to the innermost directive. If it's not one that - // allows it flag an error. - if (!leafs.empty()) { - auto &last = leafs.back(); - - if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) { - last.clauses.push_back(node); - return true; - } - } - - return false; -} // PRIVATE // [5.2:111:5-7] From f59c027f12bcbe927a8451741fa07045cf9c899e Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 26 Aug 2025 10:54:38 -0400 Subject: [PATCH 31/40] Don't do codegen for tiling if it is an inner construct. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 157b919ae7d7d..945e91d4e4790 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3894,8 +3894,8 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: - // Emit the omp.loop_nest with annotation for tiling - genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value()); + // Skip OMPD_tile since the tile sizes will be retrieved when + // generating the omp.looop_nest op. break; default: { unsigned version = semaCtx.langOptions().OpenMPVersion; From b21af721f747a6f165e86f4bc846a82b3e98d496 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Wed, 27 Aug 2025 08:01:32 -0400 Subject: [PATCH 32/40] Remove unused stand-alone tiling code. Fix typo. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 945e91d4e4790..880cb6cf158b5 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1601,22 +1601,12 @@ genLoopNestClauses(lower::AbstractConverter &converter, clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - for (auto &clause : clauses) { + for (auto &clause : clauses) if (clause.id == llvm::omp::Clause::OMPC_collapse) { const auto &collapse = std::get(clause.u); int64_t collapseValue = evaluate::ToInt64(collapse.v).value(); clauseOps.numCollapse = firOpBuilder.getI64IntegerAttr(collapseValue); - } else if (clause.id == llvm::omp::Clause::OMPC_sizes) { - // This case handles the stand-alone tiling construct - const auto &sizes = std::get(clause.u); - llvm::SmallVector sizeValues; - for (auto &size : sizes.v) { - int64_t sizeValue = evaluate::ToInt64(size).value(); - sizeValues.push_back(sizeValue); - } - clauseOps.tileSizes = sizeValues; } - } llvm::SmallVector sizeValues; auto *ompCons{eval.getIf()}; @@ -3895,7 +3885,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: // Skip OMPD_tile since the tile sizes will be retrieved when - // generating the omp.looop_nest op. + // generating the omp.loop_nest op. break; default: { unsigned version = semaCtx.langOptions().OpenMPVersion; From 7820ef5a9a4564bbfebd2b88bb54473fa5916370 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Wed, 27 Aug 2025 08:33:54 -0400 Subject: [PATCH 33/40] Remove unused code. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 880cb6cf158b5..0e5ec02649219 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -407,7 +407,6 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, return; const parser::OmpClauseList *beginClauseList = nullptr; - const parser::OmpClauseList *middleClauseList = nullptr; const parser::OmpClauseList *endClauseList = nullptr; common::visit( common::visitors{ @@ -422,28 +421,6 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, beginClauseList = &std::get(beginDirective.t); - // For now we check if there is an inner OpenMPLoopConstruct, and - // extract the size clause from there - const auto &nestedOptional = - std::get>( - ompConstruct.t); - assert(nestedOptional.has_value() && - "Expected a DoConstruct or OpenMPLoopConstruct"); - const auto *innerConstruct = - std::get_if>( - &(nestedOptional.value())); - if (innerConstruct) { - const auto &innerLoopConstruct = innerConstruct->value(); - const auto &innerBegin = - std::get( - innerLoopConstruct.t); - const auto &innerDirective = - std::get(innerBegin.t); - if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { - middleClauseList = - &std::get(innerBegin.t); - } - } if (auto &endDirective = std::get>( ompConstruct.t)) { @@ -457,9 +434,6 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, assert(beginClauseList && "expected begin directive"); clauses.append(makeClauses(*beginClauseList, semaCtx)); - if (middleClauseList) - clauses.append(makeClauses(*middleClauseList, semaCtx)); - if (endClauseList) clauses.append(makeClauses(*endClauseList, semaCtx)); }; From 995fc5350e10c72bd651b3c38f52ad108faba27b Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Wed, 27 Aug 2025 13:52:56 -0400 Subject: [PATCH 34/40] Address review comments --- flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 32 ++++++++- flang/lib/Lower/OpenMP/ClauseProcessor.h | 8 ++- flang/lib/Lower/OpenMP/OpenMP.cpp | 21 ++---- flang/lib/Lower/OpenMP/Utils.cpp | 9 ++- flang/lib/Lower/OpenMP/Utils.h | 2 +- .../mlir/Dialect/OpenMP/OpenMPClauses.td | 65 ++++++++++--------- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 14 ++-- .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 7 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 12 ++-- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 54 +++++++-------- 10 files changed, 120 insertions(+), 104 deletions(-) diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 23f0ca14e931d..e253b7a6ce516 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -271,12 +271,25 @@ bool ClauseProcessor::processCancelDirectiveName( return true; } -bool ClauseProcessor::processCollapse( +bool ClauseProcessor::processLoopNests( mlir::Location currentLocation, lower::pft::Evaluation &eval, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) const { - return collectLoopRelatedInfo(converter, currentLocation, eval, clauses, - result, iv); + int64_t numCollapse = collectLoopRelatedInfo(converter, currentLocation, eval, + clauses, result, iv); + return numCollapse > 1; +} + +bool ClauseProcessor::processCollapse( + mlir::Location currentLocation, lower::pft::Evaluation &eval, + mlir::omp::LoopNestOperands &result, + llvm::SmallVectorImpl &iv) const { + + int64_t numCollapse = collectLoopRelatedInfo(converter, currentLocation, eval, + clauses, result, iv); + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + result.collapseNumLoops = firOpBuilder.getI64IntegerAttr(numCollapse); + return numCollapse > 1; } bool ClauseProcessor::processDevice(lower::StatementContext &stmtCtx, @@ -522,6 +535,19 @@ bool ClauseProcessor::processProcBind( return false; } +bool ClauseProcessor::processTileSizes( + lower::pft::Evaluation &eval, mlir::omp::LoopNestOperands &result) const { + bool found = false; + llvm::SmallVector sizeValues; + auto *ompCons{eval.getIf()}; + collectTileSizesFromOpenMPConstruct(ompCons, sizeValues, semaCtx); + if (sizeValues.size() > 0) { + found = true; + result.tileSizes = sizeValues; + } + return found; +} + bool ClauseProcessor::processSafelen( mlir::omp::SafelenClauseOps &result) const { if (auto *clause = findUniqueClause()) { diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index c46bdb348a3ef..f88cad5c7820a 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -63,8 +63,12 @@ class ClauseProcessor { mlir::omp::CancelDirectiveNameClauseOps &result) const; bool processCollapse(mlir::Location currentLocation, lower::pft::Evaluation &eval, - mlir::omp::LoopRelatedClauseOps &result, + mlir::omp::LoopNestOperands &result, llvm::SmallVectorImpl &iv) const; + bool + processLoopNests(mlir::Location currentLocation, lower::pft::Evaluation &eval, + mlir::omp::LoopRelatedClauseOps &result, + llvm::SmallVectorImpl &iv) const; bool processDevice(lower::StatementContext &stmtCtx, mlir::omp::DeviceClauseOps &result) const; bool processDeviceType(mlir::omp::DeviceTypeClauseOps &result) const; @@ -98,6 +102,8 @@ class ClauseProcessor { bool processPriority(lower::StatementContext &stmtCtx, mlir::omp::PriorityClauseOps &result) const; bool processProcBind(mlir::omp::ProcBindClauseOps &result) const; + bool processTileSizes(lower::pft::Evaluation &eval, + mlir::omp::LoopNestOperands &result) const; bool processSafelen(mlir::omp::SafelenClauseOps &result) const; bool processSchedule(lower::StatementContext &stmtCtx, mlir::omp::ScheduleClauseOps &result) const; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 0e5ec02649219..d618c59031b54 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -504,7 +504,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_distribute: case OMPD_distribute_simd: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processLoopNests(loc, eval, hostInfo->ops, hostInfo->iv); break; case OMPD_teams: @@ -523,7 +523,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processLoopNests(loc, eval, hostInfo->ops, hostInfo->iv); cp.processNumTeams(stmtCtx, hostInfo->ops); break; @@ -534,7 +534,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, cp.processNumTeams(stmtCtx, hostInfo->ops); [[fallthrough]]; case OMPD_loop: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processLoopNests(loc, eval, hostInfo->ops, hostInfo->iv); break; case OMPD_teams_workdistribute: @@ -1573,20 +1573,7 @@ genLoopNestClauses(lower::AbstractConverter &converter, cp.processCollapse(loc, eval, clauseOps, iv); clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); - - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - for (auto &clause : clauses) - if (clause.id == llvm::omp::Clause::OMPC_collapse) { - const auto &collapse = std::get(clause.u); - int64_t collapseValue = evaluate::ToInt64(collapse.v).value(); - clauseOps.numCollapse = firOpBuilder.getI64IntegerAttr(collapseValue); - } - - llvm::SmallVector sizeValues; - auto *ompCons{eval.getIf()}; - collectTileSizesFromOpenMPConstruct(ompCons, sizeValues, semaCtx); - if (sizeValues.size() > 0) - clauseOps.tileSizes = sizeValues; + cp.processTileSizes(eval, clauseOps); } static void genLoopClauses( diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 20a7b12f66d22..4b118e47fc3b3 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -638,12 +638,12 @@ void collectTileSizesFromOpenMPConstruct( } } -bool collectLoopRelatedInfo( +int64_t collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) { - bool found = false; + int64_t numCollapse = 1; fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); // Collect the loops to collapse. @@ -656,7 +656,7 @@ bool collectLoopRelatedInfo( if (auto *clause = ClauseFinder::findUniqueClause(clauses)) { collapseValue = evaluate::ToInt64(clause->v).value(); - found = true; + numCollapse = collapseValue; } // Collect sizes from tile directive if present @@ -685,7 +685,6 @@ bool collectLoopRelatedInfo( if (const auto tclause{ std::get_if(&clause.u)}) { sizesLengthValue = tclause->v.size(); - found = true; } } } @@ -728,7 +727,7 @@ bool collectLoopRelatedInfo( convertLoopBounds(converter, currentLocation, result, loopVarTypeSize); - return found; + return numCollapse; } } // namespace omp diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index e585eb463e9ce..5f191d89ae205 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -159,7 +159,7 @@ void genObjectList(const ObjectList &objects, void lastprivateModifierNotSupported(const omp::clause::Lastprivate &lastp, mlir::Location loc); -bool collectLoopRelatedInfo( +int64_t collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td index eb836db890738..d62fc84afc160 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td @@ -209,6 +209,23 @@ class OpenMP_BindClauseSkip< def OpenMP_BindClause : OpenMP_BindClauseSkip<>; +//===----------------------------------------------------------------------===// +// V5.2: [4.4.3] `collapse` clause +//===----------------------------------------------------------------------===// + +class OpenMP_CollapseClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause { + let arguments = (ins + ConfinedAttr, [IntMinValue<1>]> + :$collapse_num_loops + ); +} + +def OpenMP_CollapseClause : OpenMP_CollapseClauseSkip<>; + //===----------------------------------------------------------------------===// // V5.2: [5.7.2] `copyprivate` clause //===----------------------------------------------------------------------===// @@ -317,38 +334,6 @@ class OpenMP_DeviceClauseSkip< def OpenMP_DeviceClause : OpenMP_DeviceClauseSkip<>; -//===----------------------------------------------------------------------===// -// V5.2: [XX.X] `collapse` clause -//===----------------------------------------------------------------------===// - -class OpenMP_CollapseClauseSkip< - bit traits = false, bit arguments = false, bit assemblyFormat = false, - bit description = false, bit extraClassDeclaration = false - > : OpenMP_Clause { - let arguments = (ins - DefaultValuedOptionalAttr:$num_collapse - ); -} - -def OpenMP_CollapseClause : OpenMP_CollapseClauseSkip<>; - -//===----------------------------------------------------------------------===// -// V5.2: [xx.x] `sizes` clause -//===----------------------------------------------------------------------===// - -class OpenMP_TileSizesClauseSkip< - bit traits = false, bit arguments = false, bit assemblyFormat = false, - bit description = false, bit extraClassDeclaration = false - > : OpenMP_Clause { - let arguments = (ins - OptionalAttr:$tile_sizes - ); -} - -def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>; - //===----------------------------------------------------------------------===// // V5.2: [11.6.1] `dist_schedule` clause //===----------------------------------------------------------------------===// @@ -1355,6 +1340,22 @@ class OpenMP_SimdlenClauseSkip< def OpenMP_SimdlenClause : OpenMP_SimdlenClauseSkip<>; +//===----------------------------------------------------------------------===// +// V5.2: [9.1.1] `sizes` clause +//===----------------------------------------------------------------------===// + +class OpenMP_TileSizesClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause { + let arguments = (ins + OptionalAttr:$tile_sizes + ); +} + +def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>; + //===----------------------------------------------------------------------===// // V5.2: [5.5.9] `task_reduction` clause //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index e17315d923317..a3173be0ae871 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -614,15 +614,19 @@ def WorkshareLoopWrapperOp : OpenMP_Op<"workshare.loop_wrapper", traits = [ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ RecursiveMemoryEffects, SameVariadicOperandSize ], clauses = [ - OpenMP_LoopRelatedClause, OpenMP_CollapseClause, + OpenMP_LoopRelatedClause, OpenMP_TileSizesClause ], singleRegion = true> { let summary = "rectangular loop nest"; let description = [{ - This operation represents a collapsed rectangular loop nest. For each - rectangular loop of the nest represented by an instance of this operation, - lower and upper bounds, as well as a step variable, must be defined. + This operation represents a rectangular loop nest which may be collapsed + and/or tiled. For each rectangular loop of the nest represented by an + instance of this operation, lower and upper bounds, as well as a step + variable, must be defined. The collapse clause specifies how many loops + that should be collapsed (1 if no collapse is done) after any tiling is + performed. The tiling sizes is represented by the tile sizes clause. + The lower and upper bounds specify a half-open range: the range includes the lower bound but does not include the upper bound. If the `loop_inclusive` @@ -635,7 +639,7 @@ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ `loop_steps` arguments. ```mlir - omp.loop_nest (%i1, %i2) : i32 = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) { + omp.loop_nest (%i1, %i2) : i32 = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) collapse(2) tiles(5,5) { %a = load %arrA[%i1, %i2] : memref %b = load %arrB[%i1, %i2] : memref %sum = arith.addf %a, %b : f32 diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index 19fbefb48a378..460595ba9f254 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -492,9 +492,10 @@ struct ParallelOpLowering : public OpRewritePattern { // Create loop nest and populate region with contents of scf.parallel. auto loopOp = omp::LoopNestOp::create( - rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(), - parallelOp.getUpperBound(), parallelOp.getStep(), false, - parallelOp.getLowerBound().size(), nullptr); + rewriter, parallelOp.getLoc(), parallelOp.getLowerBound().size(), + parallelOp.getLowerBound(), parallelOp.getUpperBound(), + parallelOp.getStep(), /*loop_inclusive=*/false, + /*tile_sizes=*/nullptr); rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(), loopOp.getRegion().begin()); diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 8768eed13cf32..aa88b9e8eef5a 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -2980,7 +2980,7 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { return failure(); if (value > 1) result.addAttribute( - "num_collapse", + "collapse_num_loops", IntegerAttr::get(parser.getBuilder().getI64Type(), value)); // Parse tiles @@ -3024,7 +3024,7 @@ void LoopNestOp::print(OpAsmPrinter &p) { if (getLoopInclusive()) p << "inclusive "; p << "step (" << getLoopSteps() << ") "; - if (int64_t numCollapse = getNumCollapse()) + if (int64_t numCollapse = getCollapseNumLoops()) if (numCollapse > 1) p << "collapse(" << numCollapse << ") "; @@ -3037,9 +3037,9 @@ void LoopNestOp::print(OpAsmPrinter &p) { void LoopNestOp::build(OpBuilder &builder, OperationState &state, const LoopNestOperands &clauses) { MLIRContext *ctx = builder.getContext(); - LoopNestOp::build(builder, state, clauses.loopLowerBounds, - clauses.loopUpperBounds, clauses.loopSteps, - clauses.loopInclusive, clauses.numCollapse, + LoopNestOp::build(builder, state, clauses.collapseNumLoops, + clauses.loopLowerBounds, clauses.loopUpperBounds, + clauses.loopSteps, clauses.loopInclusive, makeDenseI64ArrayAttr(ctx, clauses.tileSizes)); } @@ -3058,7 +3058,7 @@ LogicalResult LoopNestOp::verify() { uint64_t numIVs = getIVs().size(); - if (const auto &numCollapse = getNumCollapse()) + if (const auto &numCollapse = getCollapseNumLoops()) if (numCollapse > numIVs) return emitOpError() << "collapse value is larger than the number of loops"; diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index c2017398bf264..31e019d4bf8d8 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3044,9 +3044,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy afterIP = loopInfos.front()->getAfterIP(); - llvm::CanonicalLoopInfo *NewTopLoopInfo = nullptr; - - // Do tiling + // Do tiling. if (const auto &tiles = loopOp.getTileSizes()) { llvm::Type *IVType = loopInfos.front()->getIndVarType(); SmallVector TileSizes; @@ -3056,41 +3054,35 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, TileSizes.push_back(TileVal); } - std::vector NewLoops = + std::vector newLoops = ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes); // Update afterIP to get the correct insertion point after // tiling. - llvm::BasicBlock *AfterBB = NewLoops.front()->getAfter(); - llvm::BasicBlock *AfterAfterBB = AfterBB->getSingleSuccessor(); - afterIP = {AfterAfterBB, AfterAfterBB->begin()}; - NewTopLoopInfo = NewLoops[0]; + llvm::BasicBlock *afterBB = newLoops.front()->getAfter(); + llvm::BasicBlock *afterAfterBB = afterBB->getSingleSuccessor(); + afterIP = {afterAfterBB, afterAfterBB->begin()}; - // Update the loop infos + // Update the loop infos. loopInfos.clear(); - for (const auto &newLoop : NewLoops) + for (const auto &newLoop : newLoops) loopInfos.push_back(newLoop); - } // Tiling done - - // Do collapse - if (const auto &numCollapse = loopOp.getNumCollapse()) { - SmallVector collapseLoopInfos( - loopInfos.begin(), loopInfos.begin() + (numCollapse)); - - auto newLoopInfo = - ompBuilder->collapseLoops(ompLoc.DL, collapseLoopInfos, {}); - NewTopLoopInfo = newLoopInfo; - } // Collapse done - - // Update the stack frame created for this loop to point to the resulting - // loop after applying transformations. - if (NewTopLoopInfo) { - moduleTranslation.stackWalk( - [&](OpenMPLoopInfoStackFrame &frame) { - frame.loopInfo = NewTopLoopInfo; - return WalkResult::interrupt(); - }); - } + } // Tiling done. + + // Do collapse. + const auto &numCollapse = loopOp.getCollapseNumLoops(); + SmallVector collapseLoopInfos( + loopInfos.begin(), loopInfos.begin() + (numCollapse)); + + auto newTopLoopInfo = + ompBuilder->collapseLoops(ompLoc.DL, collapseLoopInfos, {}); + + assert(newTopLoopInfo && "New top loop information is missing"); + moduleTranslation.stackWalk( + [&](OpenMPLoopInfoStackFrame &frame) { + frame.loopInfo = newTopLoopInfo; + return WalkResult::interrupt(); + }); // Continue building IR after the loop. Note that the LoopInfo returned by // `collapseLoops` points inside the outermost loop and is intended for From 62b3ed17dd11eb12ad9137d381cdd914ef70a335 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Wed, 27 Aug 2025 22:15:08 -0400 Subject: [PATCH 35/40] Move include line --- flang/lib/Lower/OpenMP/Utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 4b118e47fc3b3..42103a302838c 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -24,8 +24,8 @@ #include #include #include -#include #include +#include #include #include From 71ed212f1743fce20a57e7789053b38a9e959584 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Wed, 27 Aug 2025 22:28:25 -0400 Subject: [PATCH 36/40] Move include to correct location hopefully. --- flang/lib/Lower/OpenMP/Utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 42103a302838c..886a77b11c6bc 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -24,9 +24,9 @@ #include #include #include -#include #include #include +#include #include #include From a0435d014f8208d96f4952c1320258e01fe5b580 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 28 Aug 2025 11:05:24 -0400 Subject: [PATCH 37/40] Make sure collapse is progagated correctly on the host side. --- flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 16 ++++------------ flang/lib/Lower/OpenMP/ClauseProcessor.h | 7 ++----- flang/lib/Lower/OpenMP/OpenMP.cpp | 8 ++++---- .../mlir/Dialect/OpenMP/OpenMPClauseOperands.h | 2 +- 4 files changed, 11 insertions(+), 22 deletions(-) diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index e253b7a6ce516..598cb613c6765 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -271,24 +271,16 @@ bool ClauseProcessor::processCancelDirectiveName( return true; } -bool ClauseProcessor::processLoopNests( - mlir::Location currentLocation, lower::pft::Evaluation &eval, - mlir::omp::LoopRelatedClauseOps &result, - llvm::SmallVectorImpl &iv) const { - int64_t numCollapse = collectLoopRelatedInfo(converter, currentLocation, eval, - clauses, result, iv); - return numCollapse > 1; -} - bool ClauseProcessor::processCollapse( mlir::Location currentLocation, lower::pft::Evaluation &eval, - mlir::omp::LoopNestOperands &result, + mlir::omp::LoopRelatedClauseOps &loopResult, + mlir::omp::CollapseClauseOps &collapseResult, llvm::SmallVectorImpl &iv) const { int64_t numCollapse = collectLoopRelatedInfo(converter, currentLocation, eval, - clauses, result, iv); + clauses, loopResult, iv); fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - result.collapseNumLoops = firOpBuilder.getI64IntegerAttr(numCollapse); + collapseResult.collapseNumLoops = firOpBuilder.getI64IntegerAttr(numCollapse); return numCollapse > 1; } diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index f88cad5c7820a..324ea3c1047a5 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -63,12 +63,9 @@ class ClauseProcessor { mlir::omp::CancelDirectiveNameClauseOps &result) const; bool processCollapse(mlir::Location currentLocation, lower::pft::Evaluation &eval, - mlir::omp::LoopNestOperands &result, + mlir::omp::LoopRelatedClauseOps &loopResult, + mlir::omp::CollapseClauseOps &collapseResult, llvm::SmallVectorImpl &iv) const; - bool - processLoopNests(mlir::Location currentLocation, lower::pft::Evaluation &eval, - mlir::omp::LoopRelatedClauseOps &result, - llvm::SmallVectorImpl &iv) const; bool processDevice(lower::StatementContext &stmtCtx, mlir::omp::DeviceClauseOps &result) const; bool processDeviceType(mlir::omp::DeviceTypeClauseOps &result) const; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index d618c59031b54..4aca8026a1fc6 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -504,7 +504,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_distribute: case OMPD_distribute_simd: - cp.processLoopNests(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); break; case OMPD_teams: @@ -523,7 +523,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: - cp.processLoopNests(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); cp.processNumTeams(stmtCtx, hostInfo->ops); break; @@ -534,7 +534,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, cp.processNumTeams(stmtCtx, hostInfo->ops); [[fallthrough]]; case OMPD_loop: - cp.processLoopNests(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); break; case OMPD_teams_workdistribute: @@ -1570,7 +1570,7 @@ genLoopNestClauses(lower::AbstractConverter &converter, HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter); if (!hostEvalInfo || !hostEvalInfo->apply(clauseOps, iv)) - cp.processCollapse(loc, eval, clauseOps, iv); + cp.processCollapse(loc, eval, clauseOps, clauseOps, iv); clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); cp.processTileSizes(eval, clauseOps); diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h index faf820dcfdb29..6a92b136ef51c 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h @@ -40,7 +40,7 @@ struct DeviceTypeClauseOps { /// Clauses that correspond to operations other than omp.target, but might have /// to be evaluated outside of a parent target region. using HostEvaluatedOperands = - detail::Clauses; // TODO: Add `indirect` clause. From 4f2f7eb25bb309c78a6cdacc2452ae10a6155d90 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 28 Aug 2025 11:36:34 -0400 Subject: [PATCH 38/40] Address more review comments. --- flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 10 ++---- flang/lib/Lower/OpenMP/Utils.cpp | 8 ++--- .../mlir/Dialect/OpenMP/OpenMPClauses.td | 32 +++++++++---------- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 3 +- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 4 +-- 5 files changed, 25 insertions(+), 32 deletions(-) diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 598cb613c6765..a96884f5680ba 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -529,15 +529,9 @@ bool ClauseProcessor::processProcBind( bool ClauseProcessor::processTileSizes( lower::pft::Evaluation &eval, mlir::omp::LoopNestOperands &result) const { - bool found = false; - llvm::SmallVector sizeValues; auto *ompCons{eval.getIf()}; - collectTileSizesFromOpenMPConstruct(ompCons, sizeValues, semaCtx); - if (sizeValues.size() > 0) { - found = true; - result.tileSizes = sizeValues; - } - return found; + collectTileSizesFromOpenMPConstruct(ompCons, result.tileSizes, semaCtx); + return !result.tileSizes.empty(); } bool ClauseProcessor::processSafelen( diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 886a77b11c6bc..671ed872eaaf2 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -599,7 +599,7 @@ static void convertLoopBounds(lower::AbstractConverter &converter, } /// Populates the sizes vector with values if the given OpenMPConstruct -/// Contains a loop construct with an inner tiling construct. +/// contains a loop construct with an inner tiling construct. void collectTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, llvm::SmallVectorImpl &tileSizes, SemanticsContext &semaCtx) { @@ -632,6 +632,7 @@ void collectTileSizesFromOpenMPConstruct( if (const auto v{EvaluateInt64(semaCtx, tval)}) tileSizes.push_back(*v); } + break; } } } @@ -685,15 +686,14 @@ int64_t collectLoopRelatedInfo( if (const auto tclause{ std::get_if(&clause.u)}) { sizesLengthValue = tclause->v.size(); + break; } } } } } - collapseValue = collapseValue - sizesLengthValue; - collapseValue = - collapseValue < sizesLengthValue ? sizesLengthValue : collapseValue; + collapseValue = std::max(collapseValue, sizesLengthValue); std::size_t loopVarTypeSize = 0; do { lower::pft::Evaluation *doLoop = diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td index d62fc84afc160..5f40abe62a0f6 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td @@ -1340,22 +1340,6 @@ class OpenMP_SimdlenClauseSkip< def OpenMP_SimdlenClause : OpenMP_SimdlenClauseSkip<>; -//===----------------------------------------------------------------------===// -// V5.2: [9.1.1] `sizes` clause -//===----------------------------------------------------------------------===// - -class OpenMP_TileSizesClauseSkip< - bit traits = false, bit arguments = false, bit assemblyFormat = false, - bit description = false, bit extraClassDeclaration = false - > : OpenMP_Clause { - let arguments = (ins - OptionalAttr:$tile_sizes - ); -} - -def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>; - //===----------------------------------------------------------------------===// // V5.2: [5.5.9] `task_reduction` clause //===----------------------------------------------------------------------===// @@ -1418,6 +1402,22 @@ class OpenMP_ThreadLimitClauseSkip< def OpenMP_ThreadLimitClause : OpenMP_ThreadLimitClauseSkip<>; +//===----------------------------------------------------------------------===// +// V5.2: [9.1.1] `sizes` clause +//===----------------------------------------------------------------------===// + +class OpenMP_TileSizesClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause { + let arguments = (ins + OptionalAttr:$tile_sizes + ); +} + +def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>; + //===----------------------------------------------------------------------===// // V5.2: [12.1] `untied` clause //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index a3173be0ae871..830b36f440098 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -627,7 +627,6 @@ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ that should be collapsed (1 if no collapse is done) after any tiling is performed. The tiling sizes is represented by the tile sizes clause. - The lower and upper bounds specify a half-open range: the range includes the lower bound but does not include the upper bound. If the `loop_inclusive` attribute is specified then the upper bound is also included. @@ -639,7 +638,7 @@ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ `loop_steps` arguments. ```mlir - omp.loop_nest (%i1, %i2) : i32 = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) collapse(2) tiles(5,5) { + omp.loop_nest (%i1, %i2) : i32 = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) collapse(2) tiles(5,5) { %a = load %arrA[%i1, %i2] : memref %b = load %arrB[%i1, %i2] : memref %sum = arith.addf %a, %b : f32 diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 31e019d4bf8d8..95f513f955059 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3046,11 +3046,11 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, // Do tiling. if (const auto &tiles = loopOp.getTileSizes()) { - llvm::Type *IVType = loopInfos.front()->getIndVarType(); + llvm::Type *ivType = loopInfos.front()->getIndVarType(); SmallVector TileSizes; for (auto tile : tiles.value()) { - llvm::Value *TileVal = llvm::ConstantInt::get(IVType, tile); + llvm::Value *TileVal = llvm::ConstantInt::get(ivType, tile); TileSizes.push_back(TileVal); } From a687d9742a353a568f10de9ecde7e2dc87ee7caa Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Wed, 3 Sep 2025 10:53:26 -0400 Subject: [PATCH 39/40] Address review comments --- flang/lib/Lower/OpenMP/OpenMP.cpp | 1 - flang/lib/Lower/OpenMP/Utils.cpp | 21 +++++++++++-------- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 8 +++---- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 4aca8026a1fc6..0ec33e6b24dbf 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -49,7 +49,6 @@ using namespace Fortran::lower::omp; using namespace Fortran::common::openmp; using namespace Fortran::utils::openmp; -using namespace Fortran::semantics; //===----------------------------------------------------------------------===// // Code generation helper functions diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 671ed872eaaf2..06c07ee7130e0 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -31,13 +31,14 @@ #include -using namespace Fortran::semantics; - template -MaybeIntExpr EvaluateIntExpr(SemanticsContext &context, const T &expr) { - if (MaybeExpr maybeExpr{ +Fortran::semantics::MaybeIntExpr +EvaluateIntExpr(Fortran::semantics::SemanticsContext &context, const T &expr) { + if (Fortran::semantics::MaybeExpr maybeExpr{ Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) { - if (auto *intExpr{Fortran::evaluate::UnwrapExpr(*maybeExpr)}) { + if (auto *intExpr{ + Fortran::evaluate::UnwrapExpr( + *maybeExpr)}) { return std::move(*intExpr); } } @@ -45,8 +46,8 @@ MaybeIntExpr EvaluateIntExpr(SemanticsContext &context, const T &expr) { } template -std::optional EvaluateInt64(SemanticsContext &context, - const T &expr) { +std::optional +EvaluateInt64(Fortran::semantics::SemanticsContext &context, const T &expr) { return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr)); } @@ -602,7 +603,8 @@ static void convertLoopBounds(lower::AbstractConverter &converter, /// contains a loop construct with an inner tiling construct. void collectTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &tileSizes, SemanticsContext &semaCtx) { + llvm::SmallVectorImpl &tileSizes, + Fortran::semantics::SemanticsContext &semaCtx) { if (!ompCons) return; @@ -625,7 +627,7 @@ void collectTileSizesFromOpenMPConstruct( // Get the size values from parse tree and convert to a vector const auto &innerClauseList{ std::get(innerBegin.t)}; - for (const auto &clause : innerClauseList.v) + for (const auto &clause : innerClauseList.v) { if (const auto tclause{ std::get_if(&clause.u)}) { for (auto &tval : tclause->v) { @@ -634,6 +636,7 @@ void collectTileSizesFromOpenMPConstruct( } break; } + } } } } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 95f513f955059..2ab6bb0a73200 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3047,15 +3047,15 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, // Do tiling. if (const auto &tiles = loopOp.getTileSizes()) { llvm::Type *ivType = loopInfos.front()->getIndVarType(); - SmallVector TileSizes; + SmallVector tileSizes; for (auto tile : tiles.value()) { - llvm::Value *TileVal = llvm::ConstantInt::get(ivType, tile); - TileSizes.push_back(TileVal); + llvm::Value *tileVal = llvm::ConstantInt::get(ivType, tile); + tileSizes.push_back(tileVal); } std::vector newLoops = - ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes); + ompBuilder->tileLoops(ompLoc.DL, loopInfos, tileSizes); // Update afterIP to get the correct insertion point after // tiling. From d1b69c966df71b39b1fa89100846f45883c91ed1 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 9 Sep 2025 10:51:37 -0400 Subject: [PATCH 40/40] Use shared helper function to find the sizes clause. --- flang/lib/Lower/OpenMP/Utils.cpp | 65 ++++++++++++-------------------- 1 file changed, 25 insertions(+), 40 deletions(-) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 06c07ee7130e0..d1d1cd68a5b44 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -599,15 +599,13 @@ static void convertLoopBounds(lower::AbstractConverter &converter, } } -/// Populates the sizes vector with values if the given OpenMPConstruct -/// contains a loop construct with an inner tiling construct. -void collectTileSizesFromOpenMPConstruct( +// Helper function that finds the sizes clause in a inner OMPD_tile directive +// and passes the sizes clause to the callback function if found. +static void processTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &tileSizes, - Fortran::semantics::SemanticsContext &semaCtx) { + std::function processFun) { if (!ompCons) return; - if (auto *ompLoop{std::get_if(&ompCons->u)}) { const auto &nestedOptional = std::get>(ompLoop->t); @@ -624,16 +622,13 @@ void collectTileSizesFromOpenMPConstruct( std::get(innerBegin.t).v; if (innerDirective == llvm::omp::Directive::OMPD_tile) { - // Get the size values from parse tree and convert to a vector + // Get the size values from parse tree and convert to a vector. const auto &innerClauseList{ std::get(innerBegin.t)}; for (const auto &clause : innerClauseList.v) { if (const auto tclause{ std::get_if(&clause.u)}) { - for (auto &tval : tclause->v) { - if (const auto v{EvaluateInt64(semaCtx, tval)}) - tileSizes.push_back(*v); - } + processFun(tclause); break; } } @@ -642,6 +637,20 @@ void collectTileSizesFromOpenMPConstruct( } } +/// Populates the sizes vector with values if the given OpenMPConstruct +/// contains a loop construct with an inner tiling construct. +void collectTileSizesFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl &tileSizes, + Fortran::semantics::SemanticsContext &semaCtx) { + processTileSizesFromOpenMPConstruct( + ompCons, [&](const parser::OmpClause::Sizes *tclause) { + for (auto &tval : tclause->v) + if (const auto v{EvaluateInt64(semaCtx, tval)}) + tileSizes.push_back(*v); + }); +} + int64_t collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List &clauses, @@ -663,37 +672,13 @@ int64_t collectLoopRelatedInfo( numCollapse = collapseValue; } - // Collect sizes from tile directive if present + // Collect sizes from tile directive if present. std::int64_t sizesLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { - if (auto *ompLoop{std::get_if(&ompCons->u)}) { - const auto &nestedOptional = - std::get>(ompLoop->t); - assert(nestedOptional.has_value() && - "Expected a DoConstruct or OpenMPLoopConstruct"); - const auto *innerConstruct = - std::get_if>( - &(nestedOptional.value())); - if (innerConstruct) { - const auto &innerLoopDirective = innerConstruct->value(); - const auto &innerBegin = - std::get(innerLoopDirective.t); - const auto &innerDirective = - std::get(innerBegin.t).v; - - if (innerDirective == llvm::omp::Directive::OMPD_tile) { - // Get the size values from parse tree and convert to a vector - const auto &innerClauseList{ - std::get(innerBegin.t)}; - for (const auto &clause : innerClauseList.v) - if (const auto tclause{ - std::get_if(&clause.u)}) { - sizesLengthValue = tclause->v.size(); - break; - } - } - } - } + processTileSizesFromOpenMPConstruct( + ompCons, [&](const parser::OmpClause::Sizes *tclause) { + sizesLengthValue = tclause->v.size(); + }); } collapseValue = std::max(collapseValue, sizesLengthValue);