From 14d7115fdf75fd1cdbab38e9810976794ebf9eba Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 14 Aug 2025 12:05:43 +0200 Subject: [PATCH 01/57] [flang][OpenMP] MSVC buildbot fix PR #153488 caused the msvc build (https://lab.llvm.org/buildbot/#/builders/166/builds/1397) to fail: ``` ..\llvm-project\flang\include\flang/Evaluate/rewrite.h(78): error C2668: 'Fortran::evaluate::rewrite::Identity::operator ()': ambiguous call to overloaded function ..\llvm-project\flang\include\flang/Evaluate/rewrite.h(43): note: could be 'Fortran::evaluate::Expr Fortran::evaluate::rewrite::Identity::operator ()(Fortran::evaluate::Expr &&,const U &)' with [ S=Fortran::evaluate::value::Integer<128,true,32,unsigned int,unsigned __int64,128>, U=Fortran::evaluate::value::Integer<128,true,32,unsigned int,unsigned __int64,128> ] ..\llvm-project\flang\lib\Semantics\check-omp-atomic.cpp(174): note: or 'Fortran::evaluate::Expr Fortran::semantics::ReassocRewriter::operator ()(Fortran::evaluate::Expr &&,const U &,Fortran::semantics::ReassocRewriter::NonIntegralTag)' with [ S=Fortran::evaluate::value::Integer<128,true,32,unsigned int,unsigned __int64,128>, U=Fortran::evaluate::value::Integer<128,true,32,unsigned int,unsigned __int64,128> ] ..\llvm-project\flang\include\flang/Evaluate/rewrite.h(78): note: while trying to match the argument list '(Fortran::evaluate::Expr, const S)' with [ S=Fortran::evaluate::value::Integer<128,true,32,unsigned int,unsigned __int64,128> ] ..\llvm-project\flang\include\flang/Evaluate/rewrite.h(78): note: the template instantiation context (the oldest one first) is ..\llvm-project\flang\lib\Semantics\check-omp-atomic.cpp(814): note: see reference to function template instantiation 'U Fortran::evaluate::rewrite::Mutator::operator ()&,Fortran::evaluate::Expr>(T)' being compiled with [ U=Fortran::evaluate::Expr, T=const Fortran::evaluate::Expr & ] ``` The reason is that there is an ambiguite between operator() of ReassocRewriter itself as operator() of the base class Identity through `using Id::operator();`. By the C++ specification, method declarations in ReassocRewriter hide methods with the same signature from a using declaration, but this does not apply to ``` evaluate::Expr operator()(..., NonIntegralTag = {}) ``` which has a different signature due to an additional tag parameter. Since it has a default value, it is ambiguous with operator() without tag parameter. GCC and Clang both accept this, but in my understanding MSVC is correct here. Since the overloads of ReassocRewriter cover all cases, remopving the using reclarations to avoid the ambiguity. --- flang/lib/Semantics/check-omp-atomic.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/flang/lib/Semantics/check-omp-atomic.cpp b/flang/lib/Semantics/check-omp-atomic.cpp index 62bb2fdc6048e..50e63d356be02 100644 --- a/flang/lib/Semantics/check-omp-atomic.cpp +++ b/flang/lib/Semantics/check-omp-atomic.cpp @@ -86,7 +86,6 @@ ReassocOp reassocOp(const Op0 &op0, const Op1 &op1) { struct ReassocRewriter : public evaluate::rewrite::Identity { using Id = evaluate::rewrite::Identity; - using Id::operator(); struct NonIntegralTag {}; ReassocRewriter(const SomeExpr &atom) : atom_(atom) {} From 036ee24936a7d77f91a7690a3d63c932be7298a1 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Fri, 14 Mar 2025 11:06:51 -0400 Subject: [PATCH 02/57] Initial implementation of tiling. --- flang/include/flang/Lower/OpenMP.h | 1 - flang/lib/Lower/OpenMP/OpenMP.cpp | 70 +++++-- flang/lib/Lower/OpenMP/Utils.cpp | 33 +++- flang/lib/Semantics/canonicalize-omp.cpp | 44 ++++- flang/lib/Semantics/resolve-directives.cpp | 176 ++++++++++++++---- .../Frontend/OpenMP/ConstructDecompositionT.h | 24 +++ .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 9 + llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 24 +++ llvm/lib/Transforms/Utils/CodeExtractor.cpp | 7 +- .../mlir/Dialect/OpenMP/OpenMPClauses.td | 32 ++++ mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 4 +- .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 3 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 52 +++++- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 65 +++++-- 14 files changed, 458 insertions(+), 86 deletions(-) diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h index 581c93f76d627..df01a7b82c66c 100644 --- a/flang/include/flang/Lower/OpenMP.h +++ b/flang/include/flang/Lower/OpenMP.h @@ -80,7 +80,6 @@ void genOpenMPDeclarativeConstruct(AbstractConverter &, void genOpenMPSymbolProperties(AbstractConverter &converter, const pft::Variable &var); -int64_t getCollapseValue(const Fortran::parser::OmpClauseList &clauseList); void genThreadprivateOp(AbstractConverter &, const pft::Variable &); void genDeclareTargetIntGlobal(AbstractConverter &, const pft::Variable &); bool isOpenMPTargetConstruct(const parser::OpenMPConstruct &); diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index ec2ec37e623f8..a01bb371411ad 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -404,6 +404,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, return; const parser::OmpClauseList *beginClauseList = nullptr; + const parser::OmpClauseList *middleClauseList = nullptr; const parser::OmpClauseList *endClauseList = nullptr; common::visit( common::visitors{ @@ -418,6 +419,22 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, beginClauseList = &std::get(beginDirective.t); + // FIXME(JAN): For now we check if there is an inner + // OpenMPLoopConstruct, and extract the size clause from there + const auto &innerOptional = std::get>>( + ompConstruct.t); + if (innerOptional.has_value()) { + const auto &innerLoopDirective = innerOptional.value().value(); + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + std::get(innerBegin.t); + if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { + middleClauseList = + &std::get(innerBegin.t); + } + } if (auto &endDirective = std::get>( ompConstruct.t)) { @@ -431,6 +448,9 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, assert(beginClauseList && "expected begin directive"); clauses.append(makeClauses(*beginClauseList, semaCtx)); + if (middleClauseList) + clauses.append(makeClauses(*middleClauseList, semaCtx)); + if (endClauseList) clauses.append(makeClauses(*endClauseList, semaCtx)); }; @@ -910,6 +930,7 @@ static void genLoopVars( storeOp = createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); } + firOpBuilder.setInsertionPointAfter(storeOp); } @@ -1660,6 +1681,23 @@ genLoopNestClauses(lower::AbstractConverter &converter, cp.processCollapse(loc, eval, clauseOps, iv); clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); + + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + for (auto &clause : clauses) { + if (clause.id == llvm::omp::Clause::OMPC_collapse) { + const auto &collapse = std::get(clause.u); + int64_t collapseValue = evaluate::ToInt64(collapse.v).value(); + clauseOps.numCollapse = firOpBuilder.getI64IntegerAttr(collapseValue); + } else if (clause.id == llvm::omp::Clause::OMPC_sizes) { + const auto &sizes = std::get(clause.u); + llvm::SmallVector sizeValues; + for (auto &size : sizes.v) { + int64_t sizeValue = evaluate::ToInt64(size).value(); + sizeValues.push_back(sizeValue); + } + clauseOps.tileSizes = sizeValues; + } + } } static void genLoopClauses( @@ -2036,9 +2074,9 @@ static mlir::omp::LoopNestOp genLoopNestOp( return llvm::SmallVector(iv); }; - auto *nestedEval = - getCollapsedLoopEval(eval, getCollapseValue(item->clauses)); - + uint64_t nestValue = getCollapseValue(item->clauses); + nestValue = nestValue < iv.size() ? iv.size() : nestValue; + auto *nestedEval = getCollapsedLoopEval(eval, nestValue); return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval, directive) @@ -3897,6 +3935,20 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, std::get(loopConstruct.t); List clauses = makeClauses( std::get(beginLoopDirective.t), semaCtx); + + const auto &innerOptional = std::get>>(loopConstruct.t); + if (innerOptional.has_value()) { + const auto &innerLoopDirective = innerOptional.value().value(); + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + std::get(innerBegin.t); + if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { + clauses.append( + makeClauses(std::get(innerBegin.t), semaCtx)); + } + } + if (auto &endLoopDirective = std::get>( loopConstruct.t)) { @@ -4028,18 +4080,6 @@ void Fortran::lower::genOpenMPSymbolProperties( lower::genDeclareTargetIntGlobal(converter, var); } -int64_t -Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) { - for (const parser::OmpClause &clause : clauseList.v) { - if (const auto &collapseClause = - std::get_if(&clause.u)) { - const auto *expr = semantics::GetExpr(collapseClause->v); - return evaluate::ToInt64(*expr).value(); - } - } - return 1; -} - void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter, const lower::pft::Variable &var) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 77b1e39083aa6..11721d05001b0 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -38,14 +38,22 @@ namespace lower { namespace omp { int64_t getCollapseValue(const List &clauses) { - auto iter = llvm::find_if(clauses, [](const Clause &clause) { - return clause.id == llvm::omp::Clause::OMPC_collapse; - }); - if (iter != clauses.end()) { - const auto &collapse = std::get(iter->u); - return evaluate::ToInt64(collapse.v).value(); + int64_t collapseValue = 1; + int64_t numTileSizes = 0; + for (auto &clause : clauses) { + if (clause.id == llvm::omp::Clause::OMPC_collapse) { + const auto &collapse = std::get(clause.u); + collapseValue = evaluate::ToInt64(collapse.v).value(); + } else if (clause.id == llvm::omp::Clause::OMPC_sizes) { + const auto &sizes = std::get(clause.u); + numTileSizes = sizes.v.size(); + } } - return 1; + + collapseValue = collapseValue - numTileSizes; + int64_t result = + collapseValue > numTileSizes ? collapseValue : numTileSizes; + return result; } void genObjectList(const ObjectList &objects, @@ -613,6 +621,7 @@ bool collectLoopRelatedInfo( lower::pft::Evaluation &eval, const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) { + bool found = false; fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); @@ -628,7 +637,16 @@ bool collectLoopRelatedInfo( collapseValue = evaluate::ToInt64(clause->v).value(); found = true; } + std::int64_t sizesLengthValue = 0l; + if (auto *clause = + ClauseFinder::findUniqueClause(clauses)) { + sizesLengthValue = clause->v.size(); + found = true; + } + collapseValue = collapseValue - sizesLengthValue; + collapseValue = + collapseValue < sizesLengthValue ? sizesLengthValue : collapseValue; std::size_t loopVarTypeSize = 0; do { lower::pft::Evaluation *doLoop = @@ -661,7 +679,6 @@ bool collectLoopRelatedInfo( } while (collapseValue > 0); convertLoopBounds(converter, currentLocation, result, loopVarTypeSize); - return found; } diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 9722eca19447d..fb0bb0f923574 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -11,6 +11,7 @@ #include "flang/Parser/parse-tree.h" #include "flang/Semantics/semantics.h" +# include // After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP // Constructs more structured which provide explicit scopes for later // structural checks and semantic analysis. @@ -117,15 +118,17 @@ class CanonicalizationOfOmp { // in the same iteration // // Original: - // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct - // OmpBeginLoopDirective + // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t-> + // OmpBeginLoopDirective t-> OmpLoopDirective + // [ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct u-> + /// OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile] // ExecutableConstruct -> DoConstruct + // [ExecutableConstruct -> OmpEndLoopDirective] // ExecutableConstruct -> OmpEndLoopDirective (if available) // // After rewriting: - // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct - // OmpBeginLoopDirective - // DoConstruct + // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t-> + // OmpBeginLoopDirective t -> OmpLoopDirective -> DoConstruct // OmpEndLoopDirective (if available) parser::Block::iterator nextIt; auto &beginDir{std::get(x.t)}; @@ -147,20 +150,41 @@ class CanonicalizationOfOmp { if (GetConstructIf(*nextIt)) continue; + // Keep track of the loops to handle the end loop directives + std::stack loops; + loops.push(&x); + while (auto *innerConstruct{ + GetConstructIf(*nextIt)}) { + if (auto *innerOmpLoop{ + std::get_if(&innerConstruct->u)}) { + std::get< + std::optional>>( + loops.top()->t) = std::move(*innerOmpLoop); + // Retrieveing the address so that DoConstruct or inner loop can be + // set later. + loops.push(&(std::get>>( + loops.top()->t) + .value() + .value())); + nextIt = block.erase(nextIt); + } + } if (auto *doCons{GetConstructIf(*nextIt)}) { if (doCons->GetLoopControl()) { // move DoConstruct std::get>>>(x.t) = - std::move(*doCons); + common::Indirection>>>( + loops.top()->t) = std::move(*doCons); nextIt = block.erase(nextIt); // try to match OmpEndLoopDirective - if (nextIt != block.end()) { + while (nextIt != block.end() && !loops.empty()) { if (auto *endDir{ GetConstructIf(*nextIt)}) { - std::get>(x.t) = - std::move(*endDir); + std::get>( + loops.top()->t) = std::move(*endDir); nextIt = block.erase(nextIt); + loops.pop(); } } } else { diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 6a4660c9882ab..0e87dcfdbbbb8 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -817,7 +817,28 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { const parser::OmpClause *GetAssociatedClause() { return associatedClause; } private: + std::int64_t SetAssociatedMaxClause(llvm::SmallVector &, + llvm::SmallVector &); + std::int64_t GetAssociatedLoopLevelFromLoopConstruct( + const parser::OpenMPLoopConstruct &); std::int64_t GetAssociatedLoopLevelFromClauses(const parser::OmpClauseList &); + void CollectAssociatedLoopLevelsFromLoopConstruct( + const parser::OpenMPLoopConstruct &, llvm::SmallVector &, + llvm::SmallVector &); + void CollectAssociatedLoopLevelsFromInnerLoopContruct( + const parser::OpenMPLoopConstruct &, llvm::SmallVector &, + llvm::SmallVector &); + template + void CollectAssociatedLoopLevelFromClauseValue( + const parser::OmpClause &clause, llvm::SmallVector &, + llvm::SmallVector &); + template + void CollectAssociatedLoopLevelFromClauseSize(const parser::OmpClause &, + llvm::SmallVector &, + llvm::SmallVector &); + void CollectAssociatedLoopLevelsFromClauses(const parser::OmpClauseList &, + llvm::SmallVector &, + llvm::SmallVector &); Symbol::Flags dataSharingAttributeFlags{Symbol::Flag::OmpShared, Symbol::Flag::OmpPrivate, Symbol::Flag::OmpFirstPrivate, @@ -1822,7 +1843,6 @@ bool OmpAttributeVisitor::Pre( bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { const auto &beginLoopDir{std::get(x.t)}; const auto &beginDir{std::get(beginLoopDir.t)}; - const auto &clauseList{std::get(beginLoopDir.t)}; switch (beginDir.v) { case llvm::omp::Directive::OMPD_distribute: case llvm::omp::Directive::OMPD_distribute_parallel_do: @@ -1873,7 +1893,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { beginDir.v == llvm::omp::Directive::OMPD_target_loop) IssueNonConformanceWarning(beginDir.v, beginDir.source, 52); ClearDataSharingAttributeObjects(); - SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromClauses(clauseList)); + SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromLoopConstruct(x)); if (beginDir.v == llvm::omp::Directive::OMPD_do) { auto &optLoopCons = std::get>(x.t); @@ -1887,7 +1907,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { } } PrivatizeAssociatedLoopIndexAndCheckLoopLevel(x); - ordCollapseLevel = GetAssociatedLoopLevelFromClauses(clauseList) + 1; + ordCollapseLevel = GetAssociatedLoopLevelFromLoopConstruct(x) + 1; return true; } @@ -1975,44 +1995,124 @@ bool OmpAttributeVisitor::Pre(const parser::DoConstruct &x) { return true; } +static bool isSizesClause(const parser::OmpClause *clause) { + return std::holds_alternative(clause->u); +} + +std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( + llvm::SmallVector &levels, + llvm::SmallVector &clauses) { + + // Find the tile level to know how much to reduce the level for collapse + std::int64_t tileLevel = 0; + for (auto [level, clause] : llvm::zip_equal(levels, clauses)) { + if (isSizesClause(clause)) { + tileLevel = level; + } + } + + std::int64_t maxLevel = 1; + const parser::OmpClause *maxClause = nullptr; + for (auto [level, clause] : llvm::zip_equal(levels, clauses)) { + if (tileLevel > 0 && tileLevel < level) { + context_.Say(clause->source, + "The value of the parameter in the COLLAPSE clause must" + " not be larger than the number of the number of tiled loops" + " because collapse relies on independent loop iterations."_err_en_US); + return 1; + } + + if (!isSizesClause(clause)) { + level = level - tileLevel; + } + + if (level > maxLevel) { + maxLevel = level; + maxClause = clause; + } + } + if (maxClause) + SetAssociatedClause(maxClause); + return maxLevel; +} + +std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromLoopConstruct( + const parser::OpenMPLoopConstruct &x) { + llvm::SmallVector levels; + llvm::SmallVector clauses; + + CollectAssociatedLoopLevelsFromLoopConstruct(x, levels, clauses); + return SetAssociatedMaxClause(levels, clauses); +} + std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromClauses( const parser::OmpClauseList &x) { - std::int64_t orderedLevel{0}; - std::int64_t collapseLevel{0}; + llvm::SmallVector levels; + llvm::SmallVector clauses; - const parser::OmpClause *ordClause{nullptr}; - const parser::OmpClause *collClause{nullptr}; + CollectAssociatedLoopLevelsFromClauses(x, levels, clauses); + return SetAssociatedMaxClause(levels, clauses); +} - for (const auto &clause : x.v) { - if (const auto *orderedClause{ - std::get_if(&clause.u)}) { - if (const auto v{EvaluateInt64(context_, orderedClause->v)}) { - orderedLevel = *v; - } - ordClause = &clause; - } - if (const auto *collapseClause{ - std::get_if(&clause.u)}) { - if (const auto v{EvaluateInt64(context_, collapseClause->v)}) { - collapseLevel = *v; - } - collClause = &clause; +void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromLoopConstruct( + const parser::OpenMPLoopConstruct &x, + llvm::SmallVector &levels, + llvm::SmallVector &clauses) { + const auto &beginLoopDir{std::get(x.t)}; + const auto &clauseList{std::get(beginLoopDir.t)}; + + CollectAssociatedLoopLevelsFromClauses(clauseList, levels, clauses); + CollectAssociatedLoopLevelsFromInnerLoopContruct(x, levels, clauses); +} + +void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct( + const parser::OpenMPLoopConstruct &x, + llvm::SmallVector &levels, + llvm::SmallVector &clauses) { + const auto &innerOptional = + std::get>>( + x.t); + if (innerOptional.has_value()) { + CollectAssociatedLoopLevelsFromLoopConstruct( + innerOptional.value().value(), levels, clauses); + } +} + +template +void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseValue( + const parser::OmpClause &clause, llvm::SmallVector &levels, + llvm::SmallVector &clauses) { + if (const auto tclause{std::get_if(&clause.u)}) { + std::int64_t level = 0; + if (const auto v{EvaluateInt64(context_, tclause->v)}) { + level = *v; } + levels.push_back(level); + clauses.push_back(&clause); } +} - if (orderedLevel && (!collapseLevel || orderedLevel >= collapseLevel)) { - SetAssociatedClause(ordClause); - return orderedLevel; - } else if (!orderedLevel && collapseLevel) { - SetAssociatedClause(collClause); - return collapseLevel; - } else { - SetAssociatedClause(nullptr); +template +void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseSize( + const parser::OmpClause &clause, llvm::SmallVector &levels, + llvm::SmallVector &clauses) { + if (const auto tclause{std::get_if(&clause.u)}) { + levels.push_back(tclause->v.size()); + clauses.push_back(&clause); } - // orderedLevel < collapseLevel is an error handled in structural - // checks +} - return 1; // default is outermost loop +void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromClauses( + const parser::OmpClauseList &x, llvm::SmallVector &levels, + llvm::SmallVector &clauses) { + for (const auto &clause : x.v) { + CollectAssociatedLoopLevelFromClauseValue( + clause, levels, clauses); + CollectAssociatedLoopLevelFromClauseValue( + clause, levels, clauses); + CollectAssociatedLoopLevelFromClauseSize( + clause, levels, clauses); + } } // 2.15.1.1 Data-sharing Attribute Rules - Predetermined @@ -2044,10 +2144,18 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( const parser::OmpClause *clause{GetAssociatedClause()}; bool hasCollapseClause{ clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false}; + const parser::OpenMPLoopConstruct *innerMostLoop = &x; + + while (auto &optLoopCons{ + std::get>(x.t)}) { + if (const auto &innerLoop{ + std::get_if < parser::OpenMPLoopConstruct >>> (innerMostLoop->t)}) { + innerMostLoop = &innerLoop.value().value(); + } + } - auto &optLoopCons = std::get>(x.t); if (optLoopCons.has_value()) { - if (const auto &outer{std::get_if(&*optLoopCons)}) { + if (const auto &outer{std::get_if(innerMostLoop->t)}) { for (const parser::DoConstruct *loop{&*outer}; loop && level > 0; --level) { if (loop->IsDoConcurrent()) { diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 047baa3a79f5d..83db78667c7f8 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -209,6 +209,8 @@ struct ConstructDecompositionT { bool applyClause(const tomp::clause::CollapseT &clause, const ClauseTy *); + bool applyClause(const tomp::clause::SizesT &clause, + const ClauseTy *); bool applyClause(const tomp::clause::PrivateT &clause, const ClauseTy *); bool @@ -482,6 +484,28 @@ bool ConstructDecompositionT::applyClause( return false; } +// FIXME(JAN): Do the correct thing, but for now we'll do the same as collapse +template +bool ConstructDecompositionT::applyClause( + const tomp::clause::SizesT &clause, + const ClauseTy *node) { + // Apply "sizes" to the innermost directive. If it's not one that + // allows it flag an error. + if (!leafs.empty()) { + auto &last = leafs.back(); + + if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) { + last.clauses.push_back(node); + return true; + } else { + llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n"; + last.clauses.push_back(node); + return true; + } + } + + return false; +} // PRIVATE // [5.2:111:5-7] diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 1050e3d8b08dd..a994f23c1fbe2 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -2257,6 +2257,9 @@ class OpenMPIRBuilder { /// Return the function that contains the region to be outlined. Function *getFunction() const { return EntryBB->getParent(); } + + /// Dump the info in a somewhat readable way + void dump(); }; /// Collection of regions that need to be outlined during finalization. @@ -2277,6 +2280,9 @@ class OpenMPIRBuilder { /// Add a new region that will be outlined later. void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); } + /// Dump outline infos + void dumpOutlineInfos(); + /// An ordered map of auto-generated variables to their unique names. /// It stores variables with the following names: 1) ".gomp_critical_user_" + /// + ".var" for "omp critical" directives; 2) @@ -3910,6 +3916,9 @@ class CanonicalLoopInfo { /// Invalidate this loop. That is, the underlying IR does not fulfill the /// requirements of an OpenMP canonical loop anymore. LLVM_ABI void invalidate(); + + /// Dump the info in a somewhat readable way + void dump(); }; /// ScanInfo holds the information to assist in lowering of Scan reduction. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 989bcf45e0006..ff50dfbbd5259 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -9145,6 +9145,15 @@ Error OpenMPIRBuilder::emitOffloadingArrays( return Error::success(); } +void OpenMPIRBuilder::dumpOutlineInfos() { + errs() << "=== Outline Infos Begin ===\n"; + for (auto En : enumerate(OutlineInfos)) { + errs() << "[" << En.index() << "]: "; + En.value().dump(); + } + errs() << "=== Outline Infos End ===\n"; +} + void OpenMPIRBuilder::emitBranch(BasicBlock *Target) { BasicBlock *CurBB = Builder.GetInsertBlock(); @@ -10069,6 +10078,14 @@ void OpenMPIRBuilder::OutlineInfo::collectBlocks( } } +void OpenMPIRBuilder::OutlineInfo::dump() { + errs() << "=== OutilneInfo == " + << " EntryBB: " << (EntryBB ? EntryBB->getName() : "n\a") + << " ExitBB: " << (ExitBB ? ExitBB->getName() : "n\a") + << " OuterAllocaBB: " + << (OuterAllocaBB ? OuterAllocaBB->getName() : "n/a") << "\n"; +} + void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, @@ -10846,3 +10863,10 @@ void CanonicalLoopInfo::invalidate() { Latch = nullptr; Exit = nullptr; } + +void CanonicalLoopInfo::dump() { + errs() << "CanonicaLoop == Header: " << (Header ? Header->getName() : "n/a") + << " Cond: " << (Cond ? Cond->getName() : "n/a") + << " Latch: " << (Latch ? Latch->getName() : "n/a") + << " Exit: " << (Exit ? Exit->getName() : "n/a") << "\n"; +} diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index bbd1ed6a3ab2d..7ad1e70cb6e75 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -810,7 +810,11 @@ void CodeExtractor::severSplitPHINodesOfExits() { } void CodeExtractor::splitReturnBlocks() { - for (BasicBlock *Block : Blocks) + for (BasicBlock *Block : Blocks) { + if (!Block->getTerminator()) { + errs() << "====== No terminator in block: " << Block->getName() + << "======\n"; + } if (ReturnInst *RI = dyn_cast(Block->getTerminator())) { BasicBlock *New = Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret"); @@ -827,6 +831,7 @@ void CodeExtractor::splitReturnBlocks() { DT->changeImmediateDominator(I, NewNode); } } + } } Function *CodeExtractor::constructFunctionDeclaration( diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td index 311c57fb4446c..eb836db890738 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td @@ -317,6 +317,38 @@ class OpenMP_DeviceClauseSkip< def OpenMP_DeviceClause : OpenMP_DeviceClauseSkip<>; +//===----------------------------------------------------------------------===// +// V5.2: [XX.X] `collapse` clause +//===----------------------------------------------------------------------===// + +class OpenMP_CollapseClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause { + let arguments = (ins + DefaultValuedOptionalAttr:$num_collapse + ); +} + +def OpenMP_CollapseClause : OpenMP_CollapseClauseSkip<>; + +//===----------------------------------------------------------------------===// +// V5.2: [xx.x] `sizes` clause +//===----------------------------------------------------------------------===// + +class OpenMP_TileSizesClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause { + let arguments = (ins + OptionalAttr:$tile_sizes + ); +} + +def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>; + //===----------------------------------------------------------------------===// // V5.2: [11.6.1] `dist_schedule` clause //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index c956d69781b3d..ee555b695c2ad 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -614,7 +614,9 @@ def WorkshareLoopWrapperOp : OpenMP_Op<"workshare.loop_wrapper", traits = [ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ RecursiveMemoryEffects, SameVariadicOperandSize ], clauses = [ - OpenMP_LoopRelatedClause + OpenMP_LoopRelatedClause, + OpenMP_CollapseClause, + OpenMP_TileSizesClause ], singleRegion = true> { let summary = "rectangular loop nest"; let description = [{ diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index 34f372af1e4b5..bec17258d058f 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -493,7 +493,8 @@ struct ParallelOpLowering : public OpRewritePattern { // Create loop nest and populate region with contents of scf.parallel. auto loopOp = omp::LoopNestOp::create( rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(), - parallelOp.getUpperBound(), parallelOp.getStep()); + parallelOp.getUpperBound(), parallelOp.getStep(), false, 1, + nullptr); rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(), loopOp.getRegion().begin()); diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index fa94219016c1e..d2d3362f23a2f 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -56,6 +56,11 @@ makeDenseBoolArrayAttr(MLIRContext *ctx, const ArrayRef boolArray) { return boolArray.empty() ? nullptr : DenseBoolArrayAttr::get(ctx, boolArray); } +static DenseI64ArrayAttr +makeDenseI64ArrayAttr(MLIRContext *ctx, const ArrayRef intArray) { + return intArray.empty() ? nullptr : DenseI64ArrayAttr::get(ctx, intArray); +} + namespace { struct MemRefPointerLikeModel : public PointerLikeType::ExternalModel steps; @@ -2967,6 +2972,38 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { parser.parseOperandList(steps, ivs.size(), OpAsmParser::Delimiter::Paren)) return failure(); + // Parse collapse + int64_t value = 0; + if (!parser.parseOptionalKeyword("collapse") && + (parser.parseLParen() || parser.parseInteger(value) || + parser.parseRParen())) + return failure(); + if (value > 1) { + result.addAttribute( + "num_collapse", + IntegerAttr::get(parser.getBuilder().getI64Type(), value)); + } + + // Parse tiles + SmallVector tiles; + auto parseTiles = [&]() -> ParseResult { + int64_t tile; + if (parser.parseInteger(tile)) + return failure(); + tiles.push_back(tile); + return success(); + }; + + if (!parser.parseOptionalKeyword("tiles") && + (parser.parseLParen() || + parser.parseCommaSeparatedList(parseTiles) || + parser.parseRParen())) + return failure(); + + if (tiles.size() > 0) { + result.addAttribute("tile_sizes", DenseI64ArrayAttr::get(ctx, tiles)); + } + // Parse the body. Region *region = result.addRegion(); if (parser.parseRegion(*region, ivs)) @@ -2990,14 +3027,23 @@ void LoopNestOp::print(OpAsmPrinter &p) { if (getLoopInclusive()) p << "inclusive "; p << "step (" << getLoopSteps() << ") "; + if (int64_t numCollapse = getNumCollapse()) { + if (numCollapse > 1) + p << "collapse(" << numCollapse << ") "; + } + if (const auto tiles = getTileSizes()) { + p << "tiles(" << tiles.value() << ") "; + } p.printRegion(region, /*printEntryBlockArgs=*/false); } void LoopNestOp::build(OpBuilder &builder, OperationState &state, const LoopNestOperands &clauses) { + MLIRContext *ctx = builder.getContext(); LoopNestOp::build(builder, state, clauses.loopLowerBounds, clauses.loopUpperBounds, clauses.loopSteps, - clauses.loopInclusive); + clauses.loopInclusive, clauses.numCollapse, + makeDenseI64ArrayAttr(ctx, clauses.tileSizes)); } LogicalResult LoopNestOp::verify() { diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 6694de8383534..687688092f08e 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -2966,10 +2966,9 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, /// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { + LLVM::ModuleTranslation &moduleTranslation) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); auto loopOp = cast(opInst); - // Set up the source location value for OpenMP runtime. llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); @@ -3035,18 +3034,60 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, loopInfos.push_back(*loopResult); } - // Collapse loops. Store the insertion point because LoopInfos may get - // invalidated. + // llvm::OpenMPIRBuilder::InsertPointTy afterIP = builder.saveIP(); llvm::OpenMPIRBuilder::InsertPointTy afterIP = - loopInfos.front()->getAfterIP(); + loopInfos.front()->getAfterIP(); - // Update the stack frame created for this loop to point to the resulting loop - // after applying transformations. - moduleTranslation.stackWalk( - [&](OpenMPLoopInfoStackFrame &frame) { - frame.loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); - return WalkResult::interrupt(); - }); + // Initialize the new loop info to the current one, in case there + // are no loop transformations done. + llvm::CanonicalLoopInfo *NewTopLoopInfo = nullptr; + + // Do tiling + if (const auto &tiles = loopOp.getTileSizes()) { + llvm::Type *IVType = loopInfos.front()->getIndVarType(); + SmallVector TileSizes; + + for (auto tile : tiles.value()) { + llvm::Value *TileVal = llvm::ConstantInt::get(IVType, tile); + TileSizes.push_back(TileVal); + } + + std::vector NewLoops = + ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes); + + // Collapse loops. Store the insertion point because LoopInfos may get + // invalidated. + auto AfterBB = NewLoops.front()->getAfter(); + auto AfterAfterBB = AfterBB->getSingleSuccessor(); + afterIP = {AfterAfterBB, AfterAfterBB->begin()}; + NewTopLoopInfo = NewLoops[0]; + + // Update the loop infos + loopInfos.clear(); + for (const auto& newLoop : NewLoops) { + loopInfos.push_back(newLoop); + } + } // Tiling done + + // Do collapse + if (const auto &numCollapse = loopOp.getNumCollapse()) { + SmallVector collapseLoopInfos( + loopInfos.begin(), loopInfos.begin() + (numCollapse)); + + auto newLoopInfo = + ompBuilder->collapseLoops(ompLoc.DL, collapseLoopInfos, {}); + NewTopLoopInfo = newLoopInfo; + } // Collapse done + + // Update the stack frame created for this loop to point to the resulting + // loop after applying transformations. + if (NewTopLoopInfo) { + moduleTranslation.stackWalk( + [&](OpenMPLoopInfoStackFrame &frame) { + frame.loopInfo = NewTopLoopInfo; + return WalkResult::interrupt(); + }); + } // Continue building IR after the loop. Note that the LoopInfo returned by // `collapseLoops` points inside the outermost loop and is intended for From 69cfb0390e270d6ac1db824bb46e6b78230ada8e Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 10 Jun 2025 09:51:02 -0400 Subject: [PATCH 03/57] Fix tests and limit the nesting of construct to only tiling. --- flang/lib/Semantics/canonicalize-omp.cpp | 34 ++++++++++++------- .../Lower/OpenMP/parallel-wsloop-lastpriv.f90 | 4 +-- flang/test/Lower/OpenMP/simd.f90 | 2 +- flang/test/Lower/OpenMP/wsloop-variable.f90 | 2 +- flang/test/Semantics/OpenMP/do-collapse.f90 | 1 + .../LLVMIR/omptarget-wsloop-collapsed.mlir | 2 +- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 12 +++---- 7 files changed, 33 insertions(+), 24 deletions(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index fb0bb0f923574..10eaaa83f5f4f 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -149,27 +149,32 @@ class CanonicalizationOfOmp { // Ignore compiler directives. if (GetConstructIf(*nextIt)) continue; - // Keep track of the loops to handle the end loop directives std::stack loops; loops.push(&x); - while (auto *innerConstruct{ + if (auto *innerConstruct{ GetConstructIf(*nextIt)}) { if (auto *innerOmpLoop{ std::get_if(&innerConstruct->u)}) { - std::get< - std::optional>>( - loops.top()->t) = std::move(*innerOmpLoop); - // Retrieveing the address so that DoConstruct or inner loop can be - // set later. - loops.push(&(std::get>>( - loops.top()->t) - .value() - .value())); - nextIt = block.erase(nextIt); + auto &innerBeginDir{ + std::get(innerOmpLoop->t)}; + auto &innerDir{std::get(innerBeginDir.t)}; + if (innerDir.v == llvm::omp::Directive::OMPD_tile) { + std::get>>( + loops.top()->t) = std::move(*innerOmpLoop); + // Retrieveing the address so that DoConstruct or inner loop can be + // set later. + loops.push(&(std::get>>( + loops.top()->t) + .value() + .value())); + nextIt = block.erase(nextIt); + } } } + if (auto *doCons{GetConstructIf(*nextIt)}) { if (doCons->GetLoopControl()) { // move DoConstruct @@ -185,6 +190,9 @@ class CanonicalizationOfOmp { loops.top()->t) = std::move(*endDir); nextIt = block.erase(nextIt); loops.pop(); + } else { + // If there is a mismatch bail out. + break; } } } else { diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 index 2890e78e9d17f..faf8f717f6308 100644 --- a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 +++ b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 @@ -108,7 +108,7 @@ subroutine omp_do_lastprivate_collapse2(a) ! CHECK-NEXT: %[[UB2:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref ! CHECK-NEXT: %[[STEP2:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]] : !fir.ref, !fir.ref, !fir.ref) { - ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) { + ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) collapse(2) { ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ea"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -174,7 +174,7 @@ subroutine omp_do_lastprivate_collapse3(a) ! CHECK-NEXT: %[[UB3:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref ! CHECK-NEXT: %[[STEP3:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[K_PVT_REF:.*]] : !fir.ref, !fir.ref, !fir.ref, !fir.ref) { - ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) { + ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) collapse(3) { ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ea"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90 index d815474b84b31..3572b9baff00b 100644 --- a/flang/test/Lower/OpenMP/simd.f90 +++ b/flang/test/Lower/OpenMP/simd.f90 @@ -175,7 +175,7 @@ subroutine simd_with_collapse_clause(n) ! CHECK-NEXT: omp.loop_nest (%[[ARG_0:.*]], %[[ARG_1:.*]]) : i32 = ( ! CHECK-SAME: %[[LOWER_I]], %[[LOWER_J]]) to ( ! CHECK-SAME: %[[UPPER_I]], %[[UPPER_J]]) inclusive step ( - ! CHECK-SAME: %[[STEP_I]], %[[STEP_J]]) { + ! CHECK-SAME: %[[STEP_I]], %[[STEP_J]]) collapse(2) { !$OMP SIMD COLLAPSE(2) do i = 1, n do j = 1, n diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90 index a7fb5fb8936e7..cceb77b974fee 100644 --- a/flang/test/Lower/OpenMP/wsloop-variable.f90 +++ b/flang/test/Lower/OpenMP/wsloop-variable.f90 @@ -23,7 +23,7 @@ program wsloop_variable !CHECK: %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64 !CHECK: %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64 !CHECK: omp.wsloop private({{.*}}) { -!CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) { +!CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) collapse(2) { !CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16 !CHECK: hlfir.assign %[[ARG0_I16]] to %[[STORE_IV0:.*]]#0 : i16, !fir.ref !CHECK: hlfir.assign %[[ARG1]] to %[[STORE_IV1:.*]]#0 : i64, !fir.ref diff --git a/flang/test/Semantics/OpenMP/do-collapse.f90 b/flang/test/Semantics/OpenMP/do-collapse.f90 index 480bd45b79b83..ec6a3bdad3686 100644 --- a/flang/test/Semantics/OpenMP/do-collapse.f90 +++ b/flang/test/Semantics/OpenMP/do-collapse.f90 @@ -31,6 +31,7 @@ program omp_doCollapse end do end do + !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct. !ERROR: At most one COLLAPSE clause can appear on the SIMD directive !$omp simd collapse(2) collapse(1) do i = 1, 4 diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir index b42e387acbb11..d84641ff9c99b 100644 --- a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir @@ -9,7 +9,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : index) : i32 omp.wsloop { - omp.loop_nest (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) { + omp.loop_nest (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) collapse(2) { %1 = llvm.add %arg1, %arg2 : i32 %2 = llvm.mul %arg2, %loop_ub overflow : i32 %3 = llvm.add %arg1, %2 :i32 diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 3f4dcd5e24c56..27210bc0890ce 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -698,7 +698,7 @@ llvm.func @simd_simple(%lb : i64, %ub : i64, %step : i64, %arg0: !llvm.ptr) { // CHECK-LABEL: @simd_simple_multiple llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) inclusive step (%step1, %step2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) inclusive step (%step1, %step2) collapse(2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 // The form of the emitted IR is controlled by OpenMPIRBuilder and // tested there. Just check that the right metadata is added and collapsed @@ -736,7 +736,7 @@ llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64 // CHECK-LABEL: @simd_simple_multiple_simdlen llvm.func @simd_simple_multiple_simdlen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd simdlen(2) { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 // The form of the emitted IR is controlled by OpenMPIRBuilder and // tested there. Just check that the right metadata is added. @@ -760,7 +760,7 @@ llvm.func @simd_simple_multiple_simdlen(%lb1 : i64, %ub1 : i64, %step1 : i64, %l // CHECK-LABEL: @simd_simple_multiple_safelen llvm.func @simd_simple_multiple_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd safelen(2) { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 %4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr, i64) -> !llvm.ptr, f32 %5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr, i64) -> !llvm.ptr, f32 @@ -779,7 +779,7 @@ llvm.func @simd_simple_multiple_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %l // CHECK-LABEL: @simd_simple_multiple_simdlen_safelen llvm.func @simd_simple_multiple_simdlen_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd simdlen(1) safelen(2) { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 %4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr, i64) -> !llvm.ptr, f32 %5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr, i64) -> !llvm.ptr, f32 @@ -1177,7 +1177,7 @@ llvm.func @collapse_wsloop( // CHECK: store i32 %[[TOTAL_SUB_1]], ptr // CHECK: call void @__kmpc_for_static_init_4u omp.wsloop { - omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) { + omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) collapse(3) { %31 = llvm.load %20 : !llvm.ptr -> i32 %32 = llvm.add %31, %arg0 : i32 %33 = llvm.add %32, %arg1 : i32 @@ -1239,7 +1239,7 @@ llvm.func @collapse_wsloop_dynamic( // CHECK: store i32 %[[TOTAL]], ptr // CHECK: call void @__kmpc_dispatch_init_4u omp.wsloop schedule(dynamic) { - omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) { + omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) collapse(3) { %31 = llvm.load %20 : !llvm.ptr -> i32 %32 = llvm.add %31, %arg0 : i32 %33 = llvm.add %32, %arg1 : i32 From 34888b13ab38713b00ce64fe908286592d84434d Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 10 Jun 2025 10:18:32 -0400 Subject: [PATCH 04/57] Enable stand-alone tiling, but it gives a warning and converting to simd. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 42 ++++++++++++++++++--- flang/test/Lower/OpenMP/wsloop-collapse.f90 | 2 +- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index a01bb371411ad..4cfb9414e26e6 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2267,6 +2267,39 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, // Apply unrolling to it auto cli = canonLoop.getCli(); mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli); + +static mlir::omp::LoopOp +genTiledLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::const_iterator item) { + mlir::omp::LoopOperands loopClauseOps; + llvm::SmallVector loopReductionSyms; + genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps, + loopReductionSyms); + + DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/true, symTable); + dsp.processStep1(&loopClauseOps); + + mlir::omp::LoopNestOperands loopNestClauseOps; + llvm::SmallVector iv; + genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, + loopNestClauseOps, iv); + + EntryBlockArgs loopArgs; + loopArgs.priv.syms = dsp.getDelayedPrivSymbols(); + loopArgs.priv.vars = loopClauseOps.privateVars; + loopArgs.reduction.syms = loopReductionSyms; + loopArgs.reduction.vars = loopClauseOps.reductionVars; + + auto loopOp = + genWrapperOp(converter, loc, loopClauseOps, loopArgs); + genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, + loopNestClauseOps, iv, {{loopOp, loopArgs}}, + llvm::omp::Directive::OMPD_loop, dsp); + return loopOp; } static mlir::omp::MaskedOp @@ -3487,13 +3520,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter, newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; - case llvm::omp::Directive::OMPD_tile: { - unsigned version = semaCtx.langOptions().OpenMPVersion; - if (!semaCtx.langOptions().OpenMPSimd) - TODO(loc, "Unhandled loop directive (" + - llvm::omp::getOpenMPDirectiveName(dir, version) + ")"); + case llvm::omp::Directive::OMPD_tile: + newOp = + genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); break; - } case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; diff --git a/flang/test/Lower/OpenMP/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/wsloop-collapse.f90 index 7ec40ab4b2f43..677c7809c397f 100644 --- a/flang/test/Lower/OpenMP/wsloop-collapse.f90 +++ b/flang/test/Lower/OpenMP/wsloop-collapse.f90 @@ -57,7 +57,7 @@ program wsloop_collapse !CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref !CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_4:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_2:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_0:.*]] : !fir.ref, !fir.ref, !fir.ref) { -!CHECK-NEXT: omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) { +!CHECK-NEXT: omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) collapse(3) { !$omp do collapse(3) do i = 1, a do j= 1, b From 79270accfdaeca0dfcb6888d42c14834367ab2e8 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Wed, 11 Jun 2025 10:25:00 -0400 Subject: [PATCH 05/57] Add minimal test, remove debug print. --- flang/test/Lower/OpenMP/wsloop-tile.f90 | 30 +++++++++++++++++++ .../Frontend/OpenMP/ConstructDecompositionT.h | 2 +- 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 flang/test/Lower/OpenMP/wsloop-tile.f90 diff --git a/flang/test/Lower/OpenMP/wsloop-tile.f90 b/flang/test/Lower/OpenMP/wsloop-tile.f90 new file mode 100644 index 0000000000000..f43b558ce46bb --- /dev/null +++ b/flang/test/Lower/OpenMP/wsloop-tile.f90 @@ -0,0 +1,30 @@ +! This test checks lowering of OpenMP DO Directive(Worksharing) with collapse. + +! RUN: bbc -fopenmp -fopenmp-version=51 -emit-hlfir %s -o - | FileCheck %s + +!CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "wsloop_tile"} { +program wsloop_tile + integer :: i, j, k + integer :: a, b, c + integer :: x + + a=30 + b=20 + c=50 + x=0 + + !CHECK: omp.loop_nest + !CHECK-SAME: tiles(2, 5, 10) + + !$omp do + !$omp tile sizes(2,5,10) + do i = 1, a + do j= 1, b + do k = 1, c + x = x + i + j + k + end do + end do + end do + !$omp end tile + !$omp end do +end program wsloop_tile diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 83db78667c7f8..e1083b7ef2cd9 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -498,7 +498,7 @@ bool ConstructDecompositionT::applyClause( last.clauses.push_back(node); return true; } else { - llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n"; + // llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n"; last.clauses.push_back(node); return true; } From dd943a84603be324af8cee2704565a05fe7fe297 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Fri, 13 Jun 2025 09:53:10 -0400 Subject: [PATCH 06/57] Fix formatting --- flang/lib/Lower/OpenMP/OpenMP.cpp | 27 +++++++++++-------- flang/lib/Lower/OpenMP/Utils.cpp | 3 +-- flang/lib/Semantics/canonicalize-omp.cpp | 4 +-- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 3 +-- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 12 ++++----- 5 files changed, 26 insertions(+), 23 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 4cfb9414e26e6..a19283286df41 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -427,9 +427,10 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, if (innerOptional.has_value()) { const auto &innerLoopDirective = innerOptional.value().value(); const auto &innerBegin = - std::get(innerLoopDirective.t); + std::get( + innerLoopDirective.t); const auto &innerDirective = - std::get(innerBegin.t); + std::get(innerBegin.t); if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { middleClauseList = &std::get(innerBegin.t); @@ -2268,11 +2269,13 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, auto cli = canonLoop.getCli(); mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli); -static mlir::omp::LoopOp -genTiledLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item) { +static mlir::omp::LoopOp genTiledLoopOp(lower::AbstractConverter &converter, + lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + mlir::Location loc, + const ConstructQueue &queue, + ConstructQueue::const_iterator item) { mlir::omp::LoopOperands loopClauseOps; llvm::SmallVector loopReductionSyms; genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps, @@ -3522,7 +3525,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, break; case llvm::omp::Directive::OMPD_tile: newOp = - genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); + genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); @@ -3966,13 +3969,15 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, List clauses = makeClauses( std::get(beginLoopDirective.t), semaCtx); - const auto &innerOptional = std::get>>(loopConstruct.t); + const auto &innerOptional = + std::get>>( + loopConstruct.t); if (innerOptional.has_value()) { const auto &innerLoopDirective = innerOptional.value().value(); const auto &innerBegin = - std::get(innerLoopDirective.t); + std::get(innerLoopDirective.t); const auto &innerDirective = - std::get(innerBegin.t); + std::get(innerBegin.t); if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { clauses.append( makeClauses(std::get(innerBegin.t), semaCtx)); diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 11721d05001b0..69d74762ace6f 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -51,8 +51,7 @@ int64_t getCollapseValue(const List &clauses) { } collapseValue = collapseValue - numTileSizes; - int64_t result = - collapseValue > numTileSizes ? collapseValue : numTileSizes; + int64_t result = collapseValue > numTileSizes ? collapseValue : numTileSizes; return result; } diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 10eaaa83f5f4f..c519cb43628ed 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -11,7 +11,7 @@ #include "flang/Parser/parse-tree.h" #include "flang/Semantics/semantics.h" -# include +#include // After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP // Constructs more structured which provide explicit scopes for later // structural checks and semantic analysis. @@ -153,7 +153,7 @@ class CanonicalizationOfOmp { std::stack loops; loops.push(&x); if (auto *innerConstruct{ - GetConstructIf(*nextIt)}) { + GetConstructIf(*nextIt)}) { if (auto *innerOmpLoop{ std::get_if(&innerConstruct->u)}) { auto &innerBeginDir{ diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index d2d3362f23a2f..3c65ddadff019 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -2995,8 +2995,7 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { }; if (!parser.parseOptionalKeyword("tiles") && - (parser.parseLParen() || - parser.parseCommaSeparatedList(parseTiles) || + (parser.parseLParen() || parser.parseCommaSeparatedList(parseTiles) || parser.parseRParen())) return failure(); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 687688092f08e..dccf241e919a7 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -2966,7 +2966,7 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, /// Converts an OpenMP loop nest into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { + LLVM::ModuleTranslation &moduleTranslation) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); auto loopOp = cast(opInst); // Set up the source location value for OpenMP runtime. @@ -3036,7 +3036,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, // llvm::OpenMPIRBuilder::InsertPointTy afterIP = builder.saveIP(); llvm::OpenMPIRBuilder::InsertPointTy afterIP = - loopInfos.front()->getAfterIP(); + loopInfos.front()->getAfterIP(); // Initialize the new loop info to the current one, in case there // are no loop transformations done. @@ -3048,12 +3048,12 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, SmallVector TileSizes; for (auto tile : tiles.value()) { - llvm::Value *TileVal = llvm::ConstantInt::get(IVType, tile); + llvm::Value *TileVal = llvm::ConstantInt::get(IVType, tile); TileSizes.push_back(TileVal); } - std::vector NewLoops = - ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes); + std::vector NewLoops = + ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes); // Collapse loops. Store the insertion point because LoopInfos may get // invalidated. @@ -3064,7 +3064,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, // Update the loop infos loopInfos.clear(); - for (const auto& newLoop : NewLoops) { + for (const auto &newLoop : NewLoops) { loopInfos.push_back(newLoop); } } // Tiling done From 822fd824e3cd71684544fdcfab1a2c73d3736333 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 14 Jun 2025 06:29:58 -0400 Subject: [PATCH 07/57] Fix formatting --- flang/lib/Semantics/canonicalize-omp.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index c519cb43628ed..1d00bdaad777c 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -166,10 +166,10 @@ class CanonicalizationOfOmp { // Retrieveing the address so that DoConstruct or inner loop can be // set later. loops.push(&(std::get>>( + common::Indirection>>( loops.top()->t) - .value() - .value())); + .value() + .value())); nextIt = block.erase(nextIt); } } From 48bcab418f34da713cd3234c6f0dbf177da78495 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 19 Jun 2025 15:52:55 -0400 Subject: [PATCH 08/57] Fix test. --- flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 | 1 + 1 file changed, 1 insertion(+) diff --git a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 index bb1929249183b..355626f6e73b9 100644 --- a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 +++ b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 @@ -1,6 +1,7 @@ !RUN: %python %S/../test_errors.py %s %flang -fopenmp integer :: i, j +! ERROR: DO CONCURRENT loops cannot be used with the COLLAPSE clause. !$omp parallel do collapse(2) do i = 1, 1 ! ERROR: DO CONCURRENT loops cannot form part of a loop nest. From 90320a91c64e454171204143fcb999a1992a2bf8 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Fri, 20 Jun 2025 07:31:02 -0400 Subject: [PATCH 09/57] Add more mlir tests. Set collapse value when lowering from SCF to OpenMP. --- .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 4 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 12 +++++ .../Conversion/SCFToOpenMP/scf-to-openmp.mlir | 2 +- mlir/test/Dialect/OpenMP/invalid.mlir | 23 ++++++++ mlir/test/Dialect/OpenMP/ops.mlir | 54 +++++++++++++++++++ 5 files changed, 92 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index bec17258d058f..f056e72531bfc 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -493,8 +493,8 @@ struct ParallelOpLowering : public OpRewritePattern { // Create loop nest and populate region with contents of scf.parallel. auto loopOp = omp::LoopNestOp::create( rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(), - parallelOp.getUpperBound(), parallelOp.getStep(), false, 1, - nullptr); + parallelOp.getUpperBound(), parallelOp.getStep(), false, + parallelOp.getLowerBound().size(), nullptr); rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(), loopOp.getRegion().begin()); diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 3c65ddadff019..19bff545fb202 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -3058,6 +3058,18 @@ LogicalResult LoopNestOp::verify() { << "range argument type does not match corresponding IV type"; } + uint64_t numIVs = getIVs().size(); + + if (const auto &numCollapse = getNumCollapse()) + if (numCollapse > numIVs) + return emitOpError() + << "collapse value is larger than the number of loops"; + + if (const auto &tiles = getTileSizes()) + if (tiles.value().size() > numIVs) + return emitOpError() + << "number of tilings is larger than the number of loops"; + if (!llvm::dyn_cast_if_present((*this)->getParentOp())) return emitOpError() << "expects parent op to be a loop wrapper"; diff --git a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir index a722acbf2c347..d362bb6092419 100644 --- a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir +++ b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir @@ -6,7 +6,7 @@ func.func @parallel(%arg0: index, %arg1: index, %arg2: index, // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32 // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) { // CHECK: omp.wsloop { - // CHECK: omp.loop_nest (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) { + // CHECK: omp.loop_nest (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) collapse(2) { // CHECK: memref.alloca_scope scf.parallel (%i, %j) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) { // CHECK: "test.payload"(%[[LVAR1]], %[[LVAR2]]) : (index, index) -> () diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 5088f2dfa7d7a..c6b4ae02602d9 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -157,6 +157,29 @@ func.func @no_loops(%lb : index, %ub : index, %step : index) { } } +// ----- + +func.func @collapse_size(%lb : index, %ub : index, %step : index) { + omp.wsloop { + // expected-error@+1 {{collapse value is larger than the number of loops}} + omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) collapse(4) { + omp.yield + } + } +} + +// ----- + +func.func @tiles_length(%lb : index, %ub : index, %step : index) { + omp.wsloop { + // expected-error@+1 {{number of tilings is larger than the number of loops}} + omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) tiles(2, 4) { + omp.yield + } + } +} + + // ----- func.func @inclusive_not_a_clause(%lb : index, %ub : index, %step : index) { diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 8c846cde1a3ca..e627a86e69185 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -376,6 +376,60 @@ func.func @omp_loop_nest_pretty_multiple(%lb1 : i32, %ub1 : i32, %step1 : i32, return } +// CHECK-LABEL: omp_loop_nest_pretty_multiple_collapse +func.func @omp_loop_nest_pretty_multiple_collapse(%lb1 : i32, %ub1 : i32, %step1 : i32, + %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref) -> () { + + omp.wsloop { + // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) collapse(2) + omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { + %1 = "test.payload"(%iv1) : (i32) -> (index) + %2 = "test.payload"(%iv2) : (i32) -> (index) + memref.store %iv1, %data1[%1] : memref + memref.store %iv2, %data1[%2] : memref + omp.yield + } + } + + return +} + +// CHECK-LABEL: omp_loop_nest_pretty_multiple_tiles +func.func @omp_loop_nest_pretty_multiple_tiles(%lb1 : i32, %ub1 : i32, %step1 : i32, + %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref) -> () { + + omp.wsloop { + // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) tiles(5, 10) + omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) tiles(5, 10) { + %1 = "test.payload"(%iv1) : (i32) -> (index) + %2 = "test.payload"(%iv2) : (i32) -> (index) + memref.store %iv1, %data1[%1] : memref + memref.store %iv2, %data1[%2] : memref + omp.yield + } + } + + return +} + +// CHECK-LABEL: omp_loop_nest_pretty_multiple_collapse_tiles +func.func @omp_loop_nest_pretty_multiple_collapse_tiles(%lb1 : i32, %ub1 : i32, %step1 : i32, + %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref) -> () { + + omp.wsloop { + // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) collapse(2) tiles(5, 10) + omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) tiles(5, 10) { + %1 = "test.payload"(%iv1) : (i32) -> (index) + %2 = "test.payload"(%iv2) : (i32) -> (index) + memref.store %iv1, %data1[%1] : memref + memref.store %iv2, %data1[%2] : memref + omp.yield + } + } + + return +} + // CHECK-LABEL: omp_wsloop func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memref, %linear_var : i32, %chunk_var : i32) -> () { From d8a976f4c7c9fb426c8e664e0b5962cfe036198b Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Fri, 20 Jun 2025 07:52:28 -0400 Subject: [PATCH 10/57] Use llvm::SmallVector instead of std::stack --- flang/lib/Semantics/canonicalize-omp.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 1d00bdaad777c..5264ec25fd80c 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -10,8 +10,6 @@ #include "flang/Parser/parse-tree-visitor.h" #include "flang/Parser/parse-tree.h" #include "flang/Semantics/semantics.h" - -#include // After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP // Constructs more structured which provide explicit scopes for later // structural checks and semantic analysis. @@ -150,8 +148,8 @@ class CanonicalizationOfOmp { if (GetConstructIf(*nextIt)) continue; // Keep track of the loops to handle the end loop directives - std::stack loops; - loops.push(&x); + llvm::SmallVector loops; + loops.push_back(&x); if (auto *innerConstruct{ GetConstructIf(*nextIt)}) { if (auto *innerOmpLoop{ @@ -162,12 +160,12 @@ class CanonicalizationOfOmp { if (innerDir.v == llvm::omp::Directive::OMPD_tile) { std::get>>( - loops.top()->t) = std::move(*innerOmpLoop); + loops.back()->t) = std::move(*innerOmpLoop); // Retrieveing the address so that DoConstruct or inner loop can be // set later. - loops.push(&(std::get>>( - loops.top()->t) + loops.back()->t) .value() .value())); nextIt = block.erase(nextIt); @@ -180,16 +178,16 @@ class CanonicalizationOfOmp { // move DoConstruct std::get>>>( - loops.top()->t) = std::move(*doCons); + loops.back()->t) = std::move(*doCons); nextIt = block.erase(nextIt); // try to match OmpEndLoopDirective while (nextIt != block.end() && !loops.empty()) { if (auto *endDir{ GetConstructIf(*nextIt)}) { std::get>( - loops.top()->t) = std::move(*endDir); + loops.back()->t) = std::move(*endDir); nextIt = block.erase(nextIt); - loops.pop(); + loops.pop_back(); } else { // If there is a mismatch bail out. break; From 1d133e9b8158fd36e6e99cd9eeb8a9d37d4753b1 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 21 Jun 2025 09:11:58 -0400 Subject: [PATCH 11/57] Improve test a bit to make sure IVs are used as expected. --- flang/test/Lower/OpenMP/wsloop-tile.f90 | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/flang/test/Lower/OpenMP/wsloop-tile.f90 b/flang/test/Lower/OpenMP/wsloop-tile.f90 index f43b558ce46bb..c9bf18e3b278d 100644 --- a/flang/test/Lower/OpenMP/wsloop-tile.f90 +++ b/flang/test/Lower/OpenMP/wsloop-tile.f90 @@ -13,7 +13,7 @@ program wsloop_tile c=50 x=0 - !CHECK: omp.loop_nest + !CHECK: omp.loop_nest (%[[IV_0:.*]], %[[IV_1:.*]], %[[IV_2:.*]]) : i32 !CHECK-SAME: tiles(2, 5, 10) !$omp do @@ -21,6 +21,15 @@ program wsloop_tile do i = 1, a do j= 1, b do k = 1, c + !CHECK: hlfir.assign %[[IV_0]] to %[[IV_0A:.*]] : i32 + !CHECK: hlfir.assign %[[IV_1]] to %[[IV_1A:.*]] : i32 + !CHECK: hlfir.assign %[[IV_2]] to %[[IV_2A:.*]] : i32 + !CHECK: %[[IVV_0:.*]] = fir.load %[[IV_0A]] + !CHECK: %[[SUM0:.*]] = arith.addi %{{.*}}, %[[IVV_0]] : i32 + !CHECK: %[[IVV_1:.*]] = fir.load %[[IV_1A]] + !CHECK: %[[SUM1:.*]] = arith.addi %[[SUM0]], %[[IVV_1]] : i32 + !CHECK: %[[IVV_2:.*]] = fir.load %[[IV_2A]] + !CHECK: %[[SUM2:.*]] = arith.addi %[[SUM1]], %[[IVV_2]] : i32 x = x + i + j + k end do end do From 04e73de83538311995c59e016930b29de9c95735 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 21 Jun 2025 09:30:57 -0400 Subject: [PATCH 12/57] Fix comments to clarify canonicalization. --- flang/lib/Semantics/canonicalize-omp.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 5264ec25fd80c..d5b5b14d22dc2 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -119,13 +119,15 @@ class CanonicalizationOfOmp { // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t-> // OmpBeginLoopDirective t-> OmpLoopDirective // [ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct u-> - /// OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile] + // OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile] // ExecutableConstruct -> DoConstruct - // [ExecutableConstruct -> OmpEndLoopDirective] + // [ExecutableConstruct -> OmpEndLoopDirective] (note: tile) // ExecutableConstruct -> OmpEndLoopDirective (if available) // // After rewriting: // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t-> + // [OpenMPLoopConstruct t -> OmpBeginLoopDirective -> OmpLoopDirective + // OmpEndLoopDirective] (note: tile) // OmpBeginLoopDirective t -> OmpLoopDirective -> DoConstruct // OmpEndLoopDirective (if available) parser::Block::iterator nextIt; From e46d100c2724f785aa4910518902172c434a6087 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 21 Jun 2025 12:05:40 -0400 Subject: [PATCH 13/57] Special handling of tile directive when dealing with start end end loop directives. --- flang/lib/Semantics/canonicalize-omp.cpp | 31 ++++++++++++++++-------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index d5b5b14d22dc2..a7749b5a81678 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -160,16 +160,13 @@ class CanonicalizationOfOmp { std::get(innerOmpLoop->t)}; auto &innerDir{std::get(innerBeginDir.t)}; if (innerDir.v == llvm::omp::Directive::OMPD_tile) { - std::get>>( - loops.back()->t) = std::move(*innerOmpLoop); + loops.back()->t); + innerLoop = std::move(*innerOmpLoop); // Retrieveing the address so that DoConstruct or inner loop can be // set later. - loops.push_back(&(std::get>>( - loops.back()->t) - .value() - .value())); + loops.push_back(&(innerLoop.value().value())); nextIt = block.erase(nextIt); } } @@ -186,9 +183,23 @@ class CanonicalizationOfOmp { while (nextIt != block.end() && !loops.empty()) { if (auto *endDir{ GetConstructIf(*nextIt)}) { - std::get>( - loops.back()->t) = std::move(*endDir); - nextIt = block.erase(nextIt); + auto &endOmpDirective{ + std::get(endDir->t)}; + auto &loopBegin{ + std::get(loops.back()->t)}; + auto &loopDir{std::get(loopBegin.t)}; + + // If the directive is a tile we try to match the corresponding + // end tile if it exsists. If it is not a tile directive we + // always assign the end loop directive and fall back on the + // existing directive structure checks. + if (loopDir.v != llvm::omp::Directive::OMPD_tile || + loopDir.v == endOmpDirective.v) { + std::get>( + loops.back()->t) = std::move(*endDir); + nextIt = block.erase(nextIt); + } + loops.pop_back(); } else { // If there is a mismatch bail out. From 9cacf3c0d04d5d8d5d5605f71def51da5d55b24b Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 21 Jun 2025 12:20:33 -0400 Subject: [PATCH 14/57] Inline functions. --- flang/lib/Semantics/resolve-directives.cpp | 62 +++++++++------------- 1 file changed, 24 insertions(+), 38 deletions(-) diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 0e87dcfdbbbb8..934a06b2aef33 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -828,14 +828,6 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { void CollectAssociatedLoopLevelsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); - template - void CollectAssociatedLoopLevelFromClauseValue( - const parser::OmpClause &clause, llvm::SmallVector &, - llvm::SmallVector &); - template - void CollectAssociatedLoopLevelFromClauseSize(const parser::OmpClause &, - llvm::SmallVector &, - llvm::SmallVector &); void CollectAssociatedLoopLevelsFromClauses(const parser::OmpClauseList &, llvm::SmallVector &, llvm::SmallVector &); @@ -2078,40 +2070,34 @@ void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct( } } -template -void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseValue( - const parser::OmpClause &clause, llvm::SmallVector &levels, - llvm::SmallVector &clauses) { - if (const auto tclause{std::get_if(&clause.u)}) { - std::int64_t level = 0; - if (const auto v{EvaluateInt64(context_, tclause->v)}) { - level = *v; - } - levels.push_back(level); - clauses.push_back(&clause); - } -} - -template -void OmpAttributeVisitor::CollectAssociatedLoopLevelFromClauseSize( - const parser::OmpClause &clause, llvm::SmallVector &levels, - llvm::SmallVector &clauses) { - if (const auto tclause{std::get_if(&clause.u)}) { - levels.push_back(tclause->v.size()); - clauses.push_back(&clause); - } -} - void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromClauses( const parser::OmpClauseList &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { for (const auto &clause : x.v) { - CollectAssociatedLoopLevelFromClauseValue( - clause, levels, clauses); - CollectAssociatedLoopLevelFromClauseValue( - clause, levels, clauses); - CollectAssociatedLoopLevelFromClauseSize( - clause, levels, clauses); + if (const auto oclause{ + std::get_if(&clause.u)}) { + std::int64_t level = 0; + if (const auto v{EvaluateInt64(context_, oclause->v)}) { + level = *v; + } + levels.push_back(level); + clauses.push_back(&clause); + } + + if (const auto cclause{ + std::get_if(&clause.u)}) { + std::int64_t level = 0; + if (const auto v{EvaluateInt64(context_, cclause->v)}) { + level = *v; + } + levels.push_back(level); + clauses.push_back(&clause); + } + + if (const auto tclause{std::get_if(&clause.u)}) { + levels.push_back(tclause->v.size()); + clauses.push_back(&clause); + } } } From 279ee72fcd9dde83ff44a2c001864f735f4cfa70 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Mon, 23 Jun 2025 11:03:50 -0400 Subject: [PATCH 15/57] Remove debug code. --- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 7ad1e70cb6e75..bbd1ed6a3ab2d 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -810,11 +810,7 @@ void CodeExtractor::severSplitPHINodesOfExits() { } void CodeExtractor::splitReturnBlocks() { - for (BasicBlock *Block : Blocks) { - if (!Block->getTerminator()) { - errs() << "====== No terminator in block: " << Block->getName() - << "======\n"; - } + for (BasicBlock *Block : Blocks) if (ReturnInst *RI = dyn_cast(Block->getTerminator())) { BasicBlock *New = Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret"); @@ -831,7 +827,6 @@ void CodeExtractor::splitReturnBlocks() { DT->changeImmediateDominator(I, NewNode); } } - } } Function *CodeExtractor::constructFunctionDeclaration( From ace5268bc9f65953607c703cbd9ea36ef05e63c5 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Mon, 23 Jun 2025 11:08:42 -0400 Subject: [PATCH 16/57] Reuse loop op lowering, add comment. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index a19283286df41..7de79d7333f25 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3525,7 +3525,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, break; case llvm::omp::Directive::OMPD_tile: newOp = - genTiledLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); + genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); From 57b37f053079696f884f791247382ee15b383750 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Mon, 23 Jun 2025 11:12:51 -0400 Subject: [PATCH 17/57] Fix formatting. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 7de79d7333f25..3d5162551778d 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3524,8 +3524,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, item); break; case llvm::omp::Directive::OMPD_tile: - newOp = - genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); + newOp = genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); From 9ac5ccea3ca8995f95711058fb3959d293bc4594 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Mon, 23 Jun 2025 11:21:03 -0400 Subject: [PATCH 18/57] Remove curly braces. --- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 19bff545fb202..bac07e1ac17d5 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -2978,11 +2978,10 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { (parser.parseLParen() || parser.parseInteger(value) || parser.parseRParen())) return failure(); - if (value > 1) { + if (value > 1) result.addAttribute( "num_collapse", IntegerAttr::get(parser.getBuilder().getI64Type(), value)); - } // Parse tiles SmallVector tiles; @@ -2999,9 +2998,8 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { parser.parseRParen())) return failure(); - if (tiles.size() > 0) { + if (tiles.size() > 0) result.addAttribute("tile_sizes", DenseI64ArrayAttr::get(ctx, tiles)); - } // Parse the body. Region *region = result.addRegion(); @@ -3026,13 +3024,13 @@ void LoopNestOp::print(OpAsmPrinter &p) { if (getLoopInclusive()) p << "inclusive "; p << "step (" << getLoopSteps() << ") "; - if (int64_t numCollapse = getNumCollapse()) { + if (int64_t numCollapse = getNumCollapse()) if (numCollapse > 1) p << "collapse(" << numCollapse << ") "; - } - if (const auto tiles = getTileSizes()) { + + if (const auto tiles = getTileSizes()) p << "tiles(" << tiles.value() << ") "; - } + p.printRegion(region, /*printEntryBlockArgs=*/false); } From 7447e373e13276f053515dd92d1e22cca010f2df Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Wed, 25 Jun 2025 10:11:36 -0400 Subject: [PATCH 19/57] Avoid attaching the sizes clause to the parent construct, instead find the tile sizes through the parse tree when getting the information needed to create the loop nest ops. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 23 ++--- flang/lib/Lower/OpenMP/Utils.cpp | 90 ++++++++++++++++++- flang/lib/Lower/OpenMP/Utils.h | 5 ++ .../Frontend/OpenMP/ConstructDecompositionT.h | 4 - 4 files changed, 99 insertions(+), 23 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 3d5162551778d..646c515280c76 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -47,6 +47,7 @@ using namespace Fortran::lower::omp; using namespace Fortran::common::openmp; +using namespace Fortran::semantics; //===----------------------------------------------------------------------===// // Code generation helper functions @@ -1690,6 +1691,7 @@ genLoopNestClauses(lower::AbstractConverter &converter, int64_t collapseValue = evaluate::ToInt64(collapse.v).value(); clauseOps.numCollapse = firOpBuilder.getI64IntegerAttr(collapseValue); } else if (clause.id == llvm::omp::Clause::OMPC_sizes) { + // This case handles the stand-alone tiling construct const auto &sizes = std::get(clause.u); llvm::SmallVector sizeValues; for (auto &size : sizes.v) { @@ -1699,6 +1701,12 @@ genLoopNestClauses(lower::AbstractConverter &converter, clauseOps.tileSizes = sizeValues; } } + + llvm::SmallVector sizeValues; + auto *ompCons{eval.getIf()}; + collectTileSizesFromOpenMPConstruct (ompCons, sizeValues, semaCtx); + if (sizeValues.size() > 0) + clauseOps.tileSizes = sizeValues; } static void genLoopClauses( @@ -3968,21 +3976,6 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, List clauses = makeClauses( std::get(beginLoopDirective.t), semaCtx); - const auto &innerOptional = - std::get>>( - loopConstruct.t); - if (innerOptional.has_value()) { - const auto &innerLoopDirective = innerOptional.value().value(); - const auto &innerBegin = - std::get(innerLoopDirective.t); - const auto &innerDirective = - std::get(innerBegin.t); - if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { - clauses.append( - makeClauses(std::get(innerBegin.t), semaCtx)); - } - } - if (auto &endLoopDirective = std::get>( loopConstruct.t)) { diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 69d74762ace6f..e7fa9063b7ae2 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -14,6 +14,7 @@ #include "ClauseFinder.h" #include "flang/Lower/OpenMP/Clauses.h" +#include "flang/Evaluate/fold.h" #include #include #include @@ -24,10 +25,31 @@ #include #include #include +#include #include #include +using namespace Fortran::semantics; + +template +MaybeIntExpr +EvaluateIntExpr(SemanticsContext &context, const T &expr) { + if (MaybeExpr maybeExpr{ + Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) { + if (auto *intExpr{Fortran::evaluate::UnwrapExpr(*maybeExpr)}) { + return std::move(*intExpr); + } + } + return std::nullopt; +} + +template +std::optional +EvaluateInt64(SemanticsContext &context, const T &expr) { + return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr)); +} + llvm::cl::opt treatIndexAsSection( "openmp-treat-index-as-section", llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."), @@ -615,6 +637,43 @@ static void convertLoopBounds(lower::AbstractConverter &converter, } } +// Populates the sizes vector with values if the given OpenMPConstruct +// Contains a loop construct with an inner tiling construct. +void collectTileSizesFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl &tileSizes, + SemanticsContext &semaCtx) { + if (!ompCons) + return; + + if (auto *ompLoop{std::get_if(&ompCons->u)}) { + const auto &innerOptional = std::get< + std::optional>>( + ompLoop->t); + if (innerOptional.has_value()) { + const auto &innerLoopDirective = innerOptional.value().value(); + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + std::get(innerBegin.t).v; + + if (innerDirective == llvm::omp::Directive::OMPD_tile) { + // Get the size values from parse tree and convert to a vector + const auto &innerClauseList{ + std::get(innerBegin.t)}; + for (const auto &clause : innerClauseList.v) + if (const auto tclause{ + std::get_if(&clause.u)}) { + for (auto &tval : tclause->v) { + if (const auto v{EvaluateInt64(semaCtx, tval)}) + tileSizes.push_back(*v); + } + } + } + } + } +} + bool collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List &clauses, @@ -636,11 +695,34 @@ bool collectLoopRelatedInfo( collapseValue = evaluate::ToInt64(clause->v).value(); found = true; } + + // Collect sizes from tile directive if present std::int64_t sizesLengthValue = 0l; - if (auto *clause = - ClauseFinder::findUniqueClause(clauses)) { - sizesLengthValue = clause->v.size(); - found = true; + if (auto *ompCons{eval.getIf()}) { + if (auto *ompLoop{std::get_if(&ompCons->u)}) { + const auto &innerOptional = std::get< + std::optional>>( + ompLoop->t); + if (innerOptional.has_value()) { + const auto &innerLoopDirective = innerOptional.value().value(); + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + std::get(innerBegin.t).v; + + if (innerDirective == llvm::omp::Directive::OMPD_tile) { + // Get the size values from parse tree and convert to a vector + const auto &innerClauseList{ + std::get(innerBegin.t)}; + for (const auto &clause : innerClauseList.v) + if (const auto tclause{ + std::get_if(&clause.u)}) { + sizesLengthValue = tclause->v.size(); + found = true; + } + } + } + } } collapseValue = collapseValue - sizesLengthValue; diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index 60f44a7f0610c..bb42fb02efc09 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -175,6 +175,11 @@ bool collectLoopRelatedInfo( mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv); +void collectTileSizesFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl &tileSizes, + Fortran::semantics::SemanticsContext &semaCtx); + } // namespace omp } // namespace lower } // namespace Fortran diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index e1083b7ef2cd9..5bb1f3f36b65e 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -497,10 +497,6 @@ bool ConstructDecompositionT::applyClause( if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) { last.clauses.push_back(node); return true; - } else { - // llvm::errs() << "** OVERRIDING isAllowedClauseForDirective **\n"; - last.clauses.push_back(node); - return true; } } From 7c7b6f101135d4c1768287f763bf6860533997ec Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Wed, 25 Jun 2025 10:33:33 -0400 Subject: [PATCH 20/57] Fix formatting --- flang/lib/Lower/OpenMP/OpenMP.cpp | 2 +- flang/lib/Lower/OpenMP/Utils.cpp | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 646c515280c76..4311c85eeb7d4 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1704,7 +1704,7 @@ genLoopNestClauses(lower::AbstractConverter &converter, llvm::SmallVector sizeValues; auto *ompCons{eval.getIf()}; - collectTileSizesFromOpenMPConstruct (ompCons, sizeValues, semaCtx); + collectTileSizesFromOpenMPConstruct(ompCons, sizeValues, semaCtx); if (sizeValues.size() > 0) clauseOps.tileSizes = sizeValues; } diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index e7fa9063b7ae2..e04a6eae98408 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -33,8 +33,7 @@ using namespace Fortran::semantics; template -MaybeIntExpr -EvaluateIntExpr(SemanticsContext &context, const T &expr) { +MaybeIntExpr EvaluateIntExpr(SemanticsContext &context, const T &expr) { if (MaybeExpr maybeExpr{ Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) { if (auto *intExpr{Fortran::evaluate::UnwrapExpr(*maybeExpr)}) { @@ -45,8 +44,8 @@ EvaluateIntExpr(SemanticsContext &context, const T &expr) { } template -std::optional -EvaluateInt64(SemanticsContext &context, const T &expr) { +std::optional EvaluateInt64(SemanticsContext &context, + const T &expr) { return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr)); } @@ -641,8 +640,7 @@ static void convertLoopBounds(lower::AbstractConverter &converter, // Contains a loop construct with an inner tiling construct. void collectTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &tileSizes, - SemanticsContext &semaCtx) { + llvm::SmallVectorImpl &tileSizes, SemanticsContext &semaCtx) { if (!ompCons) return; From 47b75c3947c82294cf467bdd61a86bf2e787c147 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 26 Jun 2025 10:03:13 -0400 Subject: [PATCH 21/57] Fix unparse and add a test for nested loop constructs. --- flang/test/Parser/OpenMP/do-tile-size.f90 | 29 +++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 flang/test/Parser/OpenMP/do-tile-size.f90 diff --git a/flang/test/Parser/OpenMP/do-tile-size.f90 b/flang/test/Parser/OpenMP/do-tile-size.f90 new file mode 100644 index 0000000000000..886ee4a2a680c --- /dev/null +++ b/flang/test/Parser/OpenMP/do-tile-size.f90 @@ -0,0 +1,29 @@ +! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix="PARSE-TREE" %s + +subroutine openmp_do_tiles(x) + + integer, intent(inout)::x + + +!CHECK: !$omp do +!CHECK: !$omp tile sizes +!$omp do +!$omp tile sizes(2) +!CHECK: do + do x = 1, 100 + call F1() +!CHECK: end do + end do +!CHECK: !$omp end tile +!$omp end tile +!$omp end do + +!PARSE-TREE:| | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct +!PARSE-TREE:| | | OmpBeginLoopDirective +!PARSE-TREE:| | | OpenMPLoopConstruct +!PARSE-TREE:| | | | OmpBeginLoopDirective +!PARSE-TREE:| | | | | OmpLoopDirective -> llvm::omp::Directive = tile +!PARSE-TREE:| | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4' +!PARSE-TREE: | | | | DoConstruct +END subroutine openmp_do_tiles From 2203a350d8605baceb889e48de703f7528a3ceac Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 26 Jun 2025 10:50:08 -0400 Subject: [PATCH 22/57] Use more convenient function to get OpenMPLoopConstruct. Fix comments. --- flang/lib/Semantics/canonicalize-omp.cpp | 30 ++++++++----------- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 9 +++--- 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index a7749b5a81678..79630b564e51a 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -152,23 +152,19 @@ class CanonicalizationOfOmp { // Keep track of the loops to handle the end loop directives llvm::SmallVector loops; loops.push_back(&x); - if (auto *innerConstruct{ - GetConstructIf(*nextIt)}) { - if (auto *innerOmpLoop{ - std::get_if(&innerConstruct->u)}) { - auto &innerBeginDir{ - std::get(innerOmpLoop->t)}; - auto &innerDir{std::get(innerBeginDir.t)}; - if (innerDir.v == llvm::omp::Directive::OMPD_tile) { - auto &innerLoop = std::get>>( - loops.back()->t); - innerLoop = std::move(*innerOmpLoop); - // Retrieveing the address so that DoConstruct or inner loop can be - // set later. - loops.push_back(&(innerLoop.value().value())); - nextIt = block.erase(nextIt); - } + if (auto *innerOmpLoop{GetOmpIf(*nextIt)}) { + auto &innerBeginDir{ + std::get(innerOmpLoop->t)}; + auto &innerDir{std::get(innerBeginDir.t)}; + if (innerDir.v == llvm::omp::Directive::OMPD_tile) { + auto &innerLoop = std::get< + std::optional>>( + loops.back()->t); + innerLoop = std::move(*innerOmpLoop); + // Retrieveing the address so that DoConstruct or inner loop can be + // set later. + loops.push_back(&(innerLoop.value().value())); + nextIt = block.erase(nextIt); } } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index dccf241e919a7..842fdc0854d67 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3034,7 +3034,6 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, loopInfos.push_back(*loopResult); } - // llvm::OpenMPIRBuilder::InsertPointTy afterIP = builder.saveIP(); llvm::OpenMPIRBuilder::InsertPointTy afterIP = loopInfos.front()->getAfterIP(); @@ -3055,10 +3054,10 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, std::vector NewLoops = ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes); - // Collapse loops. Store the insertion point because LoopInfos may get - // invalidated. - auto AfterBB = NewLoops.front()->getAfter(); - auto AfterAfterBB = AfterBB->getSingleSuccessor(); + // Update afterIP to get the correct insertion point after + // tiling. + llvm::BasicBlock *AfterBB = NewLoops.front()->getAfter(); + llvm::BasicBlock *AfterAfterBB = AfterBB->getSingleSuccessor(); afterIP = {AfterAfterBB, AfterAfterBB->begin()}; NewTopLoopInfo = NewLoops[0]; From 99cb790de98302ff254c699e2bc5e5db51497e13 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 26 Jun 2025 10:54:46 -0400 Subject: [PATCH 23/57] Fix formatting. --- .../Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 842fdc0854d67..183964372cadf 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3057,7 +3057,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, // Update afterIP to get the correct insertion point after // tiling. llvm::BasicBlock *AfterBB = NewLoops.front()->getAfter(); - llvm::BasicBlock *AfterAfterBB = AfterBB->getSingleSuccessor(); + llvm::BasicBlock *AfterAfterBB = AfterBB->getSingleSuccessor(); afterIP = {AfterAfterBB, AfterAfterBB->begin()}; NewTopLoopInfo = NewLoops[0]; From 1ff74efa64434d7d98868bb5f3a3df5aff62c781 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 9 Aug 2025 10:53:23 -0400 Subject: [PATCH 24/57] Fix merge problems related to the different representations used for nested loop constructs. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 52 +++++----------------- flang/lib/Lower/OpenMP/Utils.cpp | 28 +++++++----- flang/lib/Semantics/canonicalize-omp.cpp | 10 +++-- flang/lib/Semantics/resolve-directives.cpp | 34 ++++++++------ 4 files changed, 56 insertions(+), 68 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 4311c85eeb7d4..e6a8ac1e3b1f3 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -422,14 +422,19 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, // FIXME(JAN): For now we check if there is an inner // OpenMPLoopConstruct, and extract the size clause from there - const auto &innerOptional = std::get>>( - ompConstruct.t); - if (innerOptional.has_value()) { - const auto &innerLoopDirective = innerOptional.value().value(); + const auto &nestedOptional = + std::get>( + ompConstruct.t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); + if (innerConstruct) { + const auto &innerLoopConstruct = innerConstruct->value(); const auto &innerBegin = std::get( - innerLoopDirective.t); + innerLoopConstruct.t); const auto &innerDirective = std::get(innerBegin.t); if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { @@ -2276,41 +2281,6 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, // Apply unrolling to it auto cli = canonLoop.getCli(); mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli); - -static mlir::omp::LoopOp genTiledLoopOp(lower::AbstractConverter &converter, - lower::SymMap &symTable, - semantics::SemanticsContext &semaCtx, - lower::pft::Evaluation &eval, - mlir::Location loc, - const ConstructQueue &queue, - ConstructQueue::const_iterator item) { - mlir::omp::LoopOperands loopClauseOps; - llvm::SmallVector loopReductionSyms; - genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps, - loopReductionSyms); - - DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval, - /*shouldCollectPreDeterminedSymbols=*/true, - /*useDelayedPrivatization=*/true, symTable); - dsp.processStep1(&loopClauseOps); - - mlir::omp::LoopNestOperands loopNestClauseOps; - llvm::SmallVector iv; - genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, - loopNestClauseOps, iv); - - EntryBlockArgs loopArgs; - loopArgs.priv.syms = dsp.getDelayedPrivSymbols(); - loopArgs.priv.vars = loopClauseOps.privateVars; - loopArgs.reduction.syms = loopReductionSyms; - loopArgs.reduction.vars = loopClauseOps.reductionVars; - - auto loopOp = - genWrapperOp(converter, loc, loopClauseOps, loopArgs); - genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, - loopNestClauseOps, iv, {{loopOp, loopArgs}}, - llvm::omp::Directive::OMPD_loop, dsp); - return loopOp; } static mlir::omp::MaskedOp diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index e04a6eae98408..dc58eecae7759 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -645,11 +645,15 @@ void collectTileSizesFromOpenMPConstruct( return; if (auto *ompLoop{std::get_if(&ompCons->u)}) { - const auto &innerOptional = std::get< - std::optional>>( - ompLoop->t); - if (innerOptional.has_value()) { - const auto &innerLoopDirective = innerOptional.value().value(); + const auto &nestedOptional = + std::get>(ompLoop->t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); + if (innerConstruct) { + const auto &innerLoopDirective = innerConstruct->value(); const auto &innerBegin = std::get(innerLoopDirective.t); const auto &innerDirective = @@ -698,11 +702,15 @@ bool collectLoopRelatedInfo( std::int64_t sizesLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { if (auto *ompLoop{std::get_if(&ompCons->u)}) { - const auto &innerOptional = std::get< - std::optional>>( - ompLoop->t); - if (innerOptional.has_value()) { - const auto &innerLoopDirective = innerOptional.value().value(); + const auto &nestedOptional = + std::get>(ompLoop->t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); + if (innerConstruct) { + const auto &innerLoopDirective = innerConstruct->value(); const auto &innerBegin = std::get(innerLoopDirective.t); const auto &innerDirective = diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 79630b564e51a..4792bf2cb217c 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -157,13 +157,15 @@ class CanonicalizationOfOmp { std::get(innerOmpLoop->t)}; auto &innerDir{std::get(innerBeginDir.t)}; if (innerDir.v == llvm::omp::Directive::OMPD_tile) { - auto &innerLoop = std::get< - std::optional>>( - loops.back()->t); + auto &innerLoopVariant = + std::get>(loops.back()->t); + auto &innerLoop = + std::get>( + innerLoopVariant.value()); innerLoop = std::move(*innerOmpLoop); // Retrieveing the address so that DoConstruct or inner loop can be // set later. - loops.push_back(&(innerLoop.value().value())); + loops.push_back(&(innerLoop.value())); nextIt = block.erase(nextIt); } } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 934a06b2aef33..9a2caef34bd67 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2061,12 +2061,18 @@ void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { - const auto &innerOptional = - std::get>>( - x.t); - if (innerOptional.has_value()) { + + const auto &nestedOptional = + std::get>(x.t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); + + if (innerConstruct) { CollectAssociatedLoopLevelsFromLoopConstruct( - innerOptional.value().value(), levels, clauses); + innerConstruct->value(), levels, clauses); } } @@ -2131,17 +2137,19 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( bool hasCollapseClause{ clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false}; const parser::OpenMPLoopConstruct *innerMostLoop = &x; - + const parser::NestedConstruct *innerMostNest = nullptr; while (auto &optLoopCons{ - std::get>(x.t)}) { - if (const auto &innerLoop{ - std::get_if < parser::OpenMPLoopConstruct >>> (innerMostLoop->t)}) { - innerMostLoop = &innerLoop.value().value(); + std::get>(innerMostLoop->t)}) { + innerMostNest = &(optLoopCons.value()); + if (const auto *innerLoop{ + std::get_if>( + innerMostNest)}) { + innerMostLoop = &(innerLoop->value()); } } - if (optLoopCons.has_value()) { - if (const auto &outer{std::get_if(innerMostLoop->t)}) { + if (innerMostNest) { + if (const auto &outer{std::get_if(innerMostNest)}) { for (const parser::DoConstruct *loop{&*outer}; loop && level > 0; --level) { if (loop->IsDoConcurrent()) { @@ -2177,7 +2185,7 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( CheckAssocLoopLevel(level, GetAssociatedClause()); } else if (const auto &loop{std::get_if< common::Indirection>( - &*optLoopCons)}) { + innerMostNest)}) { auto &beginDirective = std::get(loop->value().t); auto &beginLoopDirective = From e49a3012d0178dd8bb3d601fa4bbf7bb67983645 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Sat, 9 Aug 2025 12:27:35 -0400 Subject: [PATCH 25/57] Fix bugs introduced when merging. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 4 ++-- flang/lib/Semantics/canonicalize-omp.cpp | 19 +++++++++--------- flang/lib/Semantics/resolve-directives.cpp | 3 ++- ...nested-loop-transformation-construct01.f90 | 20 ------------------- flang/test/Lower/OpenMP/wsloop-tile.f90 | 2 +- 5 files changed, 15 insertions(+), 33 deletions(-) delete mode 100644 flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index e6a8ac1e3b1f3..108d9a40c8d45 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -420,8 +420,8 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, beginClauseList = &std::get(beginDirective.t); - // FIXME(JAN): For now we check if there is an inner - // OpenMPLoopConstruct, and extract the size clause from there + // For now we check if there is an inner OpenMPLoopConstruct, and + // extract the size clause from there const auto &nestedOptional = std::get>( ompConstruct.t); diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 4792bf2cb217c..c664171350d9e 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -143,7 +143,6 @@ class CanonicalizationOfOmp { "If a loop construct has been fully unrolled, it cannot then be tiled"_err_en_US, parser::ToUpperCaseLetters(dir.source.ToString())); }; - nextIt = it; while (++nextIt != block.end()) { // Ignore compiler directives. @@ -159,14 +158,16 @@ class CanonicalizationOfOmp { if (innerDir.v == llvm::omp::Directive::OMPD_tile) { auto &innerLoopVariant = std::get>(loops.back()->t); - auto &innerLoop = - std::get>( - innerLoopVariant.value()); - innerLoop = std::move(*innerOmpLoop); - // Retrieveing the address so that DoConstruct or inner loop can be - // set later. - loops.push_back(&(innerLoop.value())); - nextIt = block.erase(nextIt); + if (innerLoopVariant.has_value()) { + auto *innerLoop = + std::get_if>( + &(innerLoopVariant.value())); + *innerLoop = std::move(*innerOmpLoop); + // Retrieveing the address so that DoConstruct or inner loop can be + // set later. + loops.push_back(&(innerLoop->value())); + nextIt = block.erase(nextIt); + } } } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 9a2caef34bd67..1694da571c55e 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2145,7 +2145,8 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( std::get_if>( innerMostNest)}) { innerMostLoop = &(innerLoop->value()); - } + } else + break; } if (innerMostNest) { diff --git a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 b/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 deleted file mode 100644 index 17eba93a7405d..0000000000000 --- a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 +++ /dev/null @@ -1,20 +0,0 @@ -! Test to ensure TODO message is emitted for tile OpenMP 5.1 Directives when they are nested. - -!RUN: not %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | FileCheck %s - -subroutine loop_transformation_construct - implicit none - integer :: I = 10 - integer :: x - integer :: y(I) - - !$omp do - !$omp tile - do i = 1, I - y(i) = y(i) * 5 - end do - !$omp end tile - !$omp end do -end subroutine - -!CHECK: not yet implemented: Unhandled loop directive (tile) diff --git a/flang/test/Lower/OpenMP/wsloop-tile.f90 b/flang/test/Lower/OpenMP/wsloop-tile.f90 index c9bf18e3b278d..4c412b357f52e 100644 --- a/flang/test/Lower/OpenMP/wsloop-tile.f90 +++ b/flang/test/Lower/OpenMP/wsloop-tile.f90 @@ -2,7 +2,7 @@ ! RUN: bbc -fopenmp -fopenmp-version=51 -emit-hlfir %s -o - | FileCheck %s -!CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "wsloop_tile"} { +!CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "WSLOOP_TILE"} { program wsloop_tile integer :: i, j, k integer :: a, b, c From 9b770ab3caceb48b44221bce71ebaaa7a51114f0 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Mon, 11 Aug 2025 07:23:15 -0400 Subject: [PATCH 26/57] Move include --- flang/lib/Lower/OpenMP/Utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index dc58eecae7759..07f562fa6a4b1 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -13,8 +13,8 @@ #include "Utils.h" #include "ClauseFinder.h" -#include "flang/Lower/OpenMP/Clauses.h" #include "flang/Evaluate/fold.h" +#include "flang/Lower/OpenMP/Clauses.h" #include #include #include From d6ceeb0b1be2f4c0638e1430f90cfa2fbfbfffc1 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 19 Aug 2025 10:55:05 -0400 Subject: [PATCH 27/57] Remove unused code. Currently the canonicalize-omp can only handle a single nested loop construct, which is what we prefer. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 1 - flang/lib/Lower/OpenMP/Utils.cpp | 22 ++---- flang/lib/Semantics/canonicalize-omp.cpp | 68 ++++--------------- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 2 - 4 files changed, 20 insertions(+), 73 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 108d9a40c8d45..5ec7cb632159a 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -937,7 +937,6 @@ static void genLoopVars( storeOp = createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); } - firOpBuilder.setInsertionPointAfter(storeOp); } diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 07f562fa6a4b1..6c9763e5a37ab 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -59,21 +59,14 @@ namespace lower { namespace omp { int64_t getCollapseValue(const List &clauses) { - int64_t collapseValue = 1; - int64_t numTileSizes = 0; - for (auto &clause : clauses) { - if (clause.id == llvm::omp::Clause::OMPC_collapse) { - const auto &collapse = std::get(clause.u); - collapseValue = evaluate::ToInt64(collapse.v).value(); - } else if (clause.id == llvm::omp::Clause::OMPC_sizes) { - const auto &sizes = std::get(clause.u); - numTileSizes = sizes.v.size(); - } + auto iter = llvm::find_if(clauses, [](const Clause &clause) { + return clause.id == llvm::omp::Clause::OMPC_collapse; + }); + if (iter != clauses.end()) { + const auto &collapse = std::get(iter->u); + return evaluate::ToInt64(collapse.v).value(); } - - collapseValue = collapseValue - numTileSizes; - int64_t result = collapseValue > numTileSizes ? collapseValue : numTileSizes; - return result; + return 1; } void genObjectList(const ObjectList &objects, @@ -681,7 +674,6 @@ bool collectLoopRelatedInfo( lower::pft::Evaluation &eval, const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) { - bool found = false; fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index c664171350d9e..9722eca19447d 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -10,6 +10,7 @@ #include "flang/Parser/parse-tree-visitor.h" #include "flang/Parser/parse-tree.h" #include "flang/Semantics/semantics.h" + // After Loop Canonicalization, rewrite OpenMP parse tree to make OpenMP // Constructs more structured which provide explicit scopes for later // structural checks and semantic analysis. @@ -116,19 +117,15 @@ class CanonicalizationOfOmp { // in the same iteration // // Original: - // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t-> - // OmpBeginLoopDirective t-> OmpLoopDirective - // [ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct u-> - // OmpBeginLoopDirective t-> OmpLoopDirective t-> Tile v-> OMP_tile] + // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct + // OmpBeginLoopDirective // ExecutableConstruct -> DoConstruct - // [ExecutableConstruct -> OmpEndLoopDirective] (note: tile) // ExecutableConstruct -> OmpEndLoopDirective (if available) // // After rewriting: - // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct t-> - // [OpenMPLoopConstruct t -> OmpBeginLoopDirective -> OmpLoopDirective - // OmpEndLoopDirective] (note: tile) - // OmpBeginLoopDirective t -> OmpLoopDirective -> DoConstruct + // ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct + // OmpBeginLoopDirective + // DoConstruct // OmpEndLoopDirective (if available) parser::Block::iterator nextIt; auto &beginDir{std::get(x.t)}; @@ -143,66 +140,27 @@ class CanonicalizationOfOmp { "If a loop construct has been fully unrolled, it cannot then be tiled"_err_en_US, parser::ToUpperCaseLetters(dir.source.ToString())); }; + nextIt = it; while (++nextIt != block.end()) { // Ignore compiler directives. if (GetConstructIf(*nextIt)) continue; - // Keep track of the loops to handle the end loop directives - llvm::SmallVector loops; - loops.push_back(&x); - if (auto *innerOmpLoop{GetOmpIf(*nextIt)}) { - auto &innerBeginDir{ - std::get(innerOmpLoop->t)}; - auto &innerDir{std::get(innerBeginDir.t)}; - if (innerDir.v == llvm::omp::Directive::OMPD_tile) { - auto &innerLoopVariant = - std::get>(loops.back()->t); - if (innerLoopVariant.has_value()) { - auto *innerLoop = - std::get_if>( - &(innerLoopVariant.value())); - *innerLoop = std::move(*innerOmpLoop); - // Retrieveing the address so that DoConstruct or inner loop can be - // set later. - loops.push_back(&(innerLoop->value())); - nextIt = block.erase(nextIt); - } - } - } if (auto *doCons{GetConstructIf(*nextIt)}) { if (doCons->GetLoopControl()) { // move DoConstruct std::get>>>( - loops.back()->t) = std::move(*doCons); + common::Indirection>>>(x.t) = + std::move(*doCons); nextIt = block.erase(nextIt); // try to match OmpEndLoopDirective - while (nextIt != block.end() && !loops.empty()) { + if (nextIt != block.end()) { if (auto *endDir{ GetConstructIf(*nextIt)}) { - auto &endOmpDirective{ - std::get(endDir->t)}; - auto &loopBegin{ - std::get(loops.back()->t)}; - auto &loopDir{std::get(loopBegin.t)}; - - // If the directive is a tile we try to match the corresponding - // end tile if it exsists. If it is not a tile directive we - // always assign the end loop directive and fall back on the - // existing directive structure checks. - if (loopDir.v != llvm::omp::Directive::OMPD_tile || - loopDir.v == endOmpDirective.v) { - std::get>( - loops.back()->t) = std::move(*endDir); - nextIt = block.erase(nextIt); - } - - loops.pop_back(); - } else { - // If there is a mismatch bail out. - break; + std::get>(x.t) = + std::move(*endDir); + nextIt = block.erase(nextIt); } } } else { diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 183964372cadf..03d3cc57895c7 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3037,8 +3037,6 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy afterIP = loopInfos.front()->getAfterIP(); - // Initialize the new loop info to the current one, in case there - // are no loop transformations done. llvm::CanonicalLoopInfo *NewTopLoopInfo = nullptr; // Do tiling From dd74eac78a3e153ee8434d5f6c341a277e7718bd Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 21 Aug 2025 22:15:45 -0400 Subject: [PATCH 28/57] Address review comments. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 1 - flang/lib/Lower/OpenMP/Utils.cpp | 4 +- flang/lib/Semantics/resolve-directives.cpp | 54 +++++++++---------- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 9 ---- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 24 --------- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 5 +- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 3 +- mlir/test/Dialect/OpenMP/invalid.mlir | 2 +- 8 files changed, 33 insertions(+), 69 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 5ec7cb632159a..355ba9428995e 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3944,7 +3944,6 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, std::get(loopConstruct.t); List clauses = makeClauses( std::get(beginLoopDirective.t), semaCtx); - if (auto &endLoopDirective = std::get>( loopConstruct.t)) { diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 6c9763e5a37ab..2a6cb0d2d4a74 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -629,8 +629,8 @@ static void convertLoopBounds(lower::AbstractConverter &converter, } } -// Populates the sizes vector with values if the given OpenMPConstruct -// Contains a loop construct with an inner tiling construct. +/// Populates the sizes vector with values if the given OpenMPConstruct +/// Contains a loop construct with an inner tiling construct. void collectTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, llvm::SmallVectorImpl &tileSizes, SemanticsContext &semaCtx) { diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 1694da571c55e..20df9d01de0c0 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -817,18 +817,22 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { const parser::OmpClause *GetAssociatedClause() { return associatedClause; } private: + /// Given a vector of loop levels and a vector of corresponding clauses find + /// the largest loop level and set the associated loop level to the found + /// maximum. This is used for error handling to ensure that the number of + /// affected loops is not larger that the number of available loops. std::int64_t SetAssociatedMaxClause(llvm::SmallVector &, llvm::SmallVector &); - std::int64_t GetAssociatedLoopLevelFromLoopConstruct( + std::int64_t GetNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &); - std::int64_t GetAssociatedLoopLevelFromClauses(const parser::OmpClauseList &); - void CollectAssociatedLoopLevelsFromLoopConstruct( + std::int64_t GetNumAffectedLoopsFromClauses(const parser::OmpClauseList &); + void CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); - void CollectAssociatedLoopLevelsFromInnerLoopContruct( + void CollectNumAffectedLoopsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); - void CollectAssociatedLoopLevelsFromClauses(const parser::OmpClauseList &, + void CollectNumAffectedLoopsFromClauses(const parser::OmpClauseList &, llvm::SmallVector &, llvm::SmallVector &); @@ -1885,7 +1889,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { beginDir.v == llvm::omp::Directive::OMPD_target_loop) IssueNonConformanceWarning(beginDir.v, beginDir.source, 52); ClearDataSharingAttributeObjects(); - SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromLoopConstruct(x)); + SetContextAssociatedLoopLevel(GetNumAffectedLoopsFromLoopConstruct(x)); if (beginDir.v == llvm::omp::Directive::OMPD_do) { auto &optLoopCons = std::get>(x.t); @@ -1899,7 +1903,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { } } PrivatizeAssociatedLoopIndexAndCheckLoopLevel(x); - ordCollapseLevel = GetAssociatedLoopLevelFromLoopConstruct(x) + 1; + ordCollapseLevel = GetNumAffectedLoopsFromLoopConstruct(x) + 1; return true; } @@ -1995,13 +1999,12 @@ std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( llvm::SmallVector &levels, llvm::SmallVector &clauses) { - // Find the tile level to know how much to reduce the level for collapse + // Find the tile level to ensure that the COLLAPSE clause value + // does not exeed the number of tiled loops. std::int64_t tileLevel = 0; - for (auto [level, clause] : llvm::zip_equal(levels, clauses)) { - if (isSizesClause(clause)) { + for (auto [level, clause] : llvm::zip_equal(levels, clauses)) + if (isSizesClause(clause)) tileLevel = level; - } - } std::int64_t maxLevel = 1; const parser::OmpClause *maxClause = nullptr; @@ -2010,14 +2013,11 @@ std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( context_.Say(clause->source, "The value of the parameter in the COLLAPSE clause must" " not be larger than the number of the number of tiled loops" - " because collapse relies on independent loop iterations."_err_en_US); + " because collapse currently is limited to independent loop" + " iterations."_err_en_US); return 1; } - if (!isSizesClause(clause)) { - level = level - tileLevel; - } - if (level > maxLevel) { maxLevel = level; maxClause = clause; @@ -2028,36 +2028,36 @@ std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( return maxLevel; } -std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromLoopConstruct( +std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &x) { llvm::SmallVector levels; llvm::SmallVector clauses; - CollectAssociatedLoopLevelsFromLoopConstruct(x, levels, clauses); + CollectNumAffectedLoopsFromLoopConstruct(x, levels, clauses); return SetAssociatedMaxClause(levels, clauses); } -std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromClauses( +std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromClauses( const parser::OmpClauseList &x) { llvm::SmallVector levels; llvm::SmallVector clauses; - CollectAssociatedLoopLevelsFromClauses(x, levels, clauses); + CollectNumAffectedLoopsFromClauses(x, levels, clauses); return SetAssociatedMaxClause(levels, clauses); } -void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromLoopConstruct( +void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { const auto &beginLoopDir{std::get(x.t)}; const auto &clauseList{std::get(beginLoopDir.t)}; - CollectAssociatedLoopLevelsFromClauses(clauseList, levels, clauses); - CollectAssociatedLoopLevelsFromInnerLoopContruct(x, levels, clauses); + CollectNumAffectedLoopsFromClauses(clauseList, levels, clauses); + CollectNumAffectedLoopsFromInnerLoopContruct(x, levels, clauses); } -void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct( +void OmpAttributeVisitor::CollectNumAffectedLoopsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { @@ -2071,12 +2071,12 @@ void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromInnerLoopContruct( &(nestedOptional.value())); if (innerConstruct) { - CollectAssociatedLoopLevelsFromLoopConstruct( + CollectNumAffectedLoopsFromLoopConstruct( innerConstruct->value(), levels, clauses); } } -void OmpAttributeVisitor::CollectAssociatedLoopLevelsFromClauses( +void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( const parser::OmpClauseList &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { for (const auto &clause : x.v) { diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index a994f23c1fbe2..1050e3d8b08dd 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -2257,9 +2257,6 @@ class OpenMPIRBuilder { /// Return the function that contains the region to be outlined. Function *getFunction() const { return EntryBB->getParent(); } - - /// Dump the info in a somewhat readable way - void dump(); }; /// Collection of regions that need to be outlined during finalization. @@ -2280,9 +2277,6 @@ class OpenMPIRBuilder { /// Add a new region that will be outlined later. void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); } - /// Dump outline infos - void dumpOutlineInfos(); - /// An ordered map of auto-generated variables to their unique names. /// It stores variables with the following names: 1) ".gomp_critical_user_" + /// + ".var" for "omp critical" directives; 2) @@ -3916,9 +3910,6 @@ class CanonicalLoopInfo { /// Invalidate this loop. That is, the underlying IR does not fulfill the /// requirements of an OpenMP canonical loop anymore. LLVM_ABI void invalidate(); - - /// Dump the info in a somewhat readable way - void dump(); }; /// ScanInfo holds the information to assist in lowering of Scan reduction. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index ff50dfbbd5259..989bcf45e0006 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -9145,15 +9145,6 @@ Error OpenMPIRBuilder::emitOffloadingArrays( return Error::success(); } -void OpenMPIRBuilder::dumpOutlineInfos() { - errs() << "=== Outline Infos Begin ===\n"; - for (auto En : enumerate(OutlineInfos)) { - errs() << "[" << En.index() << "]: "; - En.value().dump(); - } - errs() << "=== Outline Infos End ===\n"; -} - void OpenMPIRBuilder::emitBranch(BasicBlock *Target) { BasicBlock *CurBB = Builder.GetInsertBlock(); @@ -10078,14 +10069,6 @@ void OpenMPIRBuilder::OutlineInfo::collectBlocks( } } -void OpenMPIRBuilder::OutlineInfo::dump() { - errs() << "=== OutilneInfo == " - << " EntryBB: " << (EntryBB ? EntryBB->getName() : "n\a") - << " ExitBB: " << (ExitBB ? ExitBB->getName() : "n\a") - << " OuterAllocaBB: " - << (OuterAllocaBB ? OuterAllocaBB->getName() : "n/a") << "\n"; -} - void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, @@ -10863,10 +10846,3 @@ void CanonicalLoopInfo::invalidate() { Latch = nullptr; Exit = nullptr; } - -void CanonicalLoopInfo::dump() { - errs() << "CanonicaLoop == Header: " << (Header ? Header->getName() : "n/a") - << " Cond: " << (Cond ? Cond->getName() : "n/a") - << " Latch: " << (Latch ? Latch->getName() : "n/a") - << " Exit: " << (Exit ? Exit->getName() : "n/a") << "\n"; -} diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index bac07e1ac17d5..5f9b6e29375d4 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -2961,7 +2961,7 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { for (auto &iv : ivs) iv.type = loopVarType; - auto ctx = parser.getBuilder().getContext(); + auto *ctx = parser.getBuilder().getContext(); // Parse "inclusive" flag. if (succeeded(parser.parseOptionalKeyword("inclusive"))) result.addAttribute("loop_inclusive", UnitAttr::get(ctx)); @@ -3065,8 +3065,7 @@ LogicalResult LoopNestOp::verify() { if (const auto &tiles = getTileSizes()) if (tiles.value().size() > numIVs) - return emitOpError() - << "number of tilings is larger than the number of loops"; + return emitOpError() << "too few canonical loops for tile dimensions"; if (!llvm::dyn_cast_if_present((*this)->getParentOp())) return emitOpError() << "expects parent op to be a loop wrapper"; diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 03d3cc57895c7..a8f4b3e585f22 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3061,9 +3061,8 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, // Update the loop infos loopInfos.clear(); - for (const auto &newLoop : NewLoops) { + for (const auto &newLoop : NewLoops) loopInfos.push_back(newLoop); - } } // Tiling done // Do collapse diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index c6b4ae02602d9..8072354d02ccd 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -172,7 +172,7 @@ func.func @collapse_size(%lb : index, %ub : index, %step : index) { func.func @tiles_length(%lb : index, %ub : index, %step : index) { omp.wsloop { - // expected-error@+1 {{number of tilings is larger than the number of loops}} + // expected-error@+1 {{op too few canonical loops for tile dimensions}} omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) tiles(2, 4) { omp.yield } From 432273a6d1419655d14bc5ed6d884354fca6cc63 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Thu, 21 Aug 2025 23:00:42 -0400 Subject: [PATCH 29/57] Undo unrelated change. --- flang/lib/Lower/OpenMP/Utils.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 2a6cb0d2d4a74..9d280985a27a4 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -758,6 +758,7 @@ bool collectLoopRelatedInfo( } while (collapseValue > 0); convertLoopBounds(converter, currentLocation, result, loopVarTypeSize); + return found; } From 5dafa147f3e41584935b73b7538bed8953a2b4b4 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 26 Aug 2025 00:51:50 +0200 Subject: [PATCH 30/57] Proof-of-concept implementation of loop interchange --- flang/lib/Lower/OpenMP/OpenMP.cpp | 70 ++++++++++++++++--- flang/lib/Lower/OpenMP/Utils.cpp | 68 +++++++++++++++++- flang/lib/Lower/OpenMP/Utils.h | 7 ++ flang/lib/Parser/openmp-parsers.cpp | 1 + flang/lib/Parser/unparse.cpp | 3 + flang/lib/Semantics/CMakeLists.txt | 42 ++++++----- flang/lib/Semantics/canonicalize-omp.cpp | 12 ++-- flang/lib/Semantics/resolve-directives.cpp | 52 +++++++++++--- .../Frontend/OpenMP/ConstructDecompositionT.h | 20 ++++++ .../mlir/Dialect/OpenMP/OpenMPClauses.td | 20 ++++++ mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 3 +- .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 2 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 9 ++- 13 files changed, 261 insertions(+), 48 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 355ba9428995e..154361e28734b 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -440,6 +440,10 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { middleClauseList = &std::get(innerBegin.t); + } + if (innerDirective.v == llvm::omp::Directive::OMPD_interchange) { + llvm_unreachable("MK: Handle this"); + middleClauseList = &std::get(innerBegin.t); } } if (auto &endDirective = @@ -1191,7 +1195,10 @@ struct OpWithBodyGenInfo { /// \param [in] item - item in the queue to generate body for. static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, const ConstructQueue &queue, - ConstructQueue::const_iterator item) { + ConstructQueue::const_iterator item) { int a = 0; + if (a) { + op.dump(); + } fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder(); auto insertMarker = [](fir::FirOpBuilder &builder) { @@ -1330,7 +1337,10 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, // present). Otherwise, these operations will be inserted within a // wrapper region. mlir::Operation *privatizationBottomLevelOp = &op; - if (auto loopNest = llvm::dyn_cast(op)) { + if (auto loopNest = llvm::dyn_cast(op)) { int b = 0; + if (b) { + loopNest.dump(); + } llvm::SmallVector wrappers; loopNest.gatherWrappers(wrappers); if (!wrappers.empty()) @@ -1679,7 +1689,7 @@ genLoopNestClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const List &clauses, mlir::Location loc, mlir::omp::LoopNestOperands &clauseOps, - llvm::SmallVectorImpl &iv) { + llvm::SmallVectorImpl &iv, bool enableInterchange = false) { ClauseProcessor cp(converter, semaCtx, clauses); HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter); @@ -1703,7 +1713,9 @@ genLoopNestClauses(lower::AbstractConverter &converter, sizeValues.push_back(sizeValue); } clauseOps.tileSizes = sizeValues; - } + } else if (clause.id == llvm::omp::Clause::OMPC_permutation) { +llvm_unreachable("MK: To handle standalone interchange construct"); + } } llvm::SmallVector sizeValues; @@ -1711,6 +1723,13 @@ genLoopNestClauses(lower::AbstractConverter &converter, collectTileSizesFromOpenMPConstruct(ompCons, sizeValues, semaCtx); if (sizeValues.size() > 0) clauseOps.tileSizes = sizeValues; + + llvm::SmallVector permutationValues; collectPermutationFromOpenMPConstruct(ompCons, permutationValues, semaCtx); + if (enableInterchange) { + permutationValues.append({2,1}); + } + clauseOps.interchangeEnabled = mlir:: BoolAttr::get( firOpBuilder.getContext() , enableInterchange); + clauseOps.permutation = permutationValues; } static void genLoopClauses( @@ -2103,7 +2122,7 @@ static mlir::omp::LoopOp genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item) { + ConstructQueue::const_iterator item, llvm::omp::Directive dir = llvm::omp::Directive::OMPD_loop , bool enableInterchange = false) { mlir::omp::LoopOperands loopClauseOps; llvm::SmallVector loopReductionSyms; genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps, @@ -2117,7 +2136,7 @@ genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, - loopNestClauseOps, iv); + loopNestClauseOps, iv, enableInterchange); EntryBlockArgs loopArgs; loopArgs.priv.syms = dsp.getDelayedPrivSymbols(); @@ -2125,11 +2144,35 @@ genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, loopArgs.reduction.syms = loopReductionSyms; loopArgs.reduction.vars = loopClauseOps.reductionVars; + // Applying interchange clause + // tiling assumed to be applied after interchange + if (loopNestClauseOps.permutation.size() >=1) { + assert(loopNestClauseOps.permutation.size() == iv.size() && "TODO: if permutation is smaller than number of associated loops, permute only the first loops"); + llvm::SmallVector newIVs; + llvm::SmallVector newLBs; + llvm::SmallVector newUBs; + llvm::SmallVector newINCs; llvm::SmallVector newSizes; + + // TODO: Assert this is a valid permution + for (auto perm :loopNestClauseOps.permutation) { + newIVs.push_back(iv[perm-1]); + newLBs.push_back( loopNestClauseOps.loopLowerBounds[perm-1] ); + newUBs.push_back( loopNestClauseOps.loopUpperBounds[perm-1] ); + newINCs.push_back( loopNestClauseOps.loopSteps [perm-1] ); if (! loopNestClauseOps.tileSizes.empty()) newSizes.push_back( loopNestClauseOps.tileSizes[perm-1]); + } + + iv = newIVs; + loopNestClauseOps.loopLowerBounds = newLBs; + loopNestClauseOps.loopUpperBounds = newUBs; + loopNestClauseOps.loopSteps = newINCs; loopNestClauseOps.tileSizes = newSizes; + } + + //if (dir == llvm::omp::Directive::OMPD_loop) { auto loopOp = genWrapperOp(converter, loc, loopClauseOps, loopArgs); + // } genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, - loopNestClauseOps, iv, {{loopOp, loopArgs}}, - llvm::omp::Directive::OMPD_loop, dsp); + loopNestClauseOps, iv, {{loopOp, loopArgs}}, dir, dsp); return loopOp; } @@ -3506,6 +3549,11 @@ static void genOMPDispatch(lower::AbstractConverter &converter, case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; + case llvm::omp::Directive::OMPD_interchange: + newOp = genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item , llvm::omp::Directive::OMPD_interchange, /*Interchange=*/true); +//llvm_unreachable("MK: implement interchange"); +//genInterchangeOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); + break; // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: newOp = genWorkshareOp(converter, symTable, stmtCtx, semaCtx, eval, loc, @@ -3967,6 +4015,10 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, // Emit the omp.loop_nest with annotation for tiling genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value()); break; + case llvm::omp::Directive::OMPD_interchange: + genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value()); + // llvm_unreachable("MK: implement nested interchange"); + break; default: { unsigned version = semaCtx.langOptions().OpenMPVersion; TODO(currentLocation, @@ -3978,7 +4030,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, } } - llvm::omp::Directive directive = + llvm::omp::Directive directive = parser::omp::GetOmpDirectiveName(beginLoopDirective).v; const parser::CharBlock &source = std::get(beginLoopDirective.t).source; diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 9d280985a27a4..ccfecabcf8a50 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -669,6 +669,47 @@ void collectTileSizesFromOpenMPConstruct( } } + +/// Populates the sizes vector with values if the given OpenMPConstruct +/// Contains a loop construct with an inner tiling construct. +void collectPermutationFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl &permutation, SemanticsContext &semaCtx) { + if (!ompCons) + return; + + if (auto *ompLoop{std::get_if(&ompCons->u)}) { + const auto &nestedOptional = + std::get>(ompLoop->t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); + if (innerConstruct) { + const auto &innerLoopDirective = innerConstruct->value(); + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + std::get(innerBegin.t).v; + + if (innerDirective == llvm::omp::Directive::OMPD_interchange) { + // Get the size values from parse tree and convert to a vector + const auto &innerClauseList{ + std::get(innerBegin.t)}; + for (const auto &clause : innerClauseList.v) + if (const auto tclause{ + std::get_if(&clause.u)}) { + for (auto &tval : tclause->v) { + if (const auto v{EvaluateInt64(semaCtx, tval)}) + permutation.push_back(*v); + } + } + } + } + } +} + bool collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List &clauses, @@ -692,6 +733,7 @@ bool collectLoopRelatedInfo( // Collect sizes from tile directive if present std::int64_t sizesLengthValue = 0l; + std::int64_t permutationLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { if (auto *ompLoop{std::get_if(&ompCons->u)}) { const auto &nestedOptional = @@ -719,13 +761,33 @@ bool collectLoopRelatedInfo( found = true; } } + + if (innerDirective == llvm::omp::Directive::OMPD_interchange) { + // Get the size values from parse tree and convert to a vector + const auto &innerClauseList{std::get(innerBegin.t)}; + for (const auto &clause : innerClauseList.v){ + if (const auto tclause{ std::get_if(&clause.u)}) { + permutationLengthValue = tclause->v.size(); + found = true; + } + } + // default: permution(2,1) + if (permutationLengthValue == 0) + permutationLengthValue = 2; + } } } } - collapseValue = collapseValue - sizesLengthValue; - collapseValue = - collapseValue < sizesLengthValue ? sizesLengthValue : collapseValue; + + +collapseValue = collapseValue - sizesLengthValue; +if (sizesLengthValue > collapseValue) + collapseValue = sizesLengthValue; +if (permutationLengthValue > collapseValue) + collapseValue = permutationLengthValue; + + std::size_t loopVarTypeSize = 0; do { lower::pft::Evaluation *doLoop = diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index bb42fb02efc09..5362c667d0575 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -180,6 +180,13 @@ void collectTileSizesFromOpenMPConstruct( llvm::SmallVectorImpl &tileSizes, Fortran::semantics::SemanticsContext &semaCtx); + +void collectPermutationFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl &permuation, + Fortran::semantics::SemanticsContext &semaCtx); + + } // namespace omp } // namespace lower } // namespace Fortran diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 56cee4ab38e9b..4625225fd61a3 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -1393,6 +1393,7 @@ TYPE_PARSER(sourced(construct(first( "TEAMS DISTRIBUTE" >> pure(llvm::omp::Directive::OMPD_teams_distribute), "TEAMS LOOP" >> pure(llvm::omp::Directive::OMPD_teams_loop), "TILE" >> pure(llvm::omp::Directive::OMPD_tile), + "INTERCHANGE" >> pure(llvm::omp::Directive::OMPD_interchange), "UNROLL" >> pure(llvm::omp::Directive::OMPD_unroll))))) TYPE_PARSER(sourced(construct( diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 09dcfe60a46bc..fca2bc5af4511 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2510,6 +2510,9 @@ class UnparseVisitor { case llvm::omp::Directive::OMPD_tile: Word("TILE "); break; + case llvm::omp::Directive::OMPD_interchange: + Word("INTERCHANGE "); + break; case llvm::omp::Directive::OMPD_unroll: Word("UNROLL "); break; diff --git a/flang/lib/Semantics/CMakeLists.txt b/flang/lib/Semantics/CMakeLists.txt index 109bc2dbb8569..414b59812aa72 100644 --- a/flang/lib/Semantics/CMakeLists.txt +++ b/flang/lib/Semantics/CMakeLists.txt @@ -1,10 +1,4 @@ -add_flang_library(FortranSemantics - assignment.cpp - attr.cpp - canonicalize-acc.cpp - canonicalize-directives.cpp - canonicalize-do.cpp - canonicalize-omp.cpp +add_flang_library(FortranSemanticsChecks PARTIAL_SOURCES_INTENDED check-acc-structure.cpp check-allocate.cpp check-arithmeticif.cpp @@ -29,6 +23,30 @@ add_flang_library(FortranSemantics check-select-rank.cpp check-select-type.cpp check-stop.cpp + + DEPENDS + acc_gen + omp_gen + + LINK_LIBS + FortranSupport + FortranParser + FortranEvaluate + + LINK_COMPONENTS + Support + FrontendOpenMP + FrontendOpenACC + TargetParser +) + +add_flang_library(FortranSemantics PARTIAL_SOURCES_INTENDED + assignment.cpp + attr.cpp + canonicalize-acc.cpp + canonicalize-directives.cpp + canonicalize-do.cpp + canonicalize-omp.cpp compute-offsets.cpp data-to-inits.cpp definable.cpp @@ -61,6 +79,7 @@ add_flang_library(FortranSemantics FortranSupport FortranParser FortranEvaluate + FortranSemanticsChecks LINK_COMPONENTS Support @@ -68,12 +87,3 @@ add_flang_library(FortranSemantics FrontendOpenACC TargetParser ) - -target_precompile_headers(FortranSemantics PRIVATE - [["flang/Semantics/semantics.h"]] - [["flang/Semantics/type.h"]] - [["flang/Semantics/openmp-modifiers.h"]] - [["flang/Semantics/expression.h"]] - [["flang/Semantics/tools.h"]] - [["flang/Semantics/symbol.h"]] -) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 9722eca19447d..231c9eef9bfde 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -177,10 +177,9 @@ class CanonicalizationOfOmp { auto &nestedBeginLoopDirective = std::get(nestedBeginDirective.t); if ((nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_unroll || - nestedBeginLoopDirective.v == - llvm::omp::Directive::OMPD_tile) && - !(nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_unroll && - dir.v == llvm::omp::Directive::OMPD_tile)) { + nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_tile || + nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_interchange) && + !(nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_unroll && dir.v == llvm::omp::Directive::OMPD_tile)) { // iterate through the remaining block items to find the end directive // for the unroll/tile directive. parser::Block::iterator endIt; @@ -205,9 +204,8 @@ class CanonicalizationOfOmp { std::optional{parser::NestedConstruct{ common::Indirection{std::move(*ompLoopCons)}}}; nextIt = block.erase(nextIt); - } else if (nestedBeginLoopDirective.v == - llvm::omp::Directive::OMPD_unroll && - dir.v == llvm::omp::Directive::OMPD_tile) { + } else if (nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_unroll && + dir.v == llvm::omp::Directive::OMPD_tile) { // if a loop has been unrolled, the user can not then tile that loop // as it has been unrolled parser::OmpClauseList &unrollClauseList{ diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 94e1fbde24389..c6c4ce75d993f 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -828,14 +828,14 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { llvm::SmallVector &); std::int64_t GetNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &); - std::int64_t GetNumAffectedLoopsFromClauses(const parser::OmpClauseList &); + std::int64_t GetNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &); void CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); void CollectNumAffectedLoopsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); - void CollectNumAffectedLoopsFromClauses(const parser::OmpClauseList &, + void CollectNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &x, const parser::OmpClauseList &, llvm::SmallVector &, llvm::SmallVector &); @@ -1880,6 +1880,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { case llvm::omp::Directive::OMPD_teams_distribute_simd: case llvm::omp::Directive::OMPD_teams_loop: case llvm::omp::Directive::OMPD_tile: + case llvm::omp::Directive::OMPD_interchange: case llvm::omp::Directive::OMPD_unroll: PushContext(beginDir.source, beginDir.v); break; @@ -1996,7 +1997,7 @@ bool OmpAttributeVisitor::Pre(const parser::DoConstruct &x) { } static bool isSizesClause(const parser::OmpClause *clause) { - return std::holds_alternative(clause->u); + return clause && std::holds_alternative(clause->u); } std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( @@ -2041,15 +2042,21 @@ std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromLoopConstruct( return SetAssociatedMaxClause(levels, clauses); } -std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromClauses( + + + +std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &x) { llvm::SmallVector levels; llvm::SmallVector clauses; - CollectNumAffectedLoopsFromClauses(x, levels, clauses); + CollectNumAffectedLoopsFromClauses( y, x, levels, clauses); return SetAssociatedMaxClause(levels, clauses); } + + + void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &x, llvm::SmallVector &levels, @@ -2057,7 +2064,8 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( const auto &beginLoopDir{std::get(x.t)}; const auto &clauseList{std::get(beginLoopDir.t)}; - CollectNumAffectedLoopsFromClauses(clauseList, levels, clauses); + + CollectNumAffectedLoopsFromClauses( x, clauseList, levels, clauses); CollectNumAffectedLoopsFromInnerLoopContruct(x, levels, clauses); } @@ -2080,9 +2088,19 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromInnerLoopContruct( } } -void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( +void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { + const auto &beginLoopDir{std::get(y.t)}; + auto&& yt = std::get<0>(beginLoopDir.t); + + + + + const auto &beginDir{std::get(beginLoopDir.t)}; + const auto &dirClauses{std::get(beginLoopDir.t)}; + auto ytv = beginDir.v; + for (const auto &clause : x.v) { if (const auto oclause{ std::get_if(&clause.u)}) { @@ -2108,7 +2126,25 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( levels.push_back(tclause->v.size()); clauses.push_back(&clause); } + + } + + + if (ytv == llvm::omp::OMPD_interchange) { + for (const auto &clause : dirClauses.v) { + if (const auto tclause{std::get_if(&clause.u)}) { + levels.push_back(tclause->v.size()); + clauses.push_back(&clause); + llvm_unreachable("MK: fetch permute depth"); + return ; + } + } + + + levels.push_back(2); + clauses.push_back(nullptr); + } } // 2.15.1.1 Data-sharing Attribute Rules - Predetermined @@ -2566,7 +2602,7 @@ static bool IsTargetCaptureImplicitlyFirstprivatizeable(const Symbol &symbol, // It is default firstprivatizeable as far as the OpenMP specification is // concerned if it is a non-array scalar type that has been implicitly // captured in a target region - const auto *type{checkSym.GetType()}; + const auto *type{checkSym.GetType() }; if ((!checkSym.GetShape() || checkSym.GetShape()->empty()) && (type->category() == Fortran::semantics::DeclTypeSpec::Category::Numeric || diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 5bb1f3f36b65e..357f4c6e54502 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -211,6 +211,7 @@ struct ConstructDecompositionT { const ClauseTy *); bool applyClause(const tomp::clause::SizesT &clause, const ClauseTy *); + bool applyClause(const tomp::clause::PermutationT &clause,const ClauseTy *); bool applyClause(const tomp::clause::PrivateT &clause, const ClauseTy *); bool @@ -484,6 +485,7 @@ bool ConstructDecompositionT::applyClause( return false; } + // FIXME(JAN): Do the correct thing, but for now we'll do the same as collapse template bool ConstructDecompositionT::applyClause( @@ -503,6 +505,24 @@ bool ConstructDecompositionT::applyClause( return false; } +#if 1 +template +bool ConstructDecompositionT::applyClause(const tomp::clause::PermutationT &clause,const ClauseTy *node) { + // Apply "permutation" to the innermost directive. If it's not one that + // allows it flag an error. + if (!leafs.empty()) { + auto &last = leafs.back(); + + if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) { + last.clauses.push_back(node); + return true; + } + } + + return false; +} +#endif + // PRIVATE // [5.2:111:5-7] // Directives: distribute, do, for, loop, parallel, scope, sections, simd, diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td index eb836db890738..4a3ae30c2a82f 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td @@ -349,6 +349,26 @@ class OpenMP_TileSizesClauseSkip< def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>; + + +//===----------------------------------------------------------------------===// +// V6.0: [xx.x] `permutation` clause +//===----------------------------------------------------------------------===// + +class OpenMP_PermutationClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause { + let arguments = (ins + BoolAttr:$interchangeEnabled, + OptionalAttr:$permutation + ); +} + +def OpenMP_PermutationClause : OpenMP_PermutationClauseSkip<>; + + //===----------------------------------------------------------------------===// // V5.2: [11.6.1] `dist_schedule` clause //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index e17315d923317..7a251130db113 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -616,7 +616,8 @@ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ ], clauses = [ OpenMP_LoopRelatedClause, OpenMP_CollapseClause, - OpenMP_TileSizesClause + OpenMP_TileSizesClause, + OpenMP_PermutationClause ], singleRegion = true> { let summary = "rectangular loop nest"; let description = [{ diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index f056e72531bfc..aefc86b783eaa 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -494,7 +494,7 @@ struct ParallelOpLowering : public OpRewritePattern { auto loopOp = omp::LoopNestOp::create( rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(), parallelOp.getUpperBound(), parallelOp.getStep(), false, - parallelOp.getLowerBound().size(), nullptr); + parallelOp.getLowerBound().size(), nullptr, false, nullptr); rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(), loopOp.getRegion().begin()); diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 8768eed13cf32..65157a04b3ae7 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -91,7 +91,7 @@ void OpenMPDialect::initialize() { #include "mlir/Dialect/OpenMP/OpenMPOpsTypes.cpp.inc" >(); - declarePromisedInterface(); + declarePromisedInterface(); MemRefType::attachInterface(*getContext()); LLVM::LLVMPointerType::attachInterface( @@ -3037,10 +3037,13 @@ void LoopNestOp::print(OpAsmPrinter &p) { void LoopNestOp::build(OpBuilder &builder, OperationState &state, const LoopNestOperands &clauses) { MLIRContext *ctx = builder.getContext(); + + auto perm = makeDenseI64ArrayAttr(ctx, clauses.permutation); LoopNestOp::build(builder, state, clauses.loopLowerBounds, clauses.loopUpperBounds, clauses.loopSteps, - clauses.loopInclusive, clauses.numCollapse, - makeDenseI64ArrayAttr(ctx, clauses.tileSizes)); + clauses.loopInclusive, clauses.numCollapse, + makeDenseI64ArrayAttr(ctx, clauses.tileSizes), + clauses.interchangeEnabled , perm ); } LogicalResult LoopNestOp::verify() { From 2d0918319edb6fc146973c279df5c7cea0636132 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 26 Aug 2025 08:45:57 -0400 Subject: [PATCH 31/57] Remove stand-alone tiling. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 8 +++- flang/lib/Semantics/resolve-directives.cpp | 10 ----- flang/test/Lower/OpenMP/wsloop-tile.f90 | 39 ------------------- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 1 + 4 files changed, 7 insertions(+), 51 deletions(-) delete mode 100644 flang/test/Lower/OpenMP/wsloop-tile.f90 diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 355ba9428995e..7dc46258cea70 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3500,9 +3500,13 @@ static void genOMPDispatch(lower::AbstractConverter &converter, newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; - case llvm::omp::Directive::OMPD_tile: - newOp = genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item); + case llvm::omp::Directive::OMPD_tile: { + unsigned version = semaCtx.langOptions().OpenMPVersion; + if (!semaCtx.langOptions().OpenMPSimd) + TODO(loc, "Unhandled loop directive (" + + llvm::omp::getOpenMPDirectiveName(dir, version) + ")"); break; + } case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 20df9d01de0c0..d53be2fea89f2 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -825,7 +825,6 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { llvm::SmallVector &); std::int64_t GetNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &); - std::int64_t GetNumAffectedLoopsFromClauses(const parser::OmpClauseList &); void CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); @@ -2037,15 +2036,6 @@ std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromLoopConstruct( return SetAssociatedMaxClause(levels, clauses); } -std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromClauses( - const parser::OmpClauseList &x) { - llvm::SmallVector levels; - llvm::SmallVector clauses; - - CollectNumAffectedLoopsFromClauses(x, levels, clauses); - return SetAssociatedMaxClause(levels, clauses); -} - void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &x, llvm::SmallVector &levels, diff --git a/flang/test/Lower/OpenMP/wsloop-tile.f90 b/flang/test/Lower/OpenMP/wsloop-tile.f90 deleted file mode 100644 index 4c412b357f52e..0000000000000 --- a/flang/test/Lower/OpenMP/wsloop-tile.f90 +++ /dev/null @@ -1,39 +0,0 @@ -! This test checks lowering of OpenMP DO Directive(Worksharing) with collapse. - -! RUN: bbc -fopenmp -fopenmp-version=51 -emit-hlfir %s -o - | FileCheck %s - -!CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "WSLOOP_TILE"} { -program wsloop_tile - integer :: i, j, k - integer :: a, b, c - integer :: x - - a=30 - b=20 - c=50 - x=0 - - !CHECK: omp.loop_nest (%[[IV_0:.*]], %[[IV_1:.*]], %[[IV_2:.*]]) : i32 - !CHECK-SAME: tiles(2, 5, 10) - - !$omp do - !$omp tile sizes(2,5,10) - do i = 1, a - do j= 1, b - do k = 1, c - !CHECK: hlfir.assign %[[IV_0]] to %[[IV_0A:.*]] : i32 - !CHECK: hlfir.assign %[[IV_1]] to %[[IV_1A:.*]] : i32 - !CHECK: hlfir.assign %[[IV_2]] to %[[IV_2A:.*]] : i32 - !CHECK: %[[IVV_0:.*]] = fir.load %[[IV_0A]] - !CHECK: %[[SUM0:.*]] = arith.addi %{{.*}}, %[[IVV_0]] : i32 - !CHECK: %[[IVV_1:.*]] = fir.load %[[IV_1A]] - !CHECK: %[[SUM1:.*]] = arith.addi %[[SUM0]], %[[IVV_1]] : i32 - !CHECK: %[[IVV_2:.*]] = fir.load %[[IV_2A]] - !CHECK: %[[SUM2:.*]] = arith.addi %[[SUM1]], %[[IVV_2]] : i32 - x = x + i + j + k - end do - end do - end do - !$omp end tile - !$omp end do -end program wsloop_tile diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index a8f4b3e585f22..8e11f60fdc886 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -2969,6 +2969,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); auto loopOp = cast(opInst); + // Set up the source location value for OpenMP runtime. llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); From 66818b328aaba4a89d856ce0d0f7d1edd36a4878 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 26 Aug 2025 10:27:14 -0400 Subject: [PATCH 32/57] Revert unused changes. --- .../Frontend/OpenMP/ConstructDecompositionT.h | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 5bb1f3f36b65e..047baa3a79f5d 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -209,8 +209,6 @@ struct ConstructDecompositionT { bool applyClause(const tomp::clause::CollapseT &clause, const ClauseTy *); - bool applyClause(const tomp::clause::SizesT &clause, - const ClauseTy *); bool applyClause(const tomp::clause::PrivateT &clause, const ClauseTy *); bool @@ -484,24 +482,6 @@ bool ConstructDecompositionT::applyClause( return false; } -// FIXME(JAN): Do the correct thing, but for now we'll do the same as collapse -template -bool ConstructDecompositionT::applyClause( - const tomp::clause::SizesT &clause, - const ClauseTy *node) { - // Apply "sizes" to the innermost directive. If it's not one that - // allows it flag an error. - if (!leafs.empty()) { - auto &last = leafs.back(); - - if (llvm::omp::isAllowedClauseForDirective(last.id, node->id, version)) { - last.clauses.push_back(node); - return true; - } - } - - return false; -} // PRIVATE // [5.2:111:5-7] From f934fa6e2ad864e7b6be277ea2286810341c2095 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 26 Aug 2025 10:54:38 -0400 Subject: [PATCH 33/57] Don't do codegen for tiling if it is an inner construct. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 7dc46258cea70..e15e0773123c9 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3968,8 +3968,8 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: - // Emit the omp.loop_nest with annotation for tiling - genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value()); + // Skip OMPD_tile since the tile sizes will be retrieved when + // generating the omp.looop_nest op. break; default: { unsigned version = semaCtx.langOptions().OpenMPVersion; From 8f793a6549a739a04f7a675550e71cc9da44b5d7 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 26 Aug 2025 19:02:53 +0200 Subject: [PATCH 34/57] successful interchange --- flang/lib/Lower/OpenMP/Decomposer.cpp | 29 ++- flang/lib/Lower/OpenMP/Decomposer.h | 4 + flang/lib/Lower/OpenMP/OpenMP.cpp | 225 ++++++++++++------ flang/lib/Lower/OpenMP/Utils.cpp | 28 +-- flang/lib/Lower/OpenMP/Utils.h | 2 - flang/lib/Semantics/canonicalize-omp.cpp | 13 +- flang/lib/Semantics/resolve-directives.cpp | 73 +++--- .../Frontend/OpenMP/ConstructDecompositionT.h | 10 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 10 +- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 1 + 10 files changed, 247 insertions(+), 148 deletions(-) diff --git a/flang/lib/Lower/OpenMP/Decomposer.cpp b/flang/lib/Lower/OpenMP/Decomposer.cpp index 9bfbf67bec88c..bf09bed395285 100644 --- a/flang/lib/Lower/OpenMP/Decomposer.cpp +++ b/flang/lib/Lower/OpenMP/Decomposer.cpp @@ -98,14 +98,39 @@ ConstructQueue buildConstructQueue( return decompose.output; } +// from clang +static bool isOpenMPLoopTransformationDirective(llvm::omp::Directive DKind) { + return DKind == llvm::omp::Directive::OMPD_tile || + DKind == llvm::omp::Directive::OMPD_unroll || + DKind == llvm::omp::Directive::OMPD_reverse || + DKind == llvm::omp::Directive::OMPD_interchange || + DKind == llvm::omp::Directive::OMPD_stripe; +} + +llvm::iterator_range getNonTransformQueue( + llvm::iterator_range range) { + // remove trailing loop transformations + auto b = range.begin(); + auto e = range.end(); + while (e != b) { + auto e2 = e - 1; + if (!isOpenMPLoopTransformationDirective(e2->id)) + break; + e = e2; + } + + return llvm::make_range(b, e); +} + bool matchLeafSequence(ConstructQueue::const_iterator item, const ConstructQueue &queue, llvm::omp::Directive directive) { llvm::ArrayRef leafDirs = llvm::omp::getLeafConstructsOrSelf(directive); - for (auto [dir, leaf] : - llvm::zip_longest(leafDirs, llvm::make_range(item, queue.end()))) { + for (auto [dir, leaf] : llvm::zip_longest( + leafDirs, + getNonTransformQueue(llvm::make_range(item, queue.end())))) { if (!dir.has_value() || !leaf.has_value()) return false; diff --git a/flang/lib/Lower/OpenMP/Decomposer.h b/flang/lib/Lower/OpenMP/Decomposer.h index 65492bd76280d..f057009629efc 100644 --- a/flang/lib/Lower/OpenMP/Decomposer.h +++ b/flang/lib/Lower/OpenMP/Decomposer.h @@ -57,6 +57,10 @@ bool isLastItemInQueue(ConstructQueue::const_iterator item, bool matchLeafSequence(ConstructQueue::const_iterator item, const ConstructQueue &queue, llvm::omp::Directive directive); + +llvm::iterator_range getNonTransformQueue( + llvm::iterator_range range); + } // namespace Fortran::lower::omp #endif // FORTRAN_LOWER_OPENMP_DECOMPOSER_H diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 154361e28734b..3ce88a972639d 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -441,9 +441,11 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, middleClauseList = &std::get(innerBegin.t); } - if (innerDirective.v == llvm::omp::Directive::OMPD_interchange) { - llvm_unreachable("MK: Handle this"); - middleClauseList = &std::get(innerBegin.t); + if (innerDirective.v == + llvm::omp::Directive::OMPD_interchange) { + llvm_unreachable("MK: Handle this"); + middleClauseList = + &std::get(innerBegin.t); } } if (auto &endDirective = @@ -1195,10 +1197,11 @@ struct OpWithBodyGenInfo { /// \param [in] item - item in the queue to generate body for. static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, const ConstructQueue &queue, - ConstructQueue::const_iterator item) { int a = 0; + ConstructQueue::const_iterator item) { + int a = 0; if (a) { op.dump(); - } + } fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder(); auto insertMarker = [](fir::FirOpBuilder &builder) { @@ -1269,8 +1272,11 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, } if (!info.genSkeletonOnly) { + // Transforms already processed by getLoopNestOp + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); if (ConstructQueue::const_iterator next = std::next(item); - next != queue.end()) { + next != transforms.begin() && next != queue.end()) { genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.eval, info.loc, queue, next); } else { @@ -1337,10 +1343,11 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, // present). Otherwise, these operations will be inserted within a // wrapper region. mlir::Operation *privatizationBottomLevelOp = &op; - if (auto loopNest = llvm::dyn_cast(op)) { int b = 0; - if (b) { - loopNest.dump(); - } + if (auto loopNest = llvm::dyn_cast(op)) { + int b = 0; + if (b) { + loopNest.dump(); + } llvm::SmallVector wrappers; loopNest.gatherWrappers(wrappers); if (!wrappers.empty()) @@ -1689,7 +1696,8 @@ genLoopNestClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const List &clauses, mlir::Location loc, mlir::omp::LoopNestOperands &clauseOps, - llvm::SmallVectorImpl &iv, bool enableInterchange = false) { + llvm::SmallVectorImpl &iv, + bool enableInterchange = false) { ClauseProcessor cp(converter, semaCtx, clauses); HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter); @@ -1714,8 +1722,8 @@ genLoopNestClauses(lower::AbstractConverter &converter, } clauseOps.tileSizes = sizeValues; } else if (clause.id == llvm::omp::Clause::OMPC_permutation) { -llvm_unreachable("MK: To handle standalone interchange construct"); - } + llvm_unreachable("MK: To handle standalone interchange construct"); + } } llvm::SmallVector sizeValues; @@ -1724,11 +1732,13 @@ llvm_unreachable("MK: To handle standalone interchange construct"); if (sizeValues.size() > 0) clauseOps.tileSizes = sizeValues; - llvm::SmallVector permutationValues; collectPermutationFromOpenMPConstruct(ompCons, permutationValues, semaCtx); + llvm::SmallVector permutationValues; + collectPermutationFromOpenMPConstruct(ompCons, permutationValues, semaCtx); if (enableInterchange) { - permutationValues.append({2,1}); + permutationValues.append({2, 1}); } - clauseOps.interchangeEnabled = mlir:: BoolAttr::get( firOpBuilder.getContext() , enableInterchange); + clauseOps.interchangeEnabled = + mlir::BoolAttr::get(firOpBuilder.getContext(), enableInterchange); clauseOps.permutation = permutationValues; } @@ -2100,15 +2110,78 @@ static mlir::omp::LoopNestOp genLoopNestOp( llvm::ArrayRef< std::pair> wrapperArgs, - llvm::omp::Directive directive, DataSharingProcessor &dsp) { + llvm::omp::Directive directive, DataSharingProcessor &dsp, + std::optional> + transforms = std::nullopt) { auto ivCallback = [&](mlir::Operation *op) { genLoopVars(op, converter, loc, iv, wrapperArgs); return llvm::SmallVector(iv); }; - uint64_t nestValue = getCollapseValue(item->clauses); + uint64_t nestValue = getCollapseValue( + item->clauses); // MK: Should be number of affected loops? nestValue = nestValue < iv.size() ? iv.size() : nestValue; auto *nestedEval = getCollapsedLoopEval(eval, nestValue); + + if (!transforms.has_value()) { + // This must be a standalone construct, assume all following actions are + // transformations + transforms = llvm::make_range(std::next(item), queue.end()); + } + + for (auto &&transform : llvm::reverse(*transforms)) { + auto d = transform.id; + auto clauses = transform.clauses; + + switch (d) { + case llvm::omp::OMPD_interchange: { + bool hasPermutationClause = false; + llvm::SmallVector permutation; + + auto &&permutationClause = ClauseFinder::findUniqueClause< + Fortran::lower::omp::clause::Permutation>(clauses); + if (permutationClause) { + permutation.reserve(permutationClause->v.size()); + for (auto &&ts : permutationClause->v) { + permutation.push_back(evaluate::ToInt64(ts).value()); + } + // llvm::append_range( permutation, permutationClause->v); + + } else { + permutation = {2, 1}; + } + + assert(permutation.size() == iv.size() && + "TODO: if permutation is smaller than number of associated loops, " + "permute only the first loops"); + llvm::SmallVector newIVs; + llvm::SmallVector newLBs; + llvm::SmallVector newUBs; + llvm::SmallVector newINCs; + llvm::SmallVector newSizes; + + // TODO: Assert this is a valid permution + for (auto perm : permutation) { + newIVs.push_back(iv[perm - 1]); + newLBs.push_back(clauseOps.loopLowerBounds[perm - 1]); + newUBs.push_back(clauseOps.loopUpperBounds[perm - 1]); + newINCs.push_back(clauseOps.loopSteps[perm - 1]); + if (!clauseOps.tileSizes.empty()) + newSizes.push_back(clauseOps.tileSizes[perm - 1]); + } + + iv = newIVs; + clauseOps.loopLowerBounds = newLBs; + clauseOps.loopUpperBounds = newUBs; + clauseOps.loopSteps = newINCs; + clauseOps.tileSizes = newSizes; + + } break; + default: + llvm_unreachable("MK: loop transformation not yet implemented"); + } + } + return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval, directive) @@ -2122,7 +2195,7 @@ static mlir::omp::LoopOp genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item, llvm::omp::Directive dir = llvm::omp::Directive::OMPD_loop , bool enableInterchange = false) { + ConstructQueue::const_iterator item) { mlir::omp::LoopOperands loopClauseOps; llvm::SmallVector loopReductionSyms; genLoopClauses(converter, semaCtx, item->clauses, loc, loopClauseOps, @@ -2136,7 +2209,7 @@ genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, mlir::omp::LoopNestOperands loopNestClauseOps; llvm::SmallVector iv; genLoopNestClauses(converter, semaCtx, eval, item->clauses, loc, - loopNestClauseOps, iv, enableInterchange); + loopNestClauseOps, iv); EntryBlockArgs loopArgs; loopArgs.priv.syms = dsp.getDelayedPrivSymbols(); @@ -2144,35 +2217,11 @@ genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable, loopArgs.reduction.syms = loopReductionSyms; loopArgs.reduction.vars = loopClauseOps.reductionVars; - // Applying interchange clause - // tiling assumed to be applied after interchange - if (loopNestClauseOps.permutation.size() >=1) { - assert(loopNestClauseOps.permutation.size() == iv.size() && "TODO: if permutation is smaller than number of associated loops, permute only the first loops"); - llvm::SmallVector newIVs; - llvm::SmallVector newLBs; - llvm::SmallVector newUBs; - llvm::SmallVector newINCs; llvm::SmallVector newSizes; - - // TODO: Assert this is a valid permution - for (auto perm :loopNestClauseOps.permutation) { - newIVs.push_back(iv[perm-1]); - newLBs.push_back( loopNestClauseOps.loopLowerBounds[perm-1] ); - newUBs.push_back( loopNestClauseOps.loopUpperBounds[perm-1] ); - newINCs.push_back( loopNestClauseOps.loopSteps [perm-1] ); if (! loopNestClauseOps.tileSizes.empty()) newSizes.push_back( loopNestClauseOps.tileSizes[perm-1]); - } - - iv = newIVs; - loopNestClauseOps.loopLowerBounds = newLBs; - loopNestClauseOps.loopUpperBounds = newUBs; - loopNestClauseOps.loopSteps = newINCs; loopNestClauseOps.tileSizes = newSizes; - } - - //if (dir == llvm::omp::Directive::OMPD_loop) { auto loopOp = genWrapperOp(converter, loc, loopClauseOps, loopArgs); - // } genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, - loopNestClauseOps, iv, {{loopOp, loopArgs}}, dir, dsp); + loopNestClauseOps, iv, {{loopOp, loopArgs}}, + llvm::omp::Directive::OMPD_loop, dsp); return loopOp; } @@ -3089,7 +3138,10 @@ static mlir::omp::DistributeOp genCompositeDistributeParallelDo( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 3 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + + assert(llvm::range_size(q) == 3 && "Invalid leaf constructs"); ConstructQueue::const_iterator distributeItem = item; ConstructQueue::const_iterator parallelItem = std::next(distributeItem); ConstructQueue::const_iterator doItem = std::next(parallelItem); @@ -3143,10 +3195,10 @@ static mlir::omp::DistributeOp genCompositeDistributeParallelDo( converter, loc, wsloopClauseOps, wsloopArgs); wsloopOp.setComposite(/*val=*/true); - genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, doItem, - loopNestClauseOps, iv, - {{distributeOp, distributeArgs}, {wsloopOp, wsloopArgs}}, - llvm::omp::Directive::OMPD_distribute_parallel_do, dsp); + genLoopNestOp( + converter, symTable, semaCtx, eval, loc, queue, doItem, loopNestClauseOps, + iv, {{distributeOp, distributeArgs}, {wsloopOp, wsloopArgs}}, + llvm::omp::Directive::OMPD_distribute_parallel_do, dsp, transforms); return distributeOp; } @@ -3155,7 +3207,11 @@ static mlir::omp::DistributeOp genCompositeDistributeParallelDoSimd( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 4 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + + assert(llvm::range_size(q) == 4 && "Invalid leaf constructs"); + ConstructQueue::const_iterator distributeItem = item; ConstructQueue::const_iterator parallelItem = std::next(distributeItem); ConstructQueue::const_iterator doItem = std::next(parallelItem); @@ -3237,7 +3293,7 @@ static mlir::omp::DistributeOp genCompositeDistributeParallelDoSimd( {wsloopOp, wsloopArgs}, {simdOp, simdArgs}}, llvm::omp::Directive::OMPD_distribute_parallel_do_simd, - simdItemDSP); + simdItemDSP, transforms); return distributeOp; } @@ -3246,7 +3302,11 @@ static mlir::omp::DistributeOp genCompositeDistributeSimd( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + + assert(llvm::range_size(q) == 2 && "Invalid leaf constructs"); + ConstructQueue::const_iterator distributeItem = item; ConstructQueue::const_iterator simdItem = std::next(distributeItem); @@ -3298,7 +3358,8 @@ static mlir::omp::DistributeOp genCompositeDistributeSimd( genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem, loopNestClauseOps, iv, {{distributeOp, distributeArgs}, {simdOp, simdArgs}}, - llvm::omp::Directive::OMPD_distribute_simd, simdItemDSP); + llvm::omp::Directive::OMPD_distribute_simd, simdItemDSP, + transforms); return distributeOp; } @@ -3307,7 +3368,11 @@ static mlir::omp::WsloopOp genCompositeDoSimd( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + + assert(llvm::range_size(q) == 2 && "Invalid leaf constructs"); + ConstructQueue::const_iterator doItem = item; ConstructQueue::const_iterator simdItem = std::next(doItem); @@ -3362,7 +3427,7 @@ static mlir::omp::WsloopOp genCompositeDoSimd( genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem, loopNestClauseOps, iv, {{wsloopOp, wsloopArgs}, {simdOp, simdArgs}}, - llvm::omp::Directive::OMPD_do_simd, simdItemDSP); + llvm::omp::Directive::OMPD_do_simd, simdItemDSP, transforms); return wsloopOp; } @@ -3371,7 +3436,11 @@ static mlir::omp::TaskloopOp genCompositeTaskloopSimd( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + + assert(llvm::range_size(q) == 2 && "Invalid leaf constructs"); + if (!semaCtx.langOptions().OpenMPSimd) TODO(loc, "Composite TASKLOOP SIMD"); return nullptr; @@ -3550,10 +3619,8 @@ static void genOMPDispatch(lower::AbstractConverter &converter, genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_interchange: - newOp = genLoopOp(converter, symTable, semaCtx, eval, loc, queue, item , llvm::omp::Directive::OMPD_interchange, /*Interchange=*/true); -//llvm_unreachable("MK: implement interchange"); -//genInterchangeOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); - break; + llvm_unreachable("MK: standalone interchange not implemented"); + break; // case llvm::omp::Directive::OMPD_workdistribute: case llvm::omp::Directive::OMPD_workshare: newOp = genWorkshareOp(converter, symTable, stmtCtx, semaCtx, eval, loc, @@ -4002,23 +4069,42 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, mlir::Location currentLocation = converter.genLocation(beginLoopDirective.source); + llvm::omp::Directive directive = + parser::omp::GetOmpDirectiveName(beginLoopDirective).v; + const parser::CharBlock &source = + std::get(beginLoopDirective.t).source; + ConstructQueue queue{ + buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, + eval, source, directive, clauses)}; + auto &optLoopCons = std::get>(loopConstruct.t); if (optLoopCons.has_value()) { if (auto *ompNestedLoopCons{ std::get_if>( &*optLoopCons)}) { + const Fortran::parser::OpenMPLoopConstruct &x = + ompNestedLoopCons->value(); + const Fortran::parser::OmpBeginLoopDirective &y = std::get<0>(x.t); + const Fortran::parser::OmpClauseList &clauseList = std::get<1>(y.t); llvm::omp::Directive nestedDirective = parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; + List nestedClauses = + makeClauses(std::get(y.t), semaCtx); + switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: // Emit the omp.loop_nest with annotation for tiling genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value()); break; - case llvm::omp::Directive::OMPD_interchange: - genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value()); - // llvm_unreachable("MK: implement nested interchange"); - break; + case llvm::omp::Directive::OMPD_interchange: { + ConstructQueue nestedQueue{buildConstructQueue( + converter.getFirOpBuilder().getModule(), semaCtx, eval, source, + nestedDirective, nestedClauses)}; + for (auto nl : nestedQueue) { + queue.push_back(nl); + } + } break; default: { unsigned version = semaCtx.langOptions().OpenMPVersion; TODO(currentLocation, @@ -4030,13 +4116,6 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, } } - llvm::omp::Directive directive = - parser::omp::GetOmpDirectiveName(beginLoopDirective).v; - const parser::CharBlock &source = - std::get(beginLoopDirective.t).source; - ConstructQueue queue{ - buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, - eval, source, directive, clauses)}; genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, queue.begin()); } diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index ccfecabcf8a50..e79bc585f0872 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -669,7 +669,6 @@ void collectTileSizesFromOpenMPConstruct( } } - /// Populates the sizes vector with values if the given OpenMPConstruct /// Contains a loop construct with an inner tiling construct. void collectPermutationFromOpenMPConstruct( @@ -733,7 +732,7 @@ bool collectLoopRelatedInfo( // Collect sizes from tile directive if present std::int64_t sizesLengthValue = 0l; - std::int64_t permutationLengthValue = 0l; + std::int64_t permutationLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { if (auto *ompLoop{std::get_if(&ompCons->u)}) { const auto &nestedOptional = @@ -764,29 +763,28 @@ bool collectLoopRelatedInfo( if (innerDirective == llvm::omp::Directive::OMPD_interchange) { // Get the size values from parse tree and convert to a vector - const auto &innerClauseList{std::get(innerBegin.t)}; - for (const auto &clause : innerClauseList.v){ - if (const auto tclause{ std::get_if(&clause.u)}) { + const auto &innerClauseList{ + std::get(innerBegin.t)}; + for (const auto &clause : innerClauseList.v) { + if (const auto tclause{ + std::get_if(&clause.u)}) { permutationLengthValue = tclause->v.size(); found = true; } - } + } // default: permution(2,1) if (permutationLengthValue == 0) - permutationLengthValue = 2; + permutationLengthValue = 2; } } } } - - -collapseValue = collapseValue - sizesLengthValue; -if (sizesLengthValue > collapseValue) - collapseValue = sizesLengthValue; -if (permutationLengthValue > collapseValue) - collapseValue = permutationLengthValue; - + collapseValue = collapseValue - sizesLengthValue; + if (sizesLengthValue > collapseValue) + collapseValue = sizesLengthValue; + if (permutationLengthValue > collapseValue) + collapseValue = permutationLengthValue; std::size_t loopVarTypeSize = 0; do { diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index 5362c667d0575..4c097dcb659fd 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -180,13 +180,11 @@ void collectTileSizesFromOpenMPConstruct( llvm::SmallVectorImpl &tileSizes, Fortran::semantics::SemanticsContext &semaCtx); - void collectPermutationFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, llvm::SmallVectorImpl &permuation, Fortran::semantics::SemanticsContext &semaCtx); - } // namespace omp } // namespace lower } // namespace Fortran diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index 231c9eef9bfde..df39770cac235 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -177,9 +177,11 @@ class CanonicalizationOfOmp { auto &nestedBeginLoopDirective = std::get(nestedBeginDirective.t); if ((nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_unroll || - nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_tile || - nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_interchange) && - !(nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_unroll && dir.v == llvm::omp::Directive::OMPD_tile)) { + nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_tile || + nestedBeginLoopDirective.v == + llvm::omp::Directive::OMPD_interchange) && + !(nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_unroll && + dir.v == llvm::omp::Directive::OMPD_tile)) { // iterate through the remaining block items to find the end directive // for the unroll/tile directive. parser::Block::iterator endIt; @@ -204,8 +206,9 @@ class CanonicalizationOfOmp { std::optional{parser::NestedConstruct{ common::Indirection{std::move(*ompLoopCons)}}}; nextIt = block.erase(nextIt); - } else if (nestedBeginLoopDirective.v == llvm::omp::Directive::OMPD_unroll && - dir.v == llvm::omp::Directive::OMPD_tile) { + } else if (nestedBeginLoopDirective.v == + llvm::omp::Directive::OMPD_unroll && + dir.v == llvm::omp::Directive::OMPD_tile) { // if a loop has been unrolled, the user can not then tile that loop // as it has been unrolled parser::OmpClauseList &unrollClauseList{ diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index c6c4ce75d993f..ea46aca3de117 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -828,15 +828,16 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { llvm::SmallVector &); std::int64_t GetNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &); - std::int64_t GetNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &); + std::int64_t GetNumAffectedLoopsFromClauses( + const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &); void CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); void CollectNumAffectedLoopsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); - void CollectNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &x, const parser::OmpClauseList &, - llvm::SmallVector &, + void CollectNumAffectedLoopsFromClauses(const parser::OpenMPLoopConstruct &x, + const parser::OmpClauseList &, llvm::SmallVector &, llvm::SmallVector &); Symbol::Flags dataSharingAttributeFlags{Symbol::Flag::OmpShared, @@ -1880,7 +1881,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { case llvm::omp::Directive::OMPD_teams_distribute_simd: case llvm::omp::Directive::OMPD_teams_loop: case llvm::omp::Directive::OMPD_tile: - case llvm::omp::Directive::OMPD_interchange: + case llvm::omp::Directive::OMPD_interchange: case llvm::omp::Directive::OMPD_unroll: PushContext(beginDir.source, beginDir.v); break; @@ -2042,21 +2043,15 @@ std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromLoopConstruct( return SetAssociatedMaxClause(levels, clauses); } - - - -std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &y, - const parser::OmpClauseList &x) { +std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromClauses( + const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &x) { llvm::SmallVector levels; llvm::SmallVector clauses; - CollectNumAffectedLoopsFromClauses( y, x, levels, clauses); + CollectNumAffectedLoopsFromClauses(y, x, levels, clauses); return SetAssociatedMaxClause(levels, clauses); } - - - void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( const parser::OpenMPLoopConstruct &x, llvm::SmallVector &levels, @@ -2064,8 +2059,7 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( const auto &beginLoopDir{std::get(x.t)}; const auto &clauseList{std::get(beginLoopDir.t)}; - - CollectNumAffectedLoopsFromClauses( x, clauseList, levels, clauses); + CollectNumAffectedLoopsFromClauses(x, clauseList, levels, clauses); CollectNumAffectedLoopsFromInnerLoopContruct(x, levels, clauses); } @@ -2088,18 +2082,16 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromInnerLoopContruct( } } -void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &y, - const parser::OmpClauseList &x, llvm::SmallVector &levels, +void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( + const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &x, + llvm::SmallVector &levels, llvm::SmallVector &clauses) { - const auto &beginLoopDir{std::get(y.t)}; - auto&& yt = std::get<0>(beginLoopDir.t); - - - + const auto &beginLoopDir{std::get(y.t)}; + auto &&yt = std::get<0>(beginLoopDir.t); - const auto &beginDir{std::get(beginLoopDir.t)}; - const auto &dirClauses{std::get(beginLoopDir.t)}; - auto ytv = beginDir.v; + const auto &beginDir{std::get(beginLoopDir.t)}; + const auto &dirClauses{std::get(beginLoopDir.t)}; + auto ytv = beginDir.v; for (const auto &clause : x.v) { if (const auto oclause{ @@ -2126,25 +2118,22 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( const parser::O levels.push_back(tclause->v.size()); clauses.push_back(&clause); } - - } - - if (ytv == llvm::omp::OMPD_interchange) { - for (const auto &clause : dirClauses.v) { - if (const auto tclause{std::get_if(&clause.u)}) { - levels.push_back(tclause->v.size()); - clauses.push_back(&clause); - llvm_unreachable("MK: fetch permute depth"); - return ; - } - } - - - levels.push_back(2); - clauses.push_back(nullptr); + if (ytv == llvm::omp::OMPD_interchange) { + for (const auto &clause : dirClauses.v) { + if (const auto tclause{ + std::get_if(&clause.u)}) { + levels.push_back(tclause->v.size()); + clauses.push_back(&clause); + // llvm_unreachable("MK: fetch permute depth"); + return; + } } + + levels.push_back(2); + clauses.push_back(nullptr); + } } // 2.15.1.1 Data-sharing Attribute Rules - Predetermined @@ -2602,7 +2591,7 @@ static bool IsTargetCaptureImplicitlyFirstprivatizeable(const Symbol &symbol, // It is default firstprivatizeable as far as the OpenMP specification is // concerned if it is a non-array scalar type that has been implicitly // captured in a target region - const auto *type{checkSym.GetType() }; + const auto *type{checkSym.GetType()}; if ((!checkSym.GetShape() || checkSym.GetShape()->empty()) && (type->category() == Fortran::semantics::DeclTypeSpec::Category::Numeric || diff --git a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h index 357f4c6e54502..a566cdaaebb0c 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ConstructDecompositionT.h @@ -211,7 +211,9 @@ struct ConstructDecompositionT { const ClauseTy *); bool applyClause(const tomp::clause::SizesT &clause, const ClauseTy *); - bool applyClause(const tomp::clause::PermutationT &clause,const ClauseTy *); + bool + applyClause(const tomp::clause::PermutationT &clause, + const ClauseTy *); bool applyClause(const tomp::clause::PrivateT &clause, const ClauseTy *); bool @@ -505,9 +507,10 @@ bool ConstructDecompositionT::applyClause( return false; } -#if 1 template -bool ConstructDecompositionT::applyClause(const tomp::clause::PermutationT &clause,const ClauseTy *node) { +bool ConstructDecompositionT::applyClause( + const tomp::clause::PermutationT &clause, + const ClauseTy *node) { // Apply "permutation" to the innermost directive. If it's not one that // allows it flag an error. if (!leafs.empty()) { @@ -521,7 +524,6 @@ bool ConstructDecompositionT::applyClause(const tomp::clause::PermutationT return false; } -#endif // PRIVATE // [5.2:111:5-7] diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 65157a04b3ae7..e08747a8218cc 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -91,7 +91,7 @@ void OpenMPDialect::initialize() { #include "mlir/Dialect/OpenMP/OpenMPOpsTypes.cpp.inc" >(); - declarePromisedInterface(); + declarePromisedInterface(); MemRefType::attachInterface(*getContext()); LLVM::LLVMPointerType::attachInterface( @@ -3038,12 +3038,12 @@ void LoopNestOp::build(OpBuilder &builder, OperationState &state, const LoopNestOperands &clauses) { MLIRContext *ctx = builder.getContext(); - auto perm = makeDenseI64ArrayAttr(ctx, clauses.permutation); + auto perm = makeDenseI64ArrayAttr(ctx, clauses.permutation); LoopNestOp::build(builder, state, clauses.loopLowerBounds, clauses.loopUpperBounds, clauses.loopSteps, - clauses.loopInclusive, clauses.numCollapse, - makeDenseI64ArrayAttr(ctx, clauses.tileSizes), - clauses.interchangeEnabled , perm ); + clauses.loopInclusive, clauses.numCollapse, + makeDenseI64ArrayAttr(ctx, clauses.tileSizes), + clauses.interchangeEnabled, perm); } LogicalResult LoopNestOp::verify() { diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index a8f4b3e585f22..8e11f60fdc886 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -2969,6 +2969,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); auto loopOp = cast(opInst); + // Set up the source location value for OpenMP runtime. llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); From b675870b4f3de7af06fe6f83c085e75668b77d8f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 26 Aug 2025 19:52:39 +0200 Subject: [PATCH 35/57] Remove in-development marker --- flang/lib/Lower/OpenMP/OpenMP.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 3ce88a972639d..0fb7003f36084 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -437,13 +437,9 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, innerLoopConstruct.t); const auto &innerDirective = std::get(innerBegin.t); - if (innerDirective.v == llvm::omp::Directive::OMPD_tile) { - middleClauseList = - &std::get(innerBegin.t); - } - if (innerDirective.v == - llvm::omp::Directive::OMPD_interchange) { - llvm_unreachable("MK: Handle this"); + if (innerDirective.v == llvm::omp::Directive::OMPD_tile || + innerDirective.v == + llvm::omp::Directive::OMPD_interchange) { middleClauseList = &std::get(innerBegin.t); } From 2c6fcf57e8417f31771dbb44b24b0d1ff5fbc3f2 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Tue, 26 Aug 2025 20:57:44 +0200 Subject: [PATCH 36/57] Reduce change noise --- flang/lib/Lower/OpenMP/OpenMP.cpp | 9 --------- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 3 +-- mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 2 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 4 +--- 4 files changed, 3 insertions(+), 15 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 8ca96a6658b69..57d5d48be0204 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1734,15 +1734,6 @@ genLoopNestClauses(lower::AbstractConverter &converter, collectTileSizesFromOpenMPConstruct(ompCons, sizeValues, semaCtx); if (sizeValues.size() > 0) clauseOps.tileSizes = sizeValues; - - llvm::SmallVector permutationValues; - collectPermutationFromOpenMPConstruct(ompCons, permutationValues, semaCtx); - if (enableInterchange) { - permutationValues.append({2, 1}); - } - clauseOps.interchangeEnabled = - mlir::BoolAttr::get(firOpBuilder.getContext(), enableInterchange); - clauseOps.permutation = permutationValues; } static void genLoopClauses( diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 7a251130db113..e17315d923317 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -616,8 +616,7 @@ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ ], clauses = [ OpenMP_LoopRelatedClause, OpenMP_CollapseClause, - OpenMP_TileSizesClause, - OpenMP_PermutationClause + OpenMP_TileSizesClause ], singleRegion = true> { let summary = "rectangular loop nest"; let description = [{ diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index f3c7c8e0329e8..19fbefb48a378 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -494,7 +494,7 @@ struct ParallelOpLowering : public OpRewritePattern { auto loopOp = omp::LoopNestOp::create( rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(), parallelOp.getUpperBound(), parallelOp.getStep(), false, - parallelOp.getLowerBound().size(), nullptr, false, nullptr); + parallelOp.getLowerBound().size(), nullptr); rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(), loopOp.getRegion().begin()); diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index e08747a8218cc..c59c8889cadcb 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -3038,12 +3038,10 @@ void LoopNestOp::build(OpBuilder &builder, OperationState &state, const LoopNestOperands &clauses) { MLIRContext *ctx = builder.getContext(); - auto perm = makeDenseI64ArrayAttr(ctx, clauses.permutation); LoopNestOp::build(builder, state, clauses.loopLowerBounds, clauses.loopUpperBounds, clauses.loopSteps, clauses.loopInclusive, clauses.numCollapse, - makeDenseI64ArrayAttr(ctx, clauses.tileSizes), - clauses.interchangeEnabled, perm); + makeDenseI64ArrayAttr(ctx, clauses.tileSizes)); } LogicalResult LoopNestOp::verify() { From 70dbb33e91453bce1f5039ac63c538f02b07ce98 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 28 Aug 2025 12:14:51 +0200 Subject: [PATCH 37/57] Allow tests written in Fortran --- llvm/runtimes/CMakeLists.txt | 5 ++++ openmp/CMakeLists.txt | 15 ++++++----- openmp/README.rst | 2 +- openmp/cmake/OpenMPTesting.cmake | 4 +++ openmp/runtime/test/CMakeLists.txt | 8 +++++- openmp/runtime/test/lit.cfg | 15 +++++++++++ openmp/runtime/test/lit.site.cfg.in | 2 ++ .../test/transform/unroll/heuristic_intdo.f90 | 26 +++++++++++++++++++ 8 files changed, 68 insertions(+), 9 deletions(-) create mode 100644 openmp/runtime/test/transform/unroll/heuristic_intdo.f90 diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 86cfd0285aa84..d33b5af5756f5 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -524,6 +524,11 @@ if(build_runtimes) endif() endforeach() endif() + + if ("openmp" IN_LIST LLVM_ENABLE_RUNTIMES AND "flang" IN_LIST LLVM_ENABLE_PROJECTS) + # Allow openmp to see the Fortran compiler + list(APPEND extra_args ENABLE_FORTRAN) + endif() if("openmp" IN_LIST LLVM_ENABLE_RUNTIMES OR "offload" IN_LIST LLVM_ENABLE_RUNTIMES) if (${LLVM_TOOL_FLANG_BUILD}) message(STATUS "Configuring build of omp_lib.mod and omp_lib_kinds.mod via flang") diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt index f3de4bc4ee87b..1e446c8778934 100644 --- a/openmp/CMakeLists.txt +++ b/openmp/CMakeLists.txt @@ -78,13 +78,6 @@ else() set(OPENMP_TEST_CXX_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++.exe) endif() - # Check for flang - if (NOT MSVC) - set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang) - else() - set(OPENMP_TEST_Fortran_COMPILER ${LLVM_RUNTIME_OUTPUT_INTDIR}/flang.exe) - endif() - # Set fortran test compiler if flang is found if (EXISTS "${OPENMP_TEST_Fortran_COMPILER}") message("Using local flang build at ${OPENMP_TEST_Fortran_COMPILER}") @@ -103,6 +96,14 @@ endif() include(config-ix) include(HandleOpenMPOptions) +# Check for flang +set(OPENMP_TEST_Fortran_COMPILER_default "flang") +if (CMAKE_Fortran_COMPILER) + set(OPENMP_TEST_Fortran_COMPILER_default "${CMAKE_Fortran_COMPILER}") +endif () +set(OPENMP_TEST_Fortran_COMPILER "${OPENMP_TEST_Fortran_COMPILER_default}" CACHE STRING + "Fortran compiler to use for testing OpenMP runtime libraries.") + # Set up testing infrastructure. include(OpenMPTesting) diff --git a/openmp/README.rst b/openmp/README.rst index c34d3e8a40d7d..cc485f9a56ce0 100644 --- a/openmp/README.rst +++ b/openmp/README.rst @@ -121,7 +121,7 @@ Options for all Libraries **OPENMP_TEST_Fortran_COMPILER** = ``${CMAKE_Fortran_COMPILER}`` Compiler to use for testing. Defaults to the compiler that was also used for - building. Will default to flang if build is in-tree. + building. **OPENMP_LLVM_TOOLS_DIR** = ``/path/to/built/llvm/tools`` Additional path to search for LLVM tools needed by tests. diff --git a/openmp/cmake/OpenMPTesting.cmake b/openmp/cmake/OpenMPTesting.cmake index 85240aede728d..60280b7ed4893 100644 --- a/openmp/cmake/OpenMPTesting.cmake +++ b/openmp/cmake/OpenMPTesting.cmake @@ -238,6 +238,10 @@ function(add_openmp_testsuite target comment) ) endif() endif() + + if (TARGET flang-rt) + add_dependencies(${target} flang-rt) + endif () endfunction() function(construct_check_openmp_target) diff --git a/openmp/runtime/test/CMakeLists.txt b/openmp/runtime/test/CMakeLists.txt index a7790804542b7..9ee3be6939811 100644 --- a/openmp/runtime/test/CMakeLists.txt +++ b/openmp/runtime/test/CMakeLists.txt @@ -41,7 +41,13 @@ add_library(ompt-print-callback INTERFACE) target_include_directories(ompt-print-callback INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/ompt) -add_openmp_testsuite(check-libomp "Running libomp tests" ${CMAKE_CURRENT_BINARY_DIR} DEPENDS omp) +add_custom_target(libomp-test-depends) +add_dependencies(libomp-test-depends omp) +if (LLVM_RUNTIMES_BUILD AND OPENMP_TEST_Fortran_COMPILER AND "flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + add_dependencies(libomp-test-depends flang-rt) +endif () + +add_openmp_testsuite(check-libomp "Running libomp tests" ${CMAKE_CURRENT_BINARY_DIR} DEPENDS libomp-test-depends) # Add target check-ompt, but make sure to not add the tests twice to check-openmp. add_openmp_testsuite(check-ompt "Running OMPT tests" ${CMAKE_CURRENT_BINARY_DIR}/ompt EXCLUDE_FROM_CHECK_ALL DEPENDS omp) diff --git a/openmp/runtime/test/lit.cfg b/openmp/runtime/test/lit.cfg index 4a5aff241765c..72da1ba1411f8 100644 --- a/openmp/runtime/test/lit.cfg +++ b/openmp/runtime/test/lit.cfg @@ -5,6 +5,8 @@ import os import re import subprocess import lit.formats +from lit.llvm.subst import ToolSubst +from lit.llvm import llvm_config # Tell pylint that we know config and lit_config exist somewhere. if 'PYLINT_IMPORT' in os.environ: @@ -39,6 +41,19 @@ config.name = 'libomp' # suffixes: A list of file extensions to treat as test files. config.suffixes = ['.c', '.cpp'] +if config.test_fortran_compiler: + lit_config.note("OpenMP Fortran tests enabled") + config.suffixes += ['.f90', '.F90'] + llvm_config.add_tool_substitutions([ + ToolSubst( + "%flang", + command=config.test_fortran_compiler, + unresolved="fatal", + ), + ], [config.llvm_tools_dir]) +else: + lit_config.note("OpenMP Fortran tests disabled") + # test_source_root: The root path where tests are located. config.test_source_root = os.path.dirname(__file__) diff --git a/openmp/runtime/test/lit.site.cfg.in b/openmp/runtime/test/lit.site.cfg.in index fc65289e4ce64..cc8b3b252d7d1 100644 --- a/openmp/runtime/test/lit.site.cfg.in +++ b/openmp/runtime/test/lit.site.cfg.in @@ -2,6 +2,7 @@ config.test_c_compiler = "@OPENMP_TEST_C_COMPILER@" config.test_cxx_compiler = "@OPENMP_TEST_CXX_COMPILER@" +config.test_fortran_compiler = "@OPENMP_TEST_Fortran_COMPILER@" config.test_compiler_features = @OPENMP_TEST_COMPILER_FEATURES@ config.test_compiler_has_omp_h = @OPENMP_TEST_COMPILER_HAS_OMP_H@ config.test_filecheck = "@OPENMP_FILECHECK_EXECUTABLE@" @@ -24,6 +25,7 @@ config.has_omit_frame_pointer_flag = @OPENMP_TEST_COMPILER_HAS_OMIT_FRAME_POINTE config.target_arch = "@LIBOMP_ARCH@" config.compiler_frontend_variant = "@CMAKE_C_COMPILER_FRONTEND_VARIANT@" config.compiler_simulate_id = "@CMAKE_C_SIMULATE_ID@" +config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" import lit.llvm lit.llvm.initialize(lit_config, config) diff --git a/openmp/runtime/test/transform/unroll/heuristic_intdo.f90 b/openmp/runtime/test/transform/unroll/heuristic_intdo.f90 new file mode 100644 index 0000000000000..d0ef938dd3a8f --- /dev/null +++ b/openmp/runtime/test/transform/unroll/heuristic_intdo.f90 @@ -0,0 +1,26 @@ +! This test checks lowering of OpenMP unroll directive + +! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + + +program unroll_heuristic + integer :: i + print *, 'do' + + !$OMP UNROLL + do i=7, 18, 3 + print '("i=", I0)', i + end do + !$OMP END UNROLL + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 +! CHECK-NEXT: i=10 +! CHECK-NEXT: i=13 +! CHECK-NEXT: i=16 +! CHECK-NEXT: done From f3ec693f124c8940da024a2ae1cd3ae9ed23b18e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 11 Sep 2025 11:47:59 +0200 Subject: [PATCH 38/57] Revert "[flang][OpenMP] Enable tiling (#143715)" This reverts commit d452e67ee7b5d17aa040f71d8997abc1a47750e4. --- flang/include/flang/Lower/OpenMP.h | 1 + flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 18 +- flang/lib/Lower/OpenMP/ClauseProcessor.h | 5 +- flang/lib/Lower/OpenMP/OpenMP.cpp | 31 ++-- flang/lib/Lower/OpenMP/Utils.cpp | 92 +--------- flang/lib/Lower/OpenMP/Utils.h | 7 +- flang/lib/Semantics/resolve-directives.cpp | 163 ++++-------------- ...nested-loop-transformation-construct01.f90 | 20 +++ .../Lower/OpenMP/parallel-wsloop-lastpriv.f90 | 4 +- flang/test/Lower/OpenMP/simd.f90 | 2 +- flang/test/Lower/OpenMP/wsloop-collapse.f90 | 2 +- flang/test/Lower/OpenMP/wsloop-variable.f90 | 2 +- flang/test/Parser/OpenMP/do-tile-size.f90 | 29 ---- flang/test/Semantics/OpenMP/do-collapse.f90 | 1 - .../OpenMP/do-concurrent-collapse.f90 | 1 - .../Dialect/OpenMP/OpenMPClauseOperands.h | 2 +- .../mlir/Dialect/OpenMP/OpenMPClauses.td | 33 ---- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 15 +- .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 6 +- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 64 +------ .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 40 +---- .../Conversion/SCFToOpenMP/scf-to-openmp.mlir | 2 +- mlir/test/Dialect/OpenMP/invalid.mlir | 23 --- mlir/test/Dialect/OpenMP/ops.mlir | 54 ------ .../LLVMIR/omptarget-wsloop-collapsed.mlir | 2 +- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 12 +- 26 files changed, 117 insertions(+), 514 deletions(-) create mode 100644 flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 delete mode 100644 flang/test/Parser/OpenMP/do-tile-size.f90 diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h index df01a7b82c66c..581c93f76d627 100644 --- a/flang/include/flang/Lower/OpenMP.h +++ b/flang/include/flang/Lower/OpenMP.h @@ -80,6 +80,7 @@ void genOpenMPDeclarativeConstruct(AbstractConverter &, void genOpenMPSymbolProperties(AbstractConverter &converter, const pft::Variable &var); +int64_t getCollapseValue(const Fortran::parser::OmpClauseList &clauseList); void genThreadprivateOp(AbstractConverter &, const pft::Variable &); void genDeclareTargetIntGlobal(AbstractConverter &, const pft::Variable &); bool isOpenMPTargetConstruct(const parser::OpenMPConstruct &); diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index a96884f5680ba..23f0ca14e931d 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -273,15 +273,10 @@ bool ClauseProcessor::processCancelDirectiveName( bool ClauseProcessor::processCollapse( mlir::Location currentLocation, lower::pft::Evaluation &eval, - mlir::omp::LoopRelatedClauseOps &loopResult, - mlir::omp::CollapseClauseOps &collapseResult, + mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) const { - - int64_t numCollapse = collectLoopRelatedInfo(converter, currentLocation, eval, - clauses, loopResult, iv); - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - collapseResult.collapseNumLoops = firOpBuilder.getI64IntegerAttr(numCollapse); - return numCollapse > 1; + return collectLoopRelatedInfo(converter, currentLocation, eval, clauses, + result, iv); } bool ClauseProcessor::processDevice(lower::StatementContext &stmtCtx, @@ -527,13 +522,6 @@ bool ClauseProcessor::processProcBind( return false; } -bool ClauseProcessor::processTileSizes( - lower::pft::Evaluation &eval, mlir::omp::LoopNestOperands &result) const { - auto *ompCons{eval.getIf()}; - collectTileSizesFromOpenMPConstruct(ompCons, result.tileSizes, semaCtx); - return !result.tileSizes.empty(); -} - bool ClauseProcessor::processSafelen( mlir::omp::SafelenClauseOps &result) const { if (auto *clause = findUniqueClause()) { diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index 324ea3c1047a5..c46bdb348a3ef 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -63,8 +63,7 @@ class ClauseProcessor { mlir::omp::CancelDirectiveNameClauseOps &result) const; bool processCollapse(mlir::Location currentLocation, lower::pft::Evaluation &eval, - mlir::omp::LoopRelatedClauseOps &loopResult, - mlir::omp::CollapseClauseOps &collapseResult, + mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) const; bool processDevice(lower::StatementContext &stmtCtx, mlir::omp::DeviceClauseOps &result) const; @@ -99,8 +98,6 @@ class ClauseProcessor { bool processPriority(lower::StatementContext &stmtCtx, mlir::omp::PriorityClauseOps &result) const; bool processProcBind(mlir::omp::ProcBindClauseOps &result) const; - bool processTileSizes(lower::pft::Evaluation &eval, - mlir::omp::LoopNestOperands &result) const; bool processSafelen(mlir::omp::SafelenClauseOps &result) const; bool processSchedule(lower::StatementContext &stmtCtx, mlir::omp::ScheduleClauseOps &result) const; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 0ec33e6b24dbf..def6cfff88231 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -503,7 +503,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_distribute: case OMPD_distribute_simd: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); break; case OMPD_teams: @@ -522,7 +522,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); cp.processNumTeams(stmtCtx, hostInfo->ops); break; @@ -533,7 +533,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, cp.processNumTeams(stmtCtx, hostInfo->ops); [[fallthrough]]; case OMPD_loop: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); break; case OMPD_teams_workdistribute: @@ -1569,10 +1569,9 @@ genLoopNestClauses(lower::AbstractConverter &converter, HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter); if (!hostEvalInfo || !hostEvalInfo->apply(clauseOps, iv)) - cp.processCollapse(loc, eval, clauseOps, clauseOps, iv); + cp.processCollapse(loc, eval, clauseOps, iv); clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); - cp.processTileSizes(eval, clauseOps); } static void genLoopClauses( @@ -1949,9 +1948,9 @@ static mlir::omp::LoopNestOp genLoopNestOp( return llvm::SmallVector(iv); }; - uint64_t nestValue = getCollapseValue(item->clauses); - nestValue = nestValue < iv.size() ? iv.size() : nestValue; - auto *nestedEval = getCollapsedLoopEval(eval, nestValue); + auto *nestedEval = + getCollapsedLoopEval(eval, getCollapseValue(item->clauses)); + return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval, directive) @@ -3844,8 +3843,8 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: - // Skip OMPD_tile since the tile sizes will be retrieved when - // generating the omp.loop_nest op. + // Emit the omp.loop_nest with annotation for tiling + genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value()); break; default: { unsigned version = semaCtx.langOptions().OpenMPVersion; @@ -3958,6 +3957,18 @@ void Fortran::lower::genOpenMPSymbolProperties( lower::genDeclareTargetIntGlobal(converter, var); } +int64_t +Fortran::lower::getCollapseValue(const parser::OmpClauseList &clauseList) { + for (const parser::OmpClause &clause : clauseList.v) { + if (const auto &collapseClause = + std::get_if(&clause.u)) { + const auto *expr = semantics::GetExpr(collapseClause->v); + return evaluate::ToInt64(*expr).value(); + } + } + return 1; +} + void Fortran::lower::genThreadprivateOp(lower::AbstractConverter &converter, const lower::pft::Variable &var) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index d1d1cd68a5b44..cb6dd57667824 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -13,7 +13,6 @@ #include "Utils.h" #include "ClauseFinder.h" -#include "flang/Evaluate/fold.h" #include "flang/Lower/OpenMP/Clauses.h" #include #include @@ -25,32 +24,11 @@ #include #include #include -#include #include #include #include -template -Fortran::semantics::MaybeIntExpr -EvaluateIntExpr(Fortran::semantics::SemanticsContext &context, const T &expr) { - if (Fortran::semantics::MaybeExpr maybeExpr{ - Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) { - if (auto *intExpr{ - Fortran::evaluate::UnwrapExpr( - *maybeExpr)}) { - return std::move(*intExpr); - } - } - return std::nullopt; -} - -template -std::optional -EvaluateInt64(Fortran::semantics::SemanticsContext &context, const T &expr) { - return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr)); -} - llvm::cl::opt treatIndexAsSection( "openmp-treat-index-as-section", llvm::cl::desc("In the OpenMP data clauses treat `a(N)` as `a(N:N)`."), @@ -599,64 +577,12 @@ static void convertLoopBounds(lower::AbstractConverter &converter, } } -// Helper function that finds the sizes clause in a inner OMPD_tile directive -// and passes the sizes clause to the callback function if found. -static void processTileSizesFromOpenMPConstruct( - const parser::OpenMPConstruct *ompCons, - std::function processFun) { - if (!ompCons) - return; - if (auto *ompLoop{std::get_if(&ompCons->u)}) { - const auto &nestedOptional = - std::get>(ompLoop->t); - assert(nestedOptional.has_value() && - "Expected a DoConstruct or OpenMPLoopConstruct"); - const auto *innerConstruct = - std::get_if>( - &(nestedOptional.value())); - if (innerConstruct) { - const auto &innerLoopDirective = innerConstruct->value(); - const auto &innerBegin = - std::get(innerLoopDirective.t); - const auto &innerDirective = - std::get(innerBegin.t).v; - - if (innerDirective == llvm::omp::Directive::OMPD_tile) { - // Get the size values from parse tree and convert to a vector. - const auto &innerClauseList{ - std::get(innerBegin.t)}; - for (const auto &clause : innerClauseList.v) { - if (const auto tclause{ - std::get_if(&clause.u)}) { - processFun(tclause); - break; - } - } - } - } - } -} - -/// Populates the sizes vector with values if the given OpenMPConstruct -/// contains a loop construct with an inner tiling construct. -void collectTileSizesFromOpenMPConstruct( - const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &tileSizes, - Fortran::semantics::SemanticsContext &semaCtx) { - processTileSizesFromOpenMPConstruct( - ompCons, [&](const parser::OmpClause::Sizes *tclause) { - for (auto &tval : tclause->v) - if (const auto v{EvaluateInt64(semaCtx, tval)}) - tileSizes.push_back(*v); - }); -} - -int64_t collectLoopRelatedInfo( +bool collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) { - int64_t numCollapse = 1; + bool found = false; fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); // Collect the loops to collapse. @@ -669,19 +595,9 @@ int64_t collectLoopRelatedInfo( if (auto *clause = ClauseFinder::findUniqueClause(clauses)) { collapseValue = evaluate::ToInt64(clause->v).value(); - numCollapse = collapseValue; - } - - // Collect sizes from tile directive if present. - std::int64_t sizesLengthValue = 0l; - if (auto *ompCons{eval.getIf()}) { - processTileSizesFromOpenMPConstruct( - ompCons, [&](const parser::OmpClause::Sizes *tclause) { - sizesLengthValue = tclause->v.size(); - }); + found = true; } - collapseValue = std::max(collapseValue, sizesLengthValue); std::size_t loopVarTypeSize = 0; do { lower::pft::Evaluation *doLoop = @@ -715,7 +631,7 @@ int64_t collectLoopRelatedInfo( convertLoopBounds(converter, currentLocation, result, loopVarTypeSize); - return numCollapse; + return found; } } // namespace omp diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index 5f191d89ae205..88371ab8bf969 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -159,17 +159,12 @@ void genObjectList(const ObjectList &objects, void lastprivateModifierNotSupported(const omp::clause::Lastprivate &lastp, mlir::Location loc); -int64_t collectLoopRelatedInfo( +bool collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv); -void collectTileSizesFromOpenMPConstruct( - const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &tileSizes, - Fortran::semantics::SemanticsContext &semaCtx); - } // namespace omp } // namespace lower } // namespace Fortran diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 1b7718d1314d3..43f12c2b14038 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -856,23 +856,7 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { const parser::OmpClause *GetAssociatedClause() { return associatedClause; } private: - /// Given a vector of loop levels and a vector of corresponding clauses find - /// the largest loop level and set the associated loop level to the found - /// maximum. This is used for error handling to ensure that the number of - /// affected loops is not larger that the number of available loops. - std::int64_t SetAssociatedMaxClause(llvm::SmallVector &, - llvm::SmallVector &); - std::int64_t GetNumAffectedLoopsFromLoopConstruct( - const parser::OpenMPLoopConstruct &); - void CollectNumAffectedLoopsFromLoopConstruct( - const parser::OpenMPLoopConstruct &, llvm::SmallVector &, - llvm::SmallVector &); - void CollectNumAffectedLoopsFromInnerLoopContruct( - const parser::OpenMPLoopConstruct &, llvm::SmallVector &, - llvm::SmallVector &); - void CollectNumAffectedLoopsFromClauses(const parser::OmpClauseList &, - llvm::SmallVector &, - llvm::SmallVector &); + std::int64_t GetAssociatedLoopLevelFromClauses(const parser::OmpClauseList &); Symbol::Flags dataSharingAttributeFlags{Symbol::Flag::OmpShared, Symbol::Flag::OmpPrivate, Symbol::Flag::OmpFirstPrivate, @@ -1884,6 +1868,7 @@ bool OmpAttributeVisitor::Pre( bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { const auto &beginLoopDir{std::get(x.t)}; const auto &beginDir{std::get(beginLoopDir.t)}; + const auto &clauseList{std::get(beginLoopDir.t)}; switch (beginDir.v) { case llvm::omp::Directive::OMPD_distribute: case llvm::omp::Directive::OMPD_distribute_parallel_do: @@ -1934,7 +1919,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { beginDir.v == llvm::omp::Directive::OMPD_target_loop) IssueNonConformanceWarning(beginDir.v, beginDir.source, 52); ClearDataSharingAttributeObjects(); - SetContextAssociatedLoopLevel(GetNumAffectedLoopsFromLoopConstruct(x)); + SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromClauses(clauseList)); if (beginDir.v == llvm::omp::Directive::OMPD_do) { auto &optLoopCons = std::get>(x.t); @@ -1948,7 +1933,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { } } PrivatizeAssociatedLoopIndexAndCheckLoopLevel(x); - ordCollapseLevel = GetNumAffectedLoopsFromLoopConstruct(x) + 1; + ordCollapseLevel = GetAssociatedLoopLevelFromClauses(clauseList) + 1; return true; } @@ -2036,111 +2021,44 @@ bool OmpAttributeVisitor::Pre(const parser::DoConstruct &x) { return true; } -static bool isSizesClause(const parser::OmpClause *clause) { - return std::holds_alternative(clause->u); -} - -std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( - llvm::SmallVector &levels, - llvm::SmallVector &clauses) { - - // Find the tile level to ensure that the COLLAPSE clause value - // does not exeed the number of tiled loops. - std::int64_t tileLevel = 0; - for (auto [level, clause] : llvm::zip_equal(levels, clauses)) - if (isSizesClause(clause)) - tileLevel = level; - - std::int64_t maxLevel = 1; - const parser::OmpClause *maxClause = nullptr; - for (auto [level, clause] : llvm::zip_equal(levels, clauses)) { - if (tileLevel > 0 && tileLevel < level) { - context_.Say(clause->source, - "The value of the parameter in the COLLAPSE clause must" - " not be larger than the number of the number of tiled loops" - " because collapse currently is limited to independent loop" - " iterations."_err_en_US); - return 1; - } - - if (level > maxLevel) { - maxLevel = level; - maxClause = clause; - } - } - if (maxClause) - SetAssociatedClause(maxClause); - return maxLevel; -} - -std::int64_t OmpAttributeVisitor::GetNumAffectedLoopsFromLoopConstruct( - const parser::OpenMPLoopConstruct &x) { - llvm::SmallVector levels; - llvm::SmallVector clauses; - - CollectNumAffectedLoopsFromLoopConstruct(x, levels, clauses); - return SetAssociatedMaxClause(levels, clauses); -} - -void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( - const parser::OpenMPLoopConstruct &x, - llvm::SmallVector &levels, - llvm::SmallVector &clauses) { - const auto &beginLoopDir{std::get(x.t)}; - const auto &clauseList{std::get(beginLoopDir.t)}; - - CollectNumAffectedLoopsFromClauses(clauseList, levels, clauses); - CollectNumAffectedLoopsFromInnerLoopContruct(x, levels, clauses); -} - -void OmpAttributeVisitor::CollectNumAffectedLoopsFromInnerLoopContruct( - const parser::OpenMPLoopConstruct &x, - llvm::SmallVector &levels, - llvm::SmallVector &clauses) { +std::int64_t OmpAttributeVisitor::GetAssociatedLoopLevelFromClauses( + const parser::OmpClauseList &x) { + std::int64_t orderedLevel{0}; + std::int64_t collapseLevel{0}; - const auto &nestedOptional = - std::get>(x.t); - assert(nestedOptional.has_value() && - "Expected a DoConstruct or OpenMPLoopConstruct"); - const auto *innerConstruct = - std::get_if>( - &(nestedOptional.value())); + const parser::OmpClause *ordClause{nullptr}; + const parser::OmpClause *collClause{nullptr}; - if (innerConstruct) { - CollectNumAffectedLoopsFromLoopConstruct( - innerConstruct->value(), levels, clauses); - } -} - -void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( - const parser::OmpClauseList &x, llvm::SmallVector &levels, - llvm::SmallVector &clauses) { for (const auto &clause : x.v) { - if (const auto oclause{ + if (const auto *orderedClause{ std::get_if(&clause.u)}) { - std::int64_t level = 0; - if (const auto v{EvaluateInt64(context_, oclause->v)}) { - level = *v; + if (const auto v{EvaluateInt64(context_, orderedClause->v)}) { + orderedLevel = *v; } - levels.push_back(level); - clauses.push_back(&clause); + ordClause = &clause; } - - if (const auto cclause{ + if (const auto *collapseClause{ std::get_if(&clause.u)}) { - std::int64_t level = 0; - if (const auto v{EvaluateInt64(context_, cclause->v)}) { - level = *v; + if (const auto v{EvaluateInt64(context_, collapseClause->v)}) { + collapseLevel = *v; } - levels.push_back(level); - clauses.push_back(&clause); + collClause = &clause; } + } - if (const auto tclause{std::get_if(&clause.u)}) { - levels.push_back(tclause->v.size()); - clauses.push_back(&clause); - } + if (orderedLevel && (!collapseLevel || orderedLevel >= collapseLevel)) { + SetAssociatedClause(ordClause); + return orderedLevel; + } else if (!orderedLevel && collapseLevel) { + SetAssociatedClause(collClause); + return collapseLevel; + } else { + SetAssociatedClause(nullptr); } + // orderedLevel < collapseLevel is an error handled in structural + // checks + + return 1; // default is outermost loop } // 2.15.1.1 Data-sharing Attribute Rules - Predetermined @@ -2172,21 +2090,10 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( const parser::OmpClause *clause{GetAssociatedClause()}; bool hasCollapseClause{ clause ? (clause->Id() == llvm::omp::OMPC_collapse) : false}; - const parser::OpenMPLoopConstruct *innerMostLoop = &x; - const parser::NestedConstruct *innerMostNest = nullptr; - while (auto &optLoopCons{ - std::get>(innerMostLoop->t)}) { - innerMostNest = &(optLoopCons.value()); - if (const auto *innerLoop{ - std::get_if>( - innerMostNest)}) { - innerMostLoop = &(innerLoop->value()); - } else - break; - } - if (innerMostNest) { - if (const auto &outer{std::get_if(innerMostNest)}) { + auto &optLoopCons = std::get>(x.t); + if (optLoopCons.has_value()) { + if (const auto &outer{std::get_if(&*optLoopCons)}) { for (const parser::DoConstruct *loop{&*outer}; loop && level > 0; --level) { if (loop->IsDoConcurrent()) { @@ -2222,7 +2129,7 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel( CheckAssocLoopLevel(level, GetAssociatedClause()); } else if (const auto &loop{std::get_if< common::Indirection>( - innerMostNest)}) { + &*optLoopCons)}) { auto &beginDirective = std::get(loop->value().t); auto &beginLoopDirective = diff --git a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 b/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 new file mode 100644 index 0000000000000..17eba93a7405d --- /dev/null +++ b/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 @@ -0,0 +1,20 @@ +! Test to ensure TODO message is emitted for tile OpenMP 5.1 Directives when they are nested. + +!RUN: not %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | FileCheck %s + +subroutine loop_transformation_construct + implicit none + integer :: I = 10 + integer :: x + integer :: y(I) + + !$omp do + !$omp tile + do i = 1, I + y(i) = y(i) * 5 + end do + !$omp end tile + !$omp end do +end subroutine + +!CHECK: not yet implemented: Unhandled loop directive (tile) diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 index faf8f717f6308..2890e78e9d17f 100644 --- a/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 +++ b/flang/test/Lower/OpenMP/parallel-wsloop-lastpriv.f90 @@ -108,7 +108,7 @@ subroutine omp_do_lastprivate_collapse2(a) ! CHECK-NEXT: %[[UB2:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref ! CHECK-NEXT: %[[STEP2:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]] : !fir.ref, !fir.ref, !fir.ref) { - ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) collapse(2) { + ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]]) : i32 = (%[[LB1]], %[[LB2]]) to (%[[UB1]], %[[UB2]]) inclusive step (%[[STEP1]], %[[STEP2]]) { ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ea"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse2Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -174,7 +174,7 @@ subroutine omp_do_lastprivate_collapse3(a) ! CHECK-NEXT: %[[UB3:.*]] = fir.load %[[ARG0_DECL]]#0 : !fir.ref ! CHECK-NEXT: %[[STEP3:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[A_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[I_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[J_PVT_REF:.*]], @{{.*}} %{{.*}}#0 -> %[[K_PVT_REF:.*]] : !fir.ref, !fir.ref, !fir.ref, !fir.ref) { - ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) collapse(3) { + ! CHECK-NEXT: omp.loop_nest (%[[ARG1:.*]], %[[ARG2:.*]], %[[ARG3:.*]]) : i32 = (%[[LB1]], %[[LB2]], %[[LB3]]) to (%[[UB1]], %[[UB2]], %[[UB3]]) inclusive step (%[[STEP1]], %[[STEP2]], %[[STEP3]]) { ! CHECK: %[[A_PVT_DECL:.*]]:2 = hlfir.declare %[[A_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ea"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[I_PVT_DECL:.*]]:2 = hlfir.declare %[[I_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ei"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: %[[J_PVT_DECL:.*]]:2 = hlfir.declare %[[J_PVT_REF]] {uniq_name = "_QFomp_do_lastprivate_collapse3Ej"} : (!fir.ref) -> (!fir.ref, !fir.ref) diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90 index 369b5eb072af9..7655c786573e3 100644 --- a/flang/test/Lower/OpenMP/simd.f90 +++ b/flang/test/Lower/OpenMP/simd.f90 @@ -175,7 +175,7 @@ subroutine simd_with_collapse_clause(n) ! CHECK-NEXT: omp.loop_nest (%[[ARG_0:.*]], %[[ARG_1:.*]]) : i32 = ( ! CHECK-SAME: %[[LOWER_I]], %[[LOWER_J]]) to ( ! CHECK-SAME: %[[UPPER_I]], %[[UPPER_J]]) inclusive step ( - ! CHECK-SAME: %[[STEP_I]], %[[STEP_J]]) collapse(2) { + ! CHECK-SAME: %[[STEP_I]], %[[STEP_J]]) { !$OMP SIMD COLLAPSE(2) do i = 1, n do j = 1, n diff --git a/flang/test/Lower/OpenMP/wsloop-collapse.f90 b/flang/test/Lower/OpenMP/wsloop-collapse.f90 index 677c7809c397f..7ec40ab4b2f43 100644 --- a/flang/test/Lower/OpenMP/wsloop-collapse.f90 +++ b/flang/test/Lower/OpenMP/wsloop-collapse.f90 @@ -57,7 +57,7 @@ program wsloop_collapse !CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_11]]#0 : !fir.ref !CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32 !CHECK: omp.wsloop private(@{{.*}} %{{.*}}#0 -> %[[VAL_4:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_2:.*]], @{{.*}} %{{.*}}#0 -> %[[VAL_0:.*]] : !fir.ref, !fir.ref, !fir.ref) { -!CHECK-NEXT: omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) collapse(3) { +!CHECK-NEXT: omp.loop_nest (%[[VAL_33:.*]], %[[VAL_34:.*]], %[[VAL_35:.*]]) : i32 = (%[[VAL_24]], %[[VAL_27]], %[[VAL_30]]) to (%[[VAL_25]], %[[VAL_28]], %[[VAL_31]]) inclusive step (%[[VAL_26]], %[[VAL_29]], %[[VAL_32]]) { !$omp do collapse(3) do i = 1, a do j= 1, b diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90 index 0f4aafb10ded3..f998c84331ce4 100644 --- a/flang/test/Lower/OpenMP/wsloop-variable.f90 +++ b/flang/test/Lower/OpenMP/wsloop-variable.f90 @@ -22,7 +22,7 @@ program wsloop_variable !CHECK: %[[TMP6:.*]] = fir.convert %[[TMP1]] : (i32) -> i64 !CHECK: %[[TMP7:.*]] = fir.convert %{{.*}} : (i32) -> i64 !CHECK: omp.wsloop private({{.*}}) { -!CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) collapse(2) { +!CHECK-NEXT: omp.loop_nest (%[[ARG0:.*]], %[[ARG1:.*]]) : i64 = (%[[TMP2]], %[[TMP5]]) to (%[[TMP3]], %[[TMP6]]) inclusive step (%[[TMP4]], %[[TMP7]]) { !CHECK: %[[ARG0_I16:.*]] = fir.convert %[[ARG0]] : (i64) -> i16 !CHECK: hlfir.assign %[[ARG0_I16]] to %[[STORE_IV0:.*]]#0 : i16, !fir.ref !CHECK: hlfir.assign %[[ARG1]] to %[[STORE_IV1:.*]]#0 : i64, !fir.ref diff --git a/flang/test/Parser/OpenMP/do-tile-size.f90 b/flang/test/Parser/OpenMP/do-tile-size.f90 deleted file mode 100644 index 886ee4a2a680c..0000000000000 --- a/flang/test/Parser/OpenMP/do-tile-size.f90 +++ /dev/null @@ -1,29 +0,0 @@ -! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck --ignore-case %s -! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix="PARSE-TREE" %s - -subroutine openmp_do_tiles(x) - - integer, intent(inout)::x - - -!CHECK: !$omp do -!CHECK: !$omp tile sizes -!$omp do -!$omp tile sizes(2) -!CHECK: do - do x = 1, 100 - call F1() -!CHECK: end do - end do -!CHECK: !$omp end tile -!$omp end tile -!$omp end do - -!PARSE-TREE:| | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPLoopConstruct -!PARSE-TREE:| | | OmpBeginLoopDirective -!PARSE-TREE:| | | OpenMPLoopConstruct -!PARSE-TREE:| | | | OmpBeginLoopDirective -!PARSE-TREE:| | | | | OmpLoopDirective -> llvm::omp::Directive = tile -!PARSE-TREE:| | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4' -!PARSE-TREE: | | | | DoConstruct -END subroutine openmp_do_tiles diff --git a/flang/test/Semantics/OpenMP/do-collapse.f90 b/flang/test/Semantics/OpenMP/do-collapse.f90 index ec6a3bdad3686..480bd45b79b83 100644 --- a/flang/test/Semantics/OpenMP/do-collapse.f90 +++ b/flang/test/Semantics/OpenMP/do-collapse.f90 @@ -31,7 +31,6 @@ program omp_doCollapse end do end do - !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct. !ERROR: At most one COLLAPSE clause can appear on the SIMD directive !$omp simd collapse(2) collapse(1) do i = 1, 4 diff --git a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 index 355626f6e73b9..bb1929249183b 100644 --- a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 +++ b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 @@ -1,7 +1,6 @@ !RUN: %python %S/../test_errors.py %s %flang -fopenmp integer :: i, j -! ERROR: DO CONCURRENT loops cannot be used with the COLLAPSE clause. !$omp parallel do collapse(2) do i = 1, 1 ! ERROR: DO CONCURRENT loops cannot form part of a loop nest. diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h index 6a92b136ef51c..faf820dcfdb29 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h @@ -40,7 +40,7 @@ struct DeviceTypeClauseOps { /// Clauses that correspond to operations other than omp.target, but might have /// to be evaluated outside of a parent target region. using HostEvaluatedOperands = - detail::Clauses; // TODO: Add `indirect` clause. diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td index 5f40abe62a0f6..311c57fb4446c 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td @@ -209,23 +209,6 @@ class OpenMP_BindClauseSkip< def OpenMP_BindClause : OpenMP_BindClauseSkip<>; -//===----------------------------------------------------------------------===// -// V5.2: [4.4.3] `collapse` clause -//===----------------------------------------------------------------------===// - -class OpenMP_CollapseClauseSkip< - bit traits = false, bit arguments = false, bit assemblyFormat = false, - bit description = false, bit extraClassDeclaration = false - > : OpenMP_Clause { - let arguments = (ins - ConfinedAttr, [IntMinValue<1>]> - :$collapse_num_loops - ); -} - -def OpenMP_CollapseClause : OpenMP_CollapseClauseSkip<>; - //===----------------------------------------------------------------------===// // V5.2: [5.7.2] `copyprivate` clause //===----------------------------------------------------------------------===// @@ -1402,22 +1385,6 @@ class OpenMP_ThreadLimitClauseSkip< def OpenMP_ThreadLimitClause : OpenMP_ThreadLimitClauseSkip<>; -//===----------------------------------------------------------------------===// -// V5.2: [9.1.1] `sizes` clause -//===----------------------------------------------------------------------===// - -class OpenMP_TileSizesClauseSkip< - bit traits = false, bit arguments = false, bit assemblyFormat = false, - bit description = false, bit extraClassDeclaration = false - > : OpenMP_Clause { - let arguments = (ins - OptionalAttr:$tile_sizes - ); -} - -def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>; - //===----------------------------------------------------------------------===// // V5.2: [12.1] `untied` clause //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 830b36f440098..2548a8ab4aac6 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -614,18 +614,13 @@ def WorkshareLoopWrapperOp : OpenMP_Op<"workshare.loop_wrapper", traits = [ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ RecursiveMemoryEffects, SameVariadicOperandSize ], clauses = [ - OpenMP_CollapseClause, - OpenMP_LoopRelatedClause, - OpenMP_TileSizesClause + OpenMP_LoopRelatedClause ], singleRegion = true> { let summary = "rectangular loop nest"; let description = [{ - This operation represents a rectangular loop nest which may be collapsed - and/or tiled. For each rectangular loop of the nest represented by an - instance of this operation, lower and upper bounds, as well as a step - variable, must be defined. The collapse clause specifies how many loops - that should be collapsed (1 if no collapse is done) after any tiling is - performed. The tiling sizes is represented by the tile sizes clause. + This operation represents a collapsed rectangular loop nest. For each + rectangular loop of the nest represented by an instance of this operation, + lower and upper bounds, as well as a step variable, must be defined. The lower and upper bounds specify a half-open range: the range includes the lower bound but does not include the upper bound. If the `loop_inclusive` @@ -638,7 +633,7 @@ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ `loop_steps` arguments. ```mlir - omp.loop_nest (%i1, %i2) : i32 = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) collapse(2) tiles(5,5) { + omp.loop_nest (%i1, %i2) : i32 = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) { %a = load %arrA[%i1, %i2] : memref %b = load %arrB[%i1, %i2] : memref %sum = arith.addf %a, %b : f32 diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index 460595ba9f254..c4a9fc2e556f1 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -492,10 +492,8 @@ struct ParallelOpLowering : public OpRewritePattern { // Create loop nest and populate region with contents of scf.parallel. auto loopOp = omp::LoopNestOp::create( - rewriter, parallelOp.getLoc(), parallelOp.getLowerBound().size(), - parallelOp.getLowerBound(), parallelOp.getUpperBound(), - parallelOp.getStep(), /*loop_inclusive=*/false, - /*tile_sizes=*/nullptr); + rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(), + parallelOp.getUpperBound(), parallelOp.getStep()); rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(), loopOp.getRegion().begin()); diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index aa88b9e8eef5a..6e43f28e8d93d 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -56,11 +56,6 @@ makeDenseBoolArrayAttr(MLIRContext *ctx, const ArrayRef boolArray) { return boolArray.empty() ? nullptr : DenseBoolArrayAttr::get(ctx, boolArray); } -static DenseI64ArrayAttr -makeDenseI64ArrayAttr(MLIRContext *ctx, const ArrayRef intArray) { - return intArray.empty() ? nullptr : DenseI64ArrayAttr::get(ctx, intArray); -} - namespace { struct MemRefPointerLikeModel : public PointerLikeType::ExternalModel steps; @@ -2972,35 +2967,6 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { parser.parseOperandList(steps, ivs.size(), OpAsmParser::Delimiter::Paren)) return failure(); - // Parse collapse - int64_t value = 0; - if (!parser.parseOptionalKeyword("collapse") && - (parser.parseLParen() || parser.parseInteger(value) || - parser.parseRParen())) - return failure(); - if (value > 1) - result.addAttribute( - "collapse_num_loops", - IntegerAttr::get(parser.getBuilder().getI64Type(), value)); - - // Parse tiles - SmallVector tiles; - auto parseTiles = [&]() -> ParseResult { - int64_t tile; - if (parser.parseInteger(tile)) - return failure(); - tiles.push_back(tile); - return success(); - }; - - if (!parser.parseOptionalKeyword("tiles") && - (parser.parseLParen() || parser.parseCommaSeparatedList(parseTiles) || - parser.parseRParen())) - return failure(); - - if (tiles.size() > 0) - result.addAttribute("tile_sizes", DenseI64ArrayAttr::get(ctx, tiles)); - // Parse the body. Region *region = result.addRegion(); if (parser.parseRegion(*region, ivs)) @@ -3024,23 +2990,14 @@ void LoopNestOp::print(OpAsmPrinter &p) { if (getLoopInclusive()) p << "inclusive "; p << "step (" << getLoopSteps() << ") "; - if (int64_t numCollapse = getCollapseNumLoops()) - if (numCollapse > 1) - p << "collapse(" << numCollapse << ") "; - - if (const auto tiles = getTileSizes()) - p << "tiles(" << tiles.value() << ") "; - p.printRegion(region, /*printEntryBlockArgs=*/false); } void LoopNestOp::build(OpBuilder &builder, OperationState &state, const LoopNestOperands &clauses) { - MLIRContext *ctx = builder.getContext(); - LoopNestOp::build(builder, state, clauses.collapseNumLoops, - clauses.loopLowerBounds, clauses.loopUpperBounds, - clauses.loopSteps, clauses.loopInclusive, - makeDenseI64ArrayAttr(ctx, clauses.tileSizes)); + LoopNestOp::build(builder, state, clauses.loopLowerBounds, + clauses.loopUpperBounds, clauses.loopSteps, + clauses.loopInclusive); } LogicalResult LoopNestOp::verify() { @@ -3056,17 +3013,6 @@ LogicalResult LoopNestOp::verify() { << "range argument type does not match corresponding IV type"; } - uint64_t numIVs = getIVs().size(); - - if (const auto &numCollapse = getCollapseNumLoops()) - if (numCollapse > numIVs) - return emitOpError() - << "collapse value is larger than the number of loops"; - - if (const auto &tiles = getTileSizes()) - if (tiles.value().size() > numIVs) - return emitOpError() << "too few canonical loops for tile dimensions"; - if (!llvm::dyn_cast_if_present((*this)->getParentOp())) return emitOpError() << "expects parent op to be a loop wrapper"; diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 2ab6bb0a73200..4e26e65cf9718 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3041,46 +3041,16 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, loopInfos.push_back(*loopResult); } + // Collapse loops. Store the insertion point because LoopInfos may get + // invalidated. llvm::OpenMPIRBuilder::InsertPointTy afterIP = loopInfos.front()->getAfterIP(); - // Do tiling. - if (const auto &tiles = loopOp.getTileSizes()) { - llvm::Type *ivType = loopInfos.front()->getIndVarType(); - SmallVector tileSizes; - - for (auto tile : tiles.value()) { - llvm::Value *tileVal = llvm::ConstantInt::get(ivType, tile); - tileSizes.push_back(tileVal); - } - - std::vector newLoops = - ompBuilder->tileLoops(ompLoc.DL, loopInfos, tileSizes); - - // Update afterIP to get the correct insertion point after - // tiling. - llvm::BasicBlock *afterBB = newLoops.front()->getAfter(); - llvm::BasicBlock *afterAfterBB = afterBB->getSingleSuccessor(); - afterIP = {afterAfterBB, afterAfterBB->begin()}; - - // Update the loop infos. - loopInfos.clear(); - for (const auto &newLoop : newLoops) - loopInfos.push_back(newLoop); - } // Tiling done. - - // Do collapse. - const auto &numCollapse = loopOp.getCollapseNumLoops(); - SmallVector collapseLoopInfos( - loopInfos.begin(), loopInfos.begin() + (numCollapse)); - - auto newTopLoopInfo = - ompBuilder->collapseLoops(ompLoc.DL, collapseLoopInfos, {}); - - assert(newTopLoopInfo && "New top loop information is missing"); + // Update the stack frame created for this loop to point to the resulting loop + // after applying transformations. moduleTranslation.stackWalk( [&](OpenMPLoopInfoStackFrame &frame) { - frame.loopInfo = newTopLoopInfo; + frame.loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); return WalkResult::interrupt(); }); diff --git a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir index d362bb6092419..a722acbf2c347 100644 --- a/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir +++ b/mlir/test/Conversion/SCFToOpenMP/scf-to-openmp.mlir @@ -6,7 +6,7 @@ func.func @parallel(%arg0: index, %arg1: index, %arg2: index, // CHECK: %[[FOUR:.+]] = llvm.mlir.constant(4 : i32) : i32 // CHECK: omp.parallel num_threads(%[[FOUR]] : i32) { // CHECK: omp.wsloop { - // CHECK: omp.loop_nest (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) collapse(2) { + // CHECK: omp.loop_nest (%[[LVAR1:.*]], %[[LVAR2:.*]]) : index = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) { // CHECK: memref.alloca_scope scf.parallel (%i, %j) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) { // CHECK: "test.payload"(%[[LVAR1]], %[[LVAR2]]) : (index, index) -> () diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 763f41c5420b8..986c3844d0bb9 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -157,29 +157,6 @@ func.func @no_loops(%lb : index, %ub : index, %step : index) { } } -// ----- - -func.func @collapse_size(%lb : index, %ub : index, %step : index) { - omp.wsloop { - // expected-error@+1 {{collapse value is larger than the number of loops}} - omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) collapse(4) { - omp.yield - } - } -} - -// ----- - -func.func @tiles_length(%lb : index, %ub : index, %step : index) { - omp.wsloop { - // expected-error@+1 {{op too few canonical loops for tile dimensions}} - omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) tiles(2, 4) { - omp.yield - } - } -} - - // ----- func.func @inclusive_not_a_clause(%lb : index, %ub : index, %step : index) { diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 60b1f61135ac2..3c2e0a3b7cc15 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -376,60 +376,6 @@ func.func @omp_loop_nest_pretty_multiple(%lb1 : i32, %ub1 : i32, %step1 : i32, return } -// CHECK-LABEL: omp_loop_nest_pretty_multiple_collapse -func.func @omp_loop_nest_pretty_multiple_collapse(%lb1 : i32, %ub1 : i32, %step1 : i32, - %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref) -> () { - - omp.wsloop { - // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) collapse(2) - omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { - %1 = "test.payload"(%iv1) : (i32) -> (index) - %2 = "test.payload"(%iv2) : (i32) -> (index) - memref.store %iv1, %data1[%1] : memref - memref.store %iv2, %data1[%2] : memref - omp.yield - } - } - - return -} - -// CHECK-LABEL: omp_loop_nest_pretty_multiple_tiles -func.func @omp_loop_nest_pretty_multiple_tiles(%lb1 : i32, %ub1 : i32, %step1 : i32, - %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref) -> () { - - omp.wsloop { - // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) tiles(5, 10) - omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) tiles(5, 10) { - %1 = "test.payload"(%iv1) : (i32) -> (index) - %2 = "test.payload"(%iv2) : (i32) -> (index) - memref.store %iv1, %data1[%1] : memref - memref.store %iv2, %data1[%2] : memref - omp.yield - } - } - - return -} - -// CHECK-LABEL: omp_loop_nest_pretty_multiple_collapse_tiles -func.func @omp_loop_nest_pretty_multiple_collapse_tiles(%lb1 : i32, %ub1 : i32, %step1 : i32, - %lb2 : i32, %ub2 : i32, %step2 : i32, %data1 : memref) -> () { - - omp.wsloop { - // CHECK: omp.loop_nest (%{{.*}}, %{{.*}}) : i32 = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) collapse(2) tiles(5, 10) - omp.loop_nest (%iv1, %iv2) : i32 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) tiles(5, 10) { - %1 = "test.payload"(%iv1) : (i32) -> (index) - %2 = "test.payload"(%iv2) : (i32) -> (index) - memref.store %iv1, %data1[%1] : memref - memref.store %iv2, %data1[%2] : memref - omp.yield - } - } - - return -} - // CHECK-LABEL: omp_wsloop func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memref, %linear_var : i32, %chunk_var : i32) -> () { diff --git a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir index d84641ff9c99b..b42e387acbb11 100644 --- a/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-wsloop-collapsed.mlir @@ -9,7 +9,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %loop_lb = llvm.mlir.constant(0 : i32) : i32 %loop_step = llvm.mlir.constant(1 : index) : i32 omp.wsloop { - omp.loop_nest (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) collapse(2) { + omp.loop_nest (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) { %1 = llvm.add %arg1, %arg2 : i32 %2 = llvm.mul %arg2, %loop_ub overflow : i32 %3 = llvm.add %arg1, %2 :i32 diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 27210bc0890ce..3f4dcd5e24c56 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -698,7 +698,7 @@ llvm.func @simd_simple(%lb : i64, %ub : i64, %step : i64, %arg0: !llvm.ptr) { // CHECK-LABEL: @simd_simple_multiple llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) inclusive step (%step1, %step2) collapse(2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) inclusive step (%step1, %step2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 // The form of the emitted IR is controlled by OpenMPIRBuilder and // tested there. Just check that the right metadata is added and collapsed @@ -736,7 +736,7 @@ llvm.func @simd_simple_multiple(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64 // CHECK-LABEL: @simd_simple_multiple_simdlen llvm.func @simd_simple_multiple_simdlen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd simdlen(2) { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 // The form of the emitted IR is controlled by OpenMPIRBuilder and // tested there. Just check that the right metadata is added. @@ -760,7 +760,7 @@ llvm.func @simd_simple_multiple_simdlen(%lb1 : i64, %ub1 : i64, %step1 : i64, %l // CHECK-LABEL: @simd_simple_multiple_safelen llvm.func @simd_simple_multiple_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd safelen(2) { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 %4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr, i64) -> !llvm.ptr, f32 %5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr, i64) -> !llvm.ptr, f32 @@ -779,7 +779,7 @@ llvm.func @simd_simple_multiple_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %l // CHECK-LABEL: @simd_simple_multiple_simdlen_safelen llvm.func @simd_simple_multiple_simdlen_safelen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { omp.simd simdlen(1) safelen(2) { - omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) collapse(2) { + omp.loop_nest (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 %4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr, i64) -> !llvm.ptr, f32 %5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr, i64) -> !llvm.ptr, f32 @@ -1177,7 +1177,7 @@ llvm.func @collapse_wsloop( // CHECK: store i32 %[[TOTAL_SUB_1]], ptr // CHECK: call void @__kmpc_for_static_init_4u omp.wsloop { - omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) collapse(3) { + omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) { %31 = llvm.load %20 : !llvm.ptr -> i32 %32 = llvm.add %31, %arg0 : i32 %33 = llvm.add %32, %arg1 : i32 @@ -1239,7 +1239,7 @@ llvm.func @collapse_wsloop_dynamic( // CHECK: store i32 %[[TOTAL]], ptr // CHECK: call void @__kmpc_dispatch_init_4u omp.wsloop schedule(dynamic) { - omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) collapse(3) { + omp.loop_nest (%arg0, %arg1, %arg2) : i32 = (%0, %1, %2) to (%3, %4, %5) step (%6, %7, %8) { %31 = llvm.load %20 : !llvm.ptr -> i32 %32 = llvm.add %31, %arg0 : i32 %33 = llvm.add %32, %arg1 : i32 From 2510e0f30c0af970fd436af776836cef2b377958 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 11 Sep 2025 12:18:46 +0200 Subject: [PATCH 39/57] Backport tblgen changes --- flang/lib/Lower/OpenMP/OpenMP.cpp | 3 +- flang/lib/Lower/OpenMP/Utils.cpp | 2 +- .../mlir/Dialect/OpenMP/OpenMPClauses.td | 104 +++++++++--------- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 13 ++- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 11 +- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 2 +- 6 files changed, 69 insertions(+), 66 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 4cf2d032170d7..7b4fb649ab383 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1619,7 +1619,8 @@ genLoopNestClauses(lower::AbstractConverter &converter, if (clause.id == llvm::omp::Clause::OMPC_collapse) { const auto &collapse = std::get(clause.u); int64_t collapseValue = evaluate::ToInt64(collapse.v).value(); - clauseOps.numCollapse = firOpBuilder.getI64IntegerAttr(collapseValue); + clauseOps.collapseNumLoops = + firOpBuilder.getI64IntegerAttr(collapseValue); } else if (clause.id == llvm::omp::Clause::OMPC_sizes) { // This case handles the stand-alone tiling construct const auto &sizes = std::get(clause.u); diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 91e40f6a0f5e6..00f1baa5094d1 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -699,7 +699,7 @@ bool collectLoopRelatedInfo( found = true; } - // Collect sizes from tile directive if present + // Collect sizes from tile directive if presentOpenMPToLLVMIRTranslation.cpp: std::int64_t sizesLengthValue = 0l; std::int64_t permutationLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td index 4a3ae30c2a82f..9102a4320c578 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td @@ -209,6 +209,23 @@ class OpenMP_BindClauseSkip< def OpenMP_BindClause : OpenMP_BindClauseSkip<>; +//===----------------------------------------------------------------------===// +// V5.2: [4.4.3] `collapse` clause +//===----------------------------------------------------------------------===// + +class OpenMP_CollapseClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause { + let arguments = (ins + ConfinedAttr, [IntMinValue<1>]> + :$collapse_num_loops + ); +} + +def OpenMP_CollapseClause : OpenMP_CollapseClauseSkip<>; + //===----------------------------------------------------------------------===// // V5.2: [5.7.2] `copyprivate` clause //===----------------------------------------------------------------------===// @@ -317,58 +334,6 @@ class OpenMP_DeviceClauseSkip< def OpenMP_DeviceClause : OpenMP_DeviceClauseSkip<>; -//===----------------------------------------------------------------------===// -// V5.2: [XX.X] `collapse` clause -//===----------------------------------------------------------------------===// - -class OpenMP_CollapseClauseSkip< - bit traits = false, bit arguments = false, bit assemblyFormat = false, - bit description = false, bit extraClassDeclaration = false - > : OpenMP_Clause { - let arguments = (ins - DefaultValuedOptionalAttr:$num_collapse - ); -} - -def OpenMP_CollapseClause : OpenMP_CollapseClauseSkip<>; - -//===----------------------------------------------------------------------===// -// V5.2: [xx.x] `sizes` clause -//===----------------------------------------------------------------------===// - -class OpenMP_TileSizesClauseSkip< - bit traits = false, bit arguments = false, bit assemblyFormat = false, - bit description = false, bit extraClassDeclaration = false - > : OpenMP_Clause { - let arguments = (ins - OptionalAttr:$tile_sizes - ); -} - -def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>; - - - -//===----------------------------------------------------------------------===// -// V6.0: [xx.x] `permutation` clause -//===----------------------------------------------------------------------===// - -class OpenMP_PermutationClauseSkip< - bit traits = false, bit arguments = false, bit assemblyFormat = false, - bit description = false, bit extraClassDeclaration = false - > : OpenMP_Clause { - let arguments = (ins - BoolAttr:$interchangeEnabled, - OptionalAttr:$permutation - ); -} - -def OpenMP_PermutationClause : OpenMP_PermutationClauseSkip<>; - - //===----------------------------------------------------------------------===// // V5.2: [11.6.1] `dist_schedule` clause //===----------------------------------------------------------------------===// @@ -1437,6 +1402,41 @@ class OpenMP_ThreadLimitClauseSkip< def OpenMP_ThreadLimitClause : OpenMP_ThreadLimitClauseSkip<>; +//===----------------------------------------------------------------------===// +// V5.2: [9.1.1] `sizes` clause +//===----------------------------------------------------------------------===// + +class OpenMP_TileSizesClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause { + let arguments = (ins + OptionalAttr:$tile_sizes + ); +} + +def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>; + + +//===----------------------------------------------------------------------===// +// V6.0: [xx.x] `permutation` clause +//===----------------------------------------------------------------------===// + +class OpenMP_PermutationClauseSkip< + bit traits = false, bit arguments = false, bit assemblyFormat = false, + bit description = false, bit extraClassDeclaration = false + > : OpenMP_Clause { + let arguments = (ins + BoolAttr:$interchangeEnabled, + OptionalAttr:$permutation + ); +} + +def OpenMP_PermutationClause : OpenMP_PermutationClauseSkip<>; + + //===----------------------------------------------------------------------===// // V5.2: [12.1] `untied` clause //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index e17315d923317..830b36f440098 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -614,15 +614,18 @@ def WorkshareLoopWrapperOp : OpenMP_Op<"workshare.loop_wrapper", traits = [ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ RecursiveMemoryEffects, SameVariadicOperandSize ], clauses = [ - OpenMP_LoopRelatedClause, OpenMP_CollapseClause, + OpenMP_LoopRelatedClause, OpenMP_TileSizesClause ], singleRegion = true> { let summary = "rectangular loop nest"; let description = [{ - This operation represents a collapsed rectangular loop nest. For each - rectangular loop of the nest represented by an instance of this operation, - lower and upper bounds, as well as a step variable, must be defined. + This operation represents a rectangular loop nest which may be collapsed + and/or tiled. For each rectangular loop of the nest represented by an + instance of this operation, lower and upper bounds, as well as a step + variable, must be defined. The collapse clause specifies how many loops + that should be collapsed (1 if no collapse is done) after any tiling is + performed. The tiling sizes is represented by the tile sizes clause. The lower and upper bounds specify a half-open range: the range includes the lower bound but does not include the upper bound. If the `loop_inclusive` @@ -635,7 +638,7 @@ def LoopNestOp : OpenMP_Op<"loop_nest", traits = [ `loop_steps` arguments. ```mlir - omp.loop_nest (%i1, %i2) : i32 = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) { + omp.loop_nest (%i1, %i2) : i32 = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) collapse(2) tiles(5,5) { %a = load %arrA[%i1, %i2] : memref %b = load %arrB[%i1, %i2] : memref %sum = arith.addf %a, %b : f32 diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index c59c8889cadcb..cc57cf66dc158 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -3024,7 +3024,7 @@ void LoopNestOp::print(OpAsmPrinter &p) { if (getLoopInclusive()) p << "inclusive "; p << "step (" << getLoopSteps() << ") "; - if (int64_t numCollapse = getNumCollapse()) + if (int64_t numCollapse = getCollapseNumLoops()) if (numCollapse > 1) p << "collapse(" << numCollapse << ") "; @@ -3037,10 +3037,9 @@ void LoopNestOp::print(OpAsmPrinter &p) { void LoopNestOp::build(OpBuilder &builder, OperationState &state, const LoopNestOperands &clauses) { MLIRContext *ctx = builder.getContext(); - - LoopNestOp::build(builder, state, clauses.loopLowerBounds, - clauses.loopUpperBounds, clauses.loopSteps, - clauses.loopInclusive, clauses.numCollapse, + LoopNestOp::build(builder, state, clauses.collapseNumLoops, + clauses.loopLowerBounds, clauses.loopUpperBounds, + clauses.loopSteps, clauses.loopInclusive, makeDenseI64ArrayAttr(ctx, clauses.tileSizes)); } @@ -3059,7 +3058,7 @@ LogicalResult LoopNestOp::verify() { uint64_t numIVs = getIVs().size(); - if (const auto &numCollapse = getNumCollapse()) + if (const auto &numCollapse = getCollapseNumLoops()) if (numCollapse > numIVs) return emitOpError() << "collapse value is larger than the number of loops"; diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index c2017398bf264..6600747ad85e8 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3073,7 +3073,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, } // Tiling done // Do collapse - if (const auto &numCollapse = loopOp.getNumCollapse()) { + if (const auto &numCollapse = loopOp.getCollapseNumLoops()) { SmallVector collapseLoopInfos( loopInfos.begin(), loopInfos.begin() + (numCollapse)); From de3c360886e1c5882b955570060d3749f4dd1458 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 11 Sep 2025 15:48:51 +0200 Subject: [PATCH 40/57] add Fortran testing --- flang/lib/Lower/OpenMP/Utils.cpp | 2 +- .../Dialect/OpenMP/OpenMPClauseOperands.h | 2 +- .../Conversion/SCFToOpenMP/SCFToOpenMP.cpp | 7 ++- .../parallel-wsloop-collapse-intdo.f90 | 33 ++++++++++ .../parallel-wsloop-collapse-intdo.o | Bin 0 -> 3179 bytes openmp/runtime/test/transform/tile/intdo.f90 | 58 +++++++++++++++++ .../tile/parallel-wsloop-collapse-intdo.f90 | 59 ++++++++++++++++++ 7 files changed, 156 insertions(+), 5 deletions(-) create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.o create mode 100644 openmp/runtime/test/transform/tile/intdo.f90 create mode 100644 openmp/runtime/test/transform/tile/parallel-wsloop-collapse-intdo.f90 diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 00f1baa5094d1..91e40f6a0f5e6 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -699,7 +699,7 @@ bool collectLoopRelatedInfo( found = true; } - // Collect sizes from tile directive if presentOpenMPToLLVMIRTranslation.cpp: + // Collect sizes from tile directive if present std::int64_t sizesLengthValue = 0l; std::int64_t permutationLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h index faf820dcfdb29..6a92b136ef51c 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h @@ -40,7 +40,7 @@ struct DeviceTypeClauseOps { /// Clauses that correspond to operations other than omp.target, but might have /// to be evaluated outside of a parent target region. using HostEvaluatedOperands = - detail::Clauses; // TODO: Add `indirect` clause. diff --git a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp index 19fbefb48a378..460595ba9f254 100644 --- a/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp +++ b/mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp @@ -492,9 +492,10 @@ struct ParallelOpLowering : public OpRewritePattern { // Create loop nest and populate region with contents of scf.parallel. auto loopOp = omp::LoopNestOp::create( - rewriter, parallelOp.getLoc(), parallelOp.getLowerBound(), - parallelOp.getUpperBound(), parallelOp.getStep(), false, - parallelOp.getLowerBound().size(), nullptr); + rewriter, parallelOp.getLoc(), parallelOp.getLowerBound().size(), + parallelOp.getLowerBound(), parallelOp.getUpperBound(), + parallelOp.getStep(), /*loop_inclusive=*/false, + /*tile_sizes=*/nullptr); rewriter.inlineRegionBefore(parallelOp.getRegion(), loopOp.getRegion(), loopOp.getRegion().begin()); diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.f90 new file mode 100644 index 0000000000000..498534374ea30 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_collapse_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO COLLAPSE(2) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.o b/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.o new file mode 100644 index 0000000000000000000000000000000000000000..a0abcfdf74fdaf7e6ab474ce4421da0c0e191065 GIT binary patch literal 3179 zcmZ`*UyKt)7@uVecbtb-MMC^XJ&PX7mHxZtj*Fp%+h#Ae@eZ%Vhn~x}-5$$rcbnZU zpbrY3lB`V=jW23^QeTYvs3Os%CnQGFh?aXg} z^ZRDL@0;D3R!-hWu;_h+j8H^K#8!^kF2?5MIzrBKgzOJ6G7L?KOdpI?WLsvGU!Pv@ zLkiL)+K!YgtDEiZ9Y@`_6s9N6c;=`g(Egy20rl9fU!$8@ks{2ZKGjM?fO> z=z8bFbj=m`d3`qCl6HIjDbJGanfUl1RNfUd+p*uwQX_ zu%HR#Ib9GJZ>GhT7EXVUl-R;RNahsERuGELlIY}b`2*+(8))WlG0@tCvs3K+B(h%S z6l}gLWK6C_(;d4=h25(?>}%<2EBbJ&!*?jJh1I;7$T%_Ks<5tLjYpy ztMGAXo17yAZjjEuWg!RyWLX%*b(V#Gbj|hC_1Y=i^c^39(OfIPIoFyC=30e8u_g4! z{?0kYa46@@hKEIGG8|o=43FV7(59hbPB!yFNL5o9+;MKP4Sw8?wH@J3Q@G;@D^2*% z-yT0_rkeg*EV8xfi+a;}FswII!CzH6s|3_`NejhBV4{g zn!**Ycd02{YUVFH`MZvQv7(Qsac^VojML7z>8Cq_@q{3bVH7c!=q_An7W~`;zRQN1 z`3n@FbKnq-^Un3HtE;QxiLLyaBPg0bHdNV~YLcNWmS)Y>tg(z3`xf-CSD2LxZZgPMe zhnlV11jN?;2oSy}0rEN2Y~8N`v32hPV(W6S{cPP$fEYIg=xJ*En$NufC``F`eC}O9 z%=QDH`w-A}s#^hsZ)||90%HC>2;bc^lnViRmZF_L8U@7ss{`6WM;m}1qv&-&&r!q; z@WKGU-sGkU0lXb>9Pz&0#e$`nRb7E*56SwxQmo6StZ9lia@5icqdrnLG)=Bs$_R)mMx;6( zB^6_MNPS^wXJ{(AGc*)B%;1iSp(_&&eL**l>V6xYh)zsE1W+IrTDG{Y*X(scCqE1v zZVX4#J__xc6tbB@ZZZza=?lp3mLtGX4@)KZrM`sJG4MO8f`HxCiZmTk3+ zrmzY*g%y;8;3?jMgRDzy9`=+Kp7aCQ01xjgc)II2&QrJ9Xy?reuLI#ZoE6`wA9DBq zh78=k9fv&(f`-y=g$!sqZd4xn-5O*-({Y=kr#y!zJC>h6+qIGT-g%Sa`N4DeJz?R_ z5B$E~-=OzhA5gQ7bXw84IOY{0hcA9|d4q42b*)g-ZG|@y&(Z4%(Cn70U-tP{Gp-eC zx~(wFJm+QoyFF^G-?w_pwL(p|)kbP1QXcMqv2_3$%=5k57r#rM!}}M@@}JdFWI)q# z^WyI$mP3r^n~xJoWC%%D71@qN@uVlKtDqs$%uG#IbtK?*4Y@wzH|-UcuU)Rs$TH?l zPvA3u^)TP|Fn<8kh6mSTd*9bHvzIkAW@~Nj(fopgZef~?*M*ib^rwKY@xnX^gZCm; zd#vyr_6qDyRn-)?`wwJnp36?{&l{D7&l2)0cqd5ka#VC16>TZCk0V)lHkmyeZdSwq>)+TF@&~2C%kL zQ}m7zR}aG-gUsW4!#X7CjhbX1G8MUEd6AT4(^M7HdqE-*qgI#Tc=g8THKQbJt`$z; z4L}MOq%xes9fX&YDkP;dS{7HvDyu-x%#^iPQpr@-h4Ha?JT;z}jK|~q;?d-IB8n~a zqNz+5JKkZN5>lgXcRW=}R8om(Dxp9XE5|GHSTZ>_D#t6;ST&mLbz}-|l6l2U#L2%1 Co=emK literal 0 HcmV?d00001 diff --git a/openmp/runtime/test/transform/tile/intdo.f90 b/openmp/runtime/test/transform/tile/intdo.f90 new file mode 100644 index 0000000000000..40e4c2c53f89c --- /dev/null +++ b/openmp/runtime/test/transform/tile/intdo.f90 @@ -0,0 +1,58 @@ +! This test checks lowering of OpenMP tile directive +! XFAIL: * + +! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + + +program tile_intdo + integer i, j + print *, 'do' + + !$OMP TILE SIZES(2,2) + do i=7, 18, 3 + do j=7, 19, 3 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END TILE + + print *, 'done' +end program + + +! CHECK: do + +! Complete tile +! CHECK-NEXT: i=7 j=7 +! CHECK-NEXT: i=7 j=10 +! CHECK-NEXT: i=10 j=7 +! CHECK-NEXT: i=10 j=10 + +! Complete tile +! CHECK-NEXT: i=7 j=13 +! CHECK-NEXT: i=7 j=16 +! CHECK-NEXT: i=10 j=13 +! CHECK-NEXT: i=10 j=16 + +! Partial tile +! CHECK-NEXT: i=7 j=19 +! CHECK-NEXT: i=10 j=19 + +! Complete tile +! CHECK-NEXT: i=13 j=7 +! CHECK-NEXT: i=13 j=10 +! CHECK-NEXT: i=16 j=7 +! CHECK-NEXT: i=16 j=10 + +! Complete tile +! CHECK-NEXT: i=13 j=13 +! CHECK-NEXT: i=13 j=16 +! CHECK-NEXT: i=16 j=13 +! CHECK-NEXT: i=16 j=16 + +! Partial tile +! CHECK-NEXT: i=13 j=19 +! CHECK-NEXT: i=16 j=19 + +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/tile/parallel-wsloop-collapse-intdo.f90 b/openmp/runtime/test/transform/tile/parallel-wsloop-collapse-intdo.f90 new file mode 100644 index 0000000000000..66bddf30e045a --- /dev/null +++ b/openmp/runtime/test/transform/tile/parallel-wsloop-collapse-intdo.f90 @@ -0,0 +1,59 @@ +! This test checks lowering of OpenMP tile directive + +! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + + +program tile_wsloop_collapse_intdo + integer i, j + print *, 'do' + + !$OMP PARALLEL DO COLLAPSE(2) NUM_THREADS(1) + !$OMP TILE SIZES(2,2) + do i=7, 18, 3 + do j=7, 19, 3 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END TILE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do + +! Complete tile +! CHECK-NEXT: i=7 j=7 +! CHECK-NEXT: i=7 j=10 +! CHECK-NEXT: i=10 j=7 +! CHECK-NEXT: i=10 j=10 + +! Complete tile +! CHECK-NEXT: i=7 j=13 +! CHECK-NEXT: i=7 j=16 +! CHECK-NEXT: i=10 j=13 +! CHECK-NEXT: i=10 j=16 + +! Partial tile +! CHECK-NEXT: i=7 j=19 +! CHECK-NEXT: i=10 j=19 + +! Complete tile +! CHECK-NEXT: i=13 j=7 +! CHECK-NEXT: i=13 j=10 +! CHECK-NEXT: i=16 j=7 +! CHECK-NEXT: i=16 j=10 + +! Complete tile +! CHECK-NEXT: i=13 j=13 +! CHECK-NEXT: i=13 j=16 +! CHECK-NEXT: i=16 j=13 +! CHECK-NEXT: i=16 j=16 + +! Partial tile +! CHECK-NEXT: i=13 j=19 +! CHECK-NEXT: i=16 j=19 + +! CHECK-NEXT: done From 0a5a05cf503fd1dde25ba91ca49dbeb55be80884 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 11 Sep 2025 17:26:40 +0200 Subject: [PATCH 41/57] systematic testing --- .../distribute-parallel-wsloop-intdo.f90 | 37 +++++++++++++++++++ .../distribute-parallel-wsloop-simd-intdo.f90 | 37 +++++++++++++++++++ .../interchange/distribute-simd-intdo.f90 | 37 +++++++++++++++++++ .../test/transform/interchange/intdo.f90 | 31 ++++++++++++++++ .../interchange/parallel-wsloop-intdo.f90 | 33 +++++++++++++++++ .../interchange/taskloop-simd-intdo.f90 | 33 +++++++++++++++++ .../transform/interchange/wsloop-intdo.f90 | 33 +++++++++++++++++ .../interchange/wsloop-simd-intdo.f90 | 33 +++++++++++++++++ 8 files changed, 274 insertions(+) create mode 100644 openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-simd-intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/distribute-simd-intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/wsloop-intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/wsloop-simd-intdo.f90 diff --git a/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-intdo.f90 new file mode 100644 index 0000000000000..b5ef5214f5064 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-intdo.f90 @@ -0,0 +1,37 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TEAMS NUM_TEAMS(1) + + !$OMP DISTRIBUTE PARALLEL DO SCHEDULE(static,2) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DISTRIBUTE PARALLEL DO + + !$OMP END TEAMS + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-simd-intdo.f90 b/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-simd-intdo.f90 new file mode 100644 index 0000000000000..2a192cad017a6 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-simd-intdo.f90 @@ -0,0 +1,37 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TEAMS NUM_TEAMS(1) + + !$OMP DISTRIBUTE PARALLEL DO SIMD SCHEDULE(static,2) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DISTRIBUTE PARALLEL DO SIMD + + !$OMP END TEAMS + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/distribute-simd-intdo.f90 b/openmp/runtime/test/transform/interchange/distribute-simd-intdo.f90 new file mode 100644 index 0000000000000..fce62b7f3ccda --- /dev/null +++ b/openmp/runtime/test/transform/interchange/distribute-simd-intdo.f90 @@ -0,0 +1,37 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TEAMS NUM_TEAMS(1) + + !$OMP DISTRIBUTE SIMD + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DISTRIBUTE SIMD + + !$OMP END TEAMS + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/intdo.f90 b/openmp/runtime/test/transform/interchange/intdo.f90 new file mode 100644 index 0000000000000..fe6820f41dba6 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/intdo.f90 @@ -0,0 +1,31 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_intdo + integer :: i, j + print *, 'do' + + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo.f90 new file mode 100644 index 0000000000000..cfa3bddf5c8d5 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 b/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 new file mode 100644 index 0000000000000..2e8293dd6bec6 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TASKLOOP SIMD SCHEDULE(static,2) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END TASKLOOP SIMD + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/wsloop-intdo.f90 new file mode 100644 index 0000000000000..32b1b87a9e859 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/wsloop-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP DO SCHEDULE(static,2) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/wsloop-simd-intdo.f90 b/openmp/runtime/test/transform/interchange/wsloop-simd-intdo.f90 new file mode 100644 index 0000000000000..56ed14b165fa3 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/wsloop-simd-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP DO SIMD SCHEDULE(static,2) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DO SIMD + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done From e84423057ba7fdf2cb8ae9c4c13a00c31d5eaba7 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 11 Sep 2025 23:17:32 +0200 Subject: [PATCH 42/57] Implement standalone interchange --- flang/include/flang/Lower/AbstractConverter.h | 5 + flang/lib/Lower/Bridge.cpp | 114 ++++ flang/lib/Lower/OpenMP/OpenMP.cpp | 568 +++++++++++++++++- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 5 + .../interchange/intdo-permutation.f90 | 42 ++ .../parallel-wsloop-intdo-firstprivate.f90 | 35 ++ .../parallel-wsloop-intdo-private.f90 | 34 ++ .../parallel-wsloop-intdo-reduction.f90 | 27 + ...teams-distribute-parallel-wsloop-intdo.f90 | 33 + .../transform/interchange/taskloop-intdo.f90 | 35 ++ .../interchange/taskloop-simd-intdo.f90 | 3 +- ...teams-distribute-parallel-wsloop-intdo.f90 | 33 + 12 files changed, 932 insertions(+), 2 deletions(-) create mode 100644 openmp/runtime/test/transform/interchange/intdo-permutation.f90 create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-firstprivate.f90 create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private.f90 create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-reduction.f90 create mode 100644 openmp/runtime/test/transform/interchange/target-teams-distribute-parallel-wsloop-intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/taskloop-intdo.f90 create mode 100644 openmp/runtime/test/transform/interchange/teams-distribute-parallel-wsloop-intdo.f90 diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h index 8e9de418e1b7e..e7c1cb92c445e 100644 --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -193,6 +193,9 @@ class AbstractConverter { std::unique_ptr expression, mlir::Type eleTy) = 0; + + + //===--------------------------------------------------------------------===// // Expressions //===--------------------------------------------------------------------===// @@ -381,6 +384,8 @@ class AbstractConverter { virtual mlir::StateStack &getStateStack() = 0; + virtual void genPermutatedLoops( llvm:: ArrayRef doStmts, Fortran:: lower::pft::Evaluation *innermostDo) = 0; + private: /// Options controlling lowering behavior. const Fortran::lower::LoweringOptions &loweringOptions; diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 6125ea9153662..0f21235872edb 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2187,6 +2187,120 @@ class FirConverter : public Fortran::lower::AbstractConverter { // so no clean-up needs to be generated for these entities. } + void genPermutatedLoops( llvm:: ArrayRef doStmts, Fortran:: lower::pft::Evaluation *innermostDo) override { + // Fortran::lower::pft::Evaluation &eval = getEval(); + // bool unstructuredContext = eval.lowerAsUnstructured(); + + llvm:: SmallVector< mlir::Block *> headerBlocks; + llvm:: SmallVector loopInfos; + + auto enterLoop = [&](Fortran:: lower::pft::Evaluation &eval) { eval.dump(); + bool unstructuredContext = eval.lowerAsUnstructured(); + + // Collect loop nest information. + // Generate begin loop code directly for infinite and while loops. + Fortran::lower::pft::Evaluation &doStmtEval = eval.getFirstNestedEvaluation(); + auto *doStmt = doStmtEval.getIf(); + const auto &loopControl = std::get>(doStmt->t); + mlir::Block *preheaderBlock = doStmtEval.block; + mlir::Block *beginBlock = preheaderBlock ? preheaderBlock : builder->getBlock(); + auto createNextBeginBlock = [&]() { + // Step beginBlock through unstructured preheader, header, and mask + // blocks, created in outermost to innermost order. + return beginBlock = beginBlock->splitBlock(beginBlock->end()); + }; + mlir::Block *headerBlock = unstructuredContext ? createNextBeginBlock() : nullptr; + headerBlocks.push_back(headerBlock); + mlir::Block *bodyBlock = doStmtEval.lexicalSuccessor->block; + mlir::Block *exitBlock = doStmtEval.parentConstruct->constructExit->block; + IncrementLoopNestInfo &incrementLoopNestInfo = loopInfos.emplace_back(); + const Fortran::parser::ScalarLogicalExpr *whileCondition = nullptr; + bool infiniteLoop = !loopControl.has_value(); + if (infiniteLoop) { + assert(unstructuredContext && "infinite loop must be unstructured"); + startBlock(headerBlock); + } else if ((whileCondition = + std::get_if( + &loopControl->u))) { + assert(unstructuredContext && "while loop must be unstructured"); + maybeStartBlock(preheaderBlock); // no block or empty block + startBlock(headerBlock); + genConditionalBranch(*whileCondition, bodyBlock, exitBlock); + } else if (const auto *bounds = + std::get_if( + &loopControl->u)) { + // Non-concurrent increment loop. + IncrementLoopInfo &info = incrementLoopNestInfo.emplace_back( + *bounds->name.thing.symbol, bounds->lower, bounds->upper, + bounds->step); + if (unstructuredContext) { + maybeStartBlock(preheaderBlock); + info.hasRealControl = info.loopVariableSym->GetType()->IsNumeric( + Fortran::common::TypeCategory::Real); + info.headerBlock = headerBlock; + info.bodyBlock = bodyBlock; + info.exitBlock = exitBlock; + } + } else { + llvm_unreachable("Cannot permute DO CONCURRENT"); + } + + + + // Increment loop begin code. (Infinite/while code was already generated.) + if (!infiniteLoop && !whileCondition) + genFIRIncrementLoopBegin(incrementLoopNestInfo, doStmtEval.dirs); + }; + + + + auto leaveLoop = [&](Fortran:: lower::pft::Evaluation &eval, mlir::Block *headerBlock , IncrementLoopNestInfo &incrementLoopNestInfo) { eval.dump(); + bool unstructuredContext = eval.lowerAsUnstructured(); + + Fortran::lower::pft::Evaluation &doStmtEval = eval.getFirstNestedEvaluation(); + auto *doStmt = doStmtEval.getIf(); + + const auto &loopControl = std::get>(doStmt->t); + bool infiniteLoop = !loopControl.has_value(); + const Fortran::parser::ScalarLogicalExpr *whileCondition = std::get_if( &loopControl->u); + + auto iter = std::prev( eval.getNestedEvaluations().end()); + + // An EndDoStmt in unstructured code may start a new block. + Fortran::lower::pft::Evaluation &endDoEval = *iter; + assert(endDoEval.getIf() && "no enddo stmt"); + if (unstructuredContext) + maybeStartBlock(endDoEval.block); + + // Loop end code. + if (infiniteLoop || whileCondition) + genBranch(headerBlock); + else + genFIRIncrementLoopEnd(incrementLoopNestInfo); + + // This call may generate a branch in some contexts. + genFIR(endDoEval, unstructuredContext); + }; + + + + + + for (auto l : doStmts) enterLoop(*l); + + + // Loop body code. + // Fortran:: lower::pft::Evaluation *innermostEval = doStmts.back(); innermostEval->dump(); + bool innermostUnstructuredContext = innermostDo->lowerAsUnstructured(); + + auto iter = innermostDo->getNestedEvaluations().begin(); + for (auto end = --innermostDo->getNestedEvaluations().end(); iter != end; ++iter) + genFIR(*iter, innermostUnstructuredContext); + + for (auto &&[l,headerBlock,li] : llvm::zip_equal( doStmts,headerBlocks,loopInfos)) + leaveLoop(*l,headerBlock,li); +} + /// Generate FIR for a DO construct. There are six variants: /// - unstructured infinite and while loops /// - structured and unstructured increment loops diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 7b4fb649ab383..7adedd934d582 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -45,6 +45,11 @@ #include "mlir/Transforms/RegionUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" using namespace Fortran::lower::omp; using namespace Fortran::common::openmp; @@ -2274,6 +2279,567 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli); } +static +void collectLoops( lower::pft::Evaluation &eval, + llvm::SmallVectorImpl< lower::pft::Evaluation* > &result, + int numLoops) { + + + + + + std::size_t loopVarTypeSize = 0; + lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation(); + for (auto i : llvm::seq(numLoops)) { + lower::pft::Evaluation *doLoop = &doConstructEval->getFirstNestedEvaluation(); + auto *doStmt = doLoop->getIf(); + assert(doStmt && "Expected do loop to be in the nested evaluation"); + const auto &loopControl = std::get>(doStmt->t); + const parser::LoopControl::Bounds *bounds = std::get_if(&loopControl->u); + assert(bounds && "Expected bounds for worksharing do loop"); + lower::StatementContext stmtCtx; + + + result.push_back(doConstructEval); + + + doConstructEval = &*std::next(doConstructEval->getNestedEvaluations().begin()); + }; +} + + +static void enterDoLoop( lower::pft::Evaluation * doStmt) { +} + + + +static void leaveDoLoop( lower::pft::Evaluation *doStmt) { +} + + + #if 0 + +// copied from Bridge.cpp +namespace { +struct IncrementLoopInfo { + template + explicit IncrementLoopInfo(Fortran::semantics::Symbol &sym, const T &lower, + const T &upper, const std::optional &step, + bool isConcurrent = false) + : loopVariableSym{&sym}, lowerExpr{Fortran::semantics::GetExpr(lower)}, + upperExpr{Fortran::semantics::GetExpr(upper)}, + stepExpr{Fortran::semantics::GetExpr(step)}, + isConcurrent{isConcurrent} {} + + IncrementLoopInfo(IncrementLoopInfo &&) = default; + IncrementLoopInfo &operator=(IncrementLoopInfo &&x) = default; + + bool isStructured() const { return !headerBlock; } + + mlir::Type getLoopVariableType() const { + assert(loopVariable && "must be set"); + return fir::unwrapRefType(loopVariable.getType()); + } + + bool hasLocalitySpecs() const { + return !localSymList.empty() || !localInitSymList.empty() || + !reduceSymList.empty() || !sharedSymList.empty(); + } + + // Data members common to both structured and unstructured loops. + const Fortran::semantics::Symbol *loopVariableSym; + const Fortran::lower::SomeExpr *lowerExpr; + const Fortran::lower::SomeExpr *upperExpr; + const Fortran::lower::SomeExpr *stepExpr; + const Fortran::lower::SomeExpr *maskExpr = nullptr; + bool isConcurrent; + llvm::SmallVector localSymList; + llvm::SmallVector localInitSymList; + llvm::SmallVector reduceSymList; + llvm::SmallVector reduceOperatorList; + llvm::SmallVector sharedSymList; + mlir::Value loopVariable = nullptr; + + // Data members for structured loops. + mlir::Operation *loopOp = nullptr; + + // Data members for unstructured loops. + bool hasRealControl = false; + mlir::Value tripVariable = nullptr; + mlir::Value stepVariable = nullptr; + mlir::Block *headerBlock = nullptr; // loop entry and test block + mlir::Block *maskBlock = nullptr; // concurrent loop mask block + mlir::Block *bodyBlock = nullptr; // first loop body block + mlir::Block *exitBlock = nullptr; // loop exit target block +}; + +using IncrementLoopNestInfo = llvm::SmallVector; + +struct MyFirConverter : public Fortran::lower::AbstractConverter { + fir::FirOpBuilder *builder = nullptr; + Fortran::parser::CharBlock currentPosition; + + /// Return the predicate: "current block does not have a terminator branch". + bool blockIsUnterminated() { + mlir::Block *currentBlock = builder->getBlock(); + return currentBlock->empty() || !currentBlock->back().hasTrait(); + } + + /// Convert a parser CharBlock to a Location + mlir::Location toLocation(const Fortran::parser::CharBlock &cb) { + return genLocation(cb); + } + + mlir::Location toLocation() { return toLocation(currentPosition); } + + +#if 0 + static mlir::Location genLocation(Fortran::parser::SourcePosition pos, mlir::MLIRContext &ctx) { + llvm::SmallString<256> path(*pos.path); + llvm::sys::fs::make_absolute(path); + llvm::sys::path::remove_dots(path); + return mlir::FileLineColLoc::get(&ctx, path.str(), pos.line, pos.column); + } +#endif + + + void genBranch(mlir::Block *targetBlock) { + assert(targetBlock && "missing unconditional target block"); + mlir::cf::BranchOp::create(*builder, toLocation(), targetBlock); + } + + /// Unconditionally switch code insertion to a new block. + void startBlock(mlir::Block *newBlock) { + assert(newBlock && "missing block"); + // Default termination for the current block is a fallthrough branch to + // the new block. + if (blockIsUnterminated()) + genBranch(newBlock); + // Some blocks may be re/started more than once, and might not be empty. + // If the new block already has (only) a terminator, set the insertion + // point to the start of the block. Otherwise set it to the end. + builder->setInsertionPointToStart(newBlock); + if (blockIsUnterminated()) + builder->setInsertionPointToEnd(newBlock); + } + + + /// Conditionally switch code insertion to a new block. + void maybeStartBlock(mlir::Block *newBlock) { + if (newBlock) + startBlock(newBlock); + } + + + void genConditionalBranch(mlir::Value cond, mlir::Block *trueTarget, + mlir::Block *falseTarget) { + assert(trueTarget && "missing conditional branch true block"); + assert(falseTarget && "missing conditional branch false block"); + mlir::Location loc = toLocation(); + mlir::Value bcc = builder->createConvert(loc, builder->getI1Type(), cond); + mlir::cf::CondBranchOp::create(*builder, loc, bcc, trueTarget, + mlir::ValueRange{}, falseTarget, + mlir::ValueRange{}); + } + + + void genConditionalBranch(mlir::Value cond, + Fortran::lower::pft::Evaluation *trueTarget, + Fortran::lower::pft::Evaluation *falseTarget) { + genConditionalBranch(cond, trueTarget->block, falseTarget->block); + } + + mlir::Value createFIRExpr(mlir::Location loc, + const Fortran::lower::SomeExpr *expr, + Fortran::lower::StatementContext &stmtCtx) { + return fir::getBase(genExprValue(*expr, stmtCtx, &loc)); + } + + + void genConditionalBranch(const Fortran::parser::ScalarLogicalExpr &expr, + mlir::Block *trueTarget, mlir::Block *falseTarget) { + Fortran::lower::StatementContext stmtCtx; + mlir::Value cond = createFIRExpr(toLocation(), Fortran::semantics::GetExpr(expr), stmtCtx); + stmtCtx.finalizeAndReset(); + genConditionalBranch(cond, trueTarget, falseTarget); + } + + + void genConditionalBranch(const Fortran::parser::ScalarLogicalExpr &expr, + Fortran::lower::pft::Evaluation *trueTarget, + Fortran::lower::pft::Evaluation *falseTarget) { + Fortran::lower::StatementContext stmtCtx; + mlir::Value cond = createFIRExpr(toLocation(), Fortran::semantics::GetExpr(expr), stmtCtx); + stmtCtx.finalizeAndReset(); + genConditionalBranch(cond, trueTarget->block, falseTarget->block); + } + + /// Generate the address of loop variable \p sym. + /// If \p sym is not mapped yet, allocate local storage for it. + mlir::Value genLoopVariableAddress(mlir::Location loc, + const Fortran::semantics::Symbol &sym, + bool isUnordered) { + if (!shallowLookupSymbol(sym) && + (isUnordered || + GetSymbolDSA(sym).test(Fortran::semantics::Symbol::Flag::OmpPrivate) || + GetSymbolDSA(sym).test( + Fortran::semantics::Symbol::Flag::OmpFirstPrivate) || + GetSymbolDSA(sym).test( + Fortran::semantics::Symbol::Flag::OmpLastPrivate) || + GetSymbolDSA(sym).test(Fortran::semantics::Symbol::Flag::OmpLinear))) { + // Do concurrent loop variables are not mapped yet since they are + // local to the Do concurrent scope (same for OpenMP loops). + mlir::OpBuilder::InsertPoint insPt = builder->saveInsertionPoint(); + builder->setInsertionPointToStart(builder->getAllocaBlock()); + mlir::Type tempTy = genType(sym); + mlir::Value temp = + builder->createTemporaryAlloc(loc, tempTy, toStringRef(sym.name())); + bindIfNewSymbol(sym, temp); + builder->restoreInsertionPoint(insPt); + } + auto entry = lookupSymbol(sym); + (void)entry; + assert(entry && "loop control variable must already be in map"); + Fortran::lower::StatementContext stmtCtx; + return fir::getBase( + genExprAddr(Fortran::evaluate::AsGenericExpr(sym).value(), stmtCtx)); + } + + /// Generate FIR to begin a structured or unstructured increment loop nest. + void genFIRIncrementLoopBegin( + IncrementLoopNestInfo &incrementLoopNestInfo, + llvm::SmallVectorImpl &dirs) { + assert(!incrementLoopNestInfo.empty() && "empty loop nest"); + mlir::Location loc = toLocation(); + mlir::arith::IntegerOverflowFlags iofBackup{}; + + llvm::SmallVector nestLBs; + llvm::SmallVector nestUBs; + llvm::SmallVector nestSts; + llvm::SmallVector nestReduceOperands; + llvm::SmallVector nestReduceAttrs; + bool genDoConcurrent = false; + + for (IncrementLoopInfo &info : incrementLoopNestInfo) { + genDoConcurrent = info.isStructured() && info.isConcurrent; + + if (!genDoConcurrent) + info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym, info.isConcurrent); + + if (!getLoweringOptions().getIntegerWrapAround()) { + iofBackup = builder->getIntegerOverflowFlags(); + builder->setIntegerOverflowFlags( + mlir::arith::IntegerOverflowFlags::nsw); + } + + nestLBs.push_back(genControlValue(info.lowerExpr, info)); + nestUBs.push_back(genControlValue(info.upperExpr, info)); + bool isConst = true; + nestSts.push_back(genControlValue( + info.stepExpr, info, info.isStructured() ? nullptr : &isConst)); + + if (!getLoweringOptions().getIntegerWrapAround()) + builder->setIntegerOverflowFlags(iofBackup); + + // Use a temp variable for unstructured loops with non-const step. + if (!isConst) { + mlir::Value stepValue = nestSts.back(); + info.stepVariable = builder->createTemporary(loc, stepValue.getType()); + fir::StoreOp::create(*builder, loc, stepValue, info.stepVariable); + } + } + + for (auto [info, lowerValue, upperValue, stepValue] : + llvm::zip_equal(incrementLoopNestInfo, nestLBs, nestUBs, nestSts)) { + // Structured loop - generate fir.do_loop. + if (info.isStructured()) { + if (genDoConcurrent) + continue; + + // The loop variable is a doLoop op argument. + mlir::Type loopVarType = info.getLoopVariableType(); + auto loopOp = fir::DoLoopOp::create( + *builder, loc, lowerValue, upperValue, stepValue, + /*unordered=*/false, + /*finalCountValue=*/true, + builder->createConvert(loc, loopVarType, lowerValue)); + info.loopOp = loopOp; + builder->setInsertionPointToStart(loopOp.getBody()); + mlir::Value loopValue = loopOp.getRegionIterArgs()[0]; + + // Update the loop variable value in case it has non-index references. + fir::StoreOp::create(*builder, loc, loopValue, info.loopVariable); + addLoopAnnotationAttr(info, dirs); + continue; + } + + // Unstructured loop preheader - initialize tripVariable and loopVariable. + mlir::Value tripCount; + if (info.hasRealControl) { + auto diff1 = + mlir::arith::SubFOp::create(*builder, loc, upperValue, lowerValue); + auto diff2 = + mlir::arith::AddFOp::create(*builder, loc, diff1, stepValue); + tripCount = + mlir::arith::DivFOp::create(*builder, loc, diff2, stepValue); + tripCount = + builder->createConvert(loc, builder->getIndexType(), tripCount); + } else { + auto diff1 = + mlir::arith::SubIOp::create(*builder, loc, upperValue, lowerValue); + auto diff2 = + mlir::arith::AddIOp::create(*builder, loc, diff1, stepValue); + tripCount = + mlir::arith::DivSIOp::create(*builder, loc, diff2, stepValue); + } + if (forceLoopToExecuteOnce) { // minimum tripCount is 1 + mlir::Value one = + builder->createIntegerConstant(loc, tripCount.getType(), 1); + auto cond = mlir::arith::CmpIOp::create( + *builder, loc, mlir::arith::CmpIPredicate::slt, tripCount, one); + tripCount = + mlir::arith::SelectOp::create(*builder, loc, cond, one, tripCount); + } + info.tripVariable = builder->createTemporary(loc, tripCount.getType()); + fir::StoreOp::create(*builder, loc, tripCount, info.tripVariable); + fir::StoreOp::create(*builder, loc, lowerValue, info.loopVariable); + + // Unstructured loop header - generate loop condition and mask. + // Note - Currently there is no way to tag a loop as a concurrent loop. + startBlock(info.headerBlock); + tripCount = fir::LoadOp::create(*builder, loc, info.tripVariable); + mlir::Value zero = + builder->createIntegerConstant(loc, tripCount.getType(), 0); + auto cond = mlir::arith::CmpIOp::create( + *builder, loc, mlir::arith::CmpIPredicate::sgt, tripCount, zero); + if (info.maskExpr) { + genConditionalBranch(cond, info.maskBlock, info.exitBlock); + startBlock(info.maskBlock); + mlir::Block *latchBlock = getEval().getLastNestedEvaluation().block; + assert(latchBlock && "missing masked concurrent loop latch block"); + Fortran::lower::StatementContext stmtCtx; + mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx); + stmtCtx.finalizeAndReset(); + genConditionalBranch(maskCond, info.bodyBlock, latchBlock); + } else { + genConditionalBranch(cond, info.bodyBlock, info.exitBlock); + if (&info != &incrementLoopNestInfo.back()) // not innermost + startBlock(info.bodyBlock); // preheader block of enclosed dimension + } + if (info.hasLocalitySpecs()) { + mlir::OpBuilder::InsertPoint insertPt = builder->saveInsertionPoint(); + builder->setInsertionPointToStart(info.bodyBlock); + handleLocalitySpecs(info); + builder->restoreInsertionPoint(insertPt); + } + } + + if (genDoConcurrent) { + auto loopWrapperOp = fir::DoConcurrentOp::create(*builder, loc); + builder->setInsertionPointToStart( + builder->createBlock(&loopWrapperOp.getRegion())); + + for (IncrementLoopInfo &info : llvm::reverse(incrementLoopNestInfo)) { + info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym, + info.isConcurrent); + } + + builder->setInsertionPointToEnd(loopWrapperOp.getBody()); + auto loopOp = fir::DoConcurrentLoopOp::create( + *builder, loc, nestLBs, nestUBs, nestSts, /*loopAnnotation=*/nullptr, + /*local_vars=*/mlir::ValueRange{}, + /*local_syms=*/nullptr, /*reduce_vars=*/mlir::ValueRange{}, + /*reduce_byref=*/nullptr, /*reduce_syms=*/nullptr, + /*reduce_attrs=*/nullptr); + + llvm::SmallVector loopBlockArgTypes( + incrementLoopNestInfo.size(), builder->getIndexType()); + llvm::SmallVector loopBlockArgLocs( + incrementLoopNestInfo.size(), loc); + mlir::Region &loopRegion = loopOp.getRegion(); + mlir::Block *loopBlock = builder->createBlock( + &loopRegion, loopRegion.begin(), loopBlockArgTypes, loopBlockArgLocs); + builder->setInsertionPointToStart(loopBlock); + + for (auto [info, blockArg] : + llvm::zip_equal(incrementLoopNestInfo, loopBlock->getArguments())) { + info.loopOp = loopOp; + mlir::Value loopValue = + builder->createConvert(loc, info.getLoopVariableType(), blockArg); + fir::StoreOp::create(*builder, loc, loopValue, info.loopVariable); + + if (info.maskExpr) { + Fortran::lower::StatementContext stmtCtx; + mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx); + stmtCtx.finalizeAndReset(); + mlir::Value maskCondCast = + builder->createConvert(loc, builder->getI1Type(), maskCond); + auto ifOp = fir::IfOp::create(*builder, loc, maskCondCast, + /*withElseRegion=*/false); + builder->setInsertionPointToStart(&ifOp.getThenRegion().front()); + } + } + + IncrementLoopInfo &innermostInfo = incrementLoopNestInfo.back(); + + if (innermostInfo.hasLocalitySpecs()) + handleLocalitySpecs(innermostInfo); + + addLoopAnnotationAttr(innermostInfo, dirs); + } + } + + + + void genFIR( fir::FirOpBuilder *builder , lower::pft::Evaluation &eval, llvm:: ArrayRef< lower::pft::Evaluation *> doStmts) { + // setCurrentPositionAt(doConstruct); + // Fortran::lower::pft::Evaluation &eval = getEval(); + bool unstructuredContext = eval.lowerAsUnstructured(); + + + // Collect loop nest information. + // Generate begin loop code directly for infinite and while loops. + Fortran::lower::pft::Evaluation &doStmtEval = eval.getFirstNestedEvaluation(); + auto *doStmt = doStmtEval.getIf(); + const auto &loopControl = + std::get>(doStmt->t); + mlir::Block *preheaderBlock = doStmtEval.block; + mlir::Block *beginBlock = preheaderBlock ? preheaderBlock : builder->getBlock(); + auto createNextBeginBlock = [&]() { + // Step beginBlock through unstructured preheader, header, and mask + // blocks, created in outermost to innermost order. + return beginBlock = beginBlock->splitBlock(beginBlock->end()); + }; + mlir::Block *headerBlock = unstructuredContext ? createNextBeginBlock() : nullptr; + mlir::Block *bodyBlock = doStmtEval.lexicalSuccessor->block; + mlir::Block *exitBlock = doStmtEval.parentConstruct->constructExit->block; + IncrementLoopNestInfo incrementLoopNestInfo; + const Fortran::parser::ScalarLogicalExpr *whileCondition = nullptr; + bool infiniteLoop = !loopControl.has_value(); + if (infiniteLoop) { + assert(unstructuredContext && "infinite loop must be unstructured"); + startBlock(headerBlock); + } else if ((whileCondition = + std::get_if( + &loopControl->u))) { + assert(unstructuredContext && "while loop must be unstructured"); + maybeStartBlock(preheaderBlock); // no block or empty block + startBlock(headerBlock); + genConditionalBranch(*whileCondition, bodyBlock, exitBlock); + } else if (const auto *bounds = + std::get_if( + &loopControl->u)) { + // Non-concurrent increment loop. + IncrementLoopInfo &info = incrementLoopNestInfo.emplace_back( + *bounds->name.thing.symbol, bounds->lower, bounds->upper, + bounds->step); + if (unstructuredContext) { + maybeStartBlock(preheaderBlock); + info.hasRealControl = info.loopVariableSym->GetType()->IsNumeric( + Fortran::common::TypeCategory::Real); + info.headerBlock = headerBlock; + info.bodyBlock = bodyBlock; + info.exitBlock = exitBlock; + } + } else { + llvm_unreachable("DO CONCURRENT unsupported"); + } + + + + // Increment loop begin code. (Infinite/while code was already generated.) + if (!infiniteLoop && !whileCondition) + genFIRIncrementLoopBegin(incrementLoopNestInfo, doStmtEval.dirs); + + // Loop body code. + auto iter = eval.getNestedEvaluations().begin(); + for (auto end = --eval.getNestedEvaluations().end(); iter != end; ++iter) + genFIR(*iter, unstructuredContext); + + // An EndDoStmt in unstructured code may start a new block. + Fortran::lower::pft::Evaluation &endDoEval = *iter; + assert(endDoEval.getIf() && "no enddo stmt"); + if (unstructuredContext) + maybeStartBlock(endDoEval.block); + + // Loop end code. + if (infiniteLoop || whileCondition) + genBranch(headerBlock); + else + genFIRIncrementLoopEnd(incrementLoopNestInfo); + + // This call may generate a branch in some contexts. + genFIR(endDoEval, unstructuredContext); + + + } + + + }; + } + + #endif + + + +static void genStandaloneInterchangeOp(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, + lower::StatementContext &stmtCtx, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + mlir::Location loc, const ConstructQueue &queue, + ConstructQueue::const_iterator item) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + assert(llvm::range_size(transforms)==1 ); + auto &&transform = *transforms.begin(); + auto d = transform.id; + assert(transform.id == llvm::omp::OMPD_interchange); + auto clauses = transform.clauses; + + bool hasPermutationClause = false; + llvm::SmallVector permutation; + auto &&permutationClause = ClauseFinder::findUniqueClause< Fortran::lower::omp::clause::Permutation>(clauses); + if (permutationClause) { + permutation.reserve(permutationClause->v.size()); + for (auto &&ts : permutationClause->v) { + permutation.push_back(evaluate::ToInt64(ts).value()); + } + } else { + permutation = {2, 1}; + } + + llvm::SmallVector< lower::pft::Evaluation* > loops; + collectLoops(eval,loops, permutation.size()); + // auto innermostDo = loops.back(); + // auto innermostBody = &*std::next(innermostDo->getNestedEvaluations().begin()); + + // TODO: Assert this is a valid permution + llvm::SmallVector< lower::pft::Evaluation* > newLoops; + for (auto perm : permutation) { + newLoops.push_back(loops[perm - 1]); + } + + converter.genPermutatedLoops(newLoops, loops.back()); + +#if 0 +MyFirConverter converter; +converter.builder = &firOpBuilder; +converter.genFir(eval, newLoops); +#endif + + + +#if 0 + mlir::omp::LoopRelatedClauseOps loopInfo; + llvm::SmallVector iv; + collectLoopRelatedInfo(converter, loc, eval, item->clauses, loopInfo, iv); +#endif +} + + static mlir::omp::MaskedOp genMaskedOp(lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, @@ -3534,7 +4100,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_interchange: - llvm_unreachable("MK: standalone interchange not implemented"); + genStandaloneInterchangeOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_workdistribute: newOp = genWorkdistributeOp(converter, symTable, semaCtx, eval, loc, queue, diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index cc57cf66dc158..6abff7c6ddc41 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -33,6 +33,11 @@ #include "llvm/ADT/TypeSwitch.h" #include "llvm/ADT/bit.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" #include #include #include diff --git a/openmp/runtime/test/transform/interchange/intdo-permutation.f90 b/openmp/runtime/test/transform/interchange/intdo-permutation.f90 new file mode 100644 index 0000000000000..a8a8e7f35d018 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/intdo-permutation.f90 @@ -0,0 +1,42 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_intdo + integer :: i, j + print *, 'do' + + !$OMP INTERCHANGE PERMUTATION(2,3,1) + do i = 7, 15, 3 + do j = -1, 1, 2 + do k = 3, 1, -1 + print '("i=", I0, " j=", I0, " k=", I0)', i, j, k + end do + end do + end do + !$OMP END INTERCHANGE + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 k=3 +! CHECK-NEXT: i=10 j=-1 k=3 +! CHECK-NEXT: i=13 j=-1 k=3 +! CHECK-NEXT: i=7 j=-1 k=2 +! CHECK-NEXT: i=10 j=-1 k=2 +! CHECK-NEXT: i=13 j=-1 k=2 +! CHECK-NEXT: i=7 j=-1 k=1 +! CHECK-NEXT: i=10 j=-1 k=1 +! CHECK-NEXT: i=13 j=-1 k=1 +! CHECK-NEXT: i=7 j=1 k=3 +! CHECK-NEXT: i=10 j=1 k=3 +! CHECK-NEXT: i=13 j=1 k=3 +! CHECK-NEXT: i=7 j=1 k=2 +! CHECK-NEXT: i=10 j=1 k=2 +! CHECK-NEXT: i=13 j=1 k=2 +! CHECK-NEXT: i=7 j=1 k=1 +! CHECK-NEXT: i=10 j=1 k=1 +! CHECK-NEXT: i=13 j=1 k=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-firstprivate.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-firstprivate.f90 new file mode 100644 index 0000000000000..e53bb107bad2b --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-firstprivate.f90 @@ -0,0 +1,35 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j, k + print *, 'do' + + k = 1 + !$OMP PARALLEL DO NUM_THREADS(3) FIRSTPRIVATE(k) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + k = k + 1 + print '("i=", I0, " j=", I0, " k=", I0)', i, j, k + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-DAG: i=7 j=-1 k=2 +! CHECK-DAG: i=10 j=-1 k=3 +! CHECK-DAG: i=13 j=-1 k=4 +! CHECK-DAG: i=7 j=0 k=2 +! CHECK-DAG: i=10 j=0 k=3 +! CHECK-DAG: i=13 j=0 k=4 +! CHECK-DAG: i=7 j=1 k=2 +! CHECK-DAG: i=10 j=1 k=3 +! CHECK-DAG: i=13 j=1 k=4 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private.f90 new file mode 100644 index 0000000000000..372ff573a10d2 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private.f90 @@ -0,0 +1,34 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j, k + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(4) PRIVATE(k) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + k = i + j + print '("i=", I0, " j=", I0, " k=", I0)', i, j, k + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-DAG: i=7 j=-1 k=6 +! CHECK-DAG: i=10 j=-1 k=9 +! CHECK-DAG: i=13 j=-1 k=12 +! CHECK-DAG: i=7 j=0 k=7 +! CHECK-DAG: i=10 j=0 k=10 +! CHECK-DAG: i=13 j=0 k=13 +! CHECK-DAG: i=7 j=1 k=8 +! CHECK-DAG: i=10 j=1 k=11 +! CHECK-DAG: i=13 j=1 k=14 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-reduction.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-reduction.f90 new file mode 100644 index 0000000000000..8d313becef862 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-reduction.f90 @@ -0,0 +1,27 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j, k + print *, 'do' + + k = 1 + !$OMP PARALLEL DO REDUCTION(+:k) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + k = k + 1 + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' + print '("k=", I0)', k +end program + + +! CHECK: do +! CHECK-NEXT: done +! CHECK-NEXT: k=10 diff --git a/openmp/runtime/test/transform/interchange/target-teams-distribute-parallel-wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/target-teams-distribute-parallel-wsloop-intdo.f90 new file mode 100644 index 0000000000000..81e681b55eb1d --- /dev/null +++ b/openmp/runtime/test/transform/interchange/target-teams-distribute-parallel-wsloop-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program target_teams_distribute_parallel_do + integer :: i, j + print *, 'do' + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO SCHEDULE(static,2) NUM_TEAMS(1) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/taskloop-intdo.f90 b/openmp/runtime/test/transform/interchange/taskloop-intdo.f90 new file mode 100644 index 0000000000000..d79f92d2ad074 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/taskloop-intdo.f90 @@ -0,0 +1,35 @@ + +! XFAIL: * +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TASKLOOP + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END TASKLOOP + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 b/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 index 2e8293dd6bec6..d84be9d1d7a96 100644 --- a/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 +++ b/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 @@ -1,12 +1,13 @@ ! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe ! RUN: %t.exe | FileCheck %s --match-full-lines +! XFAIL: * program interchange_wsloop_intdo integer :: i, j print *, 'do' - !$OMP TASKLOOP SIMD SCHEDULE(static,2) + !$OMP TASKLOOP SIMD !$OMP INTERCHANGE do i = 7, 15, 3 do j = -1, 1 diff --git a/openmp/runtime/test/transform/interchange/teams-distribute-parallel-wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/teams-distribute-parallel-wsloop-intdo.f90 new file mode 100644 index 0000000000000..6d7fe1afdcdd5 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/teams-distribute-parallel-wsloop-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program teams_distribute_parallel_do + integer :: i, j + print *, 'do' + + !$OMP TEAMS DISTRIBUTE PARALLEL DO SCHEDULE(static,2) NUM_TEAMS(1) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END TEAMS DISTRIBUTE PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done From 0f6ab3997cc0275f04cf12cdad5a63cc88030fe9 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 11 Sep 2025 23:20:19 +0200 Subject: [PATCH 43/57] clang-format --- flang/include/flang/Lower/AbstractConverter.h | 7 +- flang/lib/Lower/Bridge.cpp | 192 +++--- flang/lib/Lower/OpenMP/OpenMP.cpp | 586 ++---------------- 3 files changed, 150 insertions(+), 635 deletions(-) diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h index e7c1cb92c445e..396123b1e1938 100644 --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -193,9 +193,6 @@ class AbstractConverter { std::unique_ptr expression, mlir::Type eleTy) = 0; - - - //===--------------------------------------------------------------------===// // Expressions //===--------------------------------------------------------------------===// @@ -384,7 +381,9 @@ class AbstractConverter { virtual mlir::StateStack &getStateStack() = 0; - virtual void genPermutatedLoops( llvm:: ArrayRef doStmts, Fortran:: lower::pft::Evaluation *innermostDo) = 0; + virtual void + genPermutatedLoops(llvm::ArrayRef doStmts, + Fortran::lower::pft::Evaluation *innermostDo) = 0; private: /// Options controlling lowering behavior. diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 0f21235872edb..f54fc773bff6c 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2187,119 +2187,127 @@ class FirConverter : public Fortran::lower::AbstractConverter { // so no clean-up needs to be generated for these entities. } - void genPermutatedLoops( llvm:: ArrayRef doStmts, Fortran:: lower::pft::Evaluation *innermostDo) override { - // Fortran::lower::pft::Evaluation &eval = getEval(); - // bool unstructuredContext = eval.lowerAsUnstructured(); + void + genPermutatedLoops(llvm::ArrayRef doStmts, + Fortran::lower::pft::Evaluation *innermostDo) override { + // Fortran::lower::pft::Evaluation &eval = getEval(); + // bool unstructuredContext = eval.lowerAsUnstructured(); - llvm:: SmallVector< mlir::Block *> headerBlocks; - llvm:: SmallVector loopInfos; + llvm::SmallVector headerBlocks; + llvm::SmallVector loopInfos; - auto enterLoop = [&](Fortran:: lower::pft::Evaluation &eval) { eval.dump(); + auto enterLoop = [&](Fortran::lower::pft::Evaluation &eval) { + eval.dump(); bool unstructuredContext = eval.lowerAsUnstructured(); - // Collect loop nest information. - // Generate begin loop code directly for infinite and while loops. - Fortran::lower::pft::Evaluation &doStmtEval = eval.getFirstNestedEvaluation(); - auto *doStmt = doStmtEval.getIf(); - const auto &loopControl = std::get>(doStmt->t); - mlir::Block *preheaderBlock = doStmtEval.block; - mlir::Block *beginBlock = preheaderBlock ? preheaderBlock : builder->getBlock(); - auto createNextBeginBlock = [&]() { - // Step beginBlock through unstructured preheader, header, and mask - // blocks, created in outermost to innermost order. - return beginBlock = beginBlock->splitBlock(beginBlock->end()); - }; - mlir::Block *headerBlock = unstructuredContext ? createNextBeginBlock() : nullptr; - headerBlocks.push_back(headerBlock); - mlir::Block *bodyBlock = doStmtEval.lexicalSuccessor->block; - mlir::Block *exitBlock = doStmtEval.parentConstruct->constructExit->block; - IncrementLoopNestInfo &incrementLoopNestInfo = loopInfos.emplace_back(); - const Fortran::parser::ScalarLogicalExpr *whileCondition = nullptr; - bool infiniteLoop = !loopControl.has_value(); - if (infiniteLoop) { - assert(unstructuredContext && "infinite loop must be unstructured"); - startBlock(headerBlock); - } else if ((whileCondition = - std::get_if( - &loopControl->u))) { - assert(unstructuredContext && "while loop must be unstructured"); - maybeStartBlock(preheaderBlock); // no block or empty block - startBlock(headerBlock); - genConditionalBranch(*whileCondition, bodyBlock, exitBlock); - } else if (const auto *bounds = - std::get_if( - &loopControl->u)) { - // Non-concurrent increment loop. - IncrementLoopInfo &info = incrementLoopNestInfo.emplace_back( - *bounds->name.thing.symbol, bounds->lower, bounds->upper, - bounds->step); - if (unstructuredContext) { - maybeStartBlock(preheaderBlock); - info.hasRealControl = info.loopVariableSym->GetType()->IsNumeric( - Fortran::common::TypeCategory::Real); - info.headerBlock = headerBlock; - info.bodyBlock = bodyBlock; - info.exitBlock = exitBlock; + // Collect loop nest information. + // Generate begin loop code directly for infinite and while loops. + Fortran::lower::pft::Evaluation &doStmtEval = + eval.getFirstNestedEvaluation(); + auto *doStmt = doStmtEval.getIf(); + const auto &loopControl = + std::get>(doStmt->t); + mlir::Block *preheaderBlock = doStmtEval.block; + mlir::Block *beginBlock = + preheaderBlock ? preheaderBlock : builder->getBlock(); + auto createNextBeginBlock = [&]() { + // Step beginBlock through unstructured preheader, header, and mask + // blocks, created in outermost to innermost order. + return beginBlock = beginBlock->splitBlock(beginBlock->end()); + }; + mlir::Block *headerBlock = + unstructuredContext ? createNextBeginBlock() : nullptr; + headerBlocks.push_back(headerBlock); + mlir::Block *bodyBlock = doStmtEval.lexicalSuccessor->block; + mlir::Block *exitBlock = doStmtEval.parentConstruct->constructExit->block; + IncrementLoopNestInfo &incrementLoopNestInfo = loopInfos.emplace_back(); + const Fortran::parser::ScalarLogicalExpr *whileCondition = nullptr; + bool infiniteLoop = !loopControl.has_value(); + if (infiniteLoop) { + assert(unstructuredContext && "infinite loop must be unstructured"); + startBlock(headerBlock); + } else if ((whileCondition = + std::get_if( + &loopControl->u))) { + assert(unstructuredContext && "while loop must be unstructured"); + maybeStartBlock(preheaderBlock); // no block or empty block + startBlock(headerBlock); + genConditionalBranch(*whileCondition, bodyBlock, exitBlock); + } else if (const auto *bounds = + std::get_if( + &loopControl->u)) { + // Non-concurrent increment loop. + IncrementLoopInfo &info = incrementLoopNestInfo.emplace_back( + *bounds->name.thing.symbol, bounds->lower, bounds->upper, + bounds->step); + if (unstructuredContext) { + maybeStartBlock(preheaderBlock); + info.hasRealControl = info.loopVariableSym->GetType()->IsNumeric( + Fortran::common::TypeCategory::Real); + info.headerBlock = headerBlock; + info.bodyBlock = bodyBlock; + info.exitBlock = exitBlock; + } + } else { + llvm_unreachable("Cannot permute DO CONCURRENT"); } - } else { - llvm_unreachable("Cannot permute DO CONCURRENT"); - } - - - // Increment loop begin code. (Infinite/while code was already generated.) - if (!infiniteLoop && !whileCondition) - genFIRIncrementLoopBegin(incrementLoopNestInfo, doStmtEval.dirs); + // Increment loop begin code. (Infinite/while code was already generated.) + if (!infiniteLoop && !whileCondition) + genFIRIncrementLoopBegin(incrementLoopNestInfo, doStmtEval.dirs); }; + auto leaveLoop = [&](Fortran::lower::pft::Evaluation &eval, + mlir::Block *headerBlock, + IncrementLoopNestInfo &incrementLoopNestInfo) { + eval.dump(); + bool unstructuredContext = eval.lowerAsUnstructured(); + Fortran::lower::pft::Evaluation &doStmtEval = + eval.getFirstNestedEvaluation(); + auto *doStmt = doStmtEval.getIf(); - auto leaveLoop = [&](Fortran:: lower::pft::Evaluation &eval, mlir::Block *headerBlock , IncrementLoopNestInfo &incrementLoopNestInfo) { eval.dump(); - bool unstructuredContext = eval.lowerAsUnstructured(); - - Fortran::lower::pft::Evaluation &doStmtEval = eval.getFirstNestedEvaluation(); - auto *doStmt = doStmtEval.getIf(); - - const auto &loopControl = std::get>(doStmt->t); - bool infiniteLoop = !loopControl.has_value(); - const Fortran::parser::ScalarLogicalExpr *whileCondition = std::get_if( &loopControl->u); - - auto iter = std::prev( eval.getNestedEvaluations().end()); - - // An EndDoStmt in unstructured code may start a new block. - Fortran::lower::pft::Evaluation &endDoEval = *iter; - assert(endDoEval.getIf() && "no enddo stmt"); - if (unstructuredContext) - maybeStartBlock(endDoEval.block); - - // Loop end code. - if (infiniteLoop || whileCondition) - genBranch(headerBlock); - else - genFIRIncrementLoopEnd(incrementLoopNestInfo); - - // This call may generate a branch in some contexts. - genFIR(endDoEval, unstructuredContext); - }; + const auto &loopControl = + std::get>(doStmt->t); + bool infiniteLoop = !loopControl.has_value(); + const Fortran::parser::ScalarLogicalExpr *whileCondition = + std::get_if(&loopControl->u); + auto iter = std::prev(eval.getNestedEvaluations().end()); + // An EndDoStmt in unstructured code may start a new block. + Fortran::lower::pft::Evaluation &endDoEval = *iter; + assert(endDoEval.getIf() && "no enddo stmt"); + if (unstructuredContext) + maybeStartBlock(endDoEval.block); + // Loop end code. + if (infiniteLoop || whileCondition) + genBranch(headerBlock); + else + genFIRIncrementLoopEnd(incrementLoopNestInfo); + // This call may generate a branch in some contexts. + genFIR(endDoEval, unstructuredContext); + }; - for (auto l : doStmts) enterLoop(*l); - + for (auto l : doStmts) + enterLoop(*l); // Loop body code. - // Fortran:: lower::pft::Evaluation *innermostEval = doStmts.back(); innermostEval->dump(); - bool innermostUnstructuredContext = innermostDo->lowerAsUnstructured(); + // Fortran:: lower::pft::Evaluation *innermostEval = doStmts.back(); + // innermostEval->dump(); + bool innermostUnstructuredContext = innermostDo->lowerAsUnstructured(); auto iter = innermostDo->getNestedEvaluations().begin(); - for (auto end = --innermostDo->getNestedEvaluations().end(); iter != end; ++iter) + for (auto end = --innermostDo->getNestedEvaluations().end(); iter != end; + ++iter) genFIR(*iter, innermostUnstructuredContext); - for (auto &&[l,headerBlock,li] : llvm::zip_equal( doStmts,headerBlocks,loopInfos)) - leaveLoop(*l,headerBlock,li); -} + for (auto &&[l, headerBlock, li] : + llvm::zip_equal(doStmts, headerBlocks, loopInfos)) + leaveLoop(*l, headerBlock, li); + } /// Generate FIR for a DO construct. There are six variants: /// - unstructured infinite and while loops diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 7adedd934d582..01387cbc4dc60 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -2279,567 +2279,74 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli); } -static -void collectLoops( lower::pft::Evaluation &eval, - llvm::SmallVectorImpl< lower::pft::Evaluation* > &result, - int numLoops) { - - - - +static void +collectLoops(lower::pft::Evaluation &eval, + llvm::SmallVectorImpl &result, + int numLoops) { std::size_t loopVarTypeSize = 0; - lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation(); + lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation(); for (auto i : llvm::seq(numLoops)) { - lower::pft::Evaluation *doLoop = &doConstructEval->getFirstNestedEvaluation(); + lower::pft::Evaluation *doLoop = + &doConstructEval->getFirstNestedEvaluation(); auto *doStmt = doLoop->getIf(); assert(doStmt && "Expected do loop to be in the nested evaluation"); - const auto &loopControl = std::get>(doStmt->t); - const parser::LoopControl::Bounds *bounds = std::get_if(&loopControl->u); + const auto &loopControl = + std::get>(doStmt->t); + const parser::LoopControl::Bounds *bounds = + std::get_if(&loopControl->u); assert(bounds && "Expected bounds for worksharing do loop"); lower::StatementContext stmtCtx; - result.push_back(doConstructEval); - - doConstructEval = &*std::next(doConstructEval->getNestedEvaluations().begin()); + doConstructEval = + &*std::next(doConstructEval->getNestedEvaluations().begin()); }; } +static void genStandaloneInterchangeOp( + Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, lower::StatementContext &stmtCtx, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, mlir::Location loc, + const ConstructQueue &queue, ConstructQueue::const_iterator item) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); -static void enterDoLoop( lower::pft::Evaluation * doStmt) { -} - - - -static void leaveDoLoop( lower::pft::Evaluation *doStmt) { -} - - - #if 0 - -// copied from Bridge.cpp -namespace { -struct IncrementLoopInfo { - template - explicit IncrementLoopInfo(Fortran::semantics::Symbol &sym, const T &lower, - const T &upper, const std::optional &step, - bool isConcurrent = false) - : loopVariableSym{&sym}, lowerExpr{Fortran::semantics::GetExpr(lower)}, - upperExpr{Fortran::semantics::GetExpr(upper)}, - stepExpr{Fortran::semantics::GetExpr(step)}, - isConcurrent{isConcurrent} {} - - IncrementLoopInfo(IncrementLoopInfo &&) = default; - IncrementLoopInfo &operator=(IncrementLoopInfo &&x) = default; - - bool isStructured() const { return !headerBlock; } - - mlir::Type getLoopVariableType() const { - assert(loopVariable && "must be set"); - return fir::unwrapRefType(loopVariable.getType()); - } - - bool hasLocalitySpecs() const { - return !localSymList.empty() || !localInitSymList.empty() || - !reduceSymList.empty() || !sharedSymList.empty(); - } - - // Data members common to both structured and unstructured loops. - const Fortran::semantics::Symbol *loopVariableSym; - const Fortran::lower::SomeExpr *lowerExpr; - const Fortran::lower::SomeExpr *upperExpr; - const Fortran::lower::SomeExpr *stepExpr; - const Fortran::lower::SomeExpr *maskExpr = nullptr; - bool isConcurrent; - llvm::SmallVector localSymList; - llvm::SmallVector localInitSymList; - llvm::SmallVector reduceSymList; - llvm::SmallVector reduceOperatorList; - llvm::SmallVector sharedSymList; - mlir::Value loopVariable = nullptr; - - // Data members for structured loops. - mlir::Operation *loopOp = nullptr; - - // Data members for unstructured loops. - bool hasRealControl = false; - mlir::Value tripVariable = nullptr; - mlir::Value stepVariable = nullptr; - mlir::Block *headerBlock = nullptr; // loop entry and test block - mlir::Block *maskBlock = nullptr; // concurrent loop mask block - mlir::Block *bodyBlock = nullptr; // first loop body block - mlir::Block *exitBlock = nullptr; // loop exit target block -}; - -using IncrementLoopNestInfo = llvm::SmallVector; - -struct MyFirConverter : public Fortran::lower::AbstractConverter { - fir::FirOpBuilder *builder = nullptr; - Fortran::parser::CharBlock currentPosition; - - /// Return the predicate: "current block does not have a terminator branch". - bool blockIsUnterminated() { - mlir::Block *currentBlock = builder->getBlock(); - return currentBlock->empty() || !currentBlock->back().hasTrait(); - } - - /// Convert a parser CharBlock to a Location - mlir::Location toLocation(const Fortran::parser::CharBlock &cb) { - return genLocation(cb); - } - - mlir::Location toLocation() { return toLocation(currentPosition); } - - -#if 0 - static mlir::Location genLocation(Fortran::parser::SourcePosition pos, mlir::MLIRContext &ctx) { - llvm::SmallString<256> path(*pos.path); - llvm::sys::fs::make_absolute(path); - llvm::sys::path::remove_dots(path); - return mlir::FileLineColLoc::get(&ctx, path.str(), pos.line, pos.column); - } -#endif - - - void genBranch(mlir::Block *targetBlock) { - assert(targetBlock && "missing unconditional target block"); - mlir::cf::BranchOp::create(*builder, toLocation(), targetBlock); - } - - /// Unconditionally switch code insertion to a new block. - void startBlock(mlir::Block *newBlock) { - assert(newBlock && "missing block"); - // Default termination for the current block is a fallthrough branch to - // the new block. - if (blockIsUnterminated()) - genBranch(newBlock); - // Some blocks may be re/started more than once, and might not be empty. - // If the new block already has (only) a terminator, set the insertion - // point to the start of the block. Otherwise set it to the end. - builder->setInsertionPointToStart(newBlock); - if (blockIsUnterminated()) - builder->setInsertionPointToEnd(newBlock); - } - - - /// Conditionally switch code insertion to a new block. - void maybeStartBlock(mlir::Block *newBlock) { - if (newBlock) - startBlock(newBlock); - } - - - void genConditionalBranch(mlir::Value cond, mlir::Block *trueTarget, - mlir::Block *falseTarget) { - assert(trueTarget && "missing conditional branch true block"); - assert(falseTarget && "missing conditional branch false block"); - mlir::Location loc = toLocation(); - mlir::Value bcc = builder->createConvert(loc, builder->getI1Type(), cond); - mlir::cf::CondBranchOp::create(*builder, loc, bcc, trueTarget, - mlir::ValueRange{}, falseTarget, - mlir::ValueRange{}); - } - - - void genConditionalBranch(mlir::Value cond, - Fortran::lower::pft::Evaluation *trueTarget, - Fortran::lower::pft::Evaluation *falseTarget) { - genConditionalBranch(cond, trueTarget->block, falseTarget->block); - } - - mlir::Value createFIRExpr(mlir::Location loc, - const Fortran::lower::SomeExpr *expr, - Fortran::lower::StatementContext &stmtCtx) { - return fir::getBase(genExprValue(*expr, stmtCtx, &loc)); - } - - - void genConditionalBranch(const Fortran::parser::ScalarLogicalExpr &expr, - mlir::Block *trueTarget, mlir::Block *falseTarget) { - Fortran::lower::StatementContext stmtCtx; - mlir::Value cond = createFIRExpr(toLocation(), Fortran::semantics::GetExpr(expr), stmtCtx); - stmtCtx.finalizeAndReset(); - genConditionalBranch(cond, trueTarget, falseTarget); - } - - - void genConditionalBranch(const Fortran::parser::ScalarLogicalExpr &expr, - Fortran::lower::pft::Evaluation *trueTarget, - Fortran::lower::pft::Evaluation *falseTarget) { - Fortran::lower::StatementContext stmtCtx; - mlir::Value cond = createFIRExpr(toLocation(), Fortran::semantics::GetExpr(expr), stmtCtx); - stmtCtx.finalizeAndReset(); - genConditionalBranch(cond, trueTarget->block, falseTarget->block); - } - - /// Generate the address of loop variable \p sym. - /// If \p sym is not mapped yet, allocate local storage for it. - mlir::Value genLoopVariableAddress(mlir::Location loc, - const Fortran::semantics::Symbol &sym, - bool isUnordered) { - if (!shallowLookupSymbol(sym) && - (isUnordered || - GetSymbolDSA(sym).test(Fortran::semantics::Symbol::Flag::OmpPrivate) || - GetSymbolDSA(sym).test( - Fortran::semantics::Symbol::Flag::OmpFirstPrivate) || - GetSymbolDSA(sym).test( - Fortran::semantics::Symbol::Flag::OmpLastPrivate) || - GetSymbolDSA(sym).test(Fortran::semantics::Symbol::Flag::OmpLinear))) { - // Do concurrent loop variables are not mapped yet since they are - // local to the Do concurrent scope (same for OpenMP loops). - mlir::OpBuilder::InsertPoint insPt = builder->saveInsertionPoint(); - builder->setInsertionPointToStart(builder->getAllocaBlock()); - mlir::Type tempTy = genType(sym); - mlir::Value temp = - builder->createTemporaryAlloc(loc, tempTy, toStringRef(sym.name())); - bindIfNewSymbol(sym, temp); - builder->restoreInsertionPoint(insPt); - } - auto entry = lookupSymbol(sym); - (void)entry; - assert(entry && "loop control variable must already be in map"); - Fortran::lower::StatementContext stmtCtx; - return fir::getBase( - genExprAddr(Fortran::evaluate::AsGenericExpr(sym).value(), stmtCtx)); - } - - /// Generate FIR to begin a structured or unstructured increment loop nest. - void genFIRIncrementLoopBegin( - IncrementLoopNestInfo &incrementLoopNestInfo, - llvm::SmallVectorImpl &dirs) { - assert(!incrementLoopNestInfo.empty() && "empty loop nest"); - mlir::Location loc = toLocation(); - mlir::arith::IntegerOverflowFlags iofBackup{}; - - llvm::SmallVector nestLBs; - llvm::SmallVector nestUBs; - llvm::SmallVector nestSts; - llvm::SmallVector nestReduceOperands; - llvm::SmallVector nestReduceAttrs; - bool genDoConcurrent = false; - - for (IncrementLoopInfo &info : incrementLoopNestInfo) { - genDoConcurrent = info.isStructured() && info.isConcurrent; - - if (!genDoConcurrent) - info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym, info.isConcurrent); - - if (!getLoweringOptions().getIntegerWrapAround()) { - iofBackup = builder->getIntegerOverflowFlags(); - builder->setIntegerOverflowFlags( - mlir::arith::IntegerOverflowFlags::nsw); - } - - nestLBs.push_back(genControlValue(info.lowerExpr, info)); - nestUBs.push_back(genControlValue(info.upperExpr, info)); - bool isConst = true; - nestSts.push_back(genControlValue( - info.stepExpr, info, info.isStructured() ? nullptr : &isConst)); - - if (!getLoweringOptions().getIntegerWrapAround()) - builder->setIntegerOverflowFlags(iofBackup); - - // Use a temp variable for unstructured loops with non-const step. - if (!isConst) { - mlir::Value stepValue = nestSts.back(); - info.stepVariable = builder->createTemporary(loc, stepValue.getType()); - fir::StoreOp::create(*builder, loc, stepValue, info.stepVariable); - } - } - - for (auto [info, lowerValue, upperValue, stepValue] : - llvm::zip_equal(incrementLoopNestInfo, nestLBs, nestUBs, nestSts)) { - // Structured loop - generate fir.do_loop. - if (info.isStructured()) { - if (genDoConcurrent) - continue; - - // The loop variable is a doLoop op argument. - mlir::Type loopVarType = info.getLoopVariableType(); - auto loopOp = fir::DoLoopOp::create( - *builder, loc, lowerValue, upperValue, stepValue, - /*unordered=*/false, - /*finalCountValue=*/true, - builder->createConvert(loc, loopVarType, lowerValue)); - info.loopOp = loopOp; - builder->setInsertionPointToStart(loopOp.getBody()); - mlir::Value loopValue = loopOp.getRegionIterArgs()[0]; - - // Update the loop variable value in case it has non-index references. - fir::StoreOp::create(*builder, loc, loopValue, info.loopVariable); - addLoopAnnotationAttr(info, dirs); - continue; - } - - // Unstructured loop preheader - initialize tripVariable and loopVariable. - mlir::Value tripCount; - if (info.hasRealControl) { - auto diff1 = - mlir::arith::SubFOp::create(*builder, loc, upperValue, lowerValue); - auto diff2 = - mlir::arith::AddFOp::create(*builder, loc, diff1, stepValue); - tripCount = - mlir::arith::DivFOp::create(*builder, loc, diff2, stepValue); - tripCount = - builder->createConvert(loc, builder->getIndexType(), tripCount); - } else { - auto diff1 = - mlir::arith::SubIOp::create(*builder, loc, upperValue, lowerValue); - auto diff2 = - mlir::arith::AddIOp::create(*builder, loc, diff1, stepValue); - tripCount = - mlir::arith::DivSIOp::create(*builder, loc, diff2, stepValue); - } - if (forceLoopToExecuteOnce) { // minimum tripCount is 1 - mlir::Value one = - builder->createIntegerConstant(loc, tripCount.getType(), 1); - auto cond = mlir::arith::CmpIOp::create( - *builder, loc, mlir::arith::CmpIPredicate::slt, tripCount, one); - tripCount = - mlir::arith::SelectOp::create(*builder, loc, cond, one, tripCount); - } - info.tripVariable = builder->createTemporary(loc, tripCount.getType()); - fir::StoreOp::create(*builder, loc, tripCount, info.tripVariable); - fir::StoreOp::create(*builder, loc, lowerValue, info.loopVariable); - - // Unstructured loop header - generate loop condition and mask. - // Note - Currently there is no way to tag a loop as a concurrent loop. - startBlock(info.headerBlock); - tripCount = fir::LoadOp::create(*builder, loc, info.tripVariable); - mlir::Value zero = - builder->createIntegerConstant(loc, tripCount.getType(), 0); - auto cond = mlir::arith::CmpIOp::create( - *builder, loc, mlir::arith::CmpIPredicate::sgt, tripCount, zero); - if (info.maskExpr) { - genConditionalBranch(cond, info.maskBlock, info.exitBlock); - startBlock(info.maskBlock); - mlir::Block *latchBlock = getEval().getLastNestedEvaluation().block; - assert(latchBlock && "missing masked concurrent loop latch block"); - Fortran::lower::StatementContext stmtCtx; - mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx); - stmtCtx.finalizeAndReset(); - genConditionalBranch(maskCond, info.bodyBlock, latchBlock); - } else { - genConditionalBranch(cond, info.bodyBlock, info.exitBlock); - if (&info != &incrementLoopNestInfo.back()) // not innermost - startBlock(info.bodyBlock); // preheader block of enclosed dimension - } - if (info.hasLocalitySpecs()) { - mlir::OpBuilder::InsertPoint insertPt = builder->saveInsertionPoint(); - builder->setInsertionPointToStart(info.bodyBlock); - handleLocalitySpecs(info); - builder->restoreInsertionPoint(insertPt); - } - } - - if (genDoConcurrent) { - auto loopWrapperOp = fir::DoConcurrentOp::create(*builder, loc); - builder->setInsertionPointToStart( - builder->createBlock(&loopWrapperOp.getRegion())); - - for (IncrementLoopInfo &info : llvm::reverse(incrementLoopNestInfo)) { - info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym, - info.isConcurrent); - } - - builder->setInsertionPointToEnd(loopWrapperOp.getBody()); - auto loopOp = fir::DoConcurrentLoopOp::create( - *builder, loc, nestLBs, nestUBs, nestSts, /*loopAnnotation=*/nullptr, - /*local_vars=*/mlir::ValueRange{}, - /*local_syms=*/nullptr, /*reduce_vars=*/mlir::ValueRange{}, - /*reduce_byref=*/nullptr, /*reduce_syms=*/nullptr, - /*reduce_attrs=*/nullptr); - - llvm::SmallVector loopBlockArgTypes( - incrementLoopNestInfo.size(), builder->getIndexType()); - llvm::SmallVector loopBlockArgLocs( - incrementLoopNestInfo.size(), loc); - mlir::Region &loopRegion = loopOp.getRegion(); - mlir::Block *loopBlock = builder->createBlock( - &loopRegion, loopRegion.begin(), loopBlockArgTypes, loopBlockArgLocs); - builder->setInsertionPointToStart(loopBlock); - - for (auto [info, blockArg] : - llvm::zip_equal(incrementLoopNestInfo, loopBlock->getArguments())) { - info.loopOp = loopOp; - mlir::Value loopValue = - builder->createConvert(loc, info.getLoopVariableType(), blockArg); - fir::StoreOp::create(*builder, loc, loopValue, info.loopVariable); - - if (info.maskExpr) { - Fortran::lower::StatementContext stmtCtx; - mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx); - stmtCtx.finalizeAndReset(); - mlir::Value maskCondCast = - builder->createConvert(loc, builder->getI1Type(), maskCond); - auto ifOp = fir::IfOp::create(*builder, loc, maskCondCast, - /*withElseRegion=*/false); - builder->setInsertionPointToStart(&ifOp.getThenRegion().front()); - } - } - - IncrementLoopInfo &innermostInfo = incrementLoopNestInfo.back(); - - if (innermostInfo.hasLocalitySpecs()) - handleLocalitySpecs(innermostInfo); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + assert(llvm::range_size(transforms) == 1); + auto &&transform = *transforms.begin(); + auto d = transform.id; + assert(transform.id == llvm::omp::OMPD_interchange); + auto clauses = transform.clauses; - addLoopAnnotationAttr(innermostInfo, dirs); + bool hasPermutationClause = false; + llvm::SmallVector permutation; + auto &&permutationClause = + ClauseFinder::findUniqueClause( + clauses); + if (permutationClause) { + permutation.reserve(permutationClause->v.size()); + for (auto &&ts : permutationClause->v) { + permutation.push_back(evaluate::ToInt64(ts).value()); } + } else { + permutation = {2, 1}; } + llvm::SmallVector loops; + collectLoops(eval, loops, permutation.size()); - - void genFIR( fir::FirOpBuilder *builder , lower::pft::Evaluation &eval, llvm:: ArrayRef< lower::pft::Evaluation *> doStmts) { - // setCurrentPositionAt(doConstruct); - // Fortran::lower::pft::Evaluation &eval = getEval(); - bool unstructuredContext = eval.lowerAsUnstructured(); - - - // Collect loop nest information. - // Generate begin loop code directly for infinite and while loops. - Fortran::lower::pft::Evaluation &doStmtEval = eval.getFirstNestedEvaluation(); - auto *doStmt = doStmtEval.getIf(); - const auto &loopControl = - std::get>(doStmt->t); - mlir::Block *preheaderBlock = doStmtEval.block; - mlir::Block *beginBlock = preheaderBlock ? preheaderBlock : builder->getBlock(); - auto createNextBeginBlock = [&]() { - // Step beginBlock through unstructured preheader, header, and mask - // blocks, created in outermost to innermost order. - return beginBlock = beginBlock->splitBlock(beginBlock->end()); - }; - mlir::Block *headerBlock = unstructuredContext ? createNextBeginBlock() : nullptr; - mlir::Block *bodyBlock = doStmtEval.lexicalSuccessor->block; - mlir::Block *exitBlock = doStmtEval.parentConstruct->constructExit->block; - IncrementLoopNestInfo incrementLoopNestInfo; - const Fortran::parser::ScalarLogicalExpr *whileCondition = nullptr; - bool infiniteLoop = !loopControl.has_value(); - if (infiniteLoop) { - assert(unstructuredContext && "infinite loop must be unstructured"); - startBlock(headerBlock); - } else if ((whileCondition = - std::get_if( - &loopControl->u))) { - assert(unstructuredContext && "while loop must be unstructured"); - maybeStartBlock(preheaderBlock); // no block or empty block - startBlock(headerBlock); - genConditionalBranch(*whileCondition, bodyBlock, exitBlock); - } else if (const auto *bounds = - std::get_if( - &loopControl->u)) { - // Non-concurrent increment loop. - IncrementLoopInfo &info = incrementLoopNestInfo.emplace_back( - *bounds->name.thing.symbol, bounds->lower, bounds->upper, - bounds->step); - if (unstructuredContext) { - maybeStartBlock(preheaderBlock); - info.hasRealControl = info.loopVariableSym->GetType()->IsNumeric( - Fortran::common::TypeCategory::Real); - info.headerBlock = headerBlock; - info.bodyBlock = bodyBlock; - info.exitBlock = exitBlock; - } - } else { - llvm_unreachable("DO CONCURRENT unsupported"); - } - - - - // Increment loop begin code. (Infinite/while code was already generated.) - if (!infiniteLoop && !whileCondition) - genFIRIncrementLoopBegin(incrementLoopNestInfo, doStmtEval.dirs); - - // Loop body code. - auto iter = eval.getNestedEvaluations().begin(); - for (auto end = --eval.getNestedEvaluations().end(); iter != end; ++iter) - genFIR(*iter, unstructuredContext); - - // An EndDoStmt in unstructured code may start a new block. - Fortran::lower::pft::Evaluation &endDoEval = *iter; - assert(endDoEval.getIf() && "no enddo stmt"); - if (unstructuredContext) - maybeStartBlock(endDoEval.block); - - // Loop end code. - if (infiniteLoop || whileCondition) - genBranch(headerBlock); - else - genFIRIncrementLoopEnd(incrementLoopNestInfo); - - // This call may generate a branch in some contexts. - genFIR(endDoEval, unstructuredContext); - - + // TODO: Assert this is a valid permution + llvm::SmallVector newLoops; + for (auto perm : permutation) { + newLoops.push_back(loops[perm - 1]); } - - }; - } - - #endif - - - -static void genStandaloneInterchangeOp(Fortran::lower::AbstractConverter &converter, - Fortran::lower::SymMap &symTable, - lower::StatementContext &stmtCtx, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, - mlir::Location loc, const ConstructQueue &queue, - ConstructQueue::const_iterator item) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - - auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); - auto transforms = llvm::make_range(q.end(), queue.end()); - assert(llvm::range_size(transforms)==1 ); - auto &&transform = *transforms.begin(); - auto d = transform.id; - assert(transform.id == llvm::omp::OMPD_interchange); - auto clauses = transform.clauses; - - bool hasPermutationClause = false; - llvm::SmallVector permutation; - auto &&permutationClause = ClauseFinder::findUniqueClause< Fortran::lower::omp::clause::Permutation>(clauses); - if (permutationClause) { - permutation.reserve(permutationClause->v.size()); - for (auto &&ts : permutationClause->v) { - permutation.push_back(evaluate::ToInt64(ts).value()); - } - } else { - permutation = {2, 1}; - } - - llvm::SmallVector< lower::pft::Evaluation* > loops; - collectLoops(eval,loops, permutation.size()); - // auto innermostDo = loops.back(); - // auto innermostBody = &*std::next(innermostDo->getNestedEvaluations().begin()); - - // TODO: Assert this is a valid permution - llvm::SmallVector< lower::pft::Evaluation* > newLoops; - for (auto perm : permutation) { - newLoops.push_back(loops[perm - 1]); - } - - converter.genPermutatedLoops(newLoops, loops.back()); - -#if 0 -MyFirConverter converter; -converter.builder = &firOpBuilder; -converter.genFir(eval, newLoops); -#endif - - - -#if 0 - mlir::omp::LoopRelatedClauseOps loopInfo; - llvm::SmallVector iv; - collectLoopRelatedInfo(converter, loc, eval, item->clauses, loopInfo, iv); -#endif + converter.genPermutatedLoops(newLoops, loops.back()); } - static mlir::omp::MaskedOp genMaskedOp(lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, @@ -4100,7 +3607,8 @@ static void genOMPDispatch(lower::AbstractConverter &converter, genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; case llvm::omp::Directive::OMPD_interchange: - genStandaloneInterchangeOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); + genStandaloneInterchangeOp(converter, symTable, stmtCtx, semaCtx, eval, loc, + queue, item); break; case llvm::omp::Directive::OMPD_workdistribute: newOp = genWorkdistributeOp(converter, symTable, semaCtx, eval, loc, queue, From d77da88cc6f786e62d6ad074a62f09db9ead32ee Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 11 Sep 2025 23:32:46 +0200 Subject: [PATCH 44/57] Add tests --- .../parallel-wsloop-intdo-lastprivate.f90 | 28 ++++++++++++++++ .../parallel-wsloop-intdo-private-i.f90 | 33 +++++++++++++++++++ .../parallel-wsloop-intdo-private-j.f90 | 33 +++++++++++++++++++ 3 files changed, 94 insertions(+) create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-lastprivate.f90 create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-i.f90 create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-j.f90 diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-lastprivate.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-lastprivate.f90 new file mode 100644 index 0000000000000..42d7032bd2184 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-lastprivate.f90 @@ -0,0 +1,28 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines +! XFAIL: * + +program interchange_wsloop_intdo + integer :: i, j, k + print *, 'do' + + + !$OMP PARALLEL DO LASTPRIVATE(k) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + k = i*10 + j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' + print '("k=", I0)', k +end program + + +! CHECK: do +! CHECK-NEXT: done +! CHECK-NEXT: k=131 diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-i.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-i.f90 new file mode 100644 index 0000000000000..76928ce93577e --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-i.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(1) PRIVATE(i) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-j.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-j.f90 new file mode 100644 index 0000000000000..a679c921e9660 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-j.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(1) PRIVATE(j) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done From 23778317aa35a031cdf65ad337e73c3565cd2a00 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 11 Sep 2025 23:46:18 +0200 Subject: [PATCH 45/57] Reduce diff --- flang/lib/Lower/OpenMP/OpenMP.cpp | 6 ------ flang/lib/Lower/OpenMP/Utils.cpp | 16 +++++++--------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 01387cbc4dc60..37d2a7dc38cb7 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -45,16 +45,10 @@ #include "mlir/Transforms/RegionUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Path.h" using namespace Fortran::lower::omp; using namespace Fortran::common::openmp; using namespace Fortran::utils::openmp; -using namespace Fortran::semantics; //===----------------------------------------------------------------------===// // Code generation helper functions diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 91e40f6a0f5e6..e6559d27c84ba 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -25,19 +25,17 @@ #include #include #include -#include #include +#include #include #include -using namespace Fortran::semantics; - template -MaybeIntExpr EvaluateIntExpr(SemanticsContext &context, const T &expr) { - if (MaybeExpr maybeExpr{ +Fortran::semantics::MaybeIntExpr EvaluateIntExpr(Fortran::semantics::SemanticsContext &context, const T &expr) { + if (Fortran::semantics::MaybeExpr maybeExpr{ Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) { - if (auto *intExpr{Fortran::evaluate::UnwrapExpr(*maybeExpr)}) { + if (auto *intExpr{Fortran::evaluate::UnwrapExpr(*maybeExpr)}) { return std::move(*intExpr); } } @@ -45,7 +43,7 @@ MaybeIntExpr EvaluateIntExpr(SemanticsContext &context, const T &expr) { } template -std::optional EvaluateInt64(SemanticsContext &context, +std::optional EvaluateInt64(Fortran::semantics::SemanticsContext &context, const T &expr) { return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr)); } @@ -602,7 +600,7 @@ static void convertLoopBounds(lower::AbstractConverter &converter, /// Contains a loop construct with an inner tiling construct. void collectTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &tileSizes, SemanticsContext &semaCtx) { + llvm::SmallVectorImpl &tileSizes,Fortran::semantics:: SemanticsContext &semaCtx) { if (!ompCons) return; @@ -642,7 +640,7 @@ void collectTileSizesFromOpenMPConstruct( /// Contains a loop construct with an inner tiling construct. void collectPermutationFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &permutation, SemanticsContext &semaCtx) { + llvm::SmallVectorImpl &permutation,Fortran::semantics:: SemanticsContext &semaCtx) { if (!ompCons) return; From 8ebe17b48f9611b4730c974a29814156cc05f03f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 11 Sep 2025 23:47:32 +0200 Subject: [PATCH 46/57] Reduce diff from trunk --- flang/lib/Lower/Bridge.cpp | 4 -- flang/lib/Lower/OpenMP/OpenMP.cpp | 20 +----- flang/lib/Lower/OpenMP/Utils.cpp | 17 +++-- .../mlir/Dialect/OpenMP/OpenMPClauses.td | 19 ------ mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 7 +- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 64 ++++++++----------- 6 files changed, 42 insertions(+), 89 deletions(-) diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index f54fc773bff6c..0e057d928d345 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2197,7 +2197,6 @@ class FirConverter : public Fortran::lower::AbstractConverter { llvm::SmallVector loopInfos; auto enterLoop = [&](Fortran::lower::pft::Evaluation &eval) { - eval.dump(); bool unstructuredContext = eval.lowerAsUnstructured(); // Collect loop nest information. @@ -2260,7 +2259,6 @@ class FirConverter : public Fortran::lower::AbstractConverter { auto leaveLoop = [&](Fortran::lower::pft::Evaluation &eval, mlir::Block *headerBlock, IncrementLoopNestInfo &incrementLoopNestInfo) { - eval.dump(); bool unstructuredContext = eval.lowerAsUnstructured(); Fortran::lower::pft::Evaluation &doStmtEval = @@ -2295,8 +2293,6 @@ class FirConverter : public Fortran::lower::AbstractConverter { enterLoop(*l); // Loop body code. - // Fortran:: lower::pft::Evaluation *innermostEval = doStmts.back(); - // innermostEval->dump(); bool innermostUnstructuredContext = innermostDo->lowerAsUnstructured(); auto iter = innermostDo->getNestedEvaluations().begin(); diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 37d2a7dc38cb7..a1caabbe8cf55 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1202,10 +1202,6 @@ struct OpWithBodyGenInfo { static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - int a = 0; - if (a) { - op.dump(); - } fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder(); auto insertMarker = [](fir::FirOpBuilder &builder) { @@ -1348,10 +1344,6 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, // wrapper region. mlir::Operation *privatizationBottomLevelOp = &op; if (auto loopNest = llvm::dyn_cast(op)) { - int b = 0; - if (b) { - loopNest.dump(); - } llvm::SmallVector wrappers; loopNest.gatherWrappers(wrappers); if (!wrappers.empty()) @@ -2034,7 +2026,6 @@ static mlir::omp::LoopNestOp genLoopNestOp( switch (d) { case llvm::omp::OMPD_interchange: { - bool hasPermutationClause = false; llvm::SmallVector permutation; auto &&permutationClause = ClauseFinder::findUniqueClause< @@ -2277,10 +2268,8 @@ static void collectLoops(lower::pft::Evaluation &eval, llvm::SmallVectorImpl &result, int numLoops) { - - std::size_t loopVarTypeSize = 0; lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation(); - for (auto i : llvm::seq(numLoops)) { + for ([[maybe_unused]] auto i : llvm::seq(numLoops)) { lower::pft::Evaluation *doLoop = &doConstructEval->getFirstNestedEvaluation(); auto *doStmt = doLoop->getIf(); @@ -2305,17 +2294,13 @@ static void genStandaloneInterchangeOp( Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); auto transforms = llvm::make_range(q.end(), queue.end()); assert(llvm::range_size(transforms) == 1); auto &&transform = *transforms.begin(); - auto d = transform.id; assert(transform.id == llvm::omp::OMPD_interchange); auto clauses = transform.clauses; - bool hasPermutationClause = false; llvm::SmallVector permutation; auto &&permutationClause = ClauseFinder::findUniqueClause( @@ -3415,7 +3400,6 @@ static mlir::omp::TaskloopOp genCompositeTaskloopSimd( lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); - auto transforms = llvm::make_range(q.end(), queue.end()); assert(llvm::range_size(q) == 2 && "Invalid leaf constructs"); @@ -4072,7 +4056,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, const Fortran::parser::OpenMPLoopConstruct &x = ompNestedLoopCons->value(); const Fortran::parser::OmpBeginLoopDirective &y = std::get<0>(x.t); - const Fortran::parser::OmpClauseList &clauseList = std::get<1>(y.t); + // const Fortran::parser::OmpClauseList &clauseList = std::get<1>(y.t); llvm::omp::Directive nestedDirective = parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; List nestedClauses = diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index e6559d27c84ba..30fff48181d73 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -32,10 +32,13 @@ #include template -Fortran::semantics::MaybeIntExpr EvaluateIntExpr(Fortran::semantics::SemanticsContext &context, const T &expr) { +Fortran::semantics::MaybeIntExpr +EvaluateIntExpr(Fortran::semantics::SemanticsContext &context, const T &expr) { if (Fortran::semantics::MaybeExpr maybeExpr{ Fold(context.foldingContext(), AnalyzeExpr(context, expr))}) { - if (auto *intExpr{Fortran::evaluate::UnwrapExpr(*maybeExpr)}) { + if (auto *intExpr{ + Fortran::evaluate::UnwrapExpr( + *maybeExpr)}) { return std::move(*intExpr); } } @@ -43,8 +46,8 @@ Fortran::semantics::MaybeIntExpr EvaluateIntExpr(Fortran::semantics::SemanticsCo } template -std::optional EvaluateInt64(Fortran::semantics::SemanticsContext &context, - const T &expr) { +std::optional +EvaluateInt64(Fortran::semantics::SemanticsContext &context, const T &expr) { return Fortran::evaluate::ToInt64(EvaluateIntExpr(context, expr)); } @@ -600,7 +603,8 @@ static void convertLoopBounds(lower::AbstractConverter &converter, /// Contains a loop construct with an inner tiling construct. void collectTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &tileSizes,Fortran::semantics:: SemanticsContext &semaCtx) { + llvm::SmallVectorImpl &tileSizes, + Fortran::semantics::SemanticsContext &semaCtx) { if (!ompCons) return; @@ -640,7 +644,8 @@ void collectTileSizesFromOpenMPConstruct( /// Contains a loop construct with an inner tiling construct. void collectPermutationFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &permutation,Fortran::semantics:: SemanticsContext &semaCtx) { + llvm::SmallVectorImpl &permutation, + Fortran::semantics::SemanticsContext &semaCtx) { if (!ompCons) return; diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td index 9102a4320c578..5f40abe62a0f6 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td @@ -1418,25 +1418,6 @@ class OpenMP_TileSizesClauseSkip< def OpenMP_TileSizesClause : OpenMP_TileSizesClauseSkip<>; - -//===----------------------------------------------------------------------===// -// V6.0: [xx.x] `permutation` clause -//===----------------------------------------------------------------------===// - -class OpenMP_PermutationClauseSkip< - bit traits = false, bit arguments = false, bit assemblyFormat = false, - bit description = false, bit extraClassDeclaration = false - > : OpenMP_Clause { - let arguments = (ins - BoolAttr:$interchangeEnabled, - OptionalAttr:$permutation - ); -} - -def OpenMP_PermutationClause : OpenMP_PermutationClauseSkip<>; - - //===----------------------------------------------------------------------===// // V5.2: [12.1] `untied` clause //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 6abff7c6ddc41..aa88b9e8eef5a 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -33,11 +33,6 @@ #include "llvm/ADT/TypeSwitch.h" #include "llvm/ADT/bit.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Path.h" #include #include #include @@ -2985,7 +2980,7 @@ ParseResult LoopNestOp::parse(OpAsmParser &parser, OperationState &result) { return failure(); if (value > 1) result.addAttribute( - "num_collapse", + "collapse_num_loops", IntegerAttr::get(parser.getBuilder().getI64Type(), value)); // Parse tiles diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 6600747ad85e8..2ab6bb0a73200 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -3044,53 +3044,45 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy afterIP = loopInfos.front()->getAfterIP(); - llvm::CanonicalLoopInfo *NewTopLoopInfo = nullptr; - - // Do tiling + // Do tiling. if (const auto &tiles = loopOp.getTileSizes()) { - llvm::Type *IVType = loopInfos.front()->getIndVarType(); - SmallVector TileSizes; + llvm::Type *ivType = loopInfos.front()->getIndVarType(); + SmallVector tileSizes; for (auto tile : tiles.value()) { - llvm::Value *TileVal = llvm::ConstantInt::get(IVType, tile); - TileSizes.push_back(TileVal); + llvm::Value *tileVal = llvm::ConstantInt::get(ivType, tile); + tileSizes.push_back(tileVal); } - std::vector NewLoops = - ompBuilder->tileLoops(ompLoc.DL, loopInfos, TileSizes); + std::vector newLoops = + ompBuilder->tileLoops(ompLoc.DL, loopInfos, tileSizes); // Update afterIP to get the correct insertion point after // tiling. - llvm::BasicBlock *AfterBB = NewLoops.front()->getAfter(); - llvm::BasicBlock *AfterAfterBB = AfterBB->getSingleSuccessor(); - afterIP = {AfterAfterBB, AfterAfterBB->begin()}; - NewTopLoopInfo = NewLoops[0]; + llvm::BasicBlock *afterBB = newLoops.front()->getAfter(); + llvm::BasicBlock *afterAfterBB = afterBB->getSingleSuccessor(); + afterIP = {afterAfterBB, afterAfterBB->begin()}; - // Update the loop infos + // Update the loop infos. loopInfos.clear(); - for (const auto &newLoop : NewLoops) + for (const auto &newLoop : newLoops) loopInfos.push_back(newLoop); - } // Tiling done - - // Do collapse - if (const auto &numCollapse = loopOp.getCollapseNumLoops()) { - SmallVector collapseLoopInfos( - loopInfos.begin(), loopInfos.begin() + (numCollapse)); - - auto newLoopInfo = - ompBuilder->collapseLoops(ompLoc.DL, collapseLoopInfos, {}); - NewTopLoopInfo = newLoopInfo; - } // Collapse done - - // Update the stack frame created for this loop to point to the resulting - // loop after applying transformations. - if (NewTopLoopInfo) { - moduleTranslation.stackWalk( - [&](OpenMPLoopInfoStackFrame &frame) { - frame.loopInfo = NewTopLoopInfo; - return WalkResult::interrupt(); - }); - } + } // Tiling done. + + // Do collapse. + const auto &numCollapse = loopOp.getCollapseNumLoops(); + SmallVector collapseLoopInfos( + loopInfos.begin(), loopInfos.begin() + (numCollapse)); + + auto newTopLoopInfo = + ompBuilder->collapseLoops(ompLoc.DL, collapseLoopInfos, {}); + + assert(newTopLoopInfo && "New top loop information is missing"); + moduleTranslation.stackWalk( + [&](OpenMPLoopInfoStackFrame &frame) { + frame.loopInfo = newTopLoopInfo; + return WalkResult::interrupt(); + }); // Continue building IR after the loop. Note that the LoopInfo returned by // `collapseLoops` points inside the outermost loop and is intended for From a646a1d795267d4bf759f1f711851ead3e029ec9 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 12 Sep 2025 00:23:12 +0200 Subject: [PATCH 47/57] Add tests --- flang/lib/Lower/OpenMP/Utils.cpp | 1 - .../parallel-wsloop-intdo-collapse.f90 | 33 +++++++++++++++++++ .../parallel-wsloop-intdo-permutation.f90 | 33 +++++++++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-collapse.f90 create mode 100644 openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-permutation.f90 diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 30fff48181d73..7aa8c30ebd679 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -607,7 +607,6 @@ void collectTileSizesFromOpenMPConstruct( Fortran::semantics::SemanticsContext &semaCtx) { if (!ompCons) return; - if (auto *ompLoop{std::get_if(&ompCons->u)}) { const auto &nestedOptional = std::get>(ompLoop->t); diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-collapse.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-collapse.f90 new file mode 100644 index 0000000000000..4285edaa775b8 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-collapse.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO COLLAPSE(2) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-permutation.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-permutation.f90 new file mode 100644 index 0000000000000..e52389f2448e4 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-permutation.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(1) + !$OMP INTERCHANGE PERMUTATION(2,1) + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done From 9c7155ae7a8471ba27a039bf4f4a2f7faed32052 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 12 Sep 2025 00:29:26 +0200 Subject: [PATCH 48/57] Reduce diff --- flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 7 ++++ flang/lib/Lower/OpenMP/ClauseProcessor.h | 2 ++ flang/lib/Lower/OpenMP/OpenMP.cpp | 39 ++-------------------- flang/lib/Lower/OpenMP/Utils.cpp | 3 +- flang/lib/Semantics/resolve-directives.cpp | 1 - 5 files changed, 14 insertions(+), 38 deletions(-) diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 23f0ca14e931d..96e21872e4643 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -522,6 +522,13 @@ bool ClauseProcessor::processProcBind( return false; } +bool ClauseProcessor::processTileSizes( + lower::pft::Evaluation &eval, mlir::omp::LoopNestOperands &result) const { + auto *ompCons{eval.getIf()}; + collectTileSizesFromOpenMPConstruct(ompCons, result.tileSizes, semaCtx); + return !result.tileSizes.empty(); +} + bool ClauseProcessor::processSafelen( mlir::omp::SafelenClauseOps &result) const { if (auto *clause = findUniqueClause()) { diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index c46bdb348a3ef..01ac15a1ffc71 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -98,6 +98,8 @@ class ClauseProcessor { bool processPriority(lower::StatementContext &stmtCtx, mlir::omp::PriorityClauseOps &result) const; bool processProcBind(mlir::omp::ProcBindClauseOps &result) const; + bool processTileSizes(lower::pft::Evaluation &eval, + mlir::omp::LoopNestOperands &result) const; bool processSafelen(mlir::omp::SafelenClauseOps &result) const; bool processSchedule(lower::StatementContext &stmtCtx, mlir::omp::ScheduleClauseOps &result) const; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index a1caabbe8cf55..d301ceec555e0 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -406,7 +406,6 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, return; const parser::OmpClauseList *beginClauseList = nullptr; - const parser::OmpClauseList *middleClauseList = nullptr; const parser::OmpClauseList *endClauseList = nullptr; common::visit( common::visitors{ @@ -421,30 +420,6 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, beginClauseList = &std::get(beginDirective.t); - // For now we check if there is an inner OpenMPLoopConstruct, and - // extract the size clause from there - const auto &nestedOptional = - std::get>( - ompConstruct.t); - assert(nestedOptional.has_value() && - "Expected a DoConstruct or OpenMPLoopConstruct"); - const auto *innerConstruct = - std::get_if>( - &(nestedOptional.value())); - if (innerConstruct) { - const auto &innerLoopConstruct = innerConstruct->value(); - const auto &innerBegin = - std::get( - innerLoopConstruct.t); - const auto &innerDirective = - std::get(innerBegin.t); - if (innerDirective.v == llvm::omp::Directive::OMPD_tile || - innerDirective.v == - llvm::omp::Directive::OMPD_interchange) { - middleClauseList = - &std::get(innerBegin.t); - } - } if (auto &endDirective = std::get>( ompConstruct.t)) { @@ -458,9 +433,6 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, assert(beginClauseList && "expected begin directive"); clauses.append(makeClauses(*beginClauseList, semaCtx)); - if (middleClauseList) - clauses.append(makeClauses(*middleClauseList, semaCtx)); - if (endClauseList) clauses.append(makeClauses(*endClauseList, semaCtx)); }; @@ -1626,11 +1598,7 @@ genLoopNestClauses(lower::AbstractConverter &converter, } } - llvm::SmallVector sizeValues; - auto *ompCons{eval.getIf()}; - collectTileSizesFromOpenMPConstruct(ompCons, sizeValues, semaCtx); - if (sizeValues.size() > 0) - clauseOps.tileSizes = sizeValues; + cp.processTileSizes(eval, clauseOps); } static void genLoopClauses( @@ -2009,8 +1977,7 @@ static mlir::omp::LoopNestOp genLoopNestOp( return llvm::SmallVector(iv); }; - uint64_t nestValue = getCollapseValue( - item->clauses); // MK: Should be number of affected loops? + uint64_t nestValue = getCollapseValue(item->clauses); nestValue = nestValue < iv.size() ? iv.size() : nestValue; auto *nestedEval = getCollapsedLoopEval(eval, nestValue); @@ -4065,7 +4032,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: // Skip OMPD_tile since the tile sizes will be retrieved when - // generating the omp.looop_nest op. + // generating the omp.loop_nest op. break; case llvm::omp::Directive::OMPD_interchange: { ConstructQueue nestedQueue{buildConstructQueue( diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 7aa8c30ebd679..464cb46e59cf8 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -626,7 +626,7 @@ void collectTileSizesFromOpenMPConstruct( // Get the size values from parse tree and convert to a vector const auto &innerClauseList{ std::get(innerBegin.t)}; - for (const auto &clause : innerClauseList.v) + for (const auto &clause : innerClauseList.v) { if (const auto tclause{ std::get_if(&clause.u)}) { for (auto &tval : tclause->v) { @@ -634,6 +634,7 @@ void collectTileSizesFromOpenMPConstruct( tileSizes.push_back(*v); } } + } } } } diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 00d960914de31..649d97e081ac5 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2157,7 +2157,6 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( std::get_if(&clause.u)}) { levels.push_back(tclause->v.size()); clauses.push_back(&clause); - // llvm_unreachable("MK: fetch permute depth"); return; } } From ac411583a86ae5dc1216da470eaf9365b03a35f4 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 12 Sep 2025 00:59:38 +0200 Subject: [PATCH 49/57] Reduce diff size --- flang/lib/Lower/OpenMP/Utils.cpp | 12 +++++------- flang/lib/Lower/OpenMP/Utils.h | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 464cb46e59cf8..035ffdaced2e7 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -681,12 +681,12 @@ void collectPermutationFromOpenMPConstruct( } } -bool collectLoopRelatedInfo( +int64_t collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, llvm::SmallVectorImpl &iv) { - bool found = false; + int64_t numCollapse = 1; fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); // Collect the loops to collapse. @@ -699,10 +699,10 @@ bool collectLoopRelatedInfo( if (auto *clause = ClauseFinder::findUniqueClause(clauses)) { collapseValue = evaluate::ToInt64(clause->v).value(); - found = true; + numCollapse = collapseValue; } - // Collect sizes from tile directive if present + // Collect sizes from tile directive if present. std::int64_t sizesLengthValue = 0l; std::int64_t permutationLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { @@ -729,7 +729,6 @@ bool collectLoopRelatedInfo( if (const auto tclause{ std::get_if(&clause.u)}) { sizesLengthValue = tclause->v.size(); - found = true; } } @@ -741,7 +740,6 @@ bool collectLoopRelatedInfo( if (const auto tclause{ std::get_if(&clause.u)}) { permutationLengthValue = tclause->v.size(); - found = true; } } // default: permution(2,1) @@ -791,7 +789,7 @@ bool collectLoopRelatedInfo( convertLoopBounds(converter, currentLocation, result, loopVarTypeSize); - return found; + return numCollapse; } } // namespace omp diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index aea882dfb7dc6..a6b6e16bbbc8b 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -159,7 +159,7 @@ void genObjectList(const ObjectList &objects, void lastprivateModifierNotSupported(const omp::clause::Lastprivate &lastp, mlir::Location loc); -bool collectLoopRelatedInfo( +int64_t collectLoopRelatedInfo( lower::AbstractConverter &converter, mlir::Location currentLocation, lower::pft::Evaluation &eval, const omp::List &clauses, mlir::omp::LoopRelatedClauseOps &result, From 7a3c46c4ac200e6d4228f62d8603612e26522873 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 12 Sep 2025 01:05:49 +0200 Subject: [PATCH 50/57] Reduce diff size --- flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 11 ++++++++--- flang/lib/Lower/OpenMP/ClauseProcessor.h | 3 ++- flang/lib/Lower/OpenMP/OpenMP.cpp | 8 ++++---- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 96e21872e4643..a96884f5680ba 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -273,10 +273,15 @@ bool ClauseProcessor::processCancelDirectiveName( bool ClauseProcessor::processCollapse( mlir::Location currentLocation, lower::pft::Evaluation &eval, - mlir::omp::LoopRelatedClauseOps &result, + mlir::omp::LoopRelatedClauseOps &loopResult, + mlir::omp::CollapseClauseOps &collapseResult, llvm::SmallVectorImpl &iv) const { - return collectLoopRelatedInfo(converter, currentLocation, eval, clauses, - result, iv); + + int64_t numCollapse = collectLoopRelatedInfo(converter, currentLocation, eval, + clauses, loopResult, iv); + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + collapseResult.collapseNumLoops = firOpBuilder.getI64IntegerAttr(numCollapse); + return numCollapse > 1; } bool ClauseProcessor::processDevice(lower::StatementContext &stmtCtx, diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index 01ac15a1ffc71..324ea3c1047a5 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -63,7 +63,8 @@ class ClauseProcessor { mlir::omp::CancelDirectiveNameClauseOps &result) const; bool processCollapse(mlir::Location currentLocation, lower::pft::Evaluation &eval, - mlir::omp::LoopRelatedClauseOps &result, + mlir::omp::LoopRelatedClauseOps &loopResult, + mlir::omp::CollapseClauseOps &collapseResult, llvm::SmallVectorImpl &iv) const; bool processDevice(lower::StatementContext &stmtCtx, mlir::omp::DeviceClauseOps &result) const; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index d301ceec555e0..529b375005c92 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -503,7 +503,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_distribute: case OMPD_distribute_simd: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); break; case OMPD_teams: @@ -522,7 +522,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, [[fallthrough]]; case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); cp.processNumTeams(stmtCtx, hostInfo->ops); break; @@ -533,7 +533,7 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, cp.processNumTeams(stmtCtx, hostInfo->ops); [[fallthrough]]; case OMPD_loop: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->iv); + cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); break; case OMPD_teams_workdistribute: @@ -1573,7 +1573,7 @@ genLoopNestClauses(lower::AbstractConverter &converter, HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter); if (!hostEvalInfo || !hostEvalInfo->apply(clauseOps, iv)) - cp.processCollapse(loc, eval, clauseOps, iv); + cp.processCollapse(loc, eval, clauseOps, clauseOps, iv); clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); From 6ed3cea30effab123d08bd8203ef5543edaf202d Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 12 Sep 2025 01:13:57 +0200 Subject: [PATCH 51/57] Reduce diff size --- flang/lib/Lower/OpenMP/Utils.cpp | 117 ++++++++++++++++--------------- 1 file changed, 60 insertions(+), 57 deletions(-) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 035ffdaced2e7..6882d95ce3daf 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -599,12 +599,11 @@ static void convertLoopBounds(lower::AbstractConverter &converter, } } -/// Populates the sizes vector with values if the given OpenMPConstruct -/// Contains a loop construct with an inner tiling construct. -void collectTileSizesFromOpenMPConstruct( +// Helper function that finds the sizes clause in a inner OMPD_tile directive +// and passes the sizes clause to the callback function if found. +static void processTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &tileSizes, - Fortran::semantics::SemanticsContext &semaCtx) { + std::function processFun) { if (!ompCons) return; if (auto *ompLoop{std::get_if(&ompCons->u)}) { @@ -623,16 +622,14 @@ void collectTileSizesFromOpenMPConstruct( std::get(innerBegin.t).v; if (innerDirective == llvm::omp::Directive::OMPD_tile) { - // Get the size values from parse tree and convert to a vector + // Get the size values from parse tree and convert to a vector. const auto &innerClauseList{ std::get(innerBegin.t)}; for (const auto &clause : innerClauseList.v) { if (const auto tclause{ std::get_if(&clause.u)}) { - for (auto &tval : tclause->v) { - if (const auto v{EvaluateInt64(semaCtx, tval)}) - tileSizes.push_back(*v); - } + processFun(tclause); + break; } } } @@ -641,44 +638,17 @@ void collectTileSizesFromOpenMPConstruct( } /// Populates the sizes vector with values if the given OpenMPConstruct -/// Contains a loop construct with an inner tiling construct. -void collectPermutationFromOpenMPConstruct( +/// contains a loop construct with an inner tiling construct. +void collectTileSizesFromOpenMPConstruct( const parser::OpenMPConstruct *ompCons, - llvm::SmallVectorImpl &permutation, + llvm::SmallVectorImpl &tileSizes, Fortran::semantics::SemanticsContext &semaCtx) { - if (!ompCons) - return; - - if (auto *ompLoop{std::get_if(&ompCons->u)}) { - const auto &nestedOptional = - std::get>(ompLoop->t); - assert(nestedOptional.has_value() && - "Expected a DoConstruct or OpenMPLoopConstruct"); - const auto *innerConstruct = - std::get_if>( - &(nestedOptional.value())); - if (innerConstruct) { - const auto &innerLoopDirective = innerConstruct->value(); - const auto &innerBegin = - std::get(innerLoopDirective.t); - const auto &innerDirective = - std::get(innerBegin.t).v; - - if (innerDirective == llvm::omp::Directive::OMPD_interchange) { - // Get the size values from parse tree and convert to a vector - const auto &innerClauseList{ - std::get(innerBegin.t)}; - for (const auto &clause : innerClauseList.v) - if (const auto tclause{ - std::get_if(&clause.u)}) { - for (auto &tval : tclause->v) { - if (const auto v{EvaluateInt64(semaCtx, tval)}) - permutation.push_back(*v); - } - } - } - } - } + processTileSizesFromOpenMPConstruct( + ompCons, [&](const parser::OmpClause::Sizes *tclause) { + for (auto &tval : tclause->v) + if (const auto v{EvaluateInt64(semaCtx, tval)}) + tileSizes.push_back(*v); + }); } int64_t collectLoopRelatedInfo( @@ -706,6 +676,11 @@ int64_t collectLoopRelatedInfo( std::int64_t sizesLengthValue = 0l; std::int64_t permutationLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { + processTileSizesFromOpenMPConstruct( + ompCons, [&](const parser::OmpClause::Sizes *tclause) { + sizesLengthValue = tclause->v.size(); + }); + if (auto *ompLoop{std::get_if(&ompCons->u)}) { const auto &nestedOptional = std::get>(ompLoop->t); @@ -721,17 +696,6 @@ int64_t collectLoopRelatedInfo( const auto &innerDirective = std::get(innerBegin.t).v; - if (innerDirective == llvm::omp::Directive::OMPD_tile) { - // Get the size values from parse tree and convert to a vector - const auto &innerClauseList{ - std::get(innerBegin.t)}; - for (const auto &clause : innerClauseList.v) - if (const auto tclause{ - std::get_if(&clause.u)}) { - sizesLengthValue = tclause->v.size(); - } - } - if (innerDirective == llvm::omp::Directive::OMPD_interchange) { // Get the size values from parse tree and convert to a vector const auto &innerClauseList{ @@ -792,6 +756,45 @@ int64_t collectLoopRelatedInfo( return numCollapse; } +void collectPermutationFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl &permutation, + Fortran::semantics::SemanticsContext &semaCtx) { + if (!ompCons) + return; + + if (auto *ompLoop{std::get_if(&ompCons->u)}) { + const auto &nestedOptional = + std::get>(ompLoop->t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); + if (innerConstruct) { + const auto &innerLoopDirective = innerConstruct->value(); + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + std::get(innerBegin.t).v; + + if (innerDirective == llvm::omp::Directive::OMPD_interchange) { + // Get the size values from parse tree and convert to a vector + const auto &innerClauseList{ + std::get(innerBegin.t)}; + for (const auto &clause : innerClauseList.v) + if (const auto tclause{ + std::get_if(&clause.u)}) { + for (auto &tval : tclause->v) { + if (const auto v{EvaluateInt64(semaCtx, tval)}) + permutation.push_back(*v); + } + } + } + } + } +} + } // namespace omp } // namespace lower } // namespace Fortran From 3660ee420684e2b108228bf8c5ffeb5f07a0c948 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 20 Sep 2025 03:37:56 +0200 Subject: [PATCH 52/57] Post-merge fixes --- flang/lib/Lower/OpenMP/OpenMP.cpp | 40 ++++++++-------------- flang/lib/Lower/OpenMP/Utils.cpp | 33 ++++++++---------- flang/lib/Parser/openmp-parsers.cpp | 4 +-- flang/lib/Parser/unparse.cpp | 3 -- flang/lib/Semantics/canonicalize-omp.cpp | 10 ++---- flang/lib/Semantics/resolve-directives.cpp | 11 +++--- 6 files changed, 39 insertions(+), 62 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index ebe49553811d1..2ded09f0c51fd 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3983,35 +3983,27 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, const parser::OpenMPLoopConstruct &loopConstruct) { const parser::OmpDirectiveSpecification &beginSpec = loopConstruct.BeginDir(); List clauses = makeClauses(beginSpec.Clauses(), semaCtx); - if (auto &endSpec = loopConstruct.EndDir()) - clauses.append(makeClauses(endSpec->Clauses(), semaCtx)); + if (auto &endSpec = loopConstruct.EndDir()) clauses.append(makeClauses(endSpec->Clauses(), semaCtx)); mlir::Location currentLocation = converter.genLocation(beginSpec.source); - llvm::omp::Directive directive = - parser::omp::GetOmpDirectiveName(beginLoopDirective).v; - const parser::CharBlock &source = - std::get(beginLoopDirective.t).source; +// llvm::omp::Directive directive = Fortran::parser::omp::GetOmpDirectiveName(loopConstruct).v; + // parser::omp::GetOmpDirectiveName(beginLoopDirective).v; + //const parser::CharBlock &source = std::get(beginLoopDirective.t).source; - const parser::OmpDirectiveName &beginName = beginSpec.DirName(); - ConstructQueue queue{ - buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, - eval, beginName.source, beginName.v, clauses)}; + const parser::OmpDirectiveName &beginName = beginSpec.DirName(); + ConstructQueue queue{ buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, eval, beginName.source, beginName.v, clauses)}; - auto &optLoopCons = - std::get>(loopConstruct.t); + auto &optLoopCons = std::get>(loopConstruct.t); if (optLoopCons.has_value()) { - if (auto *ompNestedLoopCons{ - std::get_if>( - &*optLoopCons)}) { - const Fortran::parser::OpenMPLoopConstruct &x = - ompNestedLoopCons->value(); - const Fortran::parser::OmpBeginLoopDirective &y = std::get<0>(x.t); + if (auto *ompNestedLoopCons{ std::get_if>( &*optLoopCons)}) { + const Fortran::parser::OpenMPLoopConstruct &x = ompNestedLoopCons->value(); + // const Fortran::parser::OmpBeginLoopDirective &y = std::get<0>(x.t); // const Fortran::parser::OmpClauseList &clauseList = std::get<1>(y.t); - llvm::omp::Directive nestedDirective = - parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; - List nestedClauses = - makeClauses(std::get(y.t), semaCtx); + llvm::omp::Directive nestedDirective = parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; + + List nestedClauses = makeClauses(x.BeginDir().Clauses(), semaCtx); + // makeClauses(std::get(y.t), semaCtx); switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: @@ -4019,9 +4011,7 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, // generating the omp.loop_nest op. break; case llvm::omp::Directive::OMPD_interchange: { - ConstructQueue nestedQueue{buildConstructQueue( - converter.getFirOpBuilder().getModule(), semaCtx, eval, source, - nestedDirective, nestedClauses)}; + ConstructQueue nestedQueue{buildConstructQueue( converter.getFirOpBuilder().getModule(), semaCtx, eval, beginName.source, nestedDirective, nestedClauses)}; for (auto nl : nestedQueue) { queue.push_back(nl); } diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index a5e9060734cdb..d4f3eefdeaf2e 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -677,24 +677,19 @@ int64_t collectLoopRelatedInfo( }); if (auto *ompLoop{std::get_if(&ompCons->u)}) { - const auto &nestedOptional = - std::get>(ompLoop->t); - assert(nestedOptional.has_value() && - "Expected a DoConstruct or OpenMPLoopConstruct"); - const auto *innerConstruct = - std::get_if>( - &(nestedOptional.value())); + const auto &nestedOptional = std::get>(ompLoop->t); + assert(nestedOptional.has_value() && "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = std::get_if>( &(nestedOptional.value())); if (innerConstruct) { const auto &innerLoopDirective = innerConstruct->value(); - const auto &innerBegin = - std::get(innerLoopDirective.t); - const auto &innerDirective = - std::get(innerBegin.t).v; + const auto &innerBegin = std::get(innerLoopDirective.t); + const auto &innerDirective = Fortran::parser::omp::GetOmpDirectiveName(innerBegin).v; + //std::get(innerBegin.t).v; if (innerDirective == llvm::omp::Directive::OMPD_interchange) { // Get the size values from parse tree and convert to a vector - const auto &innerClauseList{ - std::get(innerBegin.t)}; + const auto &innerClauseList { innerBegin.Clauses() }; + // const auto &innerClauseList{ std::get(innerBegin.t)}; for (const auto &clause : innerClauseList.v) { if (const auto tclause{ std::get_if(&clause.u)}) { @@ -768,15 +763,15 @@ void collectPermutationFromOpenMPConstruct( &(nestedOptional.value())); if (innerConstruct) { const auto &innerLoopDirective = innerConstruct->value(); - const auto &innerBegin = - std::get(innerLoopDirective.t); - const auto &innerDirective = - std::get(innerBegin.t).v; + const auto &innerBegin = innerLoopDirective.BeginDir(); + //std::get(innerLoopDirective.t); + const auto &innerDirective = Fortran::parser::omp::GetOmpDirectiveName(innerLoopDirective).v; + //std::get(innerBegin.t).v; if (innerDirective == llvm::omp::Directive::OMPD_interchange) { // Get the size values from parse tree and convert to a vector - const auto &innerClauseList{ - std::get(innerBegin.t)}; + const auto &innerClauseList{ innerBegin.Clauses() }; + //std::get(innerBegin.t)}; for (const auto &clause : innerClauseList.v) if (const auto tclause{ std::get_if(&clause.u)}) { diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index b2ab9ce8551ac..cfe42cb34653f 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -1366,7 +1366,6 @@ TYPE_PARSER(sourced(construct( TYPE_PARSER(sourced(construct( verbatim("METADIRECTIVE"_tok), Parser{}))) - "INTERCHANGE" >> pure(llvm::omp::Directive::OMPD_interchange), static inline constexpr auto IsDirective(llvm::omp::Directive dir) { return [dir](const OmpDirectiveName &name) -> bool { return dir == name.v; }; } @@ -1954,7 +1953,8 @@ static constexpr DirectiveSet GetLoopDirectives() { unsigned(Directive::OMPD_teams_distribute_simd), unsigned(Directive::OMPD_teams_loop), unsigned(Directive::OMPD_tile), - unsigned(Directive::OMPD_unroll), + unsigned(Directive::OMPD_unroll), + unsigned(Directive::OMPD_interchange), }; return loopDirectives; } diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 02dfc3bf4955d..73bbbc04f46b1 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2403,9 +2403,6 @@ class UnparseVisitor { } #define GEN_FLANG_CLAUSE_UNPARSE #include "llvm/Frontend/OpenMP/OMP.inc" - case llvm::omp::Directive::OMPD_interchange: - Word("INTERCHANGE "); - break; void Unparse(const OmpObjectList &x) { Walk(x.v, ","); } void Unparse(const common::OmpMemoryOrderType &x) { diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index bb5bbb3c3be8d..b34296271f79a 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -177,12 +177,8 @@ class CanonicalizationOfOmp { // OpenMP Loop Construct and the DO loop itself auto &nestedBeginDirective = ompLoopCons->BeginDir(); auto &nestedBeginName = nestedBeginDirective.DirName(); - if ((nestedBeginName.v == llvm::omp::Directive::OMPD_unroll || - nestedBeginName.v == llvm::omp::Directive::OMPD_tile) && - !(nestedBeginName.v == llvm::omp::Directive::OMPD_unroll && - beginName.v == llvm::omp::Directive::OMPD_tile|| - beginName.v == - llvm::omp::Directive::OMPD_interchange)) { + if ((nestedBeginName.v == llvm::omp::Directive::OMPD_unroll || nestedBeginName.v == llvm::omp::Directive::OMPD_tile || nestedBeginName.v == llvm::omp::Directive::OMPD_interchange) && + !(nestedBeginName.v == llvm::omp::Directive::OMPD_unroll &&beginName.v == llvm::omp::Directive::OMPD_tile)) { // iterate through the remaining block items to find the end directive // for the unroll/tile directive. parser::Block::iterator endIt; @@ -207,7 +203,7 @@ class CanonicalizationOfOmp { common::Indirection{std::move(*ompLoopCons)}}}; nextIt = block.erase(nextIt); } else if (nestedBeginName.v == llvm::omp::Directive::OMPD_unroll && - beginName.v == llvm::omp::Directive::OMPD_tile) { + beginName.v == llvm::omp::Directive::OMPD_tile ) { // if a loop has been unrolled, the user can not then tile that loop // as it has been unrolled const parser::OmpClauseList &unrollClauseList{ diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 2961fdf52709f..254e4a2f47b4d 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2126,12 +2126,11 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { - const auto &beginLoopDir{std::get(y.t)}; - auto &&yt = std::get<0>(beginLoopDir.t); - - const auto &beginDir{std::get(beginLoopDir.t)}; - const auto &dirClauses{std::get(beginLoopDir.t)}; - auto ytv = beginDir.v; + const auto &beginLoopDir{ y.BeginDir() }; + const auto &dirClauses{ beginLoopDir.Clauses()}; +// const auto &beginLoopDir{std::get(y.t)}; +// const auto &dirClauses{std::get(beginLoopDir.t)}; + auto ytv = Fortran::parser::omp::GetOmpDirectiveName (y).v; for (const auto &clause : x.v) { if (const auto oclause{ From 82ea715367651dc7670b5a727bcae4e72e677f23 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 20 Sep 2025 03:44:27 +0200 Subject: [PATCH 53/57] cleanup --- flang/lib/Lower/OpenMP/OpenMP.cpp | 36 +++++++++---------- flang/lib/Lower/OpenMP/Utils.cpp | 31 ++++++++-------- flang/lib/Parser/openmp-parsers.cpp | 2 +- flang/lib/Semantics/CMakeLists.txt | 42 +++++++++------------- flang/lib/Semantics/canonicalize-omp.cpp | 9 +++-- flang/lib/Semantics/resolve-directives.cpp | 10 +++--- 6 files changed, 63 insertions(+), 67 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 2ded09f0c51fd..de2dea9bbecbe 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -3983,27 +3983,28 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, const parser::OpenMPLoopConstruct &loopConstruct) { const parser::OmpDirectiveSpecification &beginSpec = loopConstruct.BeginDir(); List clauses = makeClauses(beginSpec.Clauses(), semaCtx); - if (auto &endSpec = loopConstruct.EndDir()) clauses.append(makeClauses(endSpec->Clauses(), semaCtx)); + if (auto &endSpec = loopConstruct.EndDir()) + clauses.append(makeClauses(endSpec->Clauses(), semaCtx)); mlir::Location currentLocation = converter.genLocation(beginSpec.source); -// llvm::omp::Directive directive = Fortran::parser::omp::GetOmpDirectiveName(loopConstruct).v; - // parser::omp::GetOmpDirectiveName(beginLoopDirective).v; - //const parser::CharBlock &source = std::get(beginLoopDirective.t).source; - const parser::OmpDirectiveName &beginName = beginSpec.DirName(); - ConstructQueue queue{ buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, eval, beginName.source, beginName.v, clauses)}; + ConstructQueue queue{ + buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, + eval, beginName.source, beginName.v, clauses)}; - auto &optLoopCons = std::get>(loopConstruct.t); + auto &optLoopCons = + std::get>(loopConstruct.t); if (optLoopCons.has_value()) { - if (auto *ompNestedLoopCons{ std::get_if>( &*optLoopCons)}) { - const Fortran::parser::OpenMPLoopConstruct &x = ompNestedLoopCons->value(); - // const Fortran::parser::OmpBeginLoopDirective &y = std::get<0>(x.t); - // const Fortran::parser::OmpClauseList &clauseList = std::get<1>(y.t); - llvm::omp::Directive nestedDirective = parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; - + if (auto *ompNestedLoopCons{ + std::get_if>( + &*optLoopCons)}) { + const Fortran::parser::OpenMPLoopConstruct &x = + ompNestedLoopCons->value(); + llvm::omp::Directive nestedDirective = + parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; + List nestedClauses = makeClauses(x.BeginDir().Clauses(), semaCtx); - // makeClauses(std::get(y.t), semaCtx); switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: @@ -4011,7 +4012,9 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, // generating the omp.loop_nest op. break; case llvm::omp::Directive::OMPD_interchange: { - ConstructQueue nestedQueue{buildConstructQueue( converter.getFirOpBuilder().getModule(), semaCtx, eval, beginName.source, nestedDirective, nestedClauses)}; + ConstructQueue nestedQueue{buildConstructQueue( + converter.getFirOpBuilder().getModule(), semaCtx, eval, + beginName.source, nestedDirective, nestedClauses)}; for (auto nl : nestedQueue) { queue.push_back(nl); } @@ -4026,9 +4029,6 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, } } } - - - genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, queue.begin()); diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index d4f3eefdeaf2e..e4d2aeef6ba59 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -677,19 +677,23 @@ int64_t collectLoopRelatedInfo( }); if (auto *ompLoop{std::get_if(&ompCons->u)}) { - const auto &nestedOptional = std::get>(ompLoop->t); - assert(nestedOptional.has_value() && "Expected a DoConstruct or OpenMPLoopConstruct"); - const auto *innerConstruct = std::get_if>( &(nestedOptional.value())); + const auto &nestedOptional = + std::get>(ompLoop->t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); if (innerConstruct) { const auto &innerLoopDirective = innerConstruct->value(); - const auto &innerBegin = std::get(innerLoopDirective.t); - const auto &innerDirective = Fortran::parser::omp::GetOmpDirectiveName(innerBegin).v; - //std::get(innerBegin.t).v; + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + Fortran::parser::omp::GetOmpDirectiveName(innerBegin).v; if (innerDirective == llvm::omp::Directive::OMPD_interchange) { // Get the size values from parse tree and convert to a vector - const auto &innerClauseList { innerBegin.Clauses() }; - // const auto &innerClauseList{ std::get(innerBegin.t)}; + const auto &innerClauseList{innerBegin.Clauses()}; for (const auto &clause : innerClauseList.v) { if (const auto tclause{ std::get_if(&clause.u)}) { @@ -763,15 +767,14 @@ void collectPermutationFromOpenMPConstruct( &(nestedOptional.value())); if (innerConstruct) { const auto &innerLoopDirective = innerConstruct->value(); - const auto &innerBegin = innerLoopDirective.BeginDir(); - //std::get(innerLoopDirective.t); - const auto &innerDirective = Fortran::parser::omp::GetOmpDirectiveName(innerLoopDirective).v; - //std::get(innerBegin.t).v; + const auto &innerBegin = innerLoopDirective.BeginDir(); + const auto &innerDirective = + Fortran::parser::omp::GetOmpDirectiveName(innerLoopDirective).v; if (innerDirective == llvm::omp::Directive::OMPD_interchange) { // Get the size values from parse tree and convert to a vector - const auto &innerClauseList{ innerBegin.Clauses() }; - //std::get(innerBegin.t)}; + const auto &innerClauseList{innerBegin.Clauses()}; + // std::get(innerBegin.t)}; for (const auto &clause : innerClauseList.v) if (const auto tclause{ std::get_if(&clause.u)}) { diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index cfe42cb34653f..ca45bb0386ef2 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -1953,7 +1953,7 @@ static constexpr DirectiveSet GetLoopDirectives() { unsigned(Directive::OMPD_teams_distribute_simd), unsigned(Directive::OMPD_teams_loop), unsigned(Directive::OMPD_tile), - unsigned(Directive::OMPD_unroll), + unsigned(Directive::OMPD_unroll), unsigned(Directive::OMPD_interchange), }; return loopDirectives; diff --git a/flang/lib/Semantics/CMakeLists.txt b/flang/lib/Semantics/CMakeLists.txt index 414b59812aa72..109bc2dbb8569 100644 --- a/flang/lib/Semantics/CMakeLists.txt +++ b/flang/lib/Semantics/CMakeLists.txt @@ -1,4 +1,10 @@ -add_flang_library(FortranSemanticsChecks PARTIAL_SOURCES_INTENDED +add_flang_library(FortranSemantics + assignment.cpp + attr.cpp + canonicalize-acc.cpp + canonicalize-directives.cpp + canonicalize-do.cpp + canonicalize-omp.cpp check-acc-structure.cpp check-allocate.cpp check-arithmeticif.cpp @@ -23,30 +29,6 @@ add_flang_library(FortranSemanticsChecks PARTIAL_SOURCES_INTENDED check-select-rank.cpp check-select-type.cpp check-stop.cpp - - DEPENDS - acc_gen - omp_gen - - LINK_LIBS - FortranSupport - FortranParser - FortranEvaluate - - LINK_COMPONENTS - Support - FrontendOpenMP - FrontendOpenACC - TargetParser -) - -add_flang_library(FortranSemantics PARTIAL_SOURCES_INTENDED - assignment.cpp - attr.cpp - canonicalize-acc.cpp - canonicalize-directives.cpp - canonicalize-do.cpp - canonicalize-omp.cpp compute-offsets.cpp data-to-inits.cpp definable.cpp @@ -79,7 +61,6 @@ add_flang_library(FortranSemantics PARTIAL_SOURCES_INTENDED FortranSupport FortranParser FortranEvaluate - FortranSemanticsChecks LINK_COMPONENTS Support @@ -87,3 +68,12 @@ add_flang_library(FortranSemantics PARTIAL_SOURCES_INTENDED FrontendOpenACC TargetParser ) + +target_precompile_headers(FortranSemantics PRIVATE + [["flang/Semantics/semantics.h"]] + [["flang/Semantics/type.h"]] + [["flang/Semantics/openmp-modifiers.h"]] + [["flang/Semantics/expression.h"]] + [["flang/Semantics/tools.h"]] + [["flang/Semantics/symbol.h"]] +) diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index b34296271f79a..087a8c53f2afc 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -177,8 +177,11 @@ class CanonicalizationOfOmp { // OpenMP Loop Construct and the DO loop itself auto &nestedBeginDirective = ompLoopCons->BeginDir(); auto &nestedBeginName = nestedBeginDirective.DirName(); - if ((nestedBeginName.v == llvm::omp::Directive::OMPD_unroll || nestedBeginName.v == llvm::omp::Directive::OMPD_tile || nestedBeginName.v == llvm::omp::Directive::OMPD_interchange) && - !(nestedBeginName.v == llvm::omp::Directive::OMPD_unroll &&beginName.v == llvm::omp::Directive::OMPD_tile)) { + if ((nestedBeginName.v == llvm::omp::Directive::OMPD_unroll || + nestedBeginName.v == llvm::omp::Directive::OMPD_tile || + nestedBeginName.v == llvm::omp::Directive::OMPD_interchange) && + !(nestedBeginName.v == llvm::omp::Directive::OMPD_unroll && + beginName.v == llvm::omp::Directive::OMPD_tile)) { // iterate through the remaining block items to find the end directive // for the unroll/tile directive. parser::Block::iterator endIt; @@ -203,7 +206,7 @@ class CanonicalizationOfOmp { common::Indirection{std::move(*ompLoopCons)}}}; nextIt = block.erase(nextIt); } else if (nestedBeginName.v == llvm::omp::Directive::OMPD_unroll && - beginName.v == llvm::omp::Directive::OMPD_tile ) { + beginName.v == llvm::omp::Directive::OMPD_tile) { // if a loop has been unrolled, the user can not then tile that loop // as it has been unrolled const parser::OmpClauseList &unrollClauseList{ diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 254e4a2f47b4d..5a29f3245b4db 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2126,11 +2126,11 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &x, llvm::SmallVector &levels, llvm::SmallVector &clauses) { - const auto &beginLoopDir{ y.BeginDir() }; - const auto &dirClauses{ beginLoopDir.Clauses()}; -// const auto &beginLoopDir{std::get(y.t)}; -// const auto &dirClauses{std::get(beginLoopDir.t)}; - auto ytv = Fortran::parser::omp::GetOmpDirectiveName (y).v; + const auto &beginLoopDir{y.BeginDir()}; + const auto &dirClauses{beginLoopDir.Clauses()}; + // const auto &beginLoopDir{std::get(y.t)}; + // const auto &dirClauses{std::get(beginLoopDir.t)}; + auto ytv = Fortran::parser::omp::GetOmpDirectiveName(y).v; for (const auto &clause : x.v) { if (const auto oclause{ From 6a8b6cc372b8469c558ba3f81dd9017cb97224c3 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 20 Sep 2025 15:14:21 +0200 Subject: [PATCH 54/57] cleanup --- flang/lib/Semantics/resolve-directives.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 5a29f3245b4db..f9dde64c4038e 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -2128,8 +2128,6 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( llvm::SmallVector &clauses) { const auto &beginLoopDir{y.BeginDir()}; const auto &dirClauses{beginLoopDir.Clauses()}; - // const auto &beginLoopDir{std::get(y.t)}; - // const auto &dirClauses{std::get(beginLoopDir.t)}; auto ytv = Fortran::parser::omp::GetOmpDirectiveName(y).v; for (const auto &clause : x.v) { From eb98bbe2f4f4f1628ff76d405e26a3c578e00125 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Oct 2025 00:21:26 +0200 Subject: [PATCH 55/57] [OpenMP][test] .f90 -> .F90 The test makes use of the preprocessor, which requires a .F90 suffix --- openmp/runtime/test/transform/tile/{intfor.f90 => intfor.F90} | 1 + 1 file changed, 1 insertion(+) rename openmp/runtime/test/transform/tile/{intfor.f90 => intfor.F90} (98%) diff --git a/openmp/runtime/test/transform/tile/intfor.f90 b/openmp/runtime/test/transform/tile/intfor.F90 similarity index 98% rename from openmp/runtime/test/transform/tile/intfor.f90 rename to openmp/runtime/test/transform/tile/intfor.F90 index dac0de6a99021..4ca9f14fdae9f 100644 --- a/openmp/runtime/test/transform/tile/intfor.f90 +++ b/openmp/runtime/test/transform/tile/intfor.F90 @@ -10,6 +10,7 @@ ! RUN: %t-ub18.exe | FileCheck %s --match-full-lines program tile_intfor_1d + implicit none integer i print *, 'do' From 0d7030f641c2de155fe9736006735a3de448b885 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Oct 2025 18:39:29 +0200 Subject: [PATCH 56/57] post-merge fix --- flang/lib/Lower/OpenMP/Utils.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 73cf26431233a..10e5114bcb880 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -726,12 +726,7 @@ void collectLoopRelatedInfo( } } - int64_t collapseValue = collapseValue - sizesLengthValue; - if (sizesLengthValue > collapseValue) - collapseValue = sizesLengthValue; - if (permutationLengthValue > collapseValue) - collapseValue = permutationLengthValue; - +std::int64_t collapseValue = std::max({numCollapse, sizesLengthValue, permutationLengthValue}); std::size_t loopVarTypeSize = 0; do { lower::pft::Evaluation *doLoop = From 947f513be5d4c9d45393ba4abcecfe253eee0c1f Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 4 Oct 2025 19:19:12 +0200 Subject: [PATCH 57/57] Don't XFAIL intdo --- openmp/runtime/test/transform/tile/intdo.f90 | 1 - 1 file changed, 1 deletion(-) diff --git a/openmp/runtime/test/transform/tile/intdo.f90 b/openmp/runtime/test/transform/tile/intdo.f90 index 40e4c2c53f89c..27cd383a69fea 100644 --- a/openmp/runtime/test/transform/tile/intdo.f90 +++ b/openmp/runtime/test/transform/tile/intdo.f90 @@ -1,5 +1,4 @@ ! This test checks lowering of OpenMP tile directive -! XFAIL: * ! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe ! RUN: %t.exe | FileCheck %s --match-full-lines