diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h index 0ffe27ea038e8..f0fe74af555f3 100644 --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -384,6 +384,10 @@ class AbstractConverter { virtual mlir::StateStack &getStateStack() = 0; + virtual void + genPermutatedLoops(llvm::ArrayRef doStmts, + Fortran::lower::pft::Evaluation *innermostDo) = 0; + private: /// Options controlling lowering behavior. const Fortran::lower::LoweringOptions &loweringOptions; diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 780d56f085f69..b7b4d6c73b9ee 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -2197,6 +2197,124 @@ class FirConverter : public Fortran::lower::AbstractConverter { // so no clean-up needs to be generated for these entities. } + void + genPermutatedLoops(llvm::ArrayRef doStmts, + Fortran::lower::pft::Evaluation *innermostDo) override { + // Fortran::lower::pft::Evaluation &eval = getEval(); + // bool unstructuredContext = eval.lowerAsUnstructured(); + + llvm::SmallVector headerBlocks; + llvm::SmallVector loopInfos; + + auto enterLoop = [&](Fortran::lower::pft::Evaluation &eval) { + bool unstructuredContext = eval.lowerAsUnstructured(); + + // Collect loop nest information. + // Generate begin loop code directly for infinite and while loops. + Fortran::lower::pft::Evaluation &doStmtEval = + eval.getFirstNestedEvaluation(); + auto *doStmt = doStmtEval.getIf(); + const auto &loopControl = + std::get>(doStmt->t); + mlir::Block *preheaderBlock = doStmtEval.block; + mlir::Block *beginBlock = + preheaderBlock ? preheaderBlock : builder->getBlock(); + auto createNextBeginBlock = [&]() { + // Step beginBlock through unstructured preheader, header, and mask + // blocks, created in outermost to innermost order. + return beginBlock = beginBlock->splitBlock(beginBlock->end()); + }; + mlir::Block *headerBlock = + unstructuredContext ? createNextBeginBlock() : nullptr; + headerBlocks.push_back(headerBlock); + mlir::Block *bodyBlock = doStmtEval.lexicalSuccessor->block; + mlir::Block *exitBlock = doStmtEval.parentConstruct->constructExit->block; + IncrementLoopNestInfo &incrementLoopNestInfo = loopInfos.emplace_back(); + const Fortran::parser::ScalarLogicalExpr *whileCondition = nullptr; + bool infiniteLoop = !loopControl.has_value(); + if (infiniteLoop) { + assert(unstructuredContext && "infinite loop must be unstructured"); + startBlock(headerBlock); + } else if ((whileCondition = + std::get_if( + &loopControl->u))) { + assert(unstructuredContext && "while loop must be unstructured"); + maybeStartBlock(preheaderBlock); // no block or empty block + startBlock(headerBlock); + genConditionalBranch(*whileCondition, bodyBlock, exitBlock); + } else if (const auto *bounds = + std::get_if( + &loopControl->u)) { + // Non-concurrent increment loop. + IncrementLoopInfo &info = incrementLoopNestInfo.emplace_back( + *bounds->name.thing.symbol, bounds->lower, bounds->upper, + bounds->step); + if (unstructuredContext) { + maybeStartBlock(preheaderBlock); + info.hasRealControl = info.loopVariableSym->GetType()->IsNumeric( + Fortran::common::TypeCategory::Real); + info.headerBlock = headerBlock; + info.bodyBlock = bodyBlock; + info.exitBlock = exitBlock; + } + } else { + llvm_unreachable("Cannot permute DO CONCURRENT"); + } + + // Increment loop begin code. (Infinite/while code was already generated.) + if (!infiniteLoop && !whileCondition) + genFIRIncrementLoopBegin(incrementLoopNestInfo, doStmtEval.dirs); + }; + + auto leaveLoop = [&](Fortran::lower::pft::Evaluation &eval, + mlir::Block *headerBlock, + IncrementLoopNestInfo &incrementLoopNestInfo) { + bool unstructuredContext = eval.lowerAsUnstructured(); + + Fortran::lower::pft::Evaluation &doStmtEval = + eval.getFirstNestedEvaluation(); + auto *doStmt = doStmtEval.getIf(); + + const auto &loopControl = + std::get>(doStmt->t); + bool infiniteLoop = !loopControl.has_value(); + const Fortran::parser::ScalarLogicalExpr *whileCondition = + std::get_if(&loopControl->u); + + auto iter = std::prev(eval.getNestedEvaluations().end()); + + // An EndDoStmt in unstructured code may start a new block. + Fortran::lower::pft::Evaluation &endDoEval = *iter; + assert(endDoEval.getIf() && "no enddo stmt"); + if (unstructuredContext) + maybeStartBlock(endDoEval.block); + + // Loop end code. + if (infiniteLoop || whileCondition) + genBranch(headerBlock); + else + genFIRIncrementLoopEnd(incrementLoopNestInfo); + + // This call may generate a branch in some contexts. + genFIR(endDoEval, unstructuredContext); + }; + + for (auto l : doStmts) + enterLoop(*l); + + // Loop body code. + bool innermostUnstructuredContext = innermostDo->lowerAsUnstructured(); + + auto iter = innermostDo->getNestedEvaluations().begin(); + for (auto end = --innermostDo->getNestedEvaluations().end(); iter != end; + ++iter) + genFIR(*iter, innermostUnstructuredContext); + + for (auto &&[l, headerBlock, li] : + llvm::zip_equal(doStmts, headerBlocks, loopInfos)) + leaveLoop(*l, headerBlock, li); + } + /// Generate FIR for a DO construct. There are six variants: /// - unstructured infinite and while loops /// - structured and unstructured increment loops diff --git a/flang/lib/Lower/OpenMP/Decomposer.cpp b/flang/lib/Lower/OpenMP/Decomposer.cpp index 9bfbf67bec88c..bf09bed395285 100644 --- a/flang/lib/Lower/OpenMP/Decomposer.cpp +++ b/flang/lib/Lower/OpenMP/Decomposer.cpp @@ -98,14 +98,39 @@ ConstructQueue buildConstructQueue( return decompose.output; } +// from clang +static bool isOpenMPLoopTransformationDirective(llvm::omp::Directive DKind) { + return DKind == llvm::omp::Directive::OMPD_tile || + DKind == llvm::omp::Directive::OMPD_unroll || + DKind == llvm::omp::Directive::OMPD_reverse || + DKind == llvm::omp::Directive::OMPD_interchange || + DKind == llvm::omp::Directive::OMPD_stripe; +} + +llvm::iterator_range getNonTransformQueue( + llvm::iterator_range range) { + // remove trailing loop transformations + auto b = range.begin(); + auto e = range.end(); + while (e != b) { + auto e2 = e - 1; + if (!isOpenMPLoopTransformationDirective(e2->id)) + break; + e = e2; + } + + return llvm::make_range(b, e); +} + bool matchLeafSequence(ConstructQueue::const_iterator item, const ConstructQueue &queue, llvm::omp::Directive directive) { llvm::ArrayRef leafDirs = llvm::omp::getLeafConstructsOrSelf(directive); - for (auto [dir, leaf] : - llvm::zip_longest(leafDirs, llvm::make_range(item, queue.end()))) { + for (auto [dir, leaf] : llvm::zip_longest( + leafDirs, + getNonTransformQueue(llvm::make_range(item, queue.end())))) { if (!dir.has_value() || !leaf.has_value()) return false; diff --git a/flang/lib/Lower/OpenMP/Decomposer.h b/flang/lib/Lower/OpenMP/Decomposer.h index 65492bd76280d..f057009629efc 100644 --- a/flang/lib/Lower/OpenMP/Decomposer.h +++ b/flang/lib/Lower/OpenMP/Decomposer.h @@ -57,6 +57,10 @@ bool isLastItemInQueue(ConstructQueue::const_iterator item, bool matchLeafSequence(ConstructQueue::const_iterator item, const ConstructQueue &queue, llvm::omp::Directive directive); + +llvm::iterator_range getNonTransformQueue( + llvm::iterator_range range); + } // namespace Fortran::lower::omp #endif // FORTRAN_LOWER_OPENMP_DECOMPOSER_H diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 9e56c2bfb7e25..805e3dd8aad5e 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1231,8 +1231,11 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info, } if (!info.genSkeletonOnly) { + // Transforms already processed by getLoopNestOp + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); if (ConstructQueue::const_iterator next = std::next(item); - next != queue.end()) { + next != transforms.begin() && next != queue.end()) { genOMPDispatch(info.converter, info.symTable, info.semaCtx, info.eval, info.loc, queue, next); } else { @@ -1551,7 +1554,8 @@ genLoopNestClauses(lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const List &clauses, mlir::Location loc, mlir::omp::LoopNestOperands &clauseOps, - llvm::SmallVectorImpl &iv) { + llvm::SmallVectorImpl &iv, + bool enableInterchange = false) { ClauseProcessor cp(converter, semaCtx, clauses); HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter); @@ -1559,6 +1563,28 @@ genLoopNestClauses(lower::AbstractConverter &converter, cp.processCollapse(loc, eval, clauseOps, clauseOps, iv); clauseOps.loopInclusive = converter.getFirOpBuilder().getUnitAttr(); + + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + for (auto &clause : clauses) { + if (clause.id == llvm::omp::Clause::OMPC_collapse) { + const auto &collapse = std::get(clause.u); + int64_t collapseValue = evaluate::ToInt64(collapse.v).value(); + clauseOps.collapseNumLoops = + firOpBuilder.getI64IntegerAttr(collapseValue); + } else if (clause.id == llvm::omp::Clause::OMPC_sizes) { + // This case handles the stand-alone tiling construct + const auto &sizes = std::get(clause.u); + llvm::SmallVector sizeValues; + for (auto &size : sizes.v) { + int64_t sizeValue = evaluate::ToInt64(size).value(); + sizeValues.push_back(sizeValue); + } + clauseOps.tileSizes = sizeValues; + } else if (clause.id == llvm::omp::Clause::OMPC_permutation) { + llvm_unreachable("MK: To handle standalone interchange construct"); + } + } + cp.processTileSizes(eval, clauseOps); } @@ -1930,7 +1956,9 @@ static mlir::omp::LoopNestOp genLoopNestOp( llvm::ArrayRef< std::pair> wrapperArgs, - llvm::omp::Directive directive, DataSharingProcessor &dsp) { + llvm::omp::Directive directive, DataSharingProcessor &dsp, + std::optional> + transforms = std::nullopt) { auto ivCallback = [&](mlir::Operation *op) { genLoopVars(op, converter, loc, iv, wrapperArgs); return llvm::SmallVector(iv); @@ -1939,6 +1967,65 @@ static mlir::omp::LoopNestOp genLoopNestOp( uint64_t nestValue = getCollapseValue(item->clauses); nestValue = nestValue < iv.size() ? iv.size() : nestValue; auto *nestedEval = getCollapsedLoopEval(eval, nestValue); + + if (!transforms.has_value()) { + // This must be a standalone construct, assume all following actions are + // transformations + transforms = llvm::make_range(std::next(item), queue.end()); + } + + for (auto &&transform : llvm::reverse(*transforms)) { + auto d = transform.id; + auto clauses = transform.clauses; + + switch (d) { + case llvm::omp::OMPD_interchange: { + llvm::SmallVector permutation; + + auto &&permutationClause = ClauseFinder::findUniqueClause< + Fortran::lower::omp::clause::Permutation>(clauses); + if (permutationClause) { + permutation.reserve(permutationClause->v.size()); + for (auto &&ts : permutationClause->v) { + permutation.push_back(evaluate::ToInt64(ts).value()); + } + // llvm::append_range( permutation, permutationClause->v); + + } else { + permutation = {2, 1}; + } + + assert(permutation.size() == iv.size() && + "TODO: if permutation is smaller than number of associated loops, " + "permute only the first loops"); + llvm::SmallVector newIVs; + llvm::SmallVector newLBs; + llvm::SmallVector newUBs; + llvm::SmallVector newINCs; + llvm::SmallVector newSizes; + + // TODO: Assert this is a valid permution + for (auto perm : permutation) { + newIVs.push_back(iv[perm - 1]); + newLBs.push_back(clauseOps.loopLowerBounds[perm - 1]); + newUBs.push_back(clauseOps.loopUpperBounds[perm - 1]); + newINCs.push_back(clauseOps.loopSteps[perm - 1]); + if (!clauseOps.tileSizes.empty()) + newSizes.push_back(clauseOps.tileSizes[perm - 1]); + } + + iv = newIVs; + clauseOps.loopLowerBounds = newLBs; + clauseOps.loopUpperBounds = newUBs; + clauseOps.loopSteps = newINCs; + clauseOps.tileSizes = newSizes; + + } break; + default: + llvm_unreachable("MK: loop transformation not yet implemented"); + } + } + return genOpWithBody( OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *nestedEval, directive) @@ -2247,6 +2334,68 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter, mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli); } +static void +collectLoops(lower::pft::Evaluation &eval, + llvm::SmallVectorImpl &result, + int numLoops) { + lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation(); + for ([[maybe_unused]] auto i : llvm::seq(numLoops)) { + lower::pft::Evaluation *doLoop = + &doConstructEval->getFirstNestedEvaluation(); + auto *doStmt = doLoop->getIf(); + assert(doStmt && "Expected do loop to be in the nested evaluation"); + const auto &loopControl = + std::get>(doStmt->t); + const parser::LoopControl::Bounds *bounds = + std::get_if(&loopControl->u); + assert(bounds && "Expected bounds for worksharing do loop"); + lower::StatementContext stmtCtx; + + result.push_back(doConstructEval); + + doConstructEval = + &*std::next(doConstructEval->getNestedEvaluations().begin()); + }; +} + +static void genStandaloneInterchangeOp( + Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, lower::StatementContext &stmtCtx, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, mlir::Location loc, + const ConstructQueue &queue, ConstructQueue::const_iterator item) { + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + assert(llvm::range_size(transforms) == 1); + auto &&transform = *transforms.begin(); + assert(transform.id == llvm::omp::OMPD_interchange); + auto clauses = transform.clauses; + + llvm::SmallVector permutation; + auto &&permutationClause = + ClauseFinder::findUniqueClause( + clauses); + if (permutationClause) { + permutation.reserve(permutationClause->v.size()); + for (auto &&ts : permutationClause->v) { + permutation.push_back(evaluate::ToInt64(ts).value()); + } + } else { + permutation = {2, 1}; + } + + llvm::SmallVector loops; + collectLoops(eval, loops, permutation.size()); + + // TODO: Assert this is a valid permution + llvm::SmallVector newLoops; + for (auto perm : permutation) { + newLoops.push_back(loops[perm - 1]); + } + + converter.genPermutatedLoops(newLoops, loops.back()); +} + static mlir::omp::MaskedOp genMaskedOp(lower::AbstractConverter &converter, lower::SymMap &symTable, lower::StatementContext &stmtCtx, @@ -3022,7 +3171,10 @@ static mlir::omp::DistributeOp genCompositeDistributeParallelDo( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 3 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + + assert(llvm::range_size(q) == 3 && "Invalid leaf constructs"); ConstructQueue::const_iterator distributeItem = item; ConstructQueue::const_iterator parallelItem = std::next(distributeItem); ConstructQueue::const_iterator doItem = std::next(parallelItem); @@ -3076,10 +3228,10 @@ static mlir::omp::DistributeOp genCompositeDistributeParallelDo( converter, loc, wsloopClauseOps, wsloopArgs); wsloopOp.setComposite(/*val=*/true); - genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, doItem, - loopNestClauseOps, iv, - {{distributeOp, distributeArgs}, {wsloopOp, wsloopArgs}}, - llvm::omp::Directive::OMPD_distribute_parallel_do, dsp); + genLoopNestOp( + converter, symTable, semaCtx, eval, loc, queue, doItem, loopNestClauseOps, + iv, {{distributeOp, distributeArgs}, {wsloopOp, wsloopArgs}}, + llvm::omp::Directive::OMPD_distribute_parallel_do, dsp, transforms); return distributeOp; } @@ -3088,7 +3240,11 @@ static mlir::omp::DistributeOp genCompositeDistributeParallelDoSimd( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 4 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + + assert(llvm::range_size(q) == 4 && "Invalid leaf constructs"); + ConstructQueue::const_iterator distributeItem = item; ConstructQueue::const_iterator parallelItem = std::next(distributeItem); ConstructQueue::const_iterator doItem = std::next(parallelItem); @@ -3170,7 +3326,7 @@ static mlir::omp::DistributeOp genCompositeDistributeParallelDoSimd( {wsloopOp, wsloopArgs}, {simdOp, simdArgs}}, llvm::omp::Directive::OMPD_distribute_parallel_do_simd, - simdItemDSP); + simdItemDSP, transforms); return distributeOp; } @@ -3179,7 +3335,11 @@ static mlir::omp::DistributeOp genCompositeDistributeSimd( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + + assert(llvm::range_size(q) == 2 && "Invalid leaf constructs"); + ConstructQueue::const_iterator distributeItem = item; ConstructQueue::const_iterator simdItem = std::next(distributeItem); @@ -3231,7 +3391,8 @@ static mlir::omp::DistributeOp genCompositeDistributeSimd( genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem, loopNestClauseOps, iv, {{distributeOp, distributeArgs}, {simdOp, simdArgs}}, - llvm::omp::Directive::OMPD_distribute_simd, simdItemDSP); + llvm::omp::Directive::OMPD_distribute_simd, simdItemDSP, + transforms); return distributeOp; } @@ -3240,7 +3401,11 @@ static mlir::omp::WsloopOp genCompositeDoSimd( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + auto transforms = llvm::make_range(q.end(), queue.end()); + + assert(llvm::range_size(q) == 2 && "Invalid leaf constructs"); + ConstructQueue::const_iterator doItem = item; ConstructQueue::const_iterator simdItem = std::next(doItem); @@ -3295,7 +3460,7 @@ static mlir::omp::WsloopOp genCompositeDoSimd( genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem, loopNestClauseOps, iv, {{wsloopOp, wsloopArgs}, {simdOp, simdArgs}}, - llvm::omp::Directive::OMPD_do_simd, simdItemDSP); + llvm::omp::Directive::OMPD_do_simd, simdItemDSP, transforms); return wsloopOp; } @@ -3304,7 +3469,10 @@ static mlir::omp::TaskloopOp genCompositeTaskloopSimd( lower::StatementContext &stmtCtx, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, mlir::Location loc, const ConstructQueue &queue, ConstructQueue::const_iterator item) { - assert(std::distance(item, queue.end()) == 2 && "Invalid leaf constructs"); + auto q = getNonTransformQueue(llvm::make_range(item, queue.end())); + + assert(llvm::range_size(q) == 2 && "Invalid leaf constructs"); + if (!semaCtx.langOptions().OpenMPSimd) TODO(loc, "Composite TASKLOOP SIMD"); return nullptr; @@ -3482,6 +3650,10 @@ static void genOMPDispatch(lower::AbstractConverter &converter, case llvm::omp::Directive::OMPD_unroll: genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item); break; + case llvm::omp::Directive::OMPD_interchange: + genStandaloneInterchangeOp(converter, symTable, stmtCtx, semaCtx, eval, loc, + queue, item); + break; case llvm::omp::Directive::OMPD_workdistribute: newOp = genWorkdistributeOp(converter, symTable, semaCtx, eval, loc, queue, item); @@ -3926,19 +4098,37 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, mlir::Location currentLocation = converter.genLocation(beginSpec.source); + const parser::OmpDirectiveName &beginName = beginSpec.DirName(); + ConstructQueue queue{ + buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, + eval, beginName.source, beginName.v, clauses)}; + auto &optLoopCons = std::get>(loopConstruct.t); if (optLoopCons.has_value()) { if (auto *ompNestedLoopCons{ std::get_if>( &*optLoopCons)}) { + const Fortran::parser::OpenMPLoopConstruct &x = + ompNestedLoopCons->value(); llvm::omp::Directive nestedDirective = parser::omp::GetOmpDirectiveName(*ompNestedLoopCons).v; + + List nestedClauses = makeClauses(x.BeginDir().Clauses(), semaCtx); + switch (nestedDirective) { case llvm::omp::Directive::OMPD_tile: // Skip OMPD_tile since the tile sizes will be retrieved when // generating the omp.loop_nest op. break; + case llvm::omp::Directive::OMPD_interchange: { + ConstructQueue nestedQueue{buildConstructQueue( + converter.getFirOpBuilder().getModule(), semaCtx, eval, + beginName.source, nestedDirective, nestedClauses)}; + for (auto nl : nestedQueue) { + queue.push_back(nl); + } + } break; default: { unsigned version = semaCtx.langOptions().OpenMPVersion; TODO(currentLocation, @@ -3950,10 +4140,6 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, } } - const parser::OmpDirectiveName &beginName = beginSpec.DirName(); - ConstructQueue queue{ - buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, - eval, beginName.source, beginName.v, clauses)}; genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, queue.begin()); } diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 29cccbd1bfe5a..10e5114bcb880 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -687,14 +687,46 @@ void collectLoopRelatedInfo( // Collect sizes from tile directive if present. std::int64_t sizesLengthValue = 0l; + std::int64_t permutationLengthValue = 0l; if (auto *ompCons{eval.getIf()}) { processTileSizesFromOpenMPConstruct( ompCons, [&](const parser::OmpClause::Sizes *tclause) { sizesLengthValue = tclause->v.size(); }); + + if (auto *ompLoop{std::get_if(&ompCons->u)}) { + const auto &nestedOptional = + std::get>(ompLoop->t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); + if (innerConstruct) { + const auto &innerLoopDirective = innerConstruct->value(); + const auto &innerBegin = + std::get(innerLoopDirective.t); + const auto &innerDirective = + Fortran::parser::omp::GetOmpDirectiveName(innerBegin).v; + + if (innerDirective == llvm::omp::Directive::OMPD_interchange) { + // Get the size values from parse tree and convert to a vector + const auto &innerClauseList{innerBegin.Clauses()}; + for (const auto &clause : innerClauseList.v) { + if (const auto tclause{ + std::get_if(&clause.u)}) { + permutationLengthValue = tclause->v.size(); + } + } + // default: permution(2,1) + if (permutationLengthValue == 0) + permutationLengthValue = 2; + } + } + } } - std::int64_t collapseValue = std::max(numCollapse, sizesLengthValue); +std::int64_t collapseValue = std::max({numCollapse, sizesLengthValue, permutationLengthValue}); std::size_t loopVarTypeSize = 0; do { lower::pft::Evaluation *doLoop = @@ -729,6 +761,44 @@ void collectLoopRelatedInfo( convertLoopBounds(converter, currentLocation, result, loopVarTypeSize); } +void collectPermutationFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl &permutation, + Fortran::semantics::SemanticsContext &semaCtx) { + if (!ompCons) + return; + + if (auto *ompLoop{std::get_if(&ompCons->u)}) { + const auto &nestedOptional = + std::get>(ompLoop->t); + assert(nestedOptional.has_value() && + "Expected a DoConstruct or OpenMPLoopConstruct"); + const auto *innerConstruct = + std::get_if>( + &(nestedOptional.value())); + if (innerConstruct) { + const auto &innerLoopDirective = innerConstruct->value(); + const auto &innerBegin = innerLoopDirective.BeginDir(); + const auto &innerDirective = + Fortran::parser::omp::GetOmpDirectiveName(innerLoopDirective).v; + + if (innerDirective == llvm::omp::Directive::OMPD_interchange) { + // Get the size values from parse tree and convert to a vector + const auto &innerClauseList{innerBegin.Clauses()}; + // std::get(innerBegin.t)}; + for (const auto &clause : innerClauseList.v) + if (const auto tclause{ + std::get_if(&clause.u)}) { + for (auto &tval : tclause->v) { + if (const auto v{EvaluateInt64(semaCtx, tval)}) + permutation.push_back(*v); + } + } + } + } + } +} + } // namespace omp } // namespace lower } // namespace Fortran diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index 69499f9c7b621..03751dea0e7c8 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -177,6 +177,11 @@ void collectTileSizesFromOpenMPConstruct( llvm::SmallVectorImpl &tileSizes, Fortran::semantics::SemanticsContext &semaCtx); +void collectPermutationFromOpenMPConstruct( + const parser::OpenMPConstruct *ompCons, + llvm::SmallVectorImpl &permuation, + Fortran::semantics::SemanticsContext &semaCtx); + } // namespace omp } // namespace lower } // namespace Fortran diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 9507021057476..cd55b8a296254 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -2021,6 +2021,7 @@ static constexpr DirectiveSet GetLoopDirectives() { unsigned(Directive::OMPD_teams_loop), unsigned(Directive::OMPD_tile), unsigned(Directive::OMPD_unroll), + unsigned(Directive::OMPD_interchange), }; return loopDirectives; } diff --git a/flang/lib/Semantics/canonicalize-omp.cpp b/flang/lib/Semantics/canonicalize-omp.cpp index c884658bf464a..087a8c53f2afc 100644 --- a/flang/lib/Semantics/canonicalize-omp.cpp +++ b/flang/lib/Semantics/canonicalize-omp.cpp @@ -178,7 +178,8 @@ class CanonicalizationOfOmp { auto &nestedBeginDirective = ompLoopCons->BeginDir(); auto &nestedBeginName = nestedBeginDirective.DirName(); if ((nestedBeginName.v == llvm::omp::Directive::OMPD_unroll || - nestedBeginName.v == llvm::omp::Directive::OMPD_tile) && + nestedBeginName.v == llvm::omp::Directive::OMPD_tile || + nestedBeginName.v == llvm::omp::Directive::OMPD_interchange) && !(nestedBeginName.v == llvm::omp::Directive::OMPD_unroll && beginName.v == llvm::omp::Directive::OMPD_tile)) { // iterate through the remaining block items to find the end directive diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 18fc63814d973..3b9e972771ff8 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -901,8 +901,8 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { void CollectNumAffectedLoopsFromInnerLoopContruct( const parser::OpenMPLoopConstruct &, llvm::SmallVector &, llvm::SmallVector &); - void CollectNumAffectedLoopsFromClauses(const parser::OmpClauseList &, - llvm::SmallVector &, + void CollectNumAffectedLoopsFromClauses(const parser::OpenMPLoopConstruct &x, + const parser::OmpClauseList &, llvm::SmallVector &, llvm::SmallVector &); Symbol::Flags dataSharingAttributeFlags{Symbol::Flag::OmpShared, @@ -1975,6 +1975,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { case llvm::omp::Directive::OMPD_teams_distribute_simd: case llvm::omp::Directive::OMPD_teams_loop: case llvm::omp::Directive::OMPD_tile: + case llvm::omp::Directive::OMPD_interchange: case llvm::omp::Directive::OMPD_unroll: PushContext(beginName.source, beginName.v); break; @@ -2097,7 +2098,7 @@ bool OmpAttributeVisitor::Pre(const parser::DoConstruct &x) { } static bool isSizesClause(const parser::OmpClause *clause) { - return std::holds_alternative(clause->u); + return clause && std::holds_alternative(clause->u); } std::int64_t OmpAttributeVisitor::SetAssociatedMaxClause( @@ -2148,7 +2149,7 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromLoopConstruct( llvm::SmallVector &clauses) { const auto &clauseList{x.BeginDir().Clauses()}; - CollectNumAffectedLoopsFromClauses(clauseList, levels, clauses); + CollectNumAffectedLoopsFromClauses(x, clauseList, levels, clauses); CollectNumAffectedLoopsFromInnerLoopContruct(x, levels, clauses); } @@ -2172,8 +2173,13 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromInnerLoopContruct( } void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( - const parser::OmpClauseList &x, llvm::SmallVector &levels, + const parser::OpenMPLoopConstruct &y, const parser::OmpClauseList &x, + llvm::SmallVector &levels, llvm::SmallVector &clauses) { + const auto &beginLoopDir{y.BeginDir()}; + const auto &dirClauses{beginLoopDir.Clauses()}; + auto ytv = Fortran::parser::omp::GetOmpDirectiveName(y).v; + for (const auto &clause : x.v) { if (const auto oclause{ std::get_if(&clause.u)}) { @@ -2200,6 +2206,20 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses( clauses.push_back(&clause); } } + + if (ytv == llvm::omp::OMPD_interchange) { + for (const auto &clause : dirClauses.v) { + if (const auto tclause{ + std::get_if(&clause.u)}) { + levels.push_back(tclause->v.size()); + clauses.push_back(&clause); + return; + } + } + + levels.push_back(2); + clauses.push_back(nullptr); + } } void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop( diff --git a/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-intdo.f90 new file mode 100644 index 0000000000000..b5ef5214f5064 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-intdo.f90 @@ -0,0 +1,37 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TEAMS NUM_TEAMS(1) + + !$OMP DISTRIBUTE PARALLEL DO SCHEDULE(static,2) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DISTRIBUTE PARALLEL DO + + !$OMP END TEAMS + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-simd-intdo.f90 b/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-simd-intdo.f90 new file mode 100644 index 0000000000000..2a192cad017a6 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/distribute-parallel-wsloop-simd-intdo.f90 @@ -0,0 +1,37 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TEAMS NUM_TEAMS(1) + + !$OMP DISTRIBUTE PARALLEL DO SIMD SCHEDULE(static,2) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DISTRIBUTE PARALLEL DO SIMD + + !$OMP END TEAMS + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/distribute-simd-intdo.f90 b/openmp/runtime/test/transform/interchange/distribute-simd-intdo.f90 new file mode 100644 index 0000000000000..fce62b7f3ccda --- /dev/null +++ b/openmp/runtime/test/transform/interchange/distribute-simd-intdo.f90 @@ -0,0 +1,37 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TEAMS NUM_TEAMS(1) + + !$OMP DISTRIBUTE SIMD + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DISTRIBUTE SIMD + + !$OMP END TEAMS + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/intdo-permutation.f90 b/openmp/runtime/test/transform/interchange/intdo-permutation.f90 new file mode 100644 index 0000000000000..a8a8e7f35d018 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/intdo-permutation.f90 @@ -0,0 +1,42 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_intdo + integer :: i, j + print *, 'do' + + !$OMP INTERCHANGE PERMUTATION(2,3,1) + do i = 7, 15, 3 + do j = -1, 1, 2 + do k = 3, 1, -1 + print '("i=", I0, " j=", I0, " k=", I0)', i, j, k + end do + end do + end do + !$OMP END INTERCHANGE + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 k=3 +! CHECK-NEXT: i=10 j=-1 k=3 +! CHECK-NEXT: i=13 j=-1 k=3 +! CHECK-NEXT: i=7 j=-1 k=2 +! CHECK-NEXT: i=10 j=-1 k=2 +! CHECK-NEXT: i=13 j=-1 k=2 +! CHECK-NEXT: i=7 j=-1 k=1 +! CHECK-NEXT: i=10 j=-1 k=1 +! CHECK-NEXT: i=13 j=-1 k=1 +! CHECK-NEXT: i=7 j=1 k=3 +! CHECK-NEXT: i=10 j=1 k=3 +! CHECK-NEXT: i=13 j=1 k=3 +! CHECK-NEXT: i=7 j=1 k=2 +! CHECK-NEXT: i=10 j=1 k=2 +! CHECK-NEXT: i=13 j=1 k=2 +! CHECK-NEXT: i=7 j=1 k=1 +! CHECK-NEXT: i=10 j=1 k=1 +! CHECK-NEXT: i=13 j=1 k=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/intdo.f90 b/openmp/runtime/test/transform/interchange/intdo.f90 new file mode 100644 index 0000000000000..fe6820f41dba6 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/intdo.f90 @@ -0,0 +1,31 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_intdo + integer :: i, j + print *, 'do' + + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.f90 new file mode 100644 index 0000000000000..498534374ea30 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_collapse_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO COLLAPSE(2) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.o b/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.o new file mode 100644 index 0000000000000..a0abcfdf74fda Binary files /dev/null and b/openmp/runtime/test/transform/interchange/parallel-wsloop-collapse-intdo.o differ diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-collapse.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-collapse.f90 new file mode 100644 index 0000000000000..4285edaa775b8 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-collapse.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO COLLAPSE(2) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-firstprivate.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-firstprivate.f90 new file mode 100644 index 0000000000000..e53bb107bad2b --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-firstprivate.f90 @@ -0,0 +1,35 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j, k + print *, 'do' + + k = 1 + !$OMP PARALLEL DO NUM_THREADS(3) FIRSTPRIVATE(k) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + k = k + 1 + print '("i=", I0, " j=", I0, " k=", I0)', i, j, k + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-DAG: i=7 j=-1 k=2 +! CHECK-DAG: i=10 j=-1 k=3 +! CHECK-DAG: i=13 j=-1 k=4 +! CHECK-DAG: i=7 j=0 k=2 +! CHECK-DAG: i=10 j=0 k=3 +! CHECK-DAG: i=13 j=0 k=4 +! CHECK-DAG: i=7 j=1 k=2 +! CHECK-DAG: i=10 j=1 k=3 +! CHECK-DAG: i=13 j=1 k=4 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-lastprivate.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-lastprivate.f90 new file mode 100644 index 0000000000000..42d7032bd2184 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-lastprivate.f90 @@ -0,0 +1,28 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines +! XFAIL: * + +program interchange_wsloop_intdo + integer :: i, j, k + print *, 'do' + + + !$OMP PARALLEL DO LASTPRIVATE(k) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + k = i*10 + j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' + print '("k=", I0)', k +end program + + +! CHECK: do +! CHECK-NEXT: done +! CHECK-NEXT: k=131 diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-permutation.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-permutation.f90 new file mode 100644 index 0000000000000..e52389f2448e4 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-permutation.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(1) + !$OMP INTERCHANGE PERMUTATION(2,1) + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-i.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-i.f90 new file mode 100644 index 0000000000000..76928ce93577e --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-i.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(1) PRIVATE(i) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-j.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-j.f90 new file mode 100644 index 0000000000000..a679c921e9660 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private-j.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(1) PRIVATE(j) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private.f90 new file mode 100644 index 0000000000000..372ff573a10d2 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-private.f90 @@ -0,0 +1,34 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j, k + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(4) PRIVATE(k) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + k = i + j + print '("i=", I0, " j=", I0, " k=", I0)', i, j, k + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-DAG: i=7 j=-1 k=6 +! CHECK-DAG: i=10 j=-1 k=9 +! CHECK-DAG: i=13 j=-1 k=12 +! CHECK-DAG: i=7 j=0 k=7 +! CHECK-DAG: i=10 j=0 k=10 +! CHECK-DAG: i=13 j=0 k=13 +! CHECK-DAG: i=7 j=1 k=8 +! CHECK-DAG: i=10 j=1 k=11 +! CHECK-DAG: i=13 j=1 k=14 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-reduction.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-reduction.f90 new file mode 100644 index 0000000000000..8d313becef862 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo-reduction.f90 @@ -0,0 +1,27 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j, k + print *, 'do' + + k = 1 + !$OMP PARALLEL DO REDUCTION(+:k) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + k = k + 1 + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' + print '("k=", I0)', k +end program + + +! CHECK: do +! CHECK-NEXT: done +! CHECK-NEXT: k=10 diff --git a/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo.f90 new file mode 100644 index 0000000000000..cfa3bddf5c8d5 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/parallel-wsloop-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP PARALLEL DO NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/target-teams-distribute-parallel-wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/target-teams-distribute-parallel-wsloop-intdo.f90 new file mode 100644 index 0000000000000..81e681b55eb1d --- /dev/null +++ b/openmp/runtime/test/transform/interchange/target-teams-distribute-parallel-wsloop-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program target_teams_distribute_parallel_do + integer :: i, j + print *, 'do' + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO SCHEDULE(static,2) NUM_TEAMS(1) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/taskloop-intdo.f90 b/openmp/runtime/test/transform/interchange/taskloop-intdo.f90 new file mode 100644 index 0000000000000..d79f92d2ad074 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/taskloop-intdo.f90 @@ -0,0 +1,35 @@ + +! XFAIL: * +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TASKLOOP + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END TASKLOOP + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 b/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 new file mode 100644 index 0000000000000..d84be9d1d7a96 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/taskloop-simd-intdo.f90 @@ -0,0 +1,34 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines +! XFAIL: * + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP TASKLOOP SIMD + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END TASKLOOP SIMD + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/teams-distribute-parallel-wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/teams-distribute-parallel-wsloop-intdo.f90 new file mode 100644 index 0000000000000..6d7fe1afdcdd5 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/teams-distribute-parallel-wsloop-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program teams_distribute_parallel_do + integer :: i, j + print *, 'do' + + !$OMP TEAMS DISTRIBUTE PARALLEL DO SCHEDULE(static,2) NUM_TEAMS(1) NUM_THREADS(1) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END TEAMS DISTRIBUTE PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/wsloop-intdo.f90 b/openmp/runtime/test/transform/interchange/wsloop-intdo.f90 new file mode 100644 index 0000000000000..32b1b87a9e859 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/wsloop-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP DO SCHEDULE(static,2) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DO + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/interchange/wsloop-simd-intdo.f90 b/openmp/runtime/test/transform/interchange/wsloop-simd-intdo.f90 new file mode 100644 index 0000000000000..56ed14b165fa3 --- /dev/null +++ b/openmp/runtime/test/transform/interchange/wsloop-simd-intdo.f90 @@ -0,0 +1,33 @@ + +! RUN: %flang %flags %openmp_flags -fopenmp-version=60 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + +program interchange_wsloop_intdo + integer :: i, j + print *, 'do' + + !$OMP DO SIMD SCHEDULE(static,2) + !$OMP INTERCHANGE + do i = 7, 15, 3 + do j = -1, 1 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END INTERCHANGE + !$OMP END DO SIMD + + print *, 'done' +end program + + +! CHECK: do +! CHECK-NEXT: i=7 j=-1 +! CHECK-NEXT: i=10 j=-1 +! CHECK-NEXT: i=13 j=-1 +! CHECK-NEXT: i=7 j=0 +! CHECK-NEXT: i=10 j=0 +! CHECK-NEXT: i=13 j=0 +! CHECK-NEXT: i=7 j=1 +! CHECK-NEXT: i=10 j=1 +! CHECK-NEXT: i=13 j=1 +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/tile/intdo.f90 b/openmp/runtime/test/transform/tile/intdo.f90 new file mode 100644 index 0000000000000..27cd383a69fea --- /dev/null +++ b/openmp/runtime/test/transform/tile/intdo.f90 @@ -0,0 +1,57 @@ +! This test checks lowering of OpenMP tile directive + +! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + + +program tile_intdo + integer i, j + print *, 'do' + + !$OMP TILE SIZES(2,2) + do i=7, 18, 3 + do j=7, 19, 3 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END TILE + + print *, 'done' +end program + + +! CHECK: do + +! Complete tile +! CHECK-NEXT: i=7 j=7 +! CHECK-NEXT: i=7 j=10 +! CHECK-NEXT: i=10 j=7 +! CHECK-NEXT: i=10 j=10 + +! Complete tile +! CHECK-NEXT: i=7 j=13 +! CHECK-NEXT: i=7 j=16 +! CHECK-NEXT: i=10 j=13 +! CHECK-NEXT: i=10 j=16 + +! Partial tile +! CHECK-NEXT: i=7 j=19 +! CHECK-NEXT: i=10 j=19 + +! Complete tile +! CHECK-NEXT: i=13 j=7 +! CHECK-NEXT: i=13 j=10 +! CHECK-NEXT: i=16 j=7 +! CHECK-NEXT: i=16 j=10 + +! Complete tile +! CHECK-NEXT: i=13 j=13 +! CHECK-NEXT: i=13 j=16 +! CHECK-NEXT: i=16 j=13 +! CHECK-NEXT: i=16 j=16 + +! Partial tile +! CHECK-NEXT: i=13 j=19 +! CHECK-NEXT: i=16 j=19 + +! CHECK-NEXT: done diff --git a/openmp/runtime/test/transform/tile/parallel-wsloop-collapse-intdo.f90 b/openmp/runtime/test/transform/tile/parallel-wsloop-collapse-intdo.f90 new file mode 100644 index 0000000000000..66bddf30e045a --- /dev/null +++ b/openmp/runtime/test/transform/tile/parallel-wsloop-collapse-intdo.f90 @@ -0,0 +1,59 @@ +! This test checks lowering of OpenMP tile directive + +! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe +! RUN: %t.exe | FileCheck %s --match-full-lines + + +program tile_wsloop_collapse_intdo + integer i, j + print *, 'do' + + !$OMP PARALLEL DO COLLAPSE(2) NUM_THREADS(1) + !$OMP TILE SIZES(2,2) + do i=7, 18, 3 + do j=7, 19, 3 + print '("i=", I0, " j=", I0)', i, j + end do + end do + !$OMP END TILE + !$OMP END PARALLEL DO + + print *, 'done' +end program + + +! CHECK: do + +! Complete tile +! CHECK-NEXT: i=7 j=7 +! CHECK-NEXT: i=7 j=10 +! CHECK-NEXT: i=10 j=7 +! CHECK-NEXT: i=10 j=10 + +! Complete tile +! CHECK-NEXT: i=7 j=13 +! CHECK-NEXT: i=7 j=16 +! CHECK-NEXT: i=10 j=13 +! CHECK-NEXT: i=10 j=16 + +! Partial tile +! CHECK-NEXT: i=7 j=19 +! CHECK-NEXT: i=10 j=19 + +! Complete tile +! CHECK-NEXT: i=13 j=7 +! CHECK-NEXT: i=13 j=10 +! CHECK-NEXT: i=16 j=7 +! CHECK-NEXT: i=16 j=10 + +! Complete tile +! CHECK-NEXT: i=13 j=13 +! CHECK-NEXT: i=13 j=16 +! CHECK-NEXT: i=16 j=13 +! CHECK-NEXT: i=16 j=16 + +! Partial tile +! CHECK-NEXT: i=13 j=19 +! CHECK-NEXT: i=16 j=19 + +! CHECK-NEXT: done