diff --git a/flang/lib/Optimizer/Transforms/OptimizeArrayRepacking.cpp b/flang/lib/Optimizer/Transforms/OptimizeArrayRepacking.cpp index 1688f2887a57a..173ce1da041ae 100644 --- a/flang/lib/Optimizer/Transforms/OptimizeArrayRepacking.cpp +++ b/flang/lib/Optimizer/Transforms/OptimizeArrayRepacking.cpp @@ -26,6 +26,8 @@ namespace fir { #include "flang/Optimizer/Transforms/Passes.h.inc" } // namespace fir +#define DEBUG_TYPE "optimize-array-repacking" + namespace { class OptimizeArrayRepackingPass : public fir::impl::OptimizeArrayRepackingBase { @@ -78,13 +80,19 @@ void OptimizeArrayRepackingPass::runOnOperation() { mlir::MLIRContext *context = &getContext(); mlir::RewritePatternSet patterns(context); mlir::GreedyRewriteConfig config; - config.setRegionSimplificationLevel( - mlir::GreedySimplifyRegionLevel::Disabled); + config + .setRegionSimplificationLevel(mlir::GreedySimplifyRegionLevel::Disabled) + // Traverse the operations top-down, so that fir.pack_array + // operations are optimized before their using fir.pack_array + // operations. This way the rewrite may converge faster. + .setUseTopDownTraversal(); patterns.insert(context); patterns.insert(context); if (mlir::failed( mlir::applyPatternsGreedily(funcOp, std::move(patterns), config))) { - mlir::emitError(funcOp.getLoc(), "failure in array repacking optimization"); - signalPassFailure(); + // Failure may happen if the rewriter does not converge soon enough. + // That is not an error, so just report a diagnostic under debug. + LLVM_DEBUG(mlir::emitError(funcOp.getLoc(), + "failure in array repacking optimization")); } } diff --git a/flang/test/Transforms/optimize-array-repacking.fir b/flang/test/Transforms/optimize-array-repacking.fir index 6269fa441fe44..15a3e3941f44f 100644 --- a/flang/test/Transforms/optimize-array-repacking.fir +++ b/flang/test/Transforms/optimize-array-repacking.fir @@ -658,3 +658,136 @@ func.func @_QPneg_test_pointer(%arg0: !fir.ref> return } + +// Test a long chain of fir.pack_array operations. +// The rewriter used to use a down-top traversal that optimized +// fir.pack_array operations starting from the innermost one. +// The rewriter did not converge in 10 (default) iterations +// causing the pass to report a failure. +// A top-down traversal should fix this an allow optimizing +// all the repackings. +// CHECK-LABEL: func.func @test_long_chain( +// CHECK-NOT: fir.pack_array +// CHECK-NOT: fir.unpack_array +func.func @test_long_chain(%pred: i1) { + %c10 = arith.constant 10 : index + %3 = fir.dummy_scope : !fir.dscope + %4 = fir.address_of(@aaa) : !fir.ref> + %5 = fir.shape %c10, %c10 : (index, index) -> !fir.shape<2> + %6 = fir.declare %4(%5) {uniq_name = "aaa"} : (!fir.ref>, !fir.shape<2>) -> !fir.ref> + %9 = fir.embox %6(%5) : (!fir.ref>, !fir.shape<2>) -> !fir.box> + %10 = fir.convert %9 : (!fir.box>) -> !fir.box> + %11 = fir.dummy_scope : !fir.dscope + %12 = fir.pack_array %10 heap innermost : (!fir.box>) -> !fir.box> + %13 = fir.declare %12 dummy_scope %11 {uniq_name = "aaa"} : (!fir.box>, !fir.dscope) -> !fir.box> + %14 = fir.rebox %13 : (!fir.box>) -> !fir.box> + cf.cond_br %pred, ^bb17, ^bb1 +^bb1: // pred: ^bb0 + %20 = fir.dummy_scope : !fir.dscope + %21 = fir.pack_array %14 heap innermost : (!fir.box>) -> !fir.box> + %22 = fir.declare %21 dummy_scope %20 {uniq_name = "aaa"} : (!fir.box>, !fir.dscope) -> !fir.box> + %23 = fir.rebox %22 : (!fir.box>) -> !fir.box> + %28 = fir.dummy_scope : !fir.dscope + %29 = fir.pack_array %23 heap innermost : (!fir.box>) -> !fir.box> + %30 = fir.declare %29 dummy_scope %28 {uniq_name = "aaa"} : (!fir.box>, !fir.dscope) -> !fir.box> + %31 = fir.rebox %30 : (!fir.box>) -> !fir.box> + cf.cond_br %pred, ^bb16, ^bb2 +^bb2: // pred: ^bb1 + %37 = fir.dummy_scope : !fir.dscope + %38 = fir.pack_array %31 heap innermost : (!fir.box>) -> !fir.box> + %39 = fir.declare %38 dummy_scope %37 {uniq_name = "aaa"} : (!fir.box>, !fir.dscope) -> !fir.box> + %40 = fir.rebox %39 : (!fir.box>) -> !fir.box> + %45 = fir.dummy_scope : !fir.dscope + %46 = fir.pack_array %40 heap innermost : (!fir.box>) -> !fir.box> + %47 = fir.declare %46 dummy_scope %45 {uniq_name = "aaa"} : (!fir.box>, !fir.dscope) -> !fir.box> + %48 = fir.rebox %47 : (!fir.box>) -> !fir.box> + cf.cond_br %pred, ^bb15, ^bb3 +^bb3: // pred: ^bb2 + %54 = fir.dummy_scope : !fir.dscope + %55 = fir.pack_array %48 heap innermost : (!fir.box>) -> !fir.box> + %56 = fir.declare %55 dummy_scope %54 {uniq_name = "aaa"} : (!fir.box>, !fir.dscope) -> !fir.box> + %57 = fir.rebox %56 : (!fir.box>) -> !fir.box> + %62 = fir.dummy_scope : !fir.dscope + %63 = fir.pack_array %57 heap innermost : (!fir.box>) -> !fir.box> + %64 = fir.declare %63 dummy_scope %62 {uniq_name = "aaa"} : (!fir.box>, !fir.dscope) -> !fir.box> + %65 = fir.rebox %64 : (!fir.box>) -> !fir.box> + cf.cond_br %pred, ^bb14, ^bb4 +^bb4: // pred: ^bb3 + %71 = fir.dummy_scope : !fir.dscope + %72 = fir.pack_array %65 heap innermost : (!fir.box>) -> !fir.box> + %73 = fir.declare %72 dummy_scope %71 {uniq_name = "aaa"} : (!fir.box>, !fir.dscope) -> !fir.box> + %74 = fir.rebox %73 : (!fir.box>) -> !fir.box> + %79 = fir.dummy_scope : !fir.dscope + %80 = fir.pack_array %74 heap innermost : (!fir.box>) -> !fir.box> + %81 = fir.declare %80 dummy_scope %79 {uniq_name = "aaa"} : (!fir.box>, !fir.dscope) -> !fir.box> + %82 = fir.rebox %81 : (!fir.box>) -> !fir.box> + cf.cond_br %pred, ^bb13, ^bb5 +^bb5: // pred: ^bb4 + %88 = fir.dummy_scope : !fir.dscope + %89 = fir.pack_array %82 heap innermost : (!fir.box>) -> !fir.box> + %90 = fir.declare %89 dummy_scope %88 {uniq_name = "aaa"} : (!fir.box>, !fir.dscope) -> !fir.box> + %91 = fir.rebox %90 : (!fir.box>) -> !fir.box> + %96 = fir.dummy_scope : !fir.dscope + %97 = fir.pack_array %91 heap innermost : (!fir.box>) -> !fir.box> + %98 = fir.declare %97 dummy_scope %96 {uniq_name = "aaa"} : (!fir.box>, !fir.dscope) -> !fir.box> + %99 = fir.rebox %98 : (!fir.box>) -> !fir.box> + cf.cond_br %pred, ^bb12, ^bb6 +^bb6: // pred: ^bb5 + %105 = fir.dummy_scope : !fir.dscope + %106 = fir.pack_array %99 heap innermost : (!fir.box>) -> !fir.box> + %107 = fir.declare %106 dummy_scope %105 {uniq_name = "aaa"} : (!fir.box>, !fir.dscope) -> !fir.box> + %108 = fir.rebox %107 : (!fir.box>) -> !fir.box> + %113 = fir.dummy_scope : !fir.dscope + %114 = fir.pack_array %108 heap innermost : (!fir.box>) -> !fir.box> + %115 = fir.declare %114 dummy_scope %113 {uniq_name = "aaa"} : (!fir.box>, !fir.dscope) -> !fir.box> + %116 = fir.rebox %115 : (!fir.box>) -> !fir.box> + cf.cond_br %pred, ^bb11, ^bb7 +^bb7: // pred: ^bb6 + %122 = fir.dummy_scope : !fir.dscope + %123 = fir.pack_array %116 heap innermost : (!fir.box>) -> !fir.box> + %124 = fir.declare %123 dummy_scope %122 {uniq_name = "aaa"} : (!fir.box>, !fir.dscope) -> !fir.box> + %125 = fir.rebox %124 : (!fir.box>) -> !fir.box> + %130 = fir.dummy_scope : !fir.dscope + %131 = fir.pack_array %125 heap innermost : (!fir.box>) -> !fir.box> + %132 = fir.declare %131 dummy_scope %130 {uniq_name = "aaa"} : (!fir.box>, !fir.dscope) -> !fir.box> + %133 = fir.rebox %132 : (!fir.box>) -> !fir.box> + cf.cond_br %pred, ^bb9, ^bb8 +^bb8: // pred: ^bb7 + %139 = fir.dummy_scope : !fir.dscope + %140 = fir.pack_array %133 heap innermost : (!fir.box>) -> !fir.box> + fir.unpack_array %140 to %133 heap : !fir.box> + cf.br ^bb9 +^bb9: // 2 preds: ^bb7, ^bb8 + fir.unpack_array %131 to %125 heap : !fir.box> + cf.br ^bb10 +^bb10: // pred: ^bb9 + fir.unpack_array %123 to %116 heap : !fir.box> + cf.br ^bb11 +^bb11: // 2 preds: ^bb6, ^bb10 + fir.unpack_array %114 to %108 heap : !fir.box> + fir.unpack_array %106 to %99 heap : !fir.box> + cf.br ^bb12 +^bb12: // 2 preds: ^bb5, ^bb11 + fir.unpack_array %97 to %91 heap : !fir.box> + fir.unpack_array %89 to %82 heap : !fir.box> + cf.br ^bb13 +^bb13: // 2 preds: ^bb4, ^bb12 + fir.unpack_array %80 to %74 heap : !fir.box> + fir.unpack_array %72 to %65 heap : !fir.box> + cf.br ^bb14 +^bb14: // 2 preds: ^bb3, ^bb13 + fir.unpack_array %63 to %57 heap : !fir.box> + fir.unpack_array %55 to %48 heap : !fir.box> + cf.br ^bb15 +^bb15: // 2 preds: ^bb2, ^bb14 + fir.unpack_array %46 to %40 heap : !fir.box> + fir.unpack_array %38 to %31 heap : !fir.box> + cf.br ^bb16 +^bb16: // 2 preds: ^bb1, ^bb15 + fir.unpack_array %29 to %23 heap : !fir.box> + fir.unpack_array %21 to %14 heap : !fir.box> + cf.br ^bb17 +^bb17: // 2 preds: ^bb0, ^bb16 + fir.unpack_array %12 to %10 heap : !fir.box> + return +}