Skip to content

Commit 4775b96

Browse files
authored
[flang] Optimize redundant array repacking. (#147881)
This patch allows optimizing redundant array repacking, when the source array is statically known to be contiguous. This is part of the implementation plan for the array repacking feature, though, it does not affect any real life use case as long as FIR inlining is not a thing. I experimented with simple cases of FIR inling using `-inline-all`, and I recorded these cases in optimize-array-repacking.fir tests.
1 parent 9e56d0d commit 4775b96

File tree

16 files changed

+982
-189
lines changed

16 files changed

+982
-189
lines changed

flang/include/flang/Optimizer/Builder/HLFIRTools.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,14 @@ Entity gen1DSection(mlir::Location loc, fir::FirOpBuilder &builder,
542542
/// contiguous.
543543
bool designatePreservesContinuity(hlfir::DesignateOp op);
544544

545+
/// Return true iff the given \p base desribes an object
546+
/// that is contiguous. If \p checkWhole is true, then
547+
/// the object must be contiguous in all dimensions,
548+
/// otherwise, it must be contiguous in the innermost dimension.
549+
/// This function is an extension of hlfir::Entity::isSimplyContiguous(),
550+
/// and it can be used on pure FIR representation as well as on HLFIR.
551+
bool isSimplyContiguous(mlir::Value base, bool checkWhole = true);
552+
545553
} // namespace hlfir
546554

547555
#endif // FORTRAN_OPTIMIZER_BUILDER_HLFIRTOOLS_H

flang/include/flang/Optimizer/Dialect/FIROpsSupport.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,19 @@ std::optional<int64_t> getAllocaByteSize(fir::AllocaOp alloca,
238238
/// When \p checkWhole is false, then the checking is only done
239239
/// for continuity in the innermost dimension, otherwise,
240240
/// the checking is done for continuity of the whole result of rebox.
241-
bool reboxPreservesContinuity(fir::ReboxOp rebox, bool checkWhole = true);
241+
/// The caller may specify \p mayHaveNonDefaultLowerBounds, if it is known,
242+
/// to allow better handling of the rebox operations representing
243+
/// full array slices.
244+
bool reboxPreservesContinuity(fir::ReboxOp rebox,
245+
bool mayHaveNonDefaultLowerBounds = true,
246+
bool checkWhole = true);
247+
248+
/// Return true, if \p embox operation produces a contiguous
249+
/// entity.
250+
/// When \p checkWhole is false, then the checking is only done
251+
/// for continuity in the innermost dimension, otherwise,
252+
/// the checking is done for continuity of the whole result of embox
253+
bool isContiguousEmbox(fir::EmboxOp embox, bool checkWhole = true);
242254

243255
} // namespace fir
244256

flang/include/flang/Optimizer/Transforms/Passes.h

Lines changed: 1 addition & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -31,39 +31,7 @@ namespace fir {
3131
// Passes defined in Passes.td
3232
//===----------------------------------------------------------------------===//
3333

34-
#define GEN_PASS_DECL_ABSTRACTRESULTOPT
35-
#define GEN_PASS_DECL_AFFINEDIALECTPROMOTION
36-
#define GEN_PASS_DECL_AFFINEDIALECTDEMOTION
37-
#define GEN_PASS_DECL_ANNOTATECONSTANTOPERANDS
38-
#define GEN_PASS_DECL_ARRAYVALUECOPY
39-
#define GEN_PASS_DECL_ASSUMEDRANKOPCONVERSION
40-
#define GEN_PASS_DECL_CHARACTERCONVERSION
41-
#define GEN_PASS_DECL_CFGCONVERSION
42-
#define GEN_PASS_DECL_CUFADDCONSTRUCTOR
43-
#define GEN_PASS_DECL_CUFDEVICEGLOBAL
44-
#define GEN_PASS_DECL_CUFGPUTOLLVMCONVERSION
45-
#define GEN_PASS_DECL_CUFOPCONVERSION
46-
#define GEN_PASS_DECL_CUFCOMPUTESHAREDMEMORYOFFSETSANDSIZE
47-
#define GEN_PASS_DECL_EXTERNALNAMECONVERSION
48-
#define GEN_PASS_DECL_MEMREFDATAFLOWOPT
49-
#define GEN_PASS_DECL_SIMPLIFYINTRINSICS
50-
#define GEN_PASS_DECL_MEMORYALLOCATIONOPT
51-
#define GEN_PASS_DECL_SIMPLIFYREGIONLITE
52-
#define GEN_PASS_DECL_ALGEBRAICSIMPLIFICATION
53-
#define GEN_PASS_DECL_POLYMORPHICOPCONVERSION
54-
#define GEN_PASS_DECL_OPENACCDATAOPERANDCONVERSION
55-
#define GEN_PASS_DECL_ADDDEBUGINFO
56-
#define GEN_PASS_DECL_STACKARRAYS
57-
#define GEN_PASS_DECL_STACKRECLAIM
58-
#define GEN_PASS_DECL_LOOPVERSIONING
59-
#define GEN_PASS_DECL_ADDALIASTAGS
60-
#define GEN_PASS_DECL_VSCALEATTR
61-
#define GEN_PASS_DECL_FUNCTIONATTR
62-
#define GEN_PASS_DECL_CONSTANTARGUMENTGLOBALISATIONOPT
63-
#define GEN_PASS_DECL_COMPILERGENERATEDNAMESCONVERSION
64-
#define GEN_PASS_DECL_SETRUNTIMECALLATTRIBUTES
65-
#define GEN_PASS_DECL_GENRUNTIMECALLSFORTEST
66-
#define GEN_PASS_DECL_SIMPLIFYFIROPERATIONS
34+
#define GEN_PASS_DECL
6735

6836
#include "flang/Optimizer/Transforms/Passes.h.inc"
6937

flang/include/flang/Optimizer/Transforms/Passes.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -551,4 +551,14 @@ def SimplifyFIROperations : Pass<"simplify-fir-operations", "mlir::ModuleOp"> {
551551
"Prefer expanding without using Fortran runtime calls.">];
552552
}
553553

554+
def OptimizeArrayRepacking
555+
: Pass<"optimize-array-repacking", "mlir::func::FuncOp"> {
556+
let summary = "Optimizes redundant array repacking operations";
557+
let description = [{
558+
If the source of fir.pack_array is known to be contiguous,
559+
then this pass erases such operations. The corresponding
560+
fir.unpack_array operations are also removed.
561+
}];
562+
}
563+
554564
#endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES

flang/lib/Optimizer/Builder/HLFIRTools.cpp

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -212,10 +212,17 @@ bool hlfir::Entity::mayHaveNonDefaultLowerBounds() const {
212212
if (auto varIface = getIfVariableInterface())
213213
return isShapeWithLowerBounds(varIface.getShape());
214214
// Go through chain of fir.box converts.
215-
if (auto convert = getDefiningOp<fir::ConvertOp>())
215+
if (auto convert = getDefiningOp<fir::ConvertOp>()) {
216216
return hlfir::Entity{convert.getValue()}.mayHaveNonDefaultLowerBounds();
217-
// TODO: Embox and Rebox do not have hlfir variable interface, but are
218-
// easy to reason about.
217+
} else if (auto rebox = getDefiningOp<fir::ReboxOp>()) {
218+
// If slicing is involved, then the resulting box has
219+
// default lower bounds. If there is no slicing,
220+
// then the result depends on the shape operand
221+
// (whether it has non default lower bounds or not).
222+
return !rebox.getSlice() && isShapeWithLowerBounds(rebox.getShape());
223+
} else if (auto embox = getDefiningOp<fir::EmboxOp>()) {
224+
return !embox.getSlice() && isShapeWithLowerBounds(embox.getShape());
225+
}
219226
return true;
220227
}
221228

@@ -1646,3 +1653,30 @@ bool hlfir::designatePreservesContinuity(hlfir::DesignateOp op) {
16461653
}
16471654
return true;
16481655
}
1656+
1657+
bool hlfir::isSimplyContiguous(mlir::Value base, bool checkWhole) {
1658+
hlfir::Entity entity{base};
1659+
if (entity.isSimplyContiguous())
1660+
return true;
1661+
1662+
// Look at the definition.
1663+
mlir::Operation *def = base.getDefiningOp();
1664+
if (!def)
1665+
return false;
1666+
1667+
return mlir::TypeSwitch<mlir::Operation *, bool>(def)
1668+
.Case<fir::EmboxOp>(
1669+
[&](auto op) { return fir::isContiguousEmbox(op, checkWhole); })
1670+
.Case<fir::ReboxOp>([&](auto op) {
1671+
hlfir::Entity box{op.getBox()};
1672+
return fir::reboxPreservesContinuity(
1673+
op, box.mayHaveNonDefaultLowerBounds(), checkWhole) &&
1674+
isSimplyContiguous(box, checkWhole);
1675+
})
1676+
.Case<fir::DeclareOp, hlfir::DeclareOp>([&](auto op) {
1677+
return isSimplyContiguous(op.getMemref(), checkWhole);
1678+
})
1679+
.Case<fir::ConvertOp>(
1680+
[&](auto op) { return isSimplyContiguous(op.getValue()); })
1681+
.Default([](auto &&) { return false; });
1682+
}

flang/lib/Optimizer/Dialect/FIROps.cpp

Lines changed: 129 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1944,6 +1944,128 @@ llvm::LogicalResult fir::EmboxOp::verify() {
19441944
return mlir::success();
19451945
}
19461946

1947+
/// Returns true if \p extent matches the extent of the \p box's
1948+
/// dimension \p dim.
1949+
static bool isBoxExtent(mlir::Value box, std::int64_t dim, mlir::Value extent) {
1950+
if (auto op = extent.getDefiningOp<fir::BoxDimsOp>())
1951+
if (op.getVal() == box && op.getExtent() == extent)
1952+
if (auto dimOperand = fir::getIntIfConstant(op.getDim()))
1953+
return *dimOperand == dim;
1954+
return false;
1955+
}
1956+
1957+
/// Returns true if \p lb matches the lower bound of the \p box's
1958+
/// dimension \p dim. If \p mayHaveNonDefaultLowerBounds is false,
1959+
/// then \p lb may be an integer constant 1.
1960+
static bool isBoxLb(mlir::Value box, std::int64_t dim, mlir::Value lb,
1961+
bool mayHaveNonDefaultLowerBounds = true) {
1962+
if (auto op = lb.getDefiningOp<fir::BoxDimsOp>()) {
1963+
if (op.getVal() == box && op.getLowerBound() == lb)
1964+
if (auto dimOperand = fir::getIntIfConstant(op.getDim()))
1965+
return *dimOperand == dim;
1966+
} else if (!mayHaveNonDefaultLowerBounds) {
1967+
if (auto constantLb = fir::getIntIfConstant(lb))
1968+
return *constantLb == 1;
1969+
}
1970+
return false;
1971+
}
1972+
1973+
/// Returns true if \p ub matches the upper bound of the \p box's
1974+
/// dimension \p dim. If \p mayHaveNonDefaultLowerBounds is false,
1975+
/// then the dimension's lower bound may be an integer constant 1.
1976+
/// Note that the upper bound is usually a result of computation
1977+
/// involving the lower bound and the extent, and the function
1978+
/// tries its best to recognize the computation pattern.
1979+
/// The conservative result 'false' does not necessarily mean
1980+
/// that \p ub is not an actual upper bound value.
1981+
static bool isBoxUb(mlir::Value box, std::int64_t dim, mlir::Value ub,
1982+
bool mayHaveNonDefaultLowerBounds = true) {
1983+
if (auto sub1 = ub.getDefiningOp<mlir::arith::SubIOp>()) {
1984+
auto one = fir::getIntIfConstant(sub1.getOperand(1));
1985+
if (!one || *one != 1)
1986+
return false;
1987+
if (auto add = sub1.getOperand(0).getDefiningOp<mlir::arith::AddIOp>())
1988+
if ((isBoxLb(box, dim, add.getOperand(0)) &&
1989+
isBoxExtent(box, dim, add.getOperand(1))) ||
1990+
(isBoxLb(box, dim, add.getOperand(1)) &&
1991+
isBoxExtent(box, dim, add.getOperand(0))))
1992+
return true;
1993+
} else if (!mayHaveNonDefaultLowerBounds) {
1994+
return isBoxExtent(box, dim, ub);
1995+
}
1996+
return false;
1997+
}
1998+
1999+
/// Checks if the given \p sliceOp specifies a contiguous
2000+
/// array slice. If \p checkWhole is true, then the check
2001+
/// is done for all dimensions, otherwise, only for the innermost
2002+
/// dimension.
2003+
/// The simplest way to prove that this is an contiguous slice
2004+
/// is to check whether the slice stride(s) is 1.
2005+
/// For more complex cases, extra information must be provided
2006+
/// by the caller:
2007+
/// * \p origBox - if not null, then the source array is represented
2008+
/// with this !fir.box value. The box is used to recognize
2009+
/// the full dimension slices, which are specified by the triplets
2010+
/// computed from the dimensions' lower bounds and extents.
2011+
/// * \p mayHaveNonDefaultLowerBounds may be set to false to indicate
2012+
/// that the source entity has default lower bounds, so the full
2013+
/// dimension slices computations may use 1 for the lower bound.
2014+
static bool isContiguousArraySlice(fir::SliceOp sliceOp, bool checkWhole = true,
2015+
mlir::Value origBox = nullptr,
2016+
bool mayHaveNonDefaultLowerBounds = true) {
2017+
if (sliceOp.getFields().empty() && sliceOp.getSubstr().empty()) {
2018+
// TODO: generalize code for the triples analysis with
2019+
// hlfir::designatePreservesContinuity, especially when
2020+
// recognition of the whole dimension slices is added.
2021+
auto triples = sliceOp.getTriples();
2022+
assert((triples.size() % 3) == 0 && "invalid triples size");
2023+
2024+
// A slice with step=1 in the innermost dimension preserves
2025+
// the continuity of the array in the innermost dimension.
2026+
// If checkWhole is false, then check only the innermost slice triples.
2027+
std::size_t checkUpTo = checkWhole ? triples.size() : 3;
2028+
checkUpTo = std::min(checkUpTo, triples.size());
2029+
for (std::size_t i = 0; i < checkUpTo; i += 3) {
2030+
if (triples[i] != triples[i + 1]) {
2031+
// This is a section of the dimension. Only allow it
2032+
// to be the first triple, if the source of the slice
2033+
// is a boxed array. If it is a raw pointer, then
2034+
// the result will still be contiguous, as long as
2035+
// the strides are all ones.
2036+
// When origBox is not null, we must prove that the triple
2037+
// covers the whole dimension and the stride is one,
2038+
// before claiming contiguity for this dimension.
2039+
if (i != 0 && origBox) {
2040+
std::int64_t dim = i / 3;
2041+
if (!isBoxLb(origBox, dim, triples[i],
2042+
mayHaveNonDefaultLowerBounds) ||
2043+
!isBoxUb(origBox, dim, triples[i + 1],
2044+
mayHaveNonDefaultLowerBounds))
2045+
return false;
2046+
}
2047+
auto constantStep = fir::getIntIfConstant(triples[i + 2]);
2048+
if (!constantStep || *constantStep != 1)
2049+
return false;
2050+
}
2051+
}
2052+
return true;
2053+
}
2054+
return false;
2055+
}
2056+
2057+
bool fir::isContiguousEmbox(fir::EmboxOp embox, bool checkWhole) {
2058+
auto sliceArg = embox.getSlice();
2059+
if (!sliceArg)
2060+
return true;
2061+
2062+
if (auto sliceOp =
2063+
mlir::dyn_cast_or_null<fir::SliceOp>(sliceArg.getDefiningOp()))
2064+
return isContiguousArraySlice(sliceOp, checkWhole);
2065+
2066+
return false;
2067+
}
2068+
19472069
//===----------------------------------------------------------------------===//
19482070
// EmboxCharOp
19492071
//===----------------------------------------------------------------------===//
@@ -4794,41 +4916,20 @@ mlir::Type fir::applyPathToType(mlir::Type eleTy, mlir::ValueRange path) {
47944916
return eleTy;
47954917
}
47964918

4797-
bool fir::reboxPreservesContinuity(fir::ReboxOp rebox, bool checkWhole) {
4919+
bool fir::reboxPreservesContinuity(fir::ReboxOp rebox,
4920+
bool mayHaveNonDefaultLowerBounds,
4921+
bool checkWhole) {
47984922
// If slicing is not involved, then the rebox does not affect
47994923
// the continuity of the array.
48004924
auto sliceArg = rebox.getSlice();
48014925
if (!sliceArg)
48024926
return true;
48034927

48044928
if (auto sliceOp =
4805-
mlir::dyn_cast_or_null<fir::SliceOp>(sliceArg.getDefiningOp())) {
4806-
if (sliceOp.getFields().empty() && sliceOp.getSubstr().empty()) {
4807-
// TODO: generalize code for the triples analysis with
4808-
// hlfir::designatePreservesContinuity, especially when
4809-
// recognition of the whole dimension slices is added.
4810-
auto triples = sliceOp.getTriples();
4811-
assert((triples.size() % 3) == 0 && "invalid triples size");
4812-
4813-
// A slice with step=1 in the innermost dimension preserves
4814-
// the continuity of the array in the innermost dimension.
4815-
// If checkWhole is false, then check only the innermost slice triples.
4816-
std::size_t checkUpTo = checkWhole ? triples.size() : 3;
4817-
checkUpTo = std::min(checkUpTo, triples.size());
4818-
for (std::size_t i = 0; i < checkUpTo; i += 3) {
4819-
if (triples[i] != triples[i + 1]) {
4820-
// This is a section of the dimension. Only allow it
4821-
// to be the first triple.
4822-
if (i != 0)
4823-
return false;
4824-
auto constantStep = fir::getIntIfConstant(triples[i + 2]);
4825-
if (!constantStep || *constantStep != 1)
4826-
return false;
4827-
}
4828-
}
4829-
return true;
4830-
}
4831-
}
4929+
mlir::dyn_cast_or_null<fir::SliceOp>(sliceArg.getDefiningOp()))
4930+
return isContiguousArraySlice(sliceOp, checkWhole, rebox.getBox(),
4931+
mayHaveNonDefaultLowerBounds);
4932+
48324933
return false;
48334934
}
48344935

flang/lib/Optimizer/Passes/Pipelines.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,10 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
207207
pm.addPass(fir::createPolymorphicOpConversion());
208208
pm.addPass(fir::createAssumedRankOpConversion());
209209

210+
// Optimize redundant array repacking operations,
211+
// if the source is known to be contiguous.
212+
if (pc.OptLevel.isOptimizingForSpeed())
213+
pm.addPass(fir::createOptimizeArrayRepacking());
210214
pm.addPass(fir::createLowerRepackArraysPass());
211215
// Expand FIR operations that may use SCF dialect for their
212216
// implementation. This is a mandatory pass.

flang/lib/Optimizer/Transforms/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ add_flang_library(FIRTransforms
3434
SetRuntimeCallAttributes.cpp
3535
GenRuntimeCallsForTest.cpp
3636
SimplifyFIROperations.cpp
37+
OptimizeArrayRepacking.cpp
3738

3839
DEPENDS
3940
CUFAttrs

flang/lib/Optimizer/Transforms/LoopVersioning.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,9 @@ static mlir::Value unwrapPassThroughOps(mlir::Value val) {
214214
/// of the value, otherwise return the value
215215
static mlir::Value unwrapReboxOp(mlir::Value val) {
216216
while (fir::ReboxOp rebox = val.getDefiningOp<fir::ReboxOp>()) {
217-
if (!fir::reboxPreservesContinuity(rebox, /*checkWhole=*/false)) {
217+
if (!fir::reboxPreservesContinuity(rebox,
218+
/*mayHaveNonDefaultLowerBounds=*/true,
219+
/*checkWhole=*/false)) {
218220
LLVM_DEBUG(llvm::dbgs() << "REBOX may produce non-contiguous array: "
219221
<< rebox << '\n');
220222
break;

0 commit comments

Comments
 (0)