Skip to content

[MLIR][Affine] Fix/complete access index invariance, add isInvariantAccess #84602

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ std::optional<uint64_t> getConstantTripCount(AffineForOp forOp);
/// this method is thus able to determine non-trivial divisors.
uint64_t getLargestDivisorOfTripCount(AffineForOp forOp);

/// Checks if an affine read or write operation depends on `forOp`'s IV, i.e.,
/// if the memory access is invariant on `forOp`.
template <typename LoadOrStoreOp>
bool isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp);

/// Given an induction variable `iv` of type AffineForOp and `indices` of type
/// IndexType, returns the set of `indices` that are independent of `iv`.
///
Expand Down
5 changes: 5 additions & 0 deletions mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ class AffineValueMap {
// Resets this AffineValueMap with 'map', 'operands', and 'results'.
void reset(AffineMap map, ValueRange operands, ValueRange results = {});

/// Composes all incoming affine.apply ops and then simplifies and
/// canonicalizes the map and operands. This can change the number of
/// operands, but the result count remains the same.
void composeSimplifyAndCanonicalize();

/// Return the value map that is the difference of value maps 'a' and 'b',
/// represented as an affine map and its operands. The output map + operands
/// are canonicalized and simplified.
Expand Down
59 changes: 25 additions & 34 deletions mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,45 +145,36 @@ uint64_t mlir::affine::getLargestDivisorOfTripCount(AffineForOp forOp) {
return *gcd;
}

/// Given an induction variable `iv` of type AffineForOp and an access `index`
/// of type index, returns `true` if `index` is independent of `iv` and
/// false otherwise. The determination supports composition with at most one
/// AffineApplyOp. The 'at most one AffineApplyOp' comes from the fact that
/// the composition of AffineApplyOp needs to be canonicalized by construction
/// to avoid writing code that composes arbitrary numbers of AffineApplyOps
/// everywhere. To achieve this, at the very least, the compose-affine-apply
/// pass must have been run.
/// Given an affine.for `iv` and an access `index` of type index, returns `true`
/// if `index` is independent of `iv` and false otherwise.
///
/// Prerequisites:
/// 1. `iv` and `index` of the proper type;
/// 2. at most one reachable AffineApplyOp from index;
///
/// Returns false in cases with more than one AffineApplyOp, this is
/// conservative.
/// Prerequisites: `iv` and `index` of the proper type;
static bool isAccessIndexInvariant(Value iv, Value index) {
assert(isAffineForInductionVar(iv) && "iv must be a AffineForOp");
assert(isa<IndexType>(index.getType()) && "index must be of IndexType");
SmallVector<Operation *, 4> affineApplyOps;
getReachableAffineApplyOps({index}, affineApplyOps);

if (affineApplyOps.empty()) {
// Pointer equality test because of Value pointer semantics.
return index != iv;
}

if (affineApplyOps.size() > 1) {
affineApplyOps[0]->emitRemark(
"CompositionAffineMapsPass must have been run: there should be at most "
"one AffineApplyOp, returning false conservatively.");
return false;
}
assert(isAffineForInductionVar(iv) && "iv must be an affine.for iv");
assert(isa<IndexType>(index.getType()) && "index must be of 'index' type");
auto map = AffineMap::getMultiDimIdentityMap(/*numDims=*/1, iv.getContext());
SmallVector<Value> operands = {index};
AffineValueMap avm(map, operands);
avm.composeSimplifyAndCanonicalize();
return !avm.isFunctionOf(0, iv);
}

auto composeOp = cast<AffineApplyOp>(affineApplyOps[0]);
// We need yet another level of indirection because the `dim` index of the
// access may not correspond to the `dim` index of composeOp.
return !composeOp.getAffineValueMap().isFunctionOf(0, iv);
// Pre-requisite: Loop bounds should be in canonical form.
template <typename LoadOrStoreOp>
bool mlir::affine::isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp) {
AffineValueMap avm(memOp.getAffineMap(), memOp.getMapOperands());
avm.composeSimplifyAndCanonicalize();
return !llvm::is_contained(avm.getOperands(), forOp.getInductionVar());
}

// Explicitly instantiate the template so that the compiler knows we need them.
template bool mlir::affine::isInvariantAccess(AffineReadOpInterface,
AffineForOp);
template bool mlir::affine::isInvariantAccess(AffineWriteOpInterface,
AffineForOp);
template bool mlir::affine::isInvariantAccess(AffineLoadOp, AffineForOp);
template bool mlir::affine::isInvariantAccess(AffineStoreOp, AffineForOp);

DenseSet<Value> mlir::affine::getInvariantAccesses(Value iv,
ArrayRef<Value> indices) {
DenseSet<Value> res;
Expand Down
9 changes: 9 additions & 0 deletions mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ void AffineValueMap::reset(AffineMap map, ValueRange operands,
this->results.assign(results.begin(), results.end());
}

void AffineValueMap::composeSimplifyAndCanonicalize() {
AffineMap sMap = getAffineMap();
fullyComposeAffineMapAndOperands(&sMap, &operands);
// Full composition also canonicalizes and simplifies before returning. We
// need to canonicalize once more to drop unused operands.
canonicalizeMapAndOperands(&sMap, &operands);
this->map.reset(sMap);
}

void AffineValueMap::difference(const AffineValueMap &a,
const AffineValueMap &b, AffineValueMap *res) {
assert(a.getNumResults() == b.getNumResults() && "invalid inputs");
Expand Down
33 changes: 31 additions & 2 deletions mlir/test/Dialect/Affine/access-analysis.mlir
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
// RUN: mlir-opt %s -split-input-file -test-affine-access-analysis -verify-diagnostics | FileCheck %s

// CHECK-LABEL: func @loop_1d
func.func @loop_1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
// CHECK-LABEL: func @loop_simple
func.func @loop_simple(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {
%c0 = arith.constant 0 : index
%M = memref.dim %A, %c0 : memref<?x?xf32>
affine.for %i = 0 to %M {
affine.for %j = 0 to %M {
affine.load %A[%c0, %i] : memref<?x?xf32>
// expected-remark@above {{contiguous along loop 0}}
// expected-remark@above {{invariant along loop 1}}
affine.load %A[%c0, 8 * %i + %j] : memref<?x?xf32>
// expected-remark@above {{contiguous along loop 1}}
// Note/FIXME: access stride isn't being checked.
// expected-remark@-3 {{contiguous along loop 0}}

// These are all non-contiguous along both loops. Nothing is emitted.
affine.load %A[%i, %c0] : memref<?x?xf32>
// expected-remark@above {{invariant along loop 1}}
// Note/FIXME: access stride isn't being checked.
affine.load %A[%i, 8 * %j] : memref<?x?xf32>
// expected-remark@above {{contiguous along loop 1}}
Expand All @@ -27,6 +29,22 @@ func.func @loop_1d(%A : memref<?x?xf32>, %B : memref<?x?x?xf32>) {

// -----

// CHECK-LABEL: func @loop_unsimplified
func.func @loop_unsimplified(%A : memref<100xf32>) {
affine.for %i = 0 to 100 {
affine.load %A[2 * %i - %i - %i] : memref<100xf32>
// expected-remark@above {{invariant along loop 0}}

%m = affine.apply affine_map<(d0) -> (-2 * d0)>(%i)
%n = affine.apply affine_map<(d0) -> (2 * d0)>(%i)
affine.load %A[(%m + %n) floordiv 2] : memref<100xf32>
// expected-remark@above {{invariant along loop 0}}
}
return
}

// -----

#map = affine_map<(d0) -> (d0 * 16)>
#map1 = affine_map<(d0) -> (d0 * 16 + 16)>
#map2 = affine_map<(d0) -> (d0)>
Expand All @@ -41,11 +59,19 @@ func.func @tiled(%arg0: memref<*xf32>) {
%alloc_0 = memref.alloc() : memref<1x16x1x16xf32>
affine.for %arg4 = #map(%arg1) to #map1(%arg1) {
affine.for %arg5 = #map(%arg3) to #map1(%arg3) {
// TODO: here and below, the access isn't really invariant
// along tile-space IVs where the intra-tile IVs' bounds
// depend on them.
%0 = affine.load %cast[%arg4] : memref<64xf32>
// expected-remark@above {{contiguous along loop 3}}
// expected-remark@above {{invariant along loop 0}}
// expected-remark@above {{invariant along loop 1}}
// expected-remark@above {{invariant along loop 2}}
// expected-remark@above {{invariant along loop 4}}
affine.store %0, %alloc_0[0, %arg1 * -16 + %arg4, 0, %arg3 * -16 + %arg5] : memref<1x16x1x16xf32>
// expected-remark@above {{contiguous along loop 4}}
// expected-remark@above {{contiguous along loop 2}}
// expected-remark@above {{invariant along loop 1}}
}
}
affine.for %arg4 = #map(%arg1) to #map1(%arg1) {
Expand All @@ -56,6 +82,9 @@ func.func @tiled(%arg0: memref<*xf32>) {
// expected-remark@above {{contiguous along loop 2}}
affine.store %0, %alloc[0, %arg5, %arg6, %arg4] : memref<1x224x224x64xf32>
// expected-remark@above {{contiguous along loop 3}}
// expected-remark@above {{invariant along loop 0}}
// expected-remark@above {{invariant along loop 1}}
// expected-remark@above {{invariant along loop 2}}
}
}
}
Expand Down
19 changes: 13 additions & 6 deletions mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,18 +59,25 @@ void TestAccessAnalysis::runOnOperation() {
enclosingOps.clear();
getAffineForIVs(*memOp, &enclosingOps);
for (unsigned d = 0, e = enclosingOps.size(); d < e; d++) {
AffineForOp loop = enclosingOps[d];
int memRefDim;
bool isContiguous;
bool isContiguous, isInvariant;
if (auto read = dyn_cast<AffineReadOpInterface>(memOp)) {
isContiguous = isContiguousAccess(enclosingOps[d].getInductionVar(),
read, &memRefDim);
isContiguous =
isContiguousAccess(loop.getInductionVar(), read, &memRefDim);
isInvariant = isInvariantAccess(read, loop);
} else {
isContiguous = isContiguousAccess(enclosingOps[d].getInductionVar(),
cast<AffineWriteOpInterface>(memOp),
&memRefDim);
auto write = cast<AffineWriteOpInterface>(memOp);
isContiguous =
isContiguousAccess(loop.getInductionVar(), write, &memRefDim);
isInvariant = isInvariantAccess(write, loop);
}
// Check for contiguity for the innermost memref dimension to avoid
// emitting too many diagnostics.
if (isContiguous && memRefDim == 0)
memOp->emitRemark("contiguous along loop ") << d << '\n';
if (isInvariant)
memOp->emitRemark("invariant along loop ") << d << '\n';
}
}
}
Expand Down