diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h b/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h index 1f64b57cac578..7b92b930fb5f5 100644 --- a/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h +++ b/mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h @@ -48,6 +48,11 @@ std::optional getConstantTripCount(AffineForOp forOp); /// this method is thus able to determine non-trivial divisors. uint64_t getLargestDivisorOfTripCount(AffineForOp forOp); +/// Checks if an affine read or write operation depends on `forOp`'s IV, i.e., +/// if the memory access is invariant on `forOp`. +template +bool isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp); + /// Given an induction variable `iv` of type AffineForOp and `indices` of type /// IndexType, returns the set of `indices` that are independent of `iv`. /// diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h b/mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h index 8439930a87467..7ad0e4a1e5ea0 100644 --- a/mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h @@ -44,6 +44,11 @@ class AffineValueMap { // Resets this AffineValueMap with 'map', 'operands', and 'results'. void reset(AffineMap map, ValueRange operands, ValueRange results = {}); + /// Composes all incoming affine.apply ops and then simplifies and + /// canonicalizes the map and operands. This can change the number of + /// operands, but the result count remains the same. + void composeSimplifyAndCanonicalize(); + /// Return the value map that is the difference of value maps 'a' and 'b', /// represented as an affine map and its operands. The output map + operands /// are canonicalized and simplified. diff --git a/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp index fc0515ba95f4f..1c28d6b00b3c8 100644 --- a/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp @@ -145,45 +145,36 @@ uint64_t mlir::affine::getLargestDivisorOfTripCount(AffineForOp forOp) { return *gcd; } -/// Given an induction variable `iv` of type AffineForOp and an access `index` -/// of type index, returns `true` if `index` is independent of `iv` and -/// false otherwise. The determination supports composition with at most one -/// AffineApplyOp. The 'at most one AffineApplyOp' comes from the fact that -/// the composition of AffineApplyOp needs to be canonicalized by construction -/// to avoid writing code that composes arbitrary numbers of AffineApplyOps -/// everywhere. To achieve this, at the very least, the compose-affine-apply -/// pass must have been run. +/// Given an affine.for `iv` and an access `index` of type index, returns `true` +/// if `index` is independent of `iv` and false otherwise. /// -/// Prerequisites: -/// 1. `iv` and `index` of the proper type; -/// 2. at most one reachable AffineApplyOp from index; -/// -/// Returns false in cases with more than one AffineApplyOp, this is -/// conservative. +/// Prerequisites: `iv` and `index` of the proper type; static bool isAccessIndexInvariant(Value iv, Value index) { - assert(isAffineForInductionVar(iv) && "iv must be a AffineForOp"); - assert(isa(index.getType()) && "index must be of IndexType"); - SmallVector affineApplyOps; - getReachableAffineApplyOps({index}, affineApplyOps); - - if (affineApplyOps.empty()) { - // Pointer equality test because of Value pointer semantics. - return index != iv; - } - - if (affineApplyOps.size() > 1) { - affineApplyOps[0]->emitRemark( - "CompositionAffineMapsPass must have been run: there should be at most " - "one AffineApplyOp, returning false conservatively."); - return false; - } + assert(isAffineForInductionVar(iv) && "iv must be an affine.for iv"); + assert(isa(index.getType()) && "index must be of 'index' type"); + auto map = AffineMap::getMultiDimIdentityMap(/*numDims=*/1, iv.getContext()); + SmallVector operands = {index}; + AffineValueMap avm(map, operands); + avm.composeSimplifyAndCanonicalize(); + return !avm.isFunctionOf(0, iv); +} - auto composeOp = cast(affineApplyOps[0]); - // We need yet another level of indirection because the `dim` index of the - // access may not correspond to the `dim` index of composeOp. - return !composeOp.getAffineValueMap().isFunctionOf(0, iv); +// Pre-requisite: Loop bounds should be in canonical form. +template +bool mlir::affine::isInvariantAccess(LoadOrStoreOp memOp, AffineForOp forOp) { + AffineValueMap avm(memOp.getAffineMap(), memOp.getMapOperands()); + avm.composeSimplifyAndCanonicalize(); + return !llvm::is_contained(avm.getOperands(), forOp.getInductionVar()); } +// Explicitly instantiate the template so that the compiler knows we need them. +template bool mlir::affine::isInvariantAccess(AffineReadOpInterface, + AffineForOp); +template bool mlir::affine::isInvariantAccess(AffineWriteOpInterface, + AffineForOp); +template bool mlir::affine::isInvariantAccess(AffineLoadOp, AffineForOp); +template bool mlir::affine::isInvariantAccess(AffineStoreOp, AffineForOp); + DenseSet mlir::affine::getInvariantAccesses(Value iv, ArrayRef indices) { DenseSet res; diff --git a/mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp b/mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp index 2800237fd05ac..6a52849186872 100644 --- a/mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp @@ -24,6 +24,15 @@ void AffineValueMap::reset(AffineMap map, ValueRange operands, this->results.assign(results.begin(), results.end()); } +void AffineValueMap::composeSimplifyAndCanonicalize() { + AffineMap sMap = getAffineMap(); + fullyComposeAffineMapAndOperands(&sMap, &operands); + // Full composition also canonicalizes and simplifies before returning. We + // need to canonicalize once more to drop unused operands. + canonicalizeMapAndOperands(&sMap, &operands); + this->map.reset(sMap); +} + void AffineValueMap::difference(const AffineValueMap &a, const AffineValueMap &b, AffineValueMap *res) { assert(a.getNumResults() == b.getNumResults() && "invalid inputs"); diff --git a/mlir/test/Dialect/Affine/access-analysis.mlir b/mlir/test/Dialect/Affine/access-analysis.mlir index 68310b9323535..789de646a8f9e 100644 --- a/mlir/test/Dialect/Affine/access-analysis.mlir +++ b/mlir/test/Dialect/Affine/access-analysis.mlir @@ -1,13 +1,14 @@ // RUN: mlir-opt %s -split-input-file -test-affine-access-analysis -verify-diagnostics | FileCheck %s -// CHECK-LABEL: func @loop_1d -func.func @loop_1d(%A : memref, %B : memref) { +// CHECK-LABEL: func @loop_simple +func.func @loop_simple(%A : memref, %B : memref) { %c0 = arith.constant 0 : index %M = memref.dim %A, %c0 : memref affine.for %i = 0 to %M { affine.for %j = 0 to %M { affine.load %A[%c0, %i] : memref // expected-remark@above {{contiguous along loop 0}} + // expected-remark@above {{invariant along loop 1}} affine.load %A[%c0, 8 * %i + %j] : memref // expected-remark@above {{contiguous along loop 1}} // Note/FIXME: access stride isn't being checked. @@ -15,6 +16,7 @@ func.func @loop_1d(%A : memref, %B : memref) { // These are all non-contiguous along both loops. Nothing is emitted. affine.load %A[%i, %c0] : memref + // expected-remark@above {{invariant along loop 1}} // Note/FIXME: access stride isn't being checked. affine.load %A[%i, 8 * %j] : memref // expected-remark@above {{contiguous along loop 1}} @@ -27,6 +29,22 @@ func.func @loop_1d(%A : memref, %B : memref) { // ----- +// CHECK-LABEL: func @loop_unsimplified +func.func @loop_unsimplified(%A : memref<100xf32>) { + affine.for %i = 0 to 100 { + affine.load %A[2 * %i - %i - %i] : memref<100xf32> + // expected-remark@above {{invariant along loop 0}} + + %m = affine.apply affine_map<(d0) -> (-2 * d0)>(%i) + %n = affine.apply affine_map<(d0) -> (2 * d0)>(%i) + affine.load %A[(%m + %n) floordiv 2] : memref<100xf32> + // expected-remark@above {{invariant along loop 0}} + } + return +} + +// ----- + #map = affine_map<(d0) -> (d0 * 16)> #map1 = affine_map<(d0) -> (d0 * 16 + 16)> #map2 = affine_map<(d0) -> (d0)> @@ -41,11 +59,19 @@ func.func @tiled(%arg0: memref<*xf32>) { %alloc_0 = memref.alloc() : memref<1x16x1x16xf32> affine.for %arg4 = #map(%arg1) to #map1(%arg1) { affine.for %arg5 = #map(%arg3) to #map1(%arg3) { + // TODO: here and below, the access isn't really invariant + // along tile-space IVs where the intra-tile IVs' bounds + // depend on them. %0 = affine.load %cast[%arg4] : memref<64xf32> // expected-remark@above {{contiguous along loop 3}} + // expected-remark@above {{invariant along loop 0}} + // expected-remark@above {{invariant along loop 1}} + // expected-remark@above {{invariant along loop 2}} + // expected-remark@above {{invariant along loop 4}} affine.store %0, %alloc_0[0, %arg1 * -16 + %arg4, 0, %arg3 * -16 + %arg5] : memref<1x16x1x16xf32> // expected-remark@above {{contiguous along loop 4}} // expected-remark@above {{contiguous along loop 2}} + // expected-remark@above {{invariant along loop 1}} } } affine.for %arg4 = #map(%arg1) to #map1(%arg1) { @@ -56,6 +82,9 @@ func.func @tiled(%arg0: memref<*xf32>) { // expected-remark@above {{contiguous along loop 2}} affine.store %0, %alloc[0, %arg5, %arg6, %arg4] : memref<1x224x224x64xf32> // expected-remark@above {{contiguous along loop 3}} + // expected-remark@above {{invariant along loop 0}} + // expected-remark@above {{invariant along loop 1}} + // expected-remark@above {{invariant along loop 2}} } } } diff --git a/mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp b/mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp index b38046299d504..751302550092d 100644 --- a/mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp +++ b/mlir/test/lib/Dialect/Affine/TestAccessAnalysis.cpp @@ -59,18 +59,25 @@ void TestAccessAnalysis::runOnOperation() { enclosingOps.clear(); getAffineForIVs(*memOp, &enclosingOps); for (unsigned d = 0, e = enclosingOps.size(); d < e; d++) { + AffineForOp loop = enclosingOps[d]; int memRefDim; - bool isContiguous; + bool isContiguous, isInvariant; if (auto read = dyn_cast(memOp)) { - isContiguous = isContiguousAccess(enclosingOps[d].getInductionVar(), - read, &memRefDim); + isContiguous = + isContiguousAccess(loop.getInductionVar(), read, &memRefDim); + isInvariant = isInvariantAccess(read, loop); } else { - isContiguous = isContiguousAccess(enclosingOps[d].getInductionVar(), - cast(memOp), - &memRefDim); + auto write = cast(memOp); + isContiguous = + isContiguousAccess(loop.getInductionVar(), write, &memRefDim); + isInvariant = isInvariantAccess(write, loop); } + // Check for contiguity for the innermost memref dimension to avoid + // emitting too many diagnostics. if (isContiguous && memRefDim == 0) memOp->emitRemark("contiguous along loop ") << d << '\n'; + if (isInvariant) + memOp->emitRemark("invariant along loop ") << d << '\n'; } } }