diff --git a/mlir/include/mlir/Interfaces/LoopLikeInterface.h b/mlir/include/mlir/Interfaces/LoopLikeInterface.h index 9925fc6ce6ca9..d6c23131cf242 100644 --- a/mlir/include/mlir/Interfaces/LoopLikeInterface.h +++ b/mlir/include/mlir/Interfaces/LoopLikeInterface.h @@ -14,6 +14,7 @@ #define MLIR_INTERFACES_LOOPLIKEINTERFACE_H_ #include "mlir/IR/OpDefinition.h" +#include "mlir/Dialect/Utils/StaticValueUtils.h" namespace mlir { class RewriterBase; diff --git a/mlir/include/mlir/Interfaces/LoopLikeInterface.td b/mlir/include/mlir/Interfaces/LoopLikeInterface.td index e09b8672f2d08..644f096b69381 100644 --- a/mlir/include/mlir/Interfaces/LoopLikeInterface.td +++ b/mlir/include/mlir/Interfaces/LoopLikeInterface.td @@ -329,6 +329,35 @@ def LoopLikeOpInterface : OpInterface<"LoopLikeOpInterface"> { $_op->operand_begin() + firstOperandIndex + initsMutable.size()); } + /// Return whether the loop is known to have zero iterations. + /// Returns std::nullopt if not enough constant information is available. + ::std::optional isZeroTrip() { + auto lbs = $_op.getLoopLowerBounds(); + auto ubs = $_op.getLoopUpperBounds(); + auto steps = $_op.getLoopSteps(); + + if (!lbs || !ubs || !steps) + return ::std::nullopt; + + if (lbs->size() != ubs->size() || ubs->size() != steps->size()) + return ::std::nullopt; + + for (size_t i = 0; i < steps->size(); ++i) { + auto lb = ::mlir::getConstantIntValue((*lbs)[i]); + auto ub = ::mlir::getConstantIntValue((*ubs)[i]); + auto st = ::mlir::getConstantIntValue((*steps)[i]); + + if (!lb || !ub || !st) + return ::std::nullopt; // non-constant -> unknown + + if (*st >= 0 && *lb >= *ub) + return true; + if (*st < 0 && *lb <= *ub) + return true; + } + return false; + } + /// Return the region iter_arg that corresponds to the given init operand. /// Return an "empty" block argument if the given operand is not an init /// operand of this loop op. diff --git a/mlir/include/mlir/Interfaces/SideEffectInterfaces.h b/mlir/include/mlir/Interfaces/SideEffectInterfaces.h index 9de20f0c69f1a..5bbfbce5c5a18 100644 --- a/mlir/include/mlir/Interfaces/SideEffectInterfaces.h +++ b/mlir/include/mlir/Interfaces/SideEffectInterfaces.h @@ -14,6 +14,7 @@ #ifndef MLIR_INTERFACES_SIDEEFFECTINTERFACES_H #define MLIR_INTERFACES_SIDEEFFECTINTERFACES_H +#include "mlir/IR/Dominance.h" #include "mlir/IR/OpDefinition.h" namespace mlir { @@ -346,6 +347,20 @@ struct AlwaysSpeculatableImplTrait //===----------------------------------------------------------------------===// namespace MemoryEffects { +/// Defines the priority of the different memory effects. +/// +/// Sorting/ordering memory effects of an operation is done based on +/// their defined stage and priority, in that order. If stage values for two +/// effect instances are equal, they are then sorted by priority. Lower priority +/// values indicate higher precedence. +enum Priority { + DefaultPriority = 0, + AllocPriority = 1, + FreePriority = 2, + ReadPriority = 3, + WritePriority = 4 +}; + /// This class represents the base class used for memory effects. struct Effect : public SideEffects::Effect { using SideEffects::Effect::Effect; @@ -355,28 +370,64 @@ struct Effect : public SideEffects::Effect { using Base = SideEffects::Effect::Base; static bool classof(const SideEffects::Effect *effect); + + /// Return the priority associated with this memory effect. + Priority getPriority() const { return priority; } + + /// Return a human-readable name for the effect type. + StringRef getEffectName() const { return effectName; } + +protected: + /// Priority value for this effect. Lower numbers indicate higher precedence. + Priority priority = Priority::DefaultPriority; + StringRef effectName = ""; }; using EffectInstance = SideEffects::EffectInstance; +/// Returns vector of the op's memory effects sorted in increasing stage order +/// and in increasing priority order within each stage. +llvm::SmallVector +getMemoryEffectsSorted(Operation *op); + /// The following effect indicates that the operation allocates from some /// resource. An 'allocate' effect implies only allocation of the resource, and /// not any visible mutation or dereference. -struct Allocate : public Effect::Base {}; +struct Allocate : public Effect::Base { + Allocate() : Effect::Base() { + this->priority = Priority::AllocPriority; + this->effectName = ""; + } +}; /// The following effect indicates that the operation frees some resource that /// has been allocated. An 'allocate' effect implies only de-allocation of the /// resource, and not any visible allocation, mutation or dereference. -struct Free : public Effect::Base {}; +struct Free : public Effect::Base { + Free() : Effect::Base() { + this->priority = Priority::FreePriority; + this->effectName = ""; + } +}; /// The following effect indicates that the operation reads from some resource. /// A 'read' effect implies only dereferencing of the resource, and not any /// visible mutation. -struct Read : public Effect::Base {}; +struct Read : public Effect::Base { + Read() : Effect::Base() { + this->priority = Priority::ReadPriority; + this->effectName = ""; + } +}; /// The following effect indicates that the operation writes to some resource. A /// 'write' effect implies only mutating a resource, and not any visible /// dereference or read. -struct Write : public Effect::Base {}; +struct Write : public Effect::Base { + Write() : Effect::Base() { + this->priority = Priority::WritePriority; + this->effectName = ""; + } +}; } // namespace MemoryEffects //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Transforms/LoopInvariantCodeMotionUtils.h b/mlir/include/mlir/Transforms/LoopInvariantCodeMotionUtils.h index 3ceef44d799e8..55e2bff523f86 100644 --- a/mlir/include/mlir/Transforms/LoopInvariantCodeMotionUtils.h +++ b/mlir/include/mlir/Transforms/LoopInvariantCodeMotionUtils.h @@ -9,9 +9,12 @@ #ifndef MLIR_TRANSFORMS_LOOPINVARIANTCODEMOTIONUTILS_H #define MLIR_TRANSFORMS_LOOPINVARIANTCODEMOTIONUTILS_H +#include "mlir/Interfaces/SideEffectInterfaces.h" #include "mlir/Support/LLVM.h" +#include "mlir/Support/TypeID.h" +#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" +#include namespace mlir { @@ -21,6 +24,43 @@ class Region; class RewriterBase; class Value; +/// Alias for map used in LICM pass to track which memory resources have +/// conflicts due to sequence of memory effects applied to them in the region of +/// interest. +using MemoryConflictMap = DenseMap>; + +/// Gathers potential conflicts on all memory resources used within loop. +/// +/// Given a target loop and an op within it (or the loop op itself), +/// gathers op's memory effects and flags potential resource conflicts +/// in a map and then recurses into the op's regions to gather nested +/// resource conflicts. +/// +/// Typical usage: +/// \code +/// LoopLikeOpInterface myLoop = ...; +/// DenseMap> +/// myConflicts; +/// gatherResourceConflicts(myLoop, myLoop.getOperation(), resourceConflicts); +/// \endcode +/// +/// \param loop The loop to gather resource conflicts for. +/// \param op The operation to gather resource conflicts for, +/// typically the loop op itself via loop.getOperation(). +/// \param resourceConflicts Map to store potential resource conflicts. +/// Key is the resource ID that effects are applied to. Value is a pair of +/// a boolean, indicating if the resource has a conflict, and the last effect +/// that was applied to the resource (if no conflicts exist) or the effect +/// that caused the conflict (if conflicts exist). +/// +/// resourceConflicts is modified by the function and will be non-empty +/// as long as there are memory effects within the loop, even if there are +/// no conflicts. +void mapResourceConflicts( + LoopLikeOpInterface loop, Operation *op, + DenseMap> + &resourceConflicts); + /// Given a list of regions, perform loop-invariant code motion. An operation is /// loop-invariant if it depends only of values defined outside of the loop. /// LICM moves these operations out of the loop body so that they are not @@ -63,9 +103,10 @@ class Value; /// /// Returns the number of operations moved. size_t moveLoopInvariantCode( - ArrayRef regions, + LoopLikeOpInterface loopLike, function_ref isDefinedOutsideRegion, - function_ref shouldMoveOutOfRegion, + function_ref shouldMoveSpeculatable, + function_ref shouldMoveMemoryEffect, function_ref moveOutOfRegion); /// Move side-effect free loop invariant code out of a loop-like op using diff --git a/mlir/lib/Interfaces/SideEffectInterfaces.cpp b/mlir/lib/Interfaces/SideEffectInterfaces.cpp index b5a6888e5e1a4..6603f91936bc3 100644 --- a/mlir/lib/Interfaces/SideEffectInterfaces.cpp +++ b/mlir/lib/Interfaces/SideEffectInterfaces.cpp @@ -8,7 +8,9 @@ #include "mlir/Interfaces/SideEffectInterfaces.h" +#include "mlir/IR/Dominance.h" #include "mlir/IR/SymbolTable.h" +#include #include using namespace mlir; @@ -317,14 +319,45 @@ bool mlir::wouldOpBeTriviallyDead(Operation *op) { return wouldOpBeTriviallyDeadImpl(op); } +llvm::SmallVector +mlir::MemoryEffects::getMemoryEffectsSorted(Operation *op) { + llvm::SmallVector effectsSorted; + + auto memInterface = dyn_cast(op); + + if (!memInterface) + return effectsSorted; // return empty vec + + memInterface.getEffects(effectsSorted); + + auto sortEffects = + [](llvm::SmallVectorImpl &effects) { + llvm::stable_sort(effects, [](const MemoryEffects::EffectInstance &a, + const MemoryEffects::EffectInstance &b) { + if (a.getStage() < b.getStage()) + return true; + + if (a.getStage() == b.getStage()) + return a.getEffect()->getPriority() < b.getEffect()->getPriority(); + + return false; // b before a + }); + }; + sortEffects(effectsSorted); + + return effectsSorted; +} + bool mlir::isMemoryEffectFree(Operation *op) { if (auto memInterface = dyn_cast(op)) { if (!memInterface.hasNoEffect()) return false; + // If the op does not have recursive side effects, then it is memory effect // free. if (!op->hasTrait()) return true; + } else if (!op->hasTrait()) { // Otherwise, if the op does not implement the memory effect interface and // it does not have recursive side effects, then it cannot be known that the @@ -338,6 +371,7 @@ bool mlir::isMemoryEffectFree(Operation *op) { for (Operation &op : region.getOps()) if (!isMemoryEffectFree(&op)) return false; + return true; } diff --git a/mlir/lib/Transforms/Utils/LoopInvariantCodeMotionUtils.cpp b/mlir/lib/Transforms/Utils/LoopInvariantCodeMotionUtils.cpp index 111f58ef92f51..24816a963a5be 100644 --- a/mlir/lib/Transforms/Utils/LoopInvariantCodeMotionUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopInvariantCodeMotionUtils.cpp @@ -15,12 +15,15 @@ #include "mlir/IR/Operation.h" #include "mlir/IR/OperationSupport.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/IR/Value.h" #include "mlir/Interfaces/LoopLikeInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" #include "mlir/Interfaces/SubsetOpInterface.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugLog.h" #include +#include #define DEBUG_TYPE "licm" @@ -29,7 +32,6 @@ using namespace mlir; /// Checks whether the given op can be hoisted by checking that /// - the op and none of its contained operations depend on values inside of the /// loop (by means of calling definedOutside). -/// - the op has no side-effects. static bool canBeHoisted(Operation *op, function_ref condition) { // Do not move terminators. @@ -58,62 +60,407 @@ static bool canBeHoisted(Operation *op, op, [&](OpOperand &operand) { return definedOutside(operand.get()); }); } -size_t mlir::moveLoopInvariantCode( - ArrayRef regions, - function_ref isDefinedOutsideRegion, - function_ref shouldMoveOutOfRegion, - function_ref moveOutOfRegion) { - size_t numMoved = 0; +/// Merges srcEffect's Memory Effect on its resource into the +/// resourceConflicts map, flagging the resource if the srcEffect +/// results in a conflict. +/// +/// \param resourceConflicts The map to store resources' conflicts status. +/// \param srcEffect The effect to merge into the resourceConflicts map. +/// \param srcHasConflict Whether the srcEffect results in a conflict based +/// on higher level analysis. +/// +/// resourceConflicts is modified by the function and will be non-empty +static void mergeResource(MemoryConflictMap &resourceConflicts, + const MemoryEffects::EffectInstance &srcEffect, + bool srcHasConflict) { + + TypeID srcResourceID = srcEffect.getResource()->getResourceID(); + + bool srcIsAllocOrFree = isa(srcEffect.getEffect()) || + isa(srcEffect.getEffect()); + + LDBG() << " : \"" << srcEffect.getEffect()->getEffectName() + << " on resource <" << srcEffect.getResource()->getName() << ">\"" + << "\n"; + + bool conflict = srcHasConflict || srcIsAllocOrFree; + + auto [dstIt, inserted] = resourceConflicts.insert( + std::make_pair(srcResourceID, std::make_pair(conflict, srcEffect))); + if (inserted) { + LDBG() << ". . . . " + << "Effect inserted to map" + << "\n"; + return; + } - for (Region *region : regions) { - LDBG() << "Original loop:\n" << *region->getParentOp(); + // resource already in use + bool dstHasConflict = dstIt->second.first; + auto dstEffect = dstIt->second.second; - std::queue worklist; - // Add top-level operations in the loop body to the worklist. - for (Operation &op : region->getOps()) - worklist.push(&op); + if (dstHasConflict) { + LDBG() << ". . . . " + << "Resource has existing conflict from Effect Mem" + << dstEffect.getValue() << "\n"; + return; + } - auto definedOutside = [&](Value value) { - return isDefinedOutsideRegion(value, region); - }; + bool srcWrite = isa(srcEffect.getEffect()); + bool dstRead = isa(dstEffect.getEffect()); + bool readBeforeWrite = dstRead && srcWrite; - while (!worklist.empty()) { - Operation *op = worklist.front(); - worklist.pop(); - // Skip ops that have already been moved. Check if the op can be hoisted. - if (op->getParentRegion() != region) - continue; + conflict = conflict || readBeforeWrite; - LDBG() << "Checking op: " - << OpWithFlags(op, OpPrintingFlags().skipRegions()); - if (!shouldMoveOutOfRegion(op, region) || - !canBeHoisted(op, definedOutside)) - continue; + LDBG() << ". . . . " + << "Resource conflict status updated to = " << conflict << "\n"; + + dstIt->second = std::make_pair(conflict, srcEffect); +} + +/// Returns true if any of op's operands is defined inside the loop. +static bool hasLoopVariantInput(LoopLikeOpInterface loopLike, Operation *op) { + return llvm::any_of(op->getOperands(), [&] (Value v) { + return !loopLike.isDefinedOutsideOfLoop(v); + }); +} + +/// Returns true if: +/// (a) any of the resources used by op's Memory Effects have been +/// flagged as having a conflict within the resourceConflicts map OR +/// (b) op doesn't have a MemoryEffectOpInterface or has one but +/// without any specific effects. +static bool mayHaveMemoryEffectConflict(Operation *op, + MemoryConflictMap *resourceConflicts) { + LDBG() << " : " + << OpWithFlags(op, OpPrintingFlags().skipRegions()); + + auto condSpecInterface = dyn_cast(op); + + // if op implements ConditionallySpeculatable interface, must be speculatable! + if (condSpecInterface && !isSpeculatable(op)) + return true; + + auto memInterface = dyn_cast(op); + + // op does not implement the memory effect op interface + // shouldn't be flagged as movable to be conservative + if (!memInterface) + return true; + + // Ops with Recursive Memory Effects are special-cased here. + // For now we'll only allow them to be moved if they're effect + // free. + // A potential solution is to recursively gather all resources on all + // contained ops and then run the for-loop further below. Requires discussions + // re: obscure corner cases. + if (op->hasTrait()) { + return !isMemoryEffectFree(op); + } + + // gather all effects on op + llvm::SmallVector effects; + memInterface.getEffects(effects); + + // op has interface but no effects, be conservative + if (effects.empty()) + return true; + + // op has no conflicts IFF all resources are flagged as having no conflicts + for (const MemoryEffects::EffectInstance &effect : effects) { + auto resourceID = effect.getResource()->getResourceID(); + + auto resConIter = resourceConflicts->find(resourceID); + assert(resConIter != resourceConflicts->end()); + + bool hasConflict = resConIter->second.first; + if (hasConflict) { + LDBG() << ". . . . " + << "Conflict deteceted on resource <" + << effect.getResource()->getName() << "> from Memory Effect Mem" + << effect.getValue() << "\n"; + return true; + } + } + + return false; +} - LDBG() << "Moving loop-invariant op: " << *op; - moveOutOfRegion(op, region); - ++numMoved; +static void +mapLoopResourceUsage(LoopLikeOpInterface loopLike, Operation *op, + MemoryConflictMap &resourceConflicts, + llvm::SmallSet &opsWithReadBeforeWrite) { + + LDBG() << " : " + << OpWithFlags(op, OpPrintingFlags().skipRegions()) << "\n"; + + if (auto memInterface = dyn_cast(op)) { + LDBG() << ". . . . " + << "op has MemoryEffectsOpInterface" + << "\n"; + + // gather all effects on op + SmallVector effects = + MemoryEffects::getMemoryEffectsSorted(op); + + LDBG() << ". . . . " + << "# of effects = " << effects.size(); + + if (!effects.empty()) { + // Any input to the op could be the input data source + // for write effects in the same op. E.g., + // scf.for ... { + // %0 = foo.bar(...) : ... + // foo.baz(%0) // foo.baz has a MemWrite effect + // } + // The input %0 could be the data source for the write effect in + // foo.baz. Since %0 is loop-variant, this may cause a conflict on + // SomeResource as the MemWrite contents may change between loop iterations. + // A more complex analysis would be needed to determine + // if this is a true conflict or not. + bool writesConflict = hasLoopVariantInput(loopLike, op); + LDBG() << ". . . . " + << "Has loop-variant input = " + << (writesConflict ? "true" : "false") << "\n"; + + bool hasRead = false; + + for (const MemoryEffects::EffectInstance &effect : effects) { + bool inConflict = false; + + if (isa(effect.getEffect())) { + if (hasRead) { + LDBG() << ". . . . " + << "read-before-write detected!" + << "\n"; + LDBG() << ". . . . " + << ". . . . " + << "Inserting op into set for later checks!" + << "\n"; + opsWithReadBeforeWrite.insert(op); + } + + inConflict = writesConflict; + } + + mergeResource(resourceConflicts, effect, inConflict); + + // All writes to a resource that follow a read on any other resource + // need additional logic to check if the read will result in a conflict + // on the following write op(s)'s resource(s). + // Need to keep track of ops that have read before writes. + // If the resource for the read effect has a conflict after all loop + // resource usages have been mapped, then the conflict will be + // propagated to the resources used by the following writes. LOGIC: if + // the read resource is in conflict, that means the value stored is no + // longer loop invariant --> the read could be the data source for the + // write --> the write is not guaranteed to be loop invariant. + if (isa(effect.getEffect())) { + TypeID resourceID = effect.getResource()->getResourceID(); + auto resConIter = resourceConflicts.find(resourceID); + + if (resConIter != resourceConflicts.end()) { + hasRead = true; + } + } + } + } + } + + for (Region ®ion : op->getRegions()) + for (Operation &opInner : region.getOps()) + mapLoopResourceUsage(loopLike, &opInner, resourceConflicts, + opsWithReadBeforeWrite); +} + +static void propagateSameOpReadBeforeWriteConflicts( + LoopLikeOpInterface loopLike, Operation *op, + MemoryConflictMap &resourceConflicts, + llvm::SmallSet &opsWithReadBeforeWrite) { + + for (auto *opInner : opsWithReadBeforeWrite) { + // gather all effects on op + SmallVector effects = + MemoryEffects::getMemoryEffectsSorted(opInner); + + bool writesConflict = false; - // Since the op has been moved, we need to check its users within the - // top-level of the loop body. - for (Operation *user : op->getUsers()) - if (user->getParentRegion() == region) - worklist.push(user); + for (const MemoryEffects::EffectInstance &effect : effects) { + if (writesConflict && isa(effect.getEffect())) { + + TypeID resourceID = effect.getResource()->getResourceID(); + auto resConIter = resourceConflicts.find(resourceID); + + // already has conflict, move on + if (resConIter->getSecond().first) + continue; + + resConIter->getSecond().first = true; + resConIter->getSecond().second = effect; + } + + if (isa(effect.getEffect())) { + TypeID resourceID = effect.getResource()->getResourceID(); + auto resConIter = resourceConflicts.find(resourceID); + + if (resConIter != resourceConflicts.end() && + resConIter->getSecond().first) { + writesConflict = true; + } + } } } +} - return numMoved; +void mlir::mapResourceConflicts(LoopLikeOpInterface loopLike, Operation *op, + MemoryConflictMap &resourceConflicts) { + llvm::SmallSet opsWithReadBeforeWrite; + mapLoopResourceUsage(loopLike, loopLike.getOperation(), resourceConflicts, + opsWithReadBeforeWrite); + propagateSameOpReadBeforeWriteConflicts(loopLike, loopLike.getOperation(), + resourceConflicts, + opsWithReadBeforeWrite); +} + +size_t mlir::moveLoopInvariantCode( + LoopLikeOpInterface loopLike, + function_ref isDefinedOutsideRegion, + function_ref shouldMoveSpeculatable, + function_ref shouldMoveMemoryEffect, + function_ref moveOutOfRegion) { + size_t numMovedTotal = 0; + + // Check that the loop isn't dead. + // Two separate methods used to check this, depending on what the loopLike op + // implements. If neither is available, we can't guarantee loop liveness. + auto isMaybeDead = loopLike.isZeroTrip(); + auto tripCount = loopLike.getStaticTripCount(); + + bool confirmedDead = (isMaybeDead.has_value() && isMaybeDead.value()) || + (tripCount.has_value() && tripCount.value() == 0); + bool ambiguousLiveness = !isMaybeDead.has_value() && !tripCount.has_value(); + bool loopIsLive = !confirmedDead && !ambiguousLiveness; + + LDBG() << "Running LICM on loop op. . . ." + << "\n"; + LDBG() << " : " + << OpWithFlags(loopLike.getOperation(), + OpPrintingFlags().skipRegions()) + << "\n"; + LDBG() << ". . . . " + << "confirmedDead = " << confirmedDead << "\n"; + LDBG() << ". . . . " + << "ambiguousLiveness = " << ambiguousLiveness << "\n"; + LDBG() << ". . . . " + << "loopIsLive = " << loopIsLive << "\n"; + + int iteration = 0; + int numMoved = 0; + + do { + // reset value for iteration + numMoved = 0; + + MemoryConflictMap resourceConflicts; + + // For loops that are guaranteed to execute at least one iterations: + // Go through loop body and map out resource usages. + // op->regions are essentially merged sequentially. + // E.g., an if's "then" and "else" regions are treated like one + // continuous region --> need to add fork checking. + // + // loop "do" and "then" regions also merged. + if (loopIsLive) + mapResourceConflicts(loopLike, loopLike.getOperation(), + resourceConflicts); + + auto regions = loopLike.getLoopRegions(); + for (Region *region : regions) { + std::queue worklist; + + // Add top-level operations in the loop body to the worklist. + for (Operation &op : region->getOps()) + worklist.push(&op); + + auto definedOutside = [&](Value value) { + return isDefinedOutsideRegion(value, region); + }; + + while (!worklist.empty()) { + Operation *op = worklist.front(); + worklist.pop(); + + // Skip ops that have already been moved. Check if the op can be hoisted. + if (op->getParentRegion() != region) + continue; + + bool isHoistable = canBeHoisted(op, definedOutside); + bool movableUnderSpeculabilityPath = shouldMoveSpeculatable(op, region); + bool movableUnderMemoryEffectsPath = + loopIsLive && shouldMoveMemoryEffect(op, &resourceConflicts); + bool isNotMovable = !isHoistable || (!movableUnderSpeculabilityPath && + !movableUnderMemoryEffectsPath); + + LDBG() << ". . . . " + << " : " + << OpWithFlags(op, OpPrintingFlags().skipRegions()); + LDBG() << ". . . . " + << ". . . . " + << "isHoistable = " << isHoistable << "\n"; + LDBG() << ". . . . " + << ". . . . " + << "movableUnderSpeculabilityPath = " + << movableUnderSpeculabilityPath << "\n"; + LDBG() << ". . . . " + << ". . . . " + << "movableUnderMemoryEffectsPath = " + << movableUnderMemoryEffectsPath << "\n"; + LDBG() << ". . . . " + << ". . . . " + << "isNotMovable = " << isNotMovable << "\n"; + + if (isNotMovable) + continue; + + moveOutOfRegion(op, region); + ++numMoved; + + // Since the op has been moved, we need to check its users within the + // top-level of the loop body. + for (Operation *user : op->getUsers()) + if (user->getParentRegion() == region) + worklist.push(user); + } + } + + numMovedTotal += numMoved; + + LDBG() << ". . . . " + << "Finishing LICM iteration " << iteration++ << "\n"; + LDBG() << ". . . . " + << ". . . . " + << "Number of ops moved = " << numMoved << "\n"; + LDBG() << ". . . . " + << ". . . . " + << "Total number of ops moved across iterations = " << numMovedTotal + << "\n"; + + } while (numMoved > 0); + + return numMovedTotal; } size_t mlir::moveLoopInvariantCode(LoopLikeOpInterface loopLike) { return moveLoopInvariantCode( - loopLike.getLoopRegions(), + loopLike, [&](Value value, Region *) { return loopLike.isDefinedOutsideOfLoop(value); }, [&](Operation *op, Region *) { return isMemoryEffectFree(op) && isSpeculatable(op); }, + [&](Operation *op, MemoryConflictMap *resourceConflicts) { + return !mayHaveMemoryEffectConflict(op, resourceConflicts); + }, [&](Operation *op, Region *) { loopLike.moveOutOfLoop(op); }); } diff --git a/mlir/test/Transforms/loop-invariant-code-motion.mlir b/mlir/test/Transforms/loop-invariant-code-motion.mlir index c1604e226a334..9b7421223a8c0 100644 --- a/mlir/test/Transforms/loop-invariant-code-motion.mlir +++ b/mlir/test/Transforms/loop-invariant-code-motion.mlir @@ -1,5 +1,6 @@ // RUN: mlir-opt %s -split-input-file -loop-invariant-code-motion | FileCheck %s +// CHECK-LABEL func.func @nested_loops_both_having_invariant_code func.func @nested_loops_both_having_invariant_code() { %m = memref.alloc() : memref<10xf32> %cf7 = arith.constant 7.0 : f32 @@ -7,7 +8,7 @@ func.func @nested_loops_both_having_invariant_code() { affine.for %arg0 = 0 to 10 { %v0 = arith.addf %cf7, %cf8 : f32 - affine.for %arg1 = 0 to 10 { + affine.for %arg1 = 0 to 9 { %v1 = arith.addf %v0, %cf8 : f32 affine.store %v0, %m[%arg0] : memref<10xf32> } @@ -18,8 +19,8 @@ func.func @nested_loops_both_having_invariant_code() { // CHECK-NEXT: %[[CST1:.*]] = arith.constant 8.000000e+00 : f32 // CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[CST0]], %[[CST1]] : f32 // CHECK-NEXT: arith.addf %[[ADD0]], %[[CST1]] : f32 - // CHECK-NEXT: affine.for - // CHECK-NEXT: affine.for + // CHECK: affine.for %[[IV:.*]] = 0 to 9 + // CHECK: affine.for %[[IV:.*]] = 0 to 10 // CHECK-NEXT: affine.store return @@ -27,6 +28,7 @@ func.func @nested_loops_both_having_invariant_code() { // ----- +// CHECK-LABEL func.func @nested_loops_code_invariant_to_both func.func @nested_loops_code_invariant_to_both() { %m = memref.alloc() : memref<10xf32> %cf7 = arith.constant 7.0 : f32 @@ -48,6 +50,7 @@ func.func @nested_loops_code_invariant_to_both() { // ----- +// CHECK-LABEL func.func @single_loop_nothing_invariant func.func @single_loop_nothing_invariant() { %m1 = memref.alloc() : memref<10xf32> %m2 = memref.alloc() : memref<10xf32> @@ -71,6 +74,7 @@ func.func @single_loop_nothing_invariant() { // ----- +// CHECK-LABEL func.func @invariant_code_inside_affine_if func.func @invariant_code_inside_affine_if() { %m = memref.alloc() : memref<10xf32> %cf8 = arith.constant 8.0 : f32 @@ -99,6 +103,7 @@ func.func @invariant_code_inside_affine_if() { // ----- +// CHECK-LABEL func.func @invariant_affine_if func.func @invariant_affine_if() { %m = memref.alloc() : memref<10xf32> %cf8 = arith.constant 8.0 : f32 @@ -579,7 +584,6 @@ func.func @test_invariant_nested_loop() { return } - // ----- // Test ops in a graph region are hoisted. @@ -650,6 +654,8 @@ func.func @test_always_speculatable_op(%lb: index, %ub: index, %step: index) { return } +// ----- + // CHECK-LABEL: test_never_speculatable_op func.func @test_never_speculatable_op(%lb: index, %ub: index, %step: index) { // CHECK: scf.for @@ -661,6 +667,8 @@ func.func @test_never_speculatable_op(%lb: index, %ub: index, %step: index) { return } +// ----- + // CHECK-LABEL: test_conditionally_speculatable_op_success func.func @test_conditionally_speculatable_op_success(%lb: index, %ub: index, %step: index) { // CHECK: test.conditionally_speculatable_op @@ -673,6 +681,8 @@ func.func @test_conditionally_speculatable_op_success(%lb: index, %ub: index, %s return } +// ----- + // CHECK-LABEL: test_conditionally_speculatable_op_failure func.func @test_conditionally_speculatable_op_failure(%lb: index, %ub: index, %step: index, %arg: i32) { // CHECK: scf.for @@ -686,6 +696,8 @@ func.func @test_conditionally_speculatable_op_failure(%lb: index, %ub: index, %s return } +// ----- + // CHECK-LABEL: test_recursively_speculatable_op_success func.func @test_recursively_speculatable_op_success(%lb: index, %ub: index, %step: index, %arg: i32) { // CHECK: test.recursively_speculatable_op @@ -700,6 +712,8 @@ func.func @test_recursively_speculatable_op_success(%lb: index, %ub: index, %ste return } +// ----- + // CHECK-LABEL: test_recursively_speculatable_op_failure func.func @test_recursively_speculatable_op_failure(%lb: index, %ub: index, %step: index, %arg: i32) { // CHECK: scf.for @@ -728,6 +742,8 @@ func.func @speculate_tensor_dim_unknown_rank_unknown_dim( return } +// ----- + func.func @speculate_tensor_dim_known_rank_unknown_dim( // CHECK-LABEL: @speculate_tensor_dim_known_rank_unknown_dim %t: tensor, %dim_idx: index, %lb: index, %ub: index, %step: index) { @@ -740,6 +756,8 @@ func.func @speculate_tensor_dim_known_rank_unknown_dim( return } +// ----- + func.func @speculate_tensor_dim_unknown_rank_known_dim( // CHECK-LABEL: @speculate_tensor_dim_unknown_rank_known_dim %t: tensor<*xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) { @@ -753,6 +771,8 @@ func.func @speculate_tensor_dim_unknown_rank_known_dim( return } +// ----- + func.func @speculate_tensor_dim_known_rank_known_dim_inbounds( // CHECK-LABEL: @speculate_tensor_dim_known_rank_known_dim_inbounds %t: tensor, %dim_idx: index, %lb: index, %ub: index, %step: index) { @@ -780,6 +800,8 @@ func.func @speculate_memref_dim_unknown_rank_unknown_dim( return } +// ----- + func.func @speculate_memref_dim_known_rank_unknown_dim( // CHECK-LABEL: @speculate_memref_dim_known_rank_unknown_dim %t: memref, %dim_idx: index, %lb: index, %ub: index, %step: index) { @@ -792,6 +814,8 @@ func.func @speculate_memref_dim_known_rank_unknown_dim( return } +// ----- + func.func @speculate_memref_dim_unknown_rank_known_dim( // CHECK-LABEL: @speculate_memref_dim_unknown_rank_known_dim %t: memref<*xf32>, %dim_idx: index, %lb: index, %ub: index, %step: index) { @@ -880,6 +904,8 @@ func.func @no_speculate_divui( return } +// ----- + func.func @no_speculate_divsi( // CHECK-LABEL: @no_speculate_divsi( %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { @@ -892,6 +918,8 @@ func.func @no_speculate_divsi( return } +// ----- + func.func @no_speculate_ceildivui( // CHECK-LABEL: @no_speculate_ceildivui( %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { @@ -904,6 +932,8 @@ func.func @no_speculate_ceildivui( return } +// ----- + func.func @no_speculate_ceildivsi( // CHECK-LABEL: @no_speculate_ceildivsi( %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { @@ -916,6 +946,8 @@ func.func @no_speculate_ceildivsi( return } +// ----- + func.func @no_speculate_divui_const(%num: i32, %lb: index, %ub: index, %step: index) { // CHECK-LABEL: @no_speculate_divui_const( %c0 = arith.constant 0 : i32 @@ -928,6 +960,8 @@ func.func @no_speculate_divui_const(%num: i32, %lb: index, %ub: index, %step: in return } +// ----- + func.func @speculate_divui_const( // CHECK-LABEL: @speculate_divui_const( %num: i32, %lb: index, %ub: index, %step: index) { @@ -941,6 +975,8 @@ func.func @speculate_divui_const( return } +// ----- + func.func @no_speculate_ceildivui_const(%num: i32, %lb: index, %ub: index, %step: index) { // CHECK-LABEL: @no_speculate_ceildivui_const( %c0 = arith.constant 0 : i32 @@ -953,6 +989,8 @@ func.func @no_speculate_ceildivui_const(%num: i32, %lb: index, %ub: index, %step return } +// ----- + func.func @speculate_ceildivui_const( // CHECK-LABEL: @speculate_ceildivui_const( %num: i32, %lb: index, %ub: index, %step: index) { @@ -966,6 +1004,8 @@ func.func @speculate_ceildivui_const( return } +// ----- + func.func @no_speculate_divsi_const0( // CHECK-LABEL: @no_speculate_divsi_const0( %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { @@ -979,6 +1019,8 @@ func.func @no_speculate_divsi_const0( return } +// ----- + func.func @no_speculate_divsi_const_minus1( // CHECK-LABEL: @no_speculate_divsi_const_minus1( %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { @@ -992,6 +1034,8 @@ func.func @no_speculate_divsi_const_minus1( return } +// ----- + func.func @speculate_divsi_const( // CHECK-LABEL: @speculate_divsi_const( %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { @@ -1005,6 +1049,8 @@ func.func @speculate_divsi_const( return } +// ----- + func.func @no_speculate_ceildivsi_const0( // CHECK-LABEL: @no_speculate_ceildivsi_const0( %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { @@ -1018,6 +1064,8 @@ func.func @no_speculate_ceildivsi_const0( return } +// ----- + func.func @no_speculate_ceildivsi_const_minus1( // CHECK-LABEL: @no_speculate_ceildivsi_const_minus1( %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { @@ -1031,6 +1079,8 @@ func.func @no_speculate_ceildivsi_const_minus1( return } +// ----- + func.func @speculate_ceildivsi_const( // CHECK-LABEL: @speculate_ceildivsi_const( %num: i32, %denom: i32, %lb: index, %ub: index, %step: index) { @@ -1044,6 +1094,8 @@ func.func @speculate_ceildivsi_const( return } +// ----- + func.func @no_speculate_divui_range( // CHECK-LABEL: @no_speculate_divui_range( %num: i8, %lb: index, %ub: index, %step: index) { @@ -1057,6 +1109,8 @@ func.func @no_speculate_divui_range( return } +// ----- + func.func @no_speculate_divsi_range( // CHECK-LABEL: @no_speculate_divsi_range( %num: i8, %lb: index, %ub: index, %step: index) { @@ -1072,6 +1126,8 @@ func.func @no_speculate_divsi_range( return } +// ----- + func.func @no_speculate_ceildivui_range( // CHECK-LABEL: @no_speculate_ceildivui_range( %num: i8, %lb: index, %ub: index, %step: index) { @@ -1085,6 +1141,8 @@ func.func @no_speculate_ceildivui_range( return } +// ----- + func.func @no_speculate_ceildivsi_range( // CHECK-LABEL: @no_speculate_ceildivsi_range( %num: i8, %lb: index, %ub: index, %step: index) { @@ -1100,6 +1158,8 @@ func.func @no_speculate_ceildivsi_range( return } +// ----- + func.func @speculate_divui_range( // CHECK-LABEL: @speculate_divui_range( %num: i8, %lb: index, %ub: index, %step: index) { @@ -1113,6 +1173,8 @@ func.func @speculate_divui_range( return } +// ----- + func.func @speculate_divsi_range( // CHECK-LABEL: @speculate_divsi_range( %num: i8, %lb: index, %ub: index, %step: index) { @@ -1129,6 +1191,8 @@ func.func @speculate_divsi_range( return } +// ----- + func.func @speculate_ceildivui_range( // CHECK-LABEL: @speculate_ceildivui_range( %num: i8, %lb: index, %ub: index, %step: index) { @@ -1142,6 +1206,8 @@ func.func @speculate_ceildivui_range( return } +// ----- + func.func @speculate_ceildivsi_range( // CHECK-LABEL: @speculate_ceildivsi_range( %num: i8, %lb: index, %ub: index, %step: index) { @@ -1437,3 +1503,479 @@ func.func @do_not_hoist_vector_transfer_ops_memref( } func.return %final : vector<4x4xf32> } + +// ----- + +// CHECK-LABEL: func.func @move_single_resource_basic +func.func @move_single_resource_basic() attributes {} { + %c0_i32 = arith.constant 0 : i32 + %c1_i32 = arith.constant 1 : i32 + + %c8_i32 = arith.constant 8 : i32 + %c9_i32 = arith.constant 9 : i32 + %c10_i32 = arith.constant 10 : i32 + + // Single write effect on one resource in a triple-nested loop + // No loop-variant inputs to op and no read effects -> movable + + // Note: loop order is flipped as empty loops are hoisted out + // in reverse order! + + // CHECK: "test.test_effects_write_A"() : () -> () + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c8_i32 step %c1_i32 + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c9_i32 step %c1_i32 + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c10_i32 step %c1_i32 + + scf.for %arg0 = %c0_i32 to %c10_i32 step %c1_i32 : i32 { + scf.for %arg1 = %c0_i32 to %c9_i32 step %c1_i32 : i32 { + scf.for %arg2 = %c0_i32 to %c8_i32 step %c1_i32 : i32 { + "test.test_effects_write_A"() : () -> () + } + } + } + return +} + +// ----- + +// CHECK-LABEL func.func @move_single_resource_write_dominant +func.func @move_single_resource_write_dominant() attributes {} { + %c0_i32 = arith.constant 0 : i32 + %c1_i32 = arith.constant 1 : i32 + + %c8_i32 = arith.constant 8 : i32 + %c9_i32 = arith.constant 9 : i32 + %c10_i32 = arith.constant 10 : i32 + + // Write effect on one resource followed by a Read. + // No loop-variant inputs to Write op, no conflict on + // "A" --> both ops movable + + // Note: loop order is flipped as empty loops are hoisted out + // in reverse order! + + // CHECK: "test.test_effects_write_A"() : () -> () + // CHECK: "test.test_effects_read_A"() : () -> () + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c8_i32 step %c1_i32 + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c9_i32 step %c1_i32 + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c10_i32 step %c1_i32 + + scf.for %arg0 = %c0_i32 to %c10_i32 step %c1_i32 : i32 { + scf.for %arg1 = %c0_i32 to %c9_i32 step %c1_i32 : i32 { + scf.for %arg2 = %c0_i32 to %c8_i32 step %c1_i32 : i32 { + "test.test_effects_write_A"() : () -> () + "test.test_effects_read_A"() : () -> () + } + } + } + return +} + +// ----- + +// CHECK-LABEL func.func @move_single_resource_read_dominant +func.func @move_single_resource_read_dominant() attributes {} { + %c0_i32 = arith.constant 0 : i32 + %c1_i32 = arith.constant 1 : i32 + + %c8_i32 = arith.constant 8 : i32 + %c9_i32 = arith.constant 9 : i32 + %c10_i32 = arith.constant 10 : i32 + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c10_i32 step %c1_i32 + scf.for %arg0 = %c0_i32 to %c10_i32 step %c1_i32 : i32 { + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c9_i32 step %c1_i32 + scf.for %arg1 = %c0_i32 to %c9_i32 step %c1_i32 : i32 { + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c8_i32 step %c1_i32 + scf.for %arg2 = %c0_i32 to %c8_i32 step %c1_i32 : i32 { + + // Read effect on "A" dominates write. + // Causes conflict on "A" --> not movable. + + // CHECK: "test.test_effects_read_A"() : () -> () + // CHECK: "test.test_effects_write_A"() : () -> () + + "test.test_effects_read_A"() : () -> () + "test.test_effects_write_A"() : () -> () + } + } + } + return +} + +// ----- + +// CHECK-LABEL func.func @move_single_resource_basic_conflict +func.func @move_single_resource_basic_conflict() attributes {} { + %c0_i32 = arith.constant 0 : i32 + %c1_i32 = arith.constant 1 : i32 + + %c8_i32 = arith.constant 8 : i32 + %c9_i32 = arith.constant 9 : i32 + %c10_i32 = arith.constant 10 : i32 + + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c10_i32 step %c1_i32 + scf.for %arg0 = %c0_i32 to %c10_i32 step %c1_i32 : i32 { + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c9_i32 step %c1_i32 + scf.for %arg1 = %c0_i32 to %c9_i32 step %c1_i32 : i32 { + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c8_i32 step %c1_i32 + scf.for %arg2 = %c0_i32 to %c8_i32 step %c1_i32 : i32 { + + // CHECK: "test.test_effects_write_A"() : () -> () + // CHECK: "test.test_effects_read_A"() : () -> () + + // Read of "A" dominates Write "A" --> "A" is in conflict. + // "C" is not in conflict but, since all resources used + // by op aren't conflict free, they're not movable. + + // CHECK: "test.test_effects_write_AC"() : () -> () + // CHECK: "test.test_effects_read_AC"() : () -> () + + "test.test_effects_write_A"() : () -> () + "test.test_effects_read_A"() : () -> () + "test.test_effects_write_AC"() : () -> () + "test.test_effects_read_AC"() : () -> () + } + } + } + return +} + +// ----- + +// CHECK-LABEL func.func @move_single_resource_if_region +func.func @move_single_resource_if_region() attributes {} { + %c0_i32 = arith.constant 0 : i32 + %c1_i32 = arith.constant 1 : i32 + + %c5_i32 = arith.constant 5 : i32 + + %c8_i32 = arith.constant 8 : i32 + %c9_i32 = arith.constant 9 : i32 + %c10_i32 = arith.constant 10 : i32 + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c10_i32 step %c1_i32 + scf.for %arg0 = %c0_i32 to %c10_i32 step %c1_i32 : i32 { + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c9_i32 step %c1_i32 + scf.for %arg1 = %c0_i32 to %c9_i32 step %c1_i32 : i32 { + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c8_i32 step %c1_i32 + scf.for %arg2 = %c0_i32 to %c8_i32 step %c1_i32 : i32 { + %1 = arith.cmpi slt, %arg0, %c5_i32 : i32 + + // CHECK: scf.if + scf.if %1 { + // Checking that we're not moving ops out of + // non-loop regions. + + // CHECK: "test.test_effects_write_A"() : () -> () + // CHECK: "test.test_effects_read_A"() : () -> () + + "test.test_effects_write_A"() : () -> () + "test.test_effects_read_A"() : () -> () + } + } + } + } + return +} + +// ----- + +// CHECK-LABEL func.func @move_single_resource_for_inside_if_region +func.func @move_single_resource_for_inside_if_region() attributes {} { + %c0_i32 = arith.constant 0 : i32 + %c1_i32 = arith.constant 1 : i32 + + %c5_i32 = arith.constant 5 : i32 + + %c8_i32 = arith.constant 8 : i32 + %c9_i32 = arith.constant 9 : i32 + %c10_i32 = arith.constant 10 : i32 + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c10_i32 step %c1_i32 + scf.for %arg0 = %c0_i32 to %c10_i32 step %c1_i32 : i32 { + %1 = arith.cmpi slt, %arg0, %c5_i32 : i32 + + // CHECK: scf.if + scf.if %1 { + // Checking that we can move ops out of loops nested + // within other regions, without moving ops out of + // the parent, non-loop region. + + // CHECK: "test.test_effects_write_A"() : () -> () + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c8_i32 step %c1_i32 + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c9_i32 step %c1_i32 + + scf.for %arg1 = %c0_i32 to %c9_i32 step %c1_i32 : i32 { + scf.for %arg2 = %c0_i32 to %c8_i32 step %c1_i32 : i32 { + "test.test_effects_write_A"() : () -> () + } + } + } + } + return +} + +// ----- + +// CHECK-LABEL func.func @move_multi_resource_comprehensive +func.func @move_multi_resource_comprehensive() attributes {} { + // Constants are used to mark loops based on upper bound to + // make it more clear which loops were moved and to where + %c0_i32 = arith.constant 0 : i32 + %c1_i32 = arith.constant 1 : i32 + %c2_i32 = arith.constant 2 : i32 + %c3_i32 = arith.constant 3 : i32 + %c4_i32 = arith.constant 4 : i32 + %c5_i32 = arith.constant 5 : i32 + %c6_i32 = arith.constant 6 : i32 + %c7_i32 = arith.constant 7 : i32 + %c8_i32 = arith.constant 8 : i32 + %c9_i32 = arith.constant 9 : i32 + %c10_i32 = arith.constant 10 : i32 + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c8_i32 step %c1_i32 + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c9_i32 step %c1_i32 + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c10_i32 step %c1_i32 + scf.for %arg0 = %c0_i32 to %c10_i32 step %c1_i32 : i32 { + // CHECK: "test.test_effects_write_CD"() : () -> () + // CHECK: "test.test_effects_read_CD"() : () -> () + // CHECK: "test.test_effects_write_EF"() : () -> () + // CHECK: "test.test_effects_read_EF"() : () -> () + + // Both of these will be emptied and moved out of their parent + scf.for %arg1 = %c0_i32 to %c9_i32 step %c1_i32 : i32 { + scf.for %arg2 = %c0_i32 to %c8_i32 step %c1_i32 : i32 { + "test.test_effects_write_CD"() : () -> () + "test.test_effects_read_CD"() : () -> () + "test.test_effects_write_EF"() : () -> () + "test.test_effects_read_EF"() : () -> () + } + } + + %1 = arith.cmpi slt, %arg0, %c5_i32 : i32 + scf.if %1 { + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c5_i32 step %c1_i32 + // CHECK: "test.test_effects_write_B"() : () -> () + // CHECK: "test.test_effects_read_B"() : () -> () + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c4_i32 step %c1_i32 + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c3_i32 step %c1_i32 + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c6_i32 step %c1_i32 + scf.for %arg3 = %c0_i32 to %c6_i32 step %c1_i32 : i32 { + + // CHECK: "test.test_effects_write_A"() : () -> () + // CHECK: "test.test_effects_read_A"() : () -> () + + // Loop will be emptied and the empty loop will be moved out of parent + scf.for %arg4 = %c0_i32 to %c5_i32 step %c1_i32 : i32 { + "test.test_effects_write_A"() : () -> () + "test.test_effects_read_A"() : () -> () + } + + // Loop will be emptied and the empty loop will be moved out of parent + scf.for %arg5 = %c0_i32 to %c4_i32 step %c1_i32 : i32 { + "test.test_effects_write_B"() : () -> () + "test.test_effects_read_B"() : () -> () + } + + // CHECK: "test.test_effects_write_AC"() : () -> () + // CHECK: "test.test_effects_read_AC"() : () -> () + + // Loop will be emptied and the empty loop will be moved out of parent + scf.for %arg6 = %c0_i32 to %c3_i32 step %c1_i32 : i32 { + "test.test_effects_write_AC"() : () -> () + "test.test_effects_read_AC"() : () -> () + } + } + } + else { + // Checking that these ops aren't moved out of non-loop region + // CHECK: "test.test_effects_write_F"() : () -> () + // CHECK: "test.test_effects_read_F"() : () -> () + + // Memory effects of ops placed in this region can + // still cause conflicts on resources used in the IF's then region + + "test.test_effects_write_F"() : () -> () + "test.test_effects_read_F"() : () -> () + } + } + return +} + +// ----- + +// CHECK-LABEL func.func @move_write_with_invariant_input_multi_iteration +func.func @ove_write_with_invariant_input_multi_iteration() attributes {} { + // Constants are used to mark loops based on upper bound to + // make it more clear which loops were moved and to where + %c0_i32 = arith.constant 0 : i32 + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + + // (1) Ops are pulled out during the first LICM iteration on the + // parent loop. + // CHECK: %{{.*}} = arith.constant 7 : index + // CHECK: "test.test_effects_write_B"() : () -> () + // CHECK: "test.test_effects_read_B"() : () -> () + + // (2) Opss pulled out during the second LICM iteration on the + // parent loop, as the input is no longer loop-variant, which + // clears the conflict on resource "A," allowing us to move these ops. + // CHECK: "test.test_effects_write_A_with_input"(%c7) : (index) -> () + // CHECK: "test.test_effects_read_A"() : () -> () + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c10_i32 step %c1_i32 + scf.for %arg0 = %c0_i32 to %c10_i32 step %c1_i32 : i32 { + %input = arith.constant 7 : index + "test.test_effects_write_A_with_input"(%input) : (index) -> () + "test.test_effects_read_A"() : () -> () + + "test.test_effects_write_B"() : () -> () + "test.test_effects_read_B"() : () -> () + } + + return +} + +// ----- + +// CHECK-LABEL func.func @move_same_op_non_conflicting_read_before_write +func.func @move_same_op_non_conflicting_read_before_write() attributes {} { + // Constants are used to mark loops based on upper bound to + // make it more clear which loops were moved and to where + %c0_i32 = arith.constant 0 : i32 + %c1_i32 = arith.constant 1 : i32 + %c9_i32 = arith.constant 9 : i32 + %c10_i32 = arith.constant 10 : i32 + + // CHECK: "test.test_effects_read_A_write_B"() : () -> () + // CHECK: "test.test_effects_read_B"() : () -> () + // CHECK: "test.test_effects_read_A"() : () -> () + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c10_i32 step %c1_i32 + scf.for %arg0 = %c0_i32 to %c10_i32 step %c1_i32 : i32 { + "test.test_effects_read_A_write_B"() : () -> () + "test.test_effects_read_B"() : () -> () + "test.test_effects_read_A"() : () -> () + } + + // CHECK: "test.test_effects_read_A_then_write_B"() : () -> () + // CHECK: "test.test_effects_read_B"() : () -> () + // CHECK: "test.test_effects_read_A"() : () -> () + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c9_i32 step %c1_i32 + scf.for %arg0 = %c0_i32 to %c9_i32 step %c1_i32 : i32 { + "test.test_effects_read_A_then_write_B"() : () -> () + "test.test_effects_read_B"() : () -> () + "test.test_effects_read_A"() : () -> () + } + + return +} + +// ----- + +// CHECK-LABEL func.func @no_move_same_op_conflicting_read_before_write +func.func @no_move_same_op_conflicting_read_before_write() attributes {} { + // Constants are used to mark loops based on upper bound to + // make it more clear which loops were moved and to where + %c0_i32 = arith.constant 0 : i32 + %c1_i32 = arith.constant 1 : i32 + %c9_i32 = arith.constant 9 : i32 + %c10_i32 = arith.constant 10 : i32 + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c10_i32 step %c1_i32 + scf.for %arg0 = %c0_i32 to %c10_i32 step %c1_i32 : i32 { + + // CHECK: "test.test_effects_read_A_write_B"() : () -> () + // CHECK: "test.test_effects_read_B"() : () -> () + // CHECK: "test.test_effects_write_A"() : () -> () + + "test.test_effects_read_A_write_B"() : () -> () + "test.test_effects_read_B"() : () -> () + "test.test_effects_write_A"() : () -> () + } + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c9_i32 step %c1_i32 + scf.for %arg0 = %c0_i32 to %c9_i32 step %c1_i32 : i32 { + // CHECK: "test.test_effects_read_A_then_write_B"() : () -> () + // CHECK: "test.test_effects_read_B"() : () -> () + // CHECK: "test.test_effects_write_A"() : () -> () + + "test.test_effects_read_A_then_write_B"() : () -> () + "test.test_effects_read_B"() : () -> () + "test.test_effects_write_A"() : () -> () + } + + return +} + +// ----- + +// CHECK-LABEL func.func @move_same_op_non_conflicting_write_before_read +func.func @move_same_op_non_conflicting_write_before_read() attributes {} { + // Constants are used to mark loops based on upper bound to + // make it more clear which loops were moved and to where + %c0_i32 = arith.constant 0 : i32 + %c1_i32 = arith.constant 1 : i32 + %c9_i32 = arith.constant 9 : i32 + %c10_i32 = arith.constant 10 : i32 + + // CHECK: "test.test_effects_write_A_then_read_B"() : () -> () + // CHECK: "test.test_effects_read_B"() : () -> () + // CHECK: "test.test_effects_read_A"() : () -> () + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c10_i32 step %c1_i32 + scf.for %arg0 = %c0_i32 to %c10_i32 step %c1_i32 : i32 { + "test.test_effects_write_A_then_read_B"() : () -> () + "test.test_effects_read_B"() : () -> () + "test.test_effects_read_A"() : () -> () + } + + // CHECK: "test.test_effects_write_A_then_read_B"() : () -> () + // CHECK: "test.test_effects_read_B"() : () -> () + // CHECK: "test.test_effects_write_A"() : () -> () + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c9_i32 step %c1_i32 + scf.for %arg0 = %c0_i32 to %c9_i32 step %c1_i32 : i32 { + "test.test_effects_write_A_then_read_B"() : () -> () + "test.test_effects_read_B"() : () -> () + "test.test_effects_write_A"() : () -> () + } + + return +} + +// ----- + +// CHECK-LABEL func.func @no_move_same_op_conflicting_write_before_read +func.func @no_move_same_op_conflicting_write_before_read() attributes {} { + // Constants are used to mark loops based on upper bound to + // make it more clear which loops were moved and to where + %c0_i32 = arith.constant 0 : i32 + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + + // CHECK: scf.for %[[IV:.*]] = %c0_i32 to %c10_i32 step %c1_i32 + scf.for %arg0 = %c0_i32 to %c10_i32 step %c1_i32 : i32 { + // CHECK: "test.test_effects_write_A_then_read_B"() : () -> () + // CHECK: "test.test_effects_read_A"() : () -> () + // CHECK: "test.test_effects_write_A"() : () -> () + + "test.test_effects_write_A_then_read_B"() : () -> () + "test.test_effects_read_A"() : () -> () + "test.test_effects_write_A"() : () -> () + } + + return +} \ No newline at end of file diff --git a/mlir/test/lib/Dialect/Test/TestOps.h b/mlir/test/lib/Dialect/Test/TestOps.h index 4201ade9795e7..2dba8d79cb52f 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.h +++ b/mlir/test/lib/Dialect/Test/TestOps.h @@ -55,6 +55,29 @@ struct TestResource : public mlir::SideEffects::Resource::Base { llvm::StringRef getName() final { return ""; } }; +struct TestResourceA : public mlir::SideEffects::Resource::Base { + llvm::StringRef getName() final { return ""; } +}; + +struct TestResourceB : public mlir::SideEffects::Resource::Base { + llvm::StringRef getName() final { return ""; } +}; + +struct TestResourceC : public mlir::SideEffects::Resource::Base { + llvm::StringRef getName() final { return ""; } +}; + +struct TestResourceD : public mlir::SideEffects::Resource::Base { + llvm::StringRef getName() final { return ""; } +}; + +struct TestResourceE : public mlir::SideEffects::Resource::Base { + llvm::StringRef getName() final { return ""; } +}; + +struct TestResourceF : public mlir::SideEffects::Resource::Base { + llvm::StringRef getName() final { return ""; } +}; //===----------------------------------------------------------------------===// // PropertiesWithCustomPrint //===----------------------------------------------------------------------===// diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 5564264ed8b0b..1b3e3c7e21b08 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -2976,6 +2976,72 @@ def TestEffectsResult : TEST_Op<"test_effects_result"> { let results = (outs Res); } +//===----------------------------------------------------------------------===// +// Test Ops with multiple effects for Loop Invariant Code Motion. +//===----------------------------------------------------------------------===// + +def TestResourceA : Resource<"TestResourceA">; +def TestResourceB : Resource<"TestResourceB">; +def TestResourceC : Resource<"TestResourceC">; +def TestResourceD : Resource<"TestResourceD">; +def TestResourceE : Resource<"TestResourceE">; +def TestResourceF : Resource<"TestResourceF">; + +def TestEffectsWriteA : TEST_Op<"test_effects_write_A", + [MemoryEffects<[MemWrite]>]>; + +def TestEffectsWriteAWithInput : TEST_Op<"test_effects_write_A_with_input", + [MemoryEffects<[MemWrite]>]> { + + let arguments = (ins AnyType:$arg); +} + +def TestEffectsReadA : TEST_Op<"test_effects_read_A", + [MemoryEffects<[MemRead]>]>; + +def TestEffectsReadAWriteB : TEST_Op<"test_effects_read_A_write_B", + [MemoryEffects<[MemRead, + MemWrite]>]>; + +def TestEffectsReadAThenWriteB : TEST_Op<"test_effects_read_A_then_write_B", + [MemoryEffects<[MemRead, + MemWrite]>]>; + +def TestEffectsWriteAThenReadB : TEST_Op<"test_effects_write_A_then_read_B", + [MemoryEffects<[MemWrite, + MemRead]>]>; + +def TestEffectsWriteB : TEST_Op<"test_effects_write_B", + [MemoryEffects<[MemWrite]>]>; + +def TestEffectsReadB : TEST_Op<"test_effects_read_B", + [MemoryEffects<[MemRead]>]>; + +def TestEffectsWriteF : TEST_Op<"test_effects_write_F", + [MemoryEffects<[MemWrite]>]>; + +def TestEffectsReadF : TEST_Op<"test_effects_read_F", + [MemoryEffects<[MemRead]>]>; + +def TestEffectsWriteAC : TEST_Op<"test_effects_write_AC", + [MemoryEffects<[MemWrite, MemWrite]>]>; + +def TestEffectsReadAC : TEST_Op<"test_effects_read_AC", + [MemoryEffects<[MemRead, MemRead]>]>; + +def TestEffectsWriteCD : TEST_Op<"test_effects_write_CD", + [MemoryEffects<[MemWrite, MemWrite]>]>; + +def TestEffectsReadCD : TEST_Op<"test_effects_read_CD", + [MemoryEffects<[MemRead, MemRead]>]>; + +def TestEffectsWriteEF : TEST_Op<"test_effects_write_EF", + [MemoryEffects<[MemWrite, MemWrite]>]>; + +def TestEffectsReadEF : TEST_Op<"test_effects_read_EF", + [MemoryEffects<[MemRead, MemRead]>]>; + + //===----------------------------------------------------------------------===// // Test Ops with verifiers //===----------------------------------------------------------------------===//