diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b12ac513470ca..d3b1f9e2afd98 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1540,6 +1540,14 @@ class LoopVectorizationCostModel { } } + /// Disables previously chosen tail folding policy, sets it to None. Expects, + /// that the tail policy was selected. + void disableTailFolding() { + assert(ChosenTailFoldingStyle && "Tail folding must be selected."); + ChosenTailFoldingStyle = + std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None); + } + /// Returns true if all loop blocks should be masked to fold tail loop. bool foldTailByMasking() const { // TODO: check if it is possible to check for None style independent of @@ -1631,6 +1639,11 @@ class LoopVectorizationCostModel { ElementCount MaxSafeVF, bool FoldTailByMasking); + /// Checks if the scalable vectorization is supported and enabled. The result + /// is stored in \p IsScalableVectorizationAllowed and used later, if + /// requested. + bool isScalableVectorizationAllowed(); + /// \return the maximum legal scalable VF, based on the safe max number /// of elements. ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements); @@ -1695,6 +1708,9 @@ class LoopVectorizationCostModel { std::optional> ChosenTailFoldingStyle; + /// true if scalable vectorization is supported and enabled. + std::optional IsScalableVectorizationAllowed; + /// A map holding scalar costs for different vectorization factors. The /// presence of a cost for an instruction in the mapping indicates that the /// instruction will be scalarized when vectorizing with the associated @@ -4189,15 +4205,18 @@ bool LoopVectorizationCostModel::runtimeChecksRequired() { return false; } -ElementCount -LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) { +bool LoopVectorizationCostModel::isScalableVectorizationAllowed() { + if (IsScalableVectorizationAllowed) + return *IsScalableVectorizationAllowed; + + IsScalableVectorizationAllowed = false; if (!TTI.supportsScalableVectors() && !ForceTargetSupportsScalableVectors) - return ElementCount::getScalable(0); + return false; if (Hints->isScalableVectorizationDisabled()) { reportVectorizationInfo("Scalable vectorization is explicitly disabled", "ScalableVectorizationDisabled", ORE, TheLoop); - return ElementCount::getScalable(0); + return false; } LLVM_DEBUG(dbgs() << "LV: Scalable vectorization is available\n"); @@ -4217,7 +4236,7 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) { "Scalable vectorization not supported for the reduction " "operations found in this loop.", "ScalableVFUnfeasible", ORE, TheLoop); - return ElementCount::getScalable(0); + return false; } // Disable scalable vectorization if the loop contains any instructions @@ -4229,9 +4248,20 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) { reportVectorizationInfo("Scalable vectorization is not supported " "for all element types found in this loop.", "ScalableVFUnfeasible", ORE, TheLoop); - return ElementCount::getScalable(0); + return false; } + IsScalableVectorizationAllowed = true; + return true; +} + +ElementCount +LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) { + if (!isScalableVectorizationAllowed()) + return ElementCount::getScalable(0); + + auto MaxScalableVF = ElementCount::getScalable( + std::numeric_limits::max()); if (Legal->isSafeForAnyVectorWidth()) return MaxScalableVF; @@ -4434,6 +4464,11 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { InterleaveInfo.invalidateGroupsRequiringScalarEpilogue(); } + // If we don't know the precise trip count, or if the trip count that we + // found modulo the vectorization factor is not zero, try to fold the tail + // by masking. + // FIXME: look for a smaller MaxVF that does divide TC rather than masking. + setTailFoldingStyles(isScalableVectorizationAllowed(), UserIC); FixedScalableVFPair MaxFactors = computeFeasibleMaxVF(MaxTC, UserVF, true); // Avoid tail folding if the trip count is known to be a multiple of any VF @@ -4465,15 +4500,11 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { if (Rem->isZero()) { // Accept MaxFixedVF if we do not have a tail. LLVM_DEBUG(dbgs() << "LV: No tail will remain for any chosen VF.\n"); + disableTailFolding(); return MaxFactors; } } - // If we don't know the precise trip count, or if the trip count that we - // found modulo the vectorization factor is not zero, try to fold the tail - // by masking. - // FIXME: look for a smaller MaxVF that does divide TC rather than masking. - setTailFoldingStyles(MaxFactors.ScalableVF.isScalable(), UserIC); if (foldTailByMasking()) { if (getTailFoldingStyle() == TailFoldingStyle::DataWithEVL) { LLVM_DEBUG(