diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 2e76adb83bd50..e5bce7299a970 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1510,19 +1510,36 @@ class LoopVectorizationCostModel { } /// Returns the TailFoldingStyle that is best for the current loop. - TailFoldingStyle - getTailFoldingStyle(bool IVUpdateMayOverflow = true) const { - if (!CanFoldTailByMasking) - return TailFoldingStyle::None; + TailFoldingStyle getTailFoldingStyle(bool IVUpdateMayOverflow = true) const { + return IVUpdateMayOverflow ? ChosenTailFoldingStyle.first + : ChosenTailFoldingStyle.second; + } + + /// Selects and saves TailFoldingStyle for 2 options - if IV update may + /// overflow or not. + void setTailFoldinStyles() { + assert(ChosenTailFoldingStyle.first == TailFoldingStyle::None && + ChosenTailFoldingStyle.second == TailFoldingStyle::None && + "Tail folding must not be selected yet."); + if (!Legal->prepareToFoldTailByMasking()) + return; - if (ForceTailFoldingStyle.getNumOccurrences()) - return ForceTailFoldingStyle; + if (ForceTailFoldingStyle.getNumOccurrences()) { + ChosenTailFoldingStyle.first = ChosenTailFoldingStyle.second = + ForceTailFoldingStyle; + return; + } - return TTI.getPreferredTailFoldingStyle(IVUpdateMayOverflow); + ChosenTailFoldingStyle.first = + TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/true); + ChosenTailFoldingStyle.second = + TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/false); } /// Returns true if all loop blocks should be masked to fold tail loop. bool foldTailByMasking() const { + // TODO: check if it is possible to check for None style independent of + // IVUpdateMayOverflow flag in getTailFoldingStyle. return getTailFoldingStyle() != TailFoldingStyle::None; } @@ -1675,8 +1692,10 @@ class LoopVectorizationCostModel { /// iterations to execute in the scalar loop. ScalarEpilogueLowering ScalarEpilogueStatus = CM_ScalarEpilogueAllowed; - /// All blocks of loop are to be masked to fold tail of scalar iterations. - bool CanFoldTailByMasking = false; + /// Control finally chosen tail folding style. The first element is used if + /// the IV update may overflow, the second element - if it does not. + std::pair ChosenTailFoldingStyle = + std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None); /// A map holding scalar costs for different vectorization factors. The /// presence of a cost for an instruction in the mapping indicates that the @@ -4633,10 +4652,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { // found modulo the vectorization factor is not zero, try to fold the tail // by masking. // FIXME: look for a smaller MaxVF that does divide TC rather than masking. - if (Legal->prepareToFoldTailByMasking()) { - CanFoldTailByMasking = true; + setTailFoldinStyles(); + if (foldTailByMasking()) return MaxFactors; - } // If there was a tail-folding hint/switch, but we can't fold the tail by // masking, fallback to a vectorization with a scalar epilogue.