From 62d4f33cfde7b9374070a00f32565827804d4d89 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 15 Feb 2024 17:44:30 +0000 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?= =?UTF-8?q?l=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.5 --- .../Transforms/Vectorize/LoopVectorize.cpp | 35 ++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 98b177cf5d2d0..f2435601c8fa3 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1509,15 +1509,25 @@ class LoopVectorizationCostModel { } /// Returns the TailFoldingStyle that is best for the current loop. - TailFoldingStyle - getTailFoldingStyle(bool IVUpdateMayOverflow = true) const { - if (!CanFoldTailByMasking) - return TailFoldingStyle::None; + TailFoldingStyle getTailFoldingStyle(bool IVUpdateMayOverflow = true) const { + return IVUpdateMayOverflow ? ChosenTailFoldingStyle.first + : ChosenTailFoldingStyle.second; + } + + void selectTailFoldinStyle() { + if (!Legal->prepareToFoldTailByMasking()) + return; - if (ForceTailFoldingStyle.getNumOccurrences()) - return ForceTailFoldingStyle; + if (ForceTailFoldingStyle.getNumOccurrences()) { + ChosenTailFoldingStyle.first = ChosenTailFoldingStyle.second = + ForceTailFoldingStyle; + return; + } - return TTI.getPreferredTailFoldingStyle(IVUpdateMayOverflow); + ChosenTailFoldingStyle.first = + TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/true); + ChosenTailFoldingStyle.second = + TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/false); } /// Returns true if all loop blocks should be masked to fold tail loop. @@ -1674,8 +1684,10 @@ class LoopVectorizationCostModel { /// iterations to execute in the scalar loop. ScalarEpilogueLowering ScalarEpilogueStatus = CM_ScalarEpilogueAllowed; - /// All blocks of loop are to be masked to fold tail of scalar iterations. - bool CanFoldTailByMasking = false; + /// Control finally chosen tail folding style. The first element is used if iv + /// update may overflow, the second element - if it may not. + std::pair ChosenTailFoldingStyle = + std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None); /// A map holding scalar costs for different vectorization factors. The /// presence of a cost for an instruction in the mapping indicates that the @@ -4632,10 +4644,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { // found modulo the vectorization factor is not zero, try to fold the tail // by masking. // FIXME: look for a smaller MaxVF that does divide TC rather than masking. - if (Legal->prepareToFoldTailByMasking()) { - CanFoldTailByMasking = true; + selectTailFoldinStyle(); + if (foldTailByMasking()) return MaxFactors; - } // If there was a tail-folding hint/switch, but we can't fold the tail by // masking, fallback to a vectorization with a scalar epilogue.