[VPlan] Address review

artagnon · artagnon · commit eac2a27aa0f3 · 2025-10-13T14:09:44.000+01:00
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1555,9 +1555,6 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
   /// True if the intrinsic may have side-effects.
   bool MayHaveSideEffects;
 
-  /// True if the intrinsic is safe to speculatively execute.
-  bool IsSafeToSpeculativelyExecute;
-
 public:
   VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
                          ArrayRef<VPValue *> CallArguments, Type *Ty,
@@ -1581,7 +1578,6 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
     MayHaveSideEffects = MayWriteToMemory ||
                          !Attrs.hasAttribute(Attribute::NoUnwind) ||
                          !Attrs.hasAttribute(Attribute::WillReturn);
-    IsSafeToSpeculativelyExecute = Attrs.hasAttribute(Attribute::Speculatable);
   }
 
   ~VPWidenIntrinsicRecipe() override = default;
@@ -1621,11 +1617,6 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
   /// Returns true if the intrinsic may have side-effects.
   bool mayHaveSideEffects() const { return MayHaveSideEffects; }
 
-  /// Returns true if the intrinsic is safe to speculatively execute.
-  bool isSafeToSpeculativelyExecute() const {
-    return IsSafeToSpeculativelyExecute;
-  }
-
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// Print the recipe.
   void print(raw_ostream &O, const Twine &Indent,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -30,7 +30,6 @@
 #include "llvm/Analysis/InstSimplifyFolder.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
@@ -2103,23 +2102,12 @@ void VPlanTransforms::cse(VPlan &Plan) {
   }
 }
 
-static bool isSafeToSpeculativelyExecute(VPRecipeBase *R) {
-  if (auto *WC = dyn_cast<VPWidenCallRecipe>(R))
-    return WC->getCalledScalarFunction()->isSpeculatable();
-  if (auto *WI = dyn_cast<VPWidenIntrinsicRecipe>(R))
-    return WI->isSafeToSpeculativelyExecute();
-  if (auto *RepR = dyn_cast<VPReplicateRecipe>(R))
-    return isSafeToSpeculativelyExecute(RepR->getUnderlyingInstr());
-  return !R->mayHaveSideEffects();
-}
-
 /// Move loop-invariant recipes out of the vector loop region in \p Plan.
 static void licm(VPlan &Plan) {
   VPBasicBlock *Preheader = Plan.getVectorPreheader();
 
   // Return true if we do not know how to (mechanically) hoist a given recipe
-  // out of a loop region. Does not address legality concerns such as aliasing
-  // or speculation safety.
+  // out of a loop region.
   auto CannotHoistRecipe = [](VPRecipeBase &R) {
     // TODO: Relax checks in the future, e.g. we could also hoist reads, if
     // their memory location is not modified in the vector loop.
@@ -2133,16 +2121,16 @@ static void licm(VPlan &Plan) {
 
   // Hoist any loop invariant recipes from the vector loop region to the
   // preheader. Preform a shallow traversal of the vector loop region, to
-  // exclude recipes in replicate regions.
+  // exclude recipes in replicate regions. Since the vector loop region is
+  // guaranteed to execute, if the vector pre-header is, we don't need to check
+  // speculation safety.
   VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
-  bool GuaranteedToExecute = Preheader->getSingleSuccessor() == LoopRegion;
   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
            vp_depth_first_shallow(LoopRegion->getEntry()))) {
     for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
       if (CannotHoistRecipe(R))
         continue;
-      if ((!GuaranteedToExecute && !isSafeToSpeculativelyExecute(&R)) ||
-          any_of(R.operands(), [](VPValue *Op) {
+      if (any_of(R.operands(), [](VPValue *Op) {
             return !Op->isDefinedOutsideLoopRegions();
           }))
         continue;
diff --git a/llvm/test/Transforms/LoopVectorize/X86/funclet.ll b/llvm/test/Transforms/LoopVectorize/X86/funclet.ll
@@ -1,35 +1,11 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s
 target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
 target triple = "i686-pc-windows-msvc18.0.0"
 
 define void @test1() #0 personality ptr @__CxxFrameHandler3 {
-; CHECK-LABEL: define void @test1(
-; CHECK-SAME: ) #[[ATTR0:[0-9]+]] personality ptr @__CxxFrameHandler3 {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    invoke void @_CxxThrowException(ptr null, ptr null)
-; CHECK-NEXT:            to label %[[UNREACHABLE:.*]] unwind label %[[CATCH_DISPATCH:.*]]
-; CHECK:       [[CATCH_DISPATCH]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = catchswitch within none [label %catch] unwind to caller
-; CHECK:       [[CATCH:.*]]:
-; CHECK-NEXT:    [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr null, i32 64, ptr null]
-; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
-; CHECK:       [[FOR_COND_CLEANUP:.*]]:
-; CHECK-NEXT:    catchret from [[TMP1]] to label %[[TRY_CONT:.*]]
-; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[I_07:%.*]] = phi i32 [ 0, %[[CATCH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
-; CHECK-NEXT:    [[CALL:%.*]] = call double @floor(double 1.000000e+00) #[[ATTR1:[0-9]+]] [ "funclet"(token [[TMP1]]) ]
-; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_07]], 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024
-; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
-; CHECK:       [[TRY_CONT]]:
-; CHECK-NEXT:    ret void
-; CHECK:       [[UNREACHABLE]]:
-; CHECK-NEXT:    unreachable
-;
 entry:
   invoke void @_CxxThrowException(ptr null, ptr null)
-  to label %unreachable unwind label %catch.dispatch
+          to label %unreachable unwind label %catch.dispatch
 
 catch.dispatch:                                   ; preds = %entry
   %0 = catchswitch within none [label %catch] unwind to caller
@@ -43,7 +19,8 @@ for.cond.cleanup:                                 ; preds = %for.body
 
 for.body:                                         ; preds = %for.body, %catch
   %i.07 = phi i32 [ 0, %catch ], [ %inc, %for.body ]
-  %call = call double @floor(double 1.0) #1 [ "funclet"(token %1) ]
+  %tofp = uitofp i32 %i.07 to double
+  %call = call double @floor(double %tofp) #1 [ "funclet"(token %1) ]
   %inc = add nuw nsw i32 %i.07, 1
   %exitcond = icmp eq i32 %inc, 1024
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -55,6 +32,9 @@ unreachable:                                      ; preds = %entry
   unreachable
 }
 
+; CHECK-LABEL: define void @test1(
+; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [ptr null, i32 64, ptr null]
+; CHECK: call <16 x double> @llvm.floor.v16f64(<16 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ]
 
 declare x86_stdcallcc void @_CxxThrowException(ptr, ptr)
 
diff --git a/llvm/test/Transforms/LoopVectorize/assume.ll b/llvm/test/Transforms/LoopVectorize/assume.ll
@@ -34,8 +34,9 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b)
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1600
 ; CHECK-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    br [[FOR_END:label %.*]]
-; CHECK:       [[SCALAR_PH:.*:]]
+; CHECK-NEXT:    br label %[[FOR_END:.*]]
+; CHECK:       [[FOR_END]]:
+; CHECK-NEXT:    ret void
 ;
 entry:
   br label %for.body
@@ -57,10 +58,6 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-declare void @llvm.assume(i1) #0
-
-attributes #0 = { nounwind willreturn }
-
 define void @test2(ptr noalias %a, ptr noalias %b) {
 ; CHECK-LABEL: define void @test2(
 ; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
@@ -92,10 +89,11 @@ define void @test2(ptr noalias %a, ptr noalias %b) {
 ; CHECK-NEXT:    store <2 x float> [[TMP6]], ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1600
-; CHECK-NEXT:    br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    br [[FOR_END:label %.*]]
-; CHECK:       [[SCALAR_PH:.*:]]
+; CHECK-NEXT:    br label %[[FOR_END:.*]]
+; CHECK:       [[FOR_END]]:
+; CHECK-NEXT:    ret void
 ;
 entry:
   %ptrint = ptrtoint ptr %a to i64
@@ -163,7 +161,7 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], [[FOR_COND_CLEANUP_LOOPEXIT:label %.*]], label %[[SCALAR_PH]]
@@ -204,3 +202,58 @@ if.end5:                                          ; preds = %for.body, %if.else
   %cmp = icmp eq i64 %indvars.iv.next, %0
   br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body
 }
+
+define void @operand_bundle(ptr noalias %a, ptr noalias %b) {
+; CHECK-LABEL: define void @operand_bundle(
+; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 2
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP4]], align 4
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr [[TMP5]], align 4
+; CHECK-NEXT:    tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP0]]) ]
+; CHECK-NEXT:    tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP1]]) ]
+; CHECK-NEXT:    tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP2]]) ]
+; CHECK-NEXT:    tail call void @llvm.assume(i1 true) [ "align"(ptr [[A]], i64 [[TMP3]]) ]
+; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x float> [[WIDE_LOAD]], splat (float 1.000000e+00)
+; CHECK-NEXT:    [[TMP7:%.*]] = fadd <2 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00)
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 2
+; CHECK-NEXT:    store <2 x float> [[TMP6]], ptr [[TMP8]], align 4
+; CHECK-NEXT:    store <2 x float> [[TMP7]], ptr [[TMP9]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1600
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br label %[[FOR_END:.*]]
+; CHECK:       [[FOR_END]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %iv
+  %0 = load float, ptr %arrayidx, align 4
+  %cmp1 = fcmp ogt float %0, 1.000000e+02
+  tail call void @llvm.assume(i1 true) ["align"(ptr %a, i64 %iv)]
+  %add = fadd float %0, 1.000000e+00
+  %arrayidx5 = getelementptr inbounds float, ptr %a, i64 %iv
+  store float %add, ptr %arrayidx5, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv, 1599
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}