From c93df398028434278f8d8f255d7f9917fd05b2dc Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 8 Oct 2025 13:03:52 +0100 Subject: [PATCH 1/4] [VPlan] Extend licm with speculative-exec-check Technically speaking, LICM is only unsafe if the recipe reads or writes memory in the absence of additional information. It is moreover safe to hoist if the recipe is guaranteed to execute, or if it is safe to speculatively execute it. This patch aligns the VPlan-licm more closely with the IR-LICM. --- llvm/lib/Transforms/Vectorize/VPlan.h | 9 ++++++ .../Transforms/Vectorize/VPlanTransforms.cpp | 21 ++++++++++++-- .../extractvalue-no-scalarization-required.ll | 14 +++++---- .../Transforms/LoopVectorize/X86/funclet.ll | 29 ++++++++++++++++--- 4 files changed, 60 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 23f5623c7db78..03aa9931654af 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1546,6 +1546,9 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { /// True if the intrinsic may have side-effects. bool MayHaveSideEffects; + /// True if the intrinsic is safe to speculatively execute. + bool IsSafeToSpeculativelyExecute; + public: VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef CallArguments, Type *Ty, @@ -1569,6 +1572,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { MayHaveSideEffects = MayWriteToMemory || !Attrs.hasAttribute(Attribute::NoUnwind) || !Attrs.hasAttribute(Attribute::WillReturn); + IsSafeToSpeculativelyExecute = Attrs.hasAttribute(Attribute::Speculatable); } ~VPWidenIntrinsicRecipe() override = default; @@ -1608,6 +1612,11 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { /// Returns true if the intrinsic may have side-effects. bool mayHaveSideEffects() const { return MayHaveSideEffects; } + /// Returns true if the intrinsic is safe to speculatively execute. + bool isSafeToSpeculativelyExecute() const { + return IsSafeToSpeculativelyExecute; + } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 40b7e8df7aec9..a9556febb5f34 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/InstSimplifyFolder.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionPatternMatch.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" @@ -2102,6 +2103,16 @@ void VPlanTransforms::cse(VPlan &Plan) { } } +static bool isSafeToSpeculativelyExecute(VPRecipeBase *R) { + if (auto *WC = dyn_cast(R)) + return WC->getCalledScalarFunction()->isSpeculatable(); + if (auto *WI = dyn_cast(R)) + return WI->isSafeToSpeculativelyExecute(); + if (auto *RepR = dyn_cast(R)) + return isSafeToSpeculativelyExecute(RepR->getUnderlyingInstr()); + return !R->mayHaveSideEffects(); +} + /// Move loop-invariant recipes out of the vector loop region in \p Plan. static void licm(VPlan &Plan) { VPBasicBlock *Preheader = Plan.getVectorPreheader(); @@ -2110,6 +2121,11 @@ static void licm(VPlan &Plan) { // out of a loop region. Does not address legality concerns such as aliasing // or speculation safety. auto CannotHoistRecipe = [](VPRecipeBase &R) { + // TODO: Relax checks in the future, e.g. we could also hoist reads, if + // their memory location is not modified in the vector loop. + if (R.mayReadOrWriteMemory() || R.isPhi()) + return true; + // Allocas cannot be hoisted. auto *RepR = dyn_cast(&R); return RepR && RepR->getOpcode() == Instruction::Alloca; @@ -2119,14 +2135,13 @@ static void licm(VPlan &Plan) { // preheader. Preform a shallow traversal of the vector loop region, to // exclude recipes in replicate regions. VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + bool GuaranteedToExecute = Preheader->getSingleSuccessor() == LoopRegion; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_shallow(LoopRegion->getEntry()))) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { if (CannotHoistRecipe(R)) continue; - // TODO: Relax checks in the future, e.g. we could also hoist reads, if - // their memory location is not modified in the vector loop. - if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi() || + if ((!GuaranteedToExecute && !isSafeToSpeculativelyExecute(&R)) || any_of(R.operands(), [](VPValue *Op) { return !Op->isDefinedOutsideLoopRegions(); })) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll index 5970608794b55..48e64771ac152 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll @@ -45,8 +45,9 @@ define void @test1(ptr %dst, {i64, i64} %sv) { ; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; FORCED: [[MIDDLE_BLOCK]]: -; FORCED-NEXT: br [[EXIT:label %.*]] -; FORCED: [[SCALAR_PH:.*:]] +; FORCED-NEXT: br label %[[EXIT:.*]] +; FORCED: [[EXIT]]: +; FORCED-NEXT: ret void ; entry: br label %loop.body @@ -91,18 +92,19 @@ define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) { ; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { float, float } [[SV]], 1 ; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0 ; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer +; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]]) ; FORCED-NEXT: br label %[[VECTOR_BODY:.*]] ; FORCED: [[VECTOR_BODY]]: ; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[DST]], i32 [[INDEX]] -; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]]) ; FORCED-NEXT: store <2 x float> [[TMP2]], ptr [[TMP1]], align 4 ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 -; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; FORCED: [[MIDDLE_BLOCK]]: -; FORCED-NEXT: br [[EXIT:label %.*]] -; FORCED: [[SCALAR_PH:.*:]] +; FORCED-NEXT: br label %[[EXIT:.*]] +; FORCED: [[EXIT]]: +; FORCED-NEXT: ret void ; entry: br label %loop.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/funclet.ll b/llvm/test/Transforms/LoopVectorize/X86/funclet.ll index 84d21110e8da9..f5a30bef177df 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/funclet.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/funclet.ll @@ -1,11 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i686-pc-windows-msvc18.0.0" define void @test1() #0 personality ptr @__CxxFrameHandler3 { +; CHECK-LABEL: define void @test1( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] personality ptr @__CxxFrameHandler3 { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: invoke void @_CxxThrowException(ptr null, ptr null) +; CHECK-NEXT: to label %[[UNREACHABLE:.*]] unwind label %[[CATCH_DISPATCH:.*]] +; CHECK: [[CATCH_DISPATCH]]: +; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch] unwind to caller +; CHECK: [[CATCH:.*]]: +; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr null, i32 64, ptr null] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_COND_CLEANUP:.*]]: +; CHECK-NEXT: catchret from [[TMP1]] to label %[[TRY_CONT:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ 0, %[[CATCH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[CALL:%.*]] = call double @floor(double 1.000000e+00) #[[ATTR1:[0-9]+]] [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] +; CHECK: [[TRY_CONT]]: +; CHECK-NEXT: ret void +; CHECK: [[UNREACHABLE]]: +; CHECK-NEXT: unreachable +; entry: invoke void @_CxxThrowException(ptr null, ptr null) - to label %unreachable unwind label %catch.dispatch + to label %unreachable unwind label %catch.dispatch catch.dispatch: ; preds = %entry %0 = catchswitch within none [label %catch] unwind to caller @@ -31,9 +55,6 @@ unreachable: ; preds = %entry unreachable } -; CHECK-LABEL: define void @test1( -; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [ptr null, i32 64, ptr null] -; CHECK: call <16 x double> @llvm.floor.v16f64(<16 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ] declare x86_stdcallcc void @_CxxThrowException(ptr, ptr) From a642e71c6b7646141dfb05fa1ac9089685c794e5 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Mon, 13 Oct 2025 14:07:40 +0100 Subject: [PATCH 2/4] [VPlan] Address review --- llvm/lib/Transforms/Vectorize/VPlan.h | 9 ------ .../Transforms/Vectorize/VPlanTransforms.cpp | 22 +++---------- .../Transforms/LoopVectorize/X86/funclet.ll | 32 ++++--------------- 3 files changed, 11 insertions(+), 52 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 03aa9931654af..23f5623c7db78 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1546,9 +1546,6 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { /// True if the intrinsic may have side-effects. bool MayHaveSideEffects; - /// True if the intrinsic is safe to speculatively execute. - bool IsSafeToSpeculativelyExecute; - public: VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef CallArguments, Type *Ty, @@ -1572,7 +1569,6 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { MayHaveSideEffects = MayWriteToMemory || !Attrs.hasAttribute(Attribute::NoUnwind) || !Attrs.hasAttribute(Attribute::WillReturn); - IsSafeToSpeculativelyExecute = Attrs.hasAttribute(Attribute::Speculatable); } ~VPWidenIntrinsicRecipe() override = default; @@ -1612,11 +1608,6 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { /// Returns true if the intrinsic may have side-effects. bool mayHaveSideEffects() const { return MayHaveSideEffects; } - /// Returns true if the intrinsic is safe to speculatively execute. - bool isSafeToSpeculativelyExecute() const { - return IsSafeToSpeculativelyExecute; - } - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index a9556febb5f34..b2a8d0a210755 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -30,7 +30,6 @@ #include "llvm/Analysis/InstSimplifyFolder.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionPatternMatch.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" @@ -2103,23 +2102,12 @@ void VPlanTransforms::cse(VPlan &Plan) { } } -static bool isSafeToSpeculativelyExecute(VPRecipeBase *R) { - if (auto *WC = dyn_cast(R)) - return WC->getCalledScalarFunction()->isSpeculatable(); - if (auto *WI = dyn_cast(R)) - return WI->isSafeToSpeculativelyExecute(); - if (auto *RepR = dyn_cast(R)) - return isSafeToSpeculativelyExecute(RepR->getUnderlyingInstr()); - return !R->mayHaveSideEffects(); -} - /// Move loop-invariant recipes out of the vector loop region in \p Plan. static void licm(VPlan &Plan) { VPBasicBlock *Preheader = Plan.getVectorPreheader(); // Return true if we do not know how to (mechanically) hoist a given recipe - // out of a loop region. Does not address legality concerns such as aliasing - // or speculation safety. + // out of a loop region. auto CannotHoistRecipe = [](VPRecipeBase &R) { // TODO: Relax checks in the future, e.g. we could also hoist reads, if // their memory location is not modified in the vector loop. @@ -2133,16 +2121,16 @@ static void licm(VPlan &Plan) { // Hoist any loop invariant recipes from the vector loop region to the // preheader. Preform a shallow traversal of the vector loop region, to - // exclude recipes in replicate regions. + // exclude recipes in replicate regions. Since the vector loop region is + // guaranteed to execute, if the vector pre-header is, we don't need to check + // speculation safety. VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); - bool GuaranteedToExecute = Preheader->getSingleSuccessor() == LoopRegion; for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_shallow(LoopRegion->getEntry()))) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { if (CannotHoistRecipe(R)) continue; - if ((!GuaranteedToExecute && !isSafeToSpeculativelyExecute(&R)) || - any_of(R.operands(), [](VPValue *Op) { + if (any_of(R.operands(), [](VPValue *Op) { return !Op->isDefinedOutsideLoopRegions(); })) continue; diff --git a/llvm/test/Transforms/LoopVectorize/X86/funclet.ll b/llvm/test/Transforms/LoopVectorize/X86/funclet.ll index f5a30bef177df..995fcfd9ae688 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/funclet.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/funclet.ll @@ -1,35 +1,11 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" target triple = "i686-pc-windows-msvc18.0.0" define void @test1() #0 personality ptr @__CxxFrameHandler3 { -; CHECK-LABEL: define void @test1( -; CHECK-SAME: ) #[[ATTR0:[0-9]+]] personality ptr @__CxxFrameHandler3 { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: invoke void @_CxxThrowException(ptr null, ptr null) -; CHECK-NEXT: to label %[[UNREACHABLE:.*]] unwind label %[[CATCH_DISPATCH:.*]] -; CHECK: [[CATCH_DISPATCH]]: -; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch] unwind to caller -; CHECK: [[CATCH:.*]]: -; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr null, i32 64, ptr null] -; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[FOR_COND_CLEANUP:.*]]: -; CHECK-NEXT: catchret from [[TMP1]] to label %[[TRY_CONT:.*]] -; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ 0, %[[CATCH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-NEXT: [[CALL:%.*]] = call double @floor(double 1.000000e+00) #[[ATTR1:[0-9]+]] [ "funclet"(token [[TMP1]]) ] -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]] -; CHECK: [[TRY_CONT]]: -; CHECK-NEXT: ret void -; CHECK: [[UNREACHABLE]]: -; CHECK-NEXT: unreachable -; entry: invoke void @_CxxThrowException(ptr null, ptr null) - to label %unreachable unwind label %catch.dispatch + to label %unreachable unwind label %catch.dispatch catch.dispatch: ; preds = %entry %0 = catchswitch within none [label %catch] unwind to caller @@ -43,7 +19,8 @@ for.cond.cleanup: ; preds = %for.body for.body: ; preds = %for.body, %catch %i.07 = phi i32 [ 0, %catch ], [ %inc, %for.body ] - %call = call double @floor(double 1.0) #1 [ "funclet"(token %1) ] + %tofp = uitofp i32 %i.07 to double + %call = call double @floor(double %tofp) #1 [ "funclet"(token %1) ] %inc = add nuw nsw i32 %i.07, 1 %exitcond = icmp eq i32 %inc, 1024 br i1 %exitcond, label %for.cond.cleanup, label %for.body @@ -55,6 +32,9 @@ unreachable: ; preds = %entry unreachable } +; CHECK-LABEL: define void @test1( +; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [ptr null, i32 64, ptr null] +; CHECK: call <16 x double> @llvm.floor.v16f64(<16 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ] declare x86_stdcallcc void @_CxxThrowException(ptr, ptr) From 31127155cae3dea5f42d35f04448c0e4a8c08fdf Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 14 Oct 2025 17:02:31 +0100 Subject: [PATCH 3/4] [VPlan] Address review: patch is now NFC --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 11 +++++++---- .../extractvalue-no-scalarization-required.ll | 14 ++++++-------- llvm/test/Transforms/LoopVectorize/X86/funclet.ll | 3 +-- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b2a8d0a210755..6767718d06aaf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2111,7 +2111,7 @@ static void licm(VPlan &Plan) { auto CannotHoistRecipe = [](VPRecipeBase &R) { // TODO: Relax checks in the future, e.g. we could also hoist reads, if // their memory location is not modified in the vector loop. - if (R.mayReadOrWriteMemory() || R.isPhi()) + if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi()) return true; // Allocas cannot be hoisted. @@ -2121,10 +2121,13 @@ static void licm(VPlan &Plan) { // Hoist any loop invariant recipes from the vector loop region to the // preheader. Preform a shallow traversal of the vector loop region, to - // exclude recipes in replicate regions. Since the vector loop region is - // guaranteed to execute, if the vector pre-header is, we don't need to check - // speculation safety. + // exclude recipes in replicate regions. VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + + // Since the vector loop region is guaranteed to execute, if the vector + // pre-header is, we don't need to check speculation safety. + assert(Preheader->getSingleSuccessor() == LoopRegion && + "Expected vector prehader's successor to be the vector loop region"); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_shallow(LoopRegion->getEntry()))) { for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll index 48e64771ac152..5970608794b55 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll @@ -45,9 +45,8 @@ define void @test1(ptr %dst, {i64, i64} %sv) { ; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; FORCED: [[MIDDLE_BLOCK]]: -; FORCED-NEXT: br label %[[EXIT:.*]] -; FORCED: [[EXIT]]: -; FORCED-NEXT: ret void +; FORCED-NEXT: br [[EXIT:label %.*]] +; FORCED: [[SCALAR_PH:.*:]] ; entry: br label %loop.body @@ -92,19 +91,18 @@ define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) { ; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { float, float } [[SV]], 1 ; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0 ; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer -; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]]) ; FORCED-NEXT: br label %[[VECTOR_BODY:.*]] ; FORCED: [[VECTOR_BODY]]: ; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[DST]], i32 [[INDEX]] +; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]]) ; FORCED-NEXT: store <2 x float> [[TMP2]], ptr [[TMP1]], align 4 ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 -; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; FORCED: [[MIDDLE_BLOCK]]: -; FORCED-NEXT: br label %[[EXIT:.*]] -; FORCED: [[EXIT]]: -; FORCED-NEXT: ret void +; FORCED-NEXT: br [[EXIT:label %.*]] +; FORCED: [[SCALAR_PH:.*:]] ; entry: br label %loop.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/funclet.ll b/llvm/test/Transforms/LoopVectorize/X86/funclet.ll index 995fcfd9ae688..84d21110e8da9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/funclet.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/funclet.ll @@ -19,8 +19,7 @@ for.cond.cleanup: ; preds = %for.body for.body: ; preds = %for.body, %catch %i.07 = phi i32 [ 0, %catch ], [ %inc, %for.body ] - %tofp = uitofp i32 %i.07 to double - %call = call double @floor(double %tofp) #1 [ "funclet"(token %1) ] + %call = call double @floor(double 1.0) #1 [ "funclet"(token %1) ] %inc = add nuw nsw i32 %i.07, 1 %exitcond = icmp eq i32 %inc, 1024 br i1 %exitcond, label %for.cond.cleanup, label %for.body From c3ec44aad3773368620800eb216da16019709cb9 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 15 Oct 2025 12:01:44 +0100 Subject: [PATCH 4/4] [VPlan] Improve comment (NFC) --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 6767718d06aaf..918533868f26b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2121,11 +2121,10 @@ static void licm(VPlan &Plan) { // Hoist any loop invariant recipes from the vector loop region to the // preheader. Preform a shallow traversal of the vector loop region, to - // exclude recipes in replicate regions. + // exclude recipes in replicate regions. Since the top-level blocks in the + // vector loop region are guaranteed to execute if the vector pre-header is, + // we don't need to check speculation safety. VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); - - // Since the vector loop region is guaranteed to execute, if the vector - // pre-header is, we don't need to check speculation safety. assert(Preheader->getSingleSuccessor() == LoopRegion && "Expected vector prehader's successor to be the vector loop region"); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(