Skip to content

Commit b01edfb

Browse files
committed
[VPlan] Extend licm with speculative-exec-check
Technically speaking, LICM is only unsafe if the recipe reads or writes memory in the absence of additional information. It is moreover safe to hoist if the recipe is guaranteed to execute, or if it is safe to speculatively execute it. This patch aligns the VPlan-licm more closely with the IR-LICM.
1 parent ad64bff commit b01edfb

File tree

4 files changed

+60
-13
lines changed

4 files changed

+60
-13
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1554,6 +1554,9 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
15541554
/// True if the intrinsic may have side-effects.
15551555
bool MayHaveSideEffects;
15561556

1557+
/// True if the intrinsic is safe to speculatively execute.
1558+
bool IsSafeToSpeculativelyExecute;
1559+
15571560
public:
15581561
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
15591562
ArrayRef<VPValue *> CallArguments, Type *Ty,
@@ -1577,6 +1580,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
15771580
MayHaveSideEffects = MayWriteToMemory ||
15781581
!Attrs.hasAttribute(Attribute::NoUnwind) ||
15791582
!Attrs.hasAttribute(Attribute::WillReturn);
1583+
IsSafeToSpeculativelyExecute = Attrs.hasAttribute(Attribute::Speculatable);
15801584
}
15811585

15821586
~VPWidenIntrinsicRecipe() override = default;
@@ -1616,6 +1620,11 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
16161620
/// Returns true if the intrinsic may have side-effects.
16171621
bool mayHaveSideEffects() const { return MayHaveSideEffects; }
16181622

1623+
/// Returns true if the intrinsic is safe to speculatively execute.
1624+
bool isSafeToSpeculativelyExecute() const {
1625+
return IsSafeToSpeculativelyExecute;
1626+
}
1627+
16191628
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
16201629
/// Print the recipe.
16211630
void print(raw_ostream &O, const Twine &Indent,

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "llvm/Analysis/InstSimplifyFolder.h"
3131
#include "llvm/Analysis/LoopInfo.h"
3232
#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
33+
#include "llvm/Analysis/ValueTracking.h"
3334
#include "llvm/Analysis/VectorUtils.h"
3435
#include "llvm/IR/Intrinsics.h"
3536
#include "llvm/IR/MDBuilder.h"
@@ -2087,6 +2088,16 @@ void VPlanTransforms::cse(VPlan &Plan) {
20872088
}
20882089
}
20892090

2091+
static bool isSafeToSpeculativelyExecute(VPRecipeBase *R) {
2092+
if (auto *WC = dyn_cast<VPWidenCallRecipe>(R))
2093+
return WC->getCalledScalarFunction()->isSpeculatable();
2094+
if (auto *WI = dyn_cast<VPWidenIntrinsicRecipe>(R))
2095+
return WI->isSafeToSpeculativelyExecute();
2096+
if (auto *RepR = dyn_cast<VPReplicateRecipe>(R))
2097+
return isSafeToSpeculativelyExecute(RepR->getUnderlyingInstr());
2098+
return !R->mayHaveSideEffects();
2099+
}
2100+
20902101
/// Move loop-invariant recipes out of the vector loop region in \p Plan.
20912102
static void licm(VPlan &Plan) {
20922103
VPBasicBlock *Preheader = Plan.getVectorPreheader();
@@ -2095,6 +2106,11 @@ static void licm(VPlan &Plan) {
20952106
// out of a loop region. Does not address legality concerns such as aliasing
20962107
// or speculation safety.
20972108
auto CannotHoistRecipe = [](VPRecipeBase &R) {
2109+
// TODO: Relax checks in the future, e.g. we could also hoist reads, if
2110+
// their memory location is not modified in the vector loop.
2111+
if (R.mayReadOrWriteMemory() || R.isPhi())
2112+
return true;
2113+
20982114
// Allocas cannot be hoisted.
20992115
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
21002116
return RepR && RepR->getOpcode() == Instruction::Alloca;
@@ -2104,14 +2120,13 @@ static void licm(VPlan &Plan) {
21042120
// preheader. Preform a shallow traversal of the vector loop region, to
21052121
// exclude recipes in replicate regions.
21062122
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
2123+
bool GuaranteedToExecute = Preheader->getSingleSuccessor() == LoopRegion;
21072124
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
21082125
vp_depth_first_shallow(LoopRegion->getEntry()))) {
21092126
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
21102127
if (CannotHoistRecipe(R))
21112128
continue;
2112-
// TODO: Relax checks in the future, e.g. we could also hoist reads, if
2113-
// their memory location is not modified in the vector loop.
2114-
if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi() ||
2129+
if ((!GuaranteedToExecute && !isSafeToSpeculativelyExecute(&R)) ||
21152130
any_of(R.operands(), [](VPValue *Op) {
21162131
return !Op->isDefinedOutsideLoopRegions();
21172132
}))

llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,9 @@ define void @test1(ptr %dst, {i64, i64} %sv) {
4545
; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
4646
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4747
; FORCED: [[MIDDLE_BLOCK]]:
48-
; FORCED-NEXT: br [[EXIT:label %.*]]
49-
; FORCED: [[SCALAR_PH:.*:]]
48+
; FORCED-NEXT: br label %[[EXIT:.*]]
49+
; FORCED: [[EXIT]]:
50+
; FORCED-NEXT: ret void
5051
;
5152
entry:
5253
br label %loop.body
@@ -91,18 +92,19 @@ define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) {
9192
; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { float, float } [[SV]], 1
9293
; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0
9394
; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer
95+
; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]])
9496
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
9597
; FORCED: [[VECTOR_BODY]]:
9698
; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
9799
; FORCED-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[DST]], i32 [[INDEX]]
98-
; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]])
99100
; FORCED-NEXT: store <2 x float> [[TMP2]], ptr [[TMP1]], align 4
100101
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
101102
; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
102-
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
103+
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
103104
; FORCED: [[MIDDLE_BLOCK]]:
104-
; FORCED-NEXT: br [[EXIT:label %.*]]
105-
; FORCED: [[SCALAR_PH:.*:]]
105+
; FORCED-NEXT: br label %[[EXIT:.*]]
106+
; FORCED: [[EXIT]]:
107+
; FORCED-NEXT: ret void
106108
;
107109
entry:
108110
br label %loop.body

llvm/test/Transforms/LoopVectorize/X86/funclet.ll

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,35 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
12
; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s
23
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
34
target triple = "i686-pc-windows-msvc18.0.0"
45

56
define void @test1() #0 personality ptr @__CxxFrameHandler3 {
7+
; CHECK-LABEL: define void @test1(
8+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] personality ptr @__CxxFrameHandler3 {
9+
; CHECK-NEXT: [[ENTRY:.*:]]
10+
; CHECK-NEXT: invoke void @_CxxThrowException(ptr null, ptr null)
11+
; CHECK-NEXT: to label %[[UNREACHABLE:.*]] unwind label %[[CATCH_DISPATCH:.*]]
12+
; CHECK: [[CATCH_DISPATCH]]:
13+
; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch] unwind to caller
14+
; CHECK: [[CATCH:.*]]:
15+
; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr null, i32 64, ptr null]
16+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
17+
; CHECK: [[FOR_COND_CLEANUP:.*]]:
18+
; CHECK-NEXT: catchret from [[TMP1]] to label %[[TRY_CONT:.*]]
19+
; CHECK: [[FOR_BODY]]:
20+
; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ 0, %[[CATCH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
21+
; CHECK-NEXT: [[CALL:%.*]] = call double @floor(double 1.000000e+00) #[[ATTR1:[0-9]+]] [ "funclet"(token [[TMP1]]) ]
22+
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1
23+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024
24+
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
25+
; CHECK: [[TRY_CONT]]:
26+
; CHECK-NEXT: ret void
27+
; CHECK: [[UNREACHABLE]]:
28+
; CHECK-NEXT: unreachable
29+
;
630
entry:
731
invoke void @_CxxThrowException(ptr null, ptr null)
8-
to label %unreachable unwind label %catch.dispatch
32+
to label %unreachable unwind label %catch.dispatch
933

1034
catch.dispatch: ; preds = %entry
1135
%0 = catchswitch within none [label %catch] unwind to caller
@@ -31,9 +55,6 @@ unreachable: ; preds = %entry
3155
unreachable
3256
}
3357

34-
; CHECK-LABEL: define void @test1(
35-
; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [ptr null, i32 64, ptr null]
36-
; CHECK: call <16 x double> @llvm.floor.v16f64(<16 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ]
3758

3859
declare x86_stdcallcc void @_CxxThrowException(ptr, ptr)
3960

0 commit comments

Comments
 (0)