Skip to content

Commit 138ed04

Browse files
committed
[VPlan] Extend licm with speculative-exec-check
Technically speaking, LICM is only unsafe if the recipe reads or writes memory in the absence of additional information. It is moreover safe to hoist if the recipe is guaranteed to execute, or if it is safe to speculatively execute it. This patch aligns the VPlan-licm more closely with the IR-LICM.
1 parent 739bfde commit 138ed04

File tree

4 files changed

+60
-13
lines changed

4 files changed

+60
-13
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1555,6 +1555,9 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
15551555
/// True if the intrinsic may have side-effects.
15561556
bool MayHaveSideEffects;
15571557

1558+
/// True if the intrinsic is safe to speculatively execute.
1559+
bool IsSafeToSpeculativelyExecute;
1560+
15581561
public:
15591562
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
15601563
ArrayRef<VPValue *> CallArguments, Type *Ty,
@@ -1578,6 +1581,7 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
15781581
MayHaveSideEffects = MayWriteToMemory ||
15791582
!Attrs.hasAttribute(Attribute::NoUnwind) ||
15801583
!Attrs.hasAttribute(Attribute::WillReturn);
1584+
IsSafeToSpeculativelyExecute = Attrs.hasAttribute(Attribute::Speculatable);
15811585
}
15821586

15831587
~VPWidenIntrinsicRecipe() override = default;
@@ -1617,6 +1621,11 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
16171621
/// Returns true if the intrinsic may have side-effects.
16181622
bool mayHaveSideEffects() const { return MayHaveSideEffects; }
16191623

1624+
/// Returns true if the intrinsic is safe to speculatively execute.
1625+
bool isSafeToSpeculativelyExecute() const {
1626+
return IsSafeToSpeculativelyExecute;
1627+
}
1628+
16201629
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
16211630
/// Print the recipe.
16221631
void print(raw_ostream &O, const Twine &Indent,

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "llvm/Analysis/InstSimplifyFolder.h"
3131
#include "llvm/Analysis/LoopInfo.h"
3232
#include "llvm/Analysis/ScalarEvolutionPatternMatch.h"
33+
#include "llvm/Analysis/ValueTracking.h"
3334
#include "llvm/Analysis/VectorUtils.h"
3435
#include "llvm/IR/Intrinsics.h"
3536
#include "llvm/IR/MDBuilder.h"
@@ -2102,6 +2103,16 @@ void VPlanTransforms::cse(VPlan &Plan) {
21022103
}
21032104
}
21042105

2106+
static bool isSafeToSpeculativelyExecute(VPRecipeBase *R) {
2107+
if (auto *WC = dyn_cast<VPWidenCallRecipe>(R))
2108+
return WC->getCalledScalarFunction()->isSpeculatable();
2109+
if (auto *WI = dyn_cast<VPWidenIntrinsicRecipe>(R))
2110+
return WI->isSafeToSpeculativelyExecute();
2111+
if (auto *RepR = dyn_cast<VPReplicateRecipe>(R))
2112+
return isSafeToSpeculativelyExecute(RepR->getUnderlyingInstr());
2113+
return !R->mayHaveSideEffects();
2114+
}
2115+
21052116
/// Move loop-invariant recipes out of the vector loop region in \p Plan.
21062117
static void licm(VPlan &Plan) {
21072118
VPBasicBlock *Preheader = Plan.getVectorPreheader();
@@ -2110,6 +2121,11 @@ static void licm(VPlan &Plan) {
21102121
// out of a loop region. Does not address legality concerns such as aliasing
21112122
// or speculation safety.
21122123
auto CannotHoistRecipe = [](VPRecipeBase &R) {
2124+
// TODO: Relax checks in the future, e.g. we could also hoist reads, if
2125+
// their memory location is not modified in the vector loop.
2126+
if (R.mayReadOrWriteMemory() || R.isPhi())
2127+
return true;
2128+
21132129
// Allocas cannot be hoisted.
21142130
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
21152131
return RepR && RepR->getOpcode() == Instruction::Alloca;
@@ -2119,14 +2135,13 @@ static void licm(VPlan &Plan) {
21192135
// preheader. Preform a shallow traversal of the vector loop region, to
21202136
// exclude recipes in replicate regions.
21212137
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
2138+
bool GuaranteedToExecute = Preheader->getSingleSuccessor() == LoopRegion;
21222139
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
21232140
vp_depth_first_shallow(LoopRegion->getEntry()))) {
21242141
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
21252142
if (CannotHoistRecipe(R))
21262143
continue;
2127-
// TODO: Relax checks in the future, e.g. we could also hoist reads, if
2128-
// their memory location is not modified in the vector loop.
2129-
if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi() ||
2144+
if ((!GuaranteedToExecute && !isSafeToSpeculativelyExecute(&R)) ||
21302145
any_of(R.operands(), [](VPValue *Op) {
21312146
return !Op->isDefinedOutsideLoopRegions();
21322147
}))

llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,9 @@ define void @test1(ptr %dst, {i64, i64} %sv) {
4545
; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
4646
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4747
; FORCED: [[MIDDLE_BLOCK]]:
48-
; FORCED-NEXT: br [[EXIT:label %.*]]
49-
; FORCED: [[SCALAR_PH:.*:]]
48+
; FORCED-NEXT: br label %[[EXIT:.*]]
49+
; FORCED: [[EXIT]]:
50+
; FORCED-NEXT: ret void
5051
;
5152
entry:
5253
br label %loop.body
@@ -91,18 +92,19 @@ define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) {
9192
; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { float, float } [[SV]], 1
9293
; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i64 0
9394
; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer
95+
; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]])
9496
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
9597
; FORCED: [[VECTOR_BODY]]:
9698
; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
9799
; FORCED-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[DST]], i32 [[INDEX]]
98-
; FORCED-NEXT: [[TMP2:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[BROADCAST_SPLAT]], <2 x float> [[BROADCAST_SPLAT2]])
99100
; FORCED-NEXT: store <2 x float> [[TMP2]], ptr [[TMP1]], align 4
100101
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
101102
; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
102-
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
103+
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
103104
; FORCED: [[MIDDLE_BLOCK]]:
104-
; FORCED-NEXT: br [[EXIT:label %.*]]
105-
; FORCED: [[SCALAR_PH:.*:]]
105+
; FORCED-NEXT: br label %[[EXIT:.*]]
106+
; FORCED: [[EXIT]]:
107+
; FORCED-NEXT: ret void
106108
;
107109
entry:
108110
br label %loop.body

llvm/test/Transforms/LoopVectorize/X86/funclet.ll

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,35 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
12
; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s
23
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
34
target triple = "i686-pc-windows-msvc18.0.0"
45

56
define void @test1() #0 personality ptr @__CxxFrameHandler3 {
7+
; CHECK-LABEL: define void @test1(
8+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] personality ptr @__CxxFrameHandler3 {
9+
; CHECK-NEXT: [[ENTRY:.*:]]
10+
; CHECK-NEXT: invoke void @_CxxThrowException(ptr null, ptr null)
11+
; CHECK-NEXT: to label %[[UNREACHABLE:.*]] unwind label %[[CATCH_DISPATCH:.*]]
12+
; CHECK: [[CATCH_DISPATCH]]:
13+
; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch] unwind to caller
14+
; CHECK: [[CATCH:.*]]:
15+
; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr null, i32 64, ptr null]
16+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
17+
; CHECK: [[FOR_COND_CLEANUP:.*]]:
18+
; CHECK-NEXT: catchret from [[TMP1]] to label %[[TRY_CONT:.*]]
19+
; CHECK: [[FOR_BODY]]:
20+
; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ 0, %[[CATCH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
21+
; CHECK-NEXT: [[CALL:%.*]] = call double @floor(double 1.000000e+00) #[[ATTR1:[0-9]+]] [ "funclet"(token [[TMP1]]) ]
22+
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1
23+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1024
24+
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
25+
; CHECK: [[TRY_CONT]]:
26+
; CHECK-NEXT: ret void
27+
; CHECK: [[UNREACHABLE]]:
28+
; CHECK-NEXT: unreachable
29+
;
630
entry:
731
invoke void @_CxxThrowException(ptr null, ptr null)
8-
to label %unreachable unwind label %catch.dispatch
32+
to label %unreachable unwind label %catch.dispatch
933

1034
catch.dispatch: ; preds = %entry
1135
%0 = catchswitch within none [label %catch] unwind to caller
@@ -31,9 +55,6 @@ unreachable: ; preds = %entry
3155
unreachable
3256
}
3357

34-
; CHECK-LABEL: define void @test1(
35-
; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [ptr null, i32 64, ptr null]
36-
; CHECK: call <16 x double> @llvm.floor.v16f64(<16 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ]
3758

3859
declare x86_stdcallcc void @_CxxThrowException(ptr, ptr)
3960

0 commit comments

Comments
 (0)