Skip to content

Commit aedd1bb

Browse files
committed
[VPlan] Address review, fix crash
1 parent 5e6ba18 commit aedd1bb

File tree

5 files changed

+96
-71
lines changed

5 files changed

+96
-71
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2728,8 +2728,15 @@ InstructionCost VPBranchOnMaskRecipe::computeCost(ElementCount VF,
27282728

27292729
void VPPredInstPHIRecipe::execute(VPTransformState &State) {
27302730
assert(State.Lane && "Predicated instruction PHI works per instance.");
2731-
Instruction *ScalarPredInst =
2732-
cast<Instruction>(State.get(getOperand(0), *State.Lane));
2731+
Value *ScalarPred = State.get(getOperand(0), *State.Lane);
2732+
Instruction *ScalarPredInst = dyn_cast<Instruction>(ScalarPred);
2733+
if (!ScalarPredInst) {
2734+
if (State.hasScalarValue(this, *State.Lane))
2735+
State.reset(this, ScalarPred, *State.Lane);
2736+
else
2737+
State.set(this, ScalarPred, *State.Lane);
2738+
return;
2739+
}
27332740
BasicBlock *PredicatedBB = ScalarPredInst->getParent();
27342741
BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
27352742
assert(PredicatingBB && "Predicated block has no single predecessor.");

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 51 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -938,79 +938,71 @@ static void recursivelyDeleteDeadRecipes(VPValue *V) {
938938
}
939939
}
940940

941-
class VPConstantFolder {
942-
TargetFolder Folder;
943-
VPTypeAnalysis TypeInfo;
944-
945-
public:
946-
VPConstantFolder(const DataLayout &DL, const VPTypeAnalysis &TypeInfo)
947-
: Folder(DL), TypeInfo(TypeInfo) {}
948-
949-
Value *tryToConstantFold(VPRecipeBase &R, unsigned Opcode,
950-
ArrayRef<VPValue *> Operands) {
951-
SmallVector<Value *, 4> Ops;
952-
for (VPValue *Op : Operands) {
953-
if (!Op->isLiveIn() || !Op->getLiveInIRValue())
954-
return nullptr;
955-
Ops.emplace_back(Op->getLiveInIRValue());
956-
}
957-
switch (Opcode) {
958-
case Instruction::BinaryOps::Add:
959-
case Instruction::BinaryOps::Sub:
960-
case Instruction::BinaryOps::Mul:
961-
case Instruction::BinaryOps::AShr:
962-
case Instruction::BinaryOps::LShr:
963-
case Instruction::BinaryOps::And:
964-
case Instruction::BinaryOps::Or:
965-
case Instruction::BinaryOps::Xor:
966-
return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode),
967-
Ops[0], Ops[1]);
968-
case VPInstruction::LogicalAnd:
969-
return Folder.FoldSelect(Ops[0], Ops[1],
970-
ConstantInt::getNullValue(Ops[1]->getType()));
971-
case VPInstruction::Not:
972-
return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],
973-
Constant::getAllOnesValue(Ops[0]->getType()));
974-
case Instruction::Select:
975-
return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);
976-
case Instruction::ICmp:
977-
case Instruction::FCmp:
978-
return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
941+
/// Try to fold \p R using TargetFolder to a constant. Will succeed for a
942+
/// handled \p Opcode if all \p Operands are constant.
943+
static Value *tryToConstantFold(const VPRecipeBase &R, unsigned Opcode,
944+
ArrayRef<VPValue *> Operands,
945+
const DataLayout &DL,
946+
VPTypeAnalysis &TypeInfo) {
947+
SmallVector<Value *, 4> Ops;
948+
for (VPValue *Op : Operands) {
949+
if (!Op->isLiveIn() || !Op->getLiveInIRValue())
950+
return nullptr;
951+
Ops.push_back(Op->getLiveInIRValue());
952+
}
953+
954+
TargetFolder Folder(DL);
955+
if (Instruction::isBinaryOp(Opcode))
956+
return Folder.FoldBinOp(static_cast<Instruction::BinaryOps>(Opcode), Ops[0],
979957
Ops[1]);
980-
case Instruction::GetElementPtr:
981-
case VPInstruction::PtrAdd:
982-
return Folder.FoldGEP(TypeInfo.inferScalarType(R.getVPSingleValue()),
983-
Ops[0], drop_begin(Ops),
984-
cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
985-
case Instruction::InsertElement:
986-
return Folder.FoldInsertElement(Ops[0], Ops[1], Ops[2]);
987-
case Instruction::ExtractElement:
988-
return Folder.FoldExtractElement(Ops[0], Ops[1]);
989-
case Instruction::CastOps::SExt:
990-
case Instruction::CastOps::ZExt:
991-
case Instruction::CastOps::Trunc:
992-
return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
993-
TypeInfo.inferScalarType(R.getVPSingleValue()));
994-
}
995-
return nullptr;
958+
if (Instruction::isCast(Opcode))
959+
return Folder.FoldCast(static_cast<Instruction::CastOps>(Opcode), Ops[0],
960+
TypeInfo.inferScalarType(R.getVPSingleValue()));
961+
switch (Opcode) {
962+
case VPInstruction::LogicalAnd:
963+
return Folder.FoldSelect(Ops[0], Ops[1],
964+
ConstantInt::getNullValue(Ops[1]->getType()));
965+
case VPInstruction::Not:
966+
return Folder.FoldBinOp(Instruction::BinaryOps::Xor, Ops[0],
967+
Constant::getAllOnesValue(Ops[0]->getType()));
968+
case Instruction::Select:
969+
return Folder.FoldSelect(Ops[0], Ops[1], Ops[2]);
970+
case Instruction::ICmp:
971+
case Instruction::FCmp:
972+
return Folder.FoldCmp(cast<VPRecipeWithIRFlags>(R).getPredicate(), Ops[0],
973+
Ops[1]);
974+
case Instruction::GetElementPtr: {
975+
auto &RFlags = cast<VPRecipeWithIRFlags>(R);
976+
auto *GEP = cast<GetElementPtrInst>(RFlags.getUnderlyingInstr());
977+
return Folder.FoldGEP(GEP->getSourceElementType(), Ops[0], drop_begin(Ops),
978+
RFlags.getGEPNoWrapFlags());
979+
}
980+
case VPInstruction::PtrAdd:
981+
return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0],
982+
Ops[1],
983+
cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
984+
case Instruction::InsertElement:
985+
return Folder.FoldInsertElement(Ops[0], Ops[1], Ops[2]);
986+
case Instruction::ExtractElement:
987+
return Folder.FoldExtractElement(Ops[0], Ops[1]);
996988
}
997-
};
989+
return nullptr;
990+
}
998991

999992
/// Try to simplify recipe \p R.
1000993
static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo,
1001994
const DataLayout &DL) {
1002995
using namespace llvm::VPlanPatternMatch;
1003996

1004997
// Constant folding.
1005-
VPConstantFolder Folder(DL, TypeInfo);
1006998
if (TypeSwitch<VPRecipeBase *, bool>(&R)
1007999
.Case<VPInstruction, VPWidenRecipe, VPWidenCastRecipe,
10081000
VPReplicateRecipe>([&](auto *I) {
10091001
VPlan *Plan = R.getParent()->getPlan();
1010-
Value *V =
1011-
Folder.tryToConstantFold(R, I->getOpcode(), I->operands());
1002+
Value *V = tryToConstantFold(*I, I->getOpcode(), I->operands(), DL,
1003+
TypeInfo);
10121004
if (V)
1013-
R.getVPSingleValue()->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
1005+
I->replaceAllUsesWith(Plan->getOrAddLiveIn(V));
10141006
return V;
10151007
})
10161008
.Default([](auto *) { return false; }))

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-remove-loop-region.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@ define void @load_store_interleave_group_tc_2(ptr noalias %data) {
1313
; VF2: [[VECTOR_PH]]:
1414
; VF2-NEXT: br label %[[VECTOR_BODY:.*]]
1515
; VF2: [[VECTOR_BODY]]:
16-
; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 0, 1
17-
; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 [[TMP0]]
16+
; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[DATA]], i64 0
1817
; VF2-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
1918
; VF2-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
2019
; VF2-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 1, i32 3>

llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -563,29 +563,24 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) {
563563
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
564564
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_SREM_IF:%.*]], label [[PRED_SREM_CONTINUE:%.*]]
565565
; CHECK: pred.srem.if:
566-
; CHECK-NEXT: [[TMP4:%.*]] = srem i64 3, 0
567566
; CHECK-NEXT: br label [[PRED_SREM_CONTINUE]]
568567
; CHECK: pred.srem.continue:
569-
; CHECK-NEXT: [[TMP5:%.*]] = phi i64 [ poison, %vector.body ], [ [[TMP4]], [[PRED_SREM_IF]] ]
570568
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
571569
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_SREM_IF1:%.*]], label [[PRED_SREM_CONTINUE2:%.*]]
572570
; CHECK: pred.srem.if1:
573-
; CHECK-NEXT: [[TMP7:%.*]] = srem i64 3, 0
574571
; CHECK-NEXT: br label [[PRED_SREM_CONTINUE2]]
575572
; CHECK: pred.srem.continue2:
576573
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
577574
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_SREM_IF3:%.*]], label [[PRED_SREM_CONTINUE4:%.*]]
578575
; CHECK: pred.srem.if3:
579-
; CHECK-NEXT: [[TMP10:%.*]] = srem i64 3, 0
580576
; CHECK-NEXT: br label [[PRED_SREM_CONTINUE4]]
581577
; CHECK: pred.srem.continue4:
582578
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
583579
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_SREM_IF5:%.*]], label [[PRED_SREM_CONTINUE6]]
584580
; CHECK: pred.srem.if5:
585-
; CHECK-NEXT: [[TMP13:%.*]] = srem i64 3, 0
586581
; CHECK-NEXT: br label [[PRED_SREM_CONTINUE6]]
587582
; CHECK: pred.srem.continue6:
588-
; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP5]], -3
583+
; CHECK-NEXT: [[TMP15:%.*]] = add i64 poison, -3
589584
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], [[TMP15]]
590585
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [5 x i8], ptr @c, i64 0, i64 [[TMP16]]
591586
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP17]], i32 0

llvm/test/Transforms/LoopVectorize/constantfolder-infer-correct-gepty.ll

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,41 @@
1-
; REQUIRES: asserts
2-
; RUN: not --crash opt -passes=loop-vectorize -force-vector-width=8 -disable-output %s
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
2+
; RUN: opt -passes=loop-vectorize -force-vector-width=8 -S %s | FileCheck %s
33

44
@postscale = external constant [64 x float]
55

66
define void @test(ptr %data) {
7+
; CHECK-LABEL: define void @test(
8+
; CHECK-SAME: ptr [[DATA:%.*]]) {
9+
; CHECK-NEXT: [[ENTRY:.*]]:
10+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
11+
; CHECK: [[VECTOR_PH]]:
12+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
13+
; CHECK: [[VECTOR_BODY]]:
14+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr getelementptr inbounds nuw (i8, ptr @postscale, i64 4), align 4, !tbaa [[TBAA0:![0-9]+]]
15+
; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> [[WIDE_LOAD]])
16+
; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i64> [[TMP2]] to <8 x i16>
17+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i16> [[TMP3]], i32 7
18+
; CHECK-NEXT: store i16 [[TMP4]], ptr [[DATA]], align 2, !tbaa [[TBAA4:![0-9]+]]
19+
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
20+
; CHECK: [[MIDDLE_BLOCK]]:
21+
; CHECK-NEXT: br i1 true, label %[[END:.*]], label %[[SCALAR_PH]]
22+
; CHECK: [[SCALAR_PH]]:
23+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 8, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
24+
; CHECK-NEXT: br label %[[LOOP:.*]]
25+
; CHECK: [[LOOP]]:
26+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
27+
; CHECK-NEXT: [[OR_IV_1:%.*]] = or disjoint i64 [[IV]], 1
28+
; CHECK-NEXT: [[GEP_POSTSCALE:%.*]] = getelementptr [64 x float], ptr @postscale, i64 0, i64 [[OR_IV_1]]
29+
; CHECK-NEXT: [[LOAD_POSTSCALE:%.*]] = load float, ptr [[GEP_POSTSCALE]], align 4, !tbaa [[TBAA0]]
30+
; CHECK-NEXT: [[LRINT:%.*]] = tail call i64 @llvm.lrint.i64.f32(float [[LOAD_POSTSCALE]])
31+
; CHECK-NEXT: [[LRINT_TRUNC:%.*]] = trunc i64 [[LRINT]] to i16
32+
; CHECK-NEXT: store i16 [[LRINT_TRUNC]], ptr [[DATA]], align 2, !tbaa [[TBAA4]]
33+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
34+
; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 8
35+
; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[END]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
36+
; CHECK: [[END]]:
37+
; CHECK-NEXT: ret void
38+
;
739
entry:
840
br label %loop
941

0 commit comments

Comments
 (0)