Skip to content

Commit 6416af1

Browse files
committed
[LV]Partial replace the mask for div/rem safe-divisor select
Following up from #150368 and #156304. Try to remove the header mask for div/rem. D130164 div and rem use safe-divisor select to avoid UB. But the select use logial-and like: ``` EMIT vp<%3> = step-vector i32 EMIT vp<%4> = icmp ult vp<%3>, vp<%2> EMIT vp<%5> = logical-and vp<%4>, ir<%cmp1.not> EMIT vp<%6> = select vp<%5>, ir<%2>, ir<1> So can not remove header mask. ``` Insert an icmp as mask. I think we can also avoid UB and remove the header mask at the same time.
1 parent 028bfa2 commit 6416af1

File tree

2 files changed

+21
-27
lines changed

2 files changed

+21
-27
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7745,6 +7745,14 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
77457745
if (CM.isPredicatedInst(I)) {
77467746
SmallVector<VPValue *> Ops(Operands);
77477747
VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
7748+
if (auto *Inst = dyn_cast<VPInstruction>(Mask)) {
7749+
if (Inst->getOpcode() == VPInstruction::LogicalAnd) {
7750+
VPValue *Zero =
7751+
Plan.getOrAddLiveIn(ConstantInt::get(I->getType(), 0));
7752+
Mask = Builder.createICmp(CmpInst::ICMP_NE, Ops[1], Zero);
7753+
}
7754+
}
7755+
77487756
VPValue *One =
77497757
Plan.getOrAddLiveIn(ConstantInt::get(I->getType(), 1u, false));
77507758
auto *SafeRHS = Builder.createSelect(Mask, Ops[1], One, I->getDebugLoc());

llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -271,19 +271,15 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
271271
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
272272
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
273273
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
274+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <vscale x 2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
275+
; CHECK-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x i64> splat (i64 1)
274276
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
275277
; CHECK: vector.body:
276278
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
277279
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
278280
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
279-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP12]], i64 0
280-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
281-
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
282-
; CHECK-NEXT: [[TMP15:%.*]] = icmp ult <vscale x 2 x i32> [[TMP7]], [[BROADCAST_SPLAT2]]
283281
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
284282
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]])
285-
; CHECK-NEXT: [[TMP16:%.*]] = select <vscale x 2 x i1> [[TMP15]], <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i1> zeroinitializer
286-
; CHECK-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[TMP16]], <vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x i64> splat (i64 1)
287283
; CHECK-NEXT: [[TMP11:%.*]] = udiv <vscale x 2 x i64> [[WIDE_LOAD]], [[TMP10]]
288284
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[WIDE_LOAD]]
289285
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]])
@@ -303,15 +299,15 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
303299
; FIXED: vector.ph:
304300
; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0
305301
; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
306-
; FIXED-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer
307-
; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1)
302+
; FIXED-NEXT: [[TMP1:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer
303+
; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1)
308304
; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
309305
; FIXED: vector.body:
310306
; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
311307
; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
312308
; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
313309
; FIXED-NEXT: [[TMP8:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], [[TMP5]]
314-
; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]]
310+
; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]]
315311
; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP2]], align 8
316312
; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
317313
; FIXED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
@@ -352,19 +348,15 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
352348
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0
353349
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
354350
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
351+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <vscale x 2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
352+
; CHECK-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x i64> splat (i64 1)
355353
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
356354
; CHECK: vector.body:
357355
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
358356
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
359357
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 2, i1 true)
360-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP12]], i64 0
361-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
362-
; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
363-
; CHECK-NEXT: [[TMP15:%.*]] = icmp ult <vscale x 2 x i32> [[TMP7]], [[BROADCAST_SPLAT2]]
364358
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
365359
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 2 x i64> @llvm.vp.load.nxv2i64.p0(ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]])
366-
; CHECK-NEXT: [[TMP16:%.*]] = select <vscale x 2 x i1> [[TMP15]], <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i1> zeroinitializer
367-
; CHECK-NEXT: [[TMP10:%.*]] = select <vscale x 2 x i1> [[TMP16]], <vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x i64> splat (i64 1)
368360
; CHECK-NEXT: [[TMP11:%.*]] = sdiv <vscale x 2 x i64> [[WIDE_LOAD]], [[TMP10]]
369361
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 2 x i1> [[TMP6]], <vscale x 2 x i64> [[TMP11]], <vscale x 2 x i64> [[WIDE_LOAD]]
370362
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[PREDPHI]], ptr align 8 [[TMP8]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP12]])
@@ -384,15 +376,15 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
384376
; FIXED: vector.ph:
385377
; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0
386378
; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
387-
; FIXED-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer
388-
; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1)
379+
; FIXED-NEXT: [[TMP1:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer
380+
; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1)
389381
; FIXED-NEXT: br label [[VECTOR_BODY:%.*]]
390382
; FIXED: vector.body:
391383
; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
392384
; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
393385
; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
394386
; FIXED-NEXT: [[TMP8:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], [[TMP5]]
395-
; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]]
387+
; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]]
396388
; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP2]], align 8
397389
; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
398390
; FIXED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
@@ -575,16 +567,10 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) {
575567
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
576568
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ]
577569
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true)
578-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[TMP12]], i64 0
579-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
580-
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i32> @llvm.stepvector.nxv16i32()
581-
; CHECK-NEXT: [[TMP15:%.*]] = icmp ult <vscale x 16 x i32> [[TMP6]], [[BROADCAST_SPLAT]]
582570
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
583571
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP7]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP12]])
584572
; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 -128)
585-
; CHECK-NEXT: [[TMP16:%.*]] = select <vscale x 16 x i1> [[TMP15]], <vscale x 16 x i1> [[TMP9]], <vscale x 16 x i1> zeroinitializer
586-
; CHECK-NEXT: [[TMP10:%.*]] = select <vscale x 16 x i1> [[TMP16]], <vscale x 16 x i8> splat (i8 -1), <vscale x 16 x i8> splat (i8 1)
587-
; CHECK-NEXT: [[TMP11:%.*]] = sdiv <vscale x 16 x i8> [[WIDE_LOAD]], [[TMP10]]
573+
; CHECK-NEXT: [[TMP11:%.*]] = sdiv <vscale x 16 x i8> [[WIDE_LOAD]], splat (i8 -1)
588574
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 16 x i1> [[TMP9]], <vscale x 16 x i8> [[TMP11]], <vscale x 16 x i8> [[WIDE_LOAD]]
589575
; CHECK-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[PREDPHI]], ptr align 1 [[TMP7]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP12]])
590576
; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64
@@ -607,8 +593,8 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) {
607593
; FIXED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
608594
; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <32 x i8>, ptr [[TMP1]], align 1
609595
; FIXED-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD1]], splat (i8 -128)
610-
; FIXED-NEXT: [[TMP7:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> splat (i8 -1), <32 x i8> splat (i8 1)
611-
; FIXED-NEXT: [[TMP9:%.*]] = sdiv <32 x i8> [[WIDE_LOAD1]], [[TMP7]]
596+
; FIXED-NEXT: [[TMP2:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> splat (i8 -1), <32 x i8> splat (i8 1)
597+
; FIXED-NEXT: [[TMP9:%.*]] = sdiv <32 x i8> [[WIDE_LOAD1]], [[TMP2]]
612598
; FIXED-NEXT: [[PREDPHI2:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP9]], <32 x i8> [[WIDE_LOAD1]]
613599
; FIXED-NEXT: store <32 x i8> [[PREDPHI2]], ptr [[TMP1]], align 1
614600
; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32

0 commit comments

Comments
 (0)