Skip to content

Commit a353e25

Browse files
authored
[LAA] Don't require Stride == 1/-1 for inbounds pointer AddRecs nowrap. (#113126)
If we have a pointer AddRec, the maximum increment is 2^(pointer-index-wdith - 1) - 1. This means that if incrementing the AddRec wraps, the distance between the previously accessed location and the wrapped location is > 2^(pointer-index-wdith - 1), i.e. if the GEP for the AddRec is inbounds, this would be poison due to the object being larger than half the pointer index type space. The poison would be immediate UB when the memory access gets executed.. Similar reasoning can be applied for decrements. PR: #113126
1 parent fedb9fd commit a353e25

File tree

11 files changed

+173
-108
lines changed

11 files changed

+173
-108
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1511,12 +1511,13 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
15111511
if (isNoWrapAddRec(Ptr, AR, PSE, Lp))
15121512
return Stride;
15131513

1514-
// An nusw getelementptr that is a AddRec with a unit stride
1515-
// cannot wrap per definition. If it did, the result would be poison
1516-
// and any memory access dependent on it would be immediate UB
1517-
// when executed.
1514+
// An nusw getelementptr that is an AddRec cannot wrap. If it would wrap,
1515+
// the distance between the previously accessed location and the wrapped
1516+
// location will be larger than half the pointer index type space. In that
1517+
// case, the GEP would be poison and any memory access dependent on it would
1518+
// be immediate UB when executed.
15181519
if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
1519-
GEP && GEP->hasNoUnsignedSignedWrap() && (Stride == 1 || Stride == -1))
1520+
GEP && GEP->hasNoUnsignedSignedWrap())
15201521
return Stride;
15211522

15221523
// If the null pointer is undefined, then a access sequence which would

llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ define void @backdep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
3030
; CHECK-EMPTY:
3131
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
3232
; CHECK-NEXT: SCEV assumptions:
33-
; CHECK-NEXT: {(4 + (8 * %n) + %vec),+,8}<%loop> Added Flags: <nusw>
3433
; CHECK-EMPTY:
3534
; CHECK-NEXT: Expressions re-written:
3635
;
@@ -157,7 +156,6 @@ define void @neg_dist_dep_type_size_equivalence(ptr nocapture %vec, i64 %n) {
157156
; CHECK-EMPTY:
158157
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
159158
; CHECK-NEXT: SCEV assumptions:
160-
; CHECK-NEXT: {((8 * %n) + %vec),+,8}<%loop> Added Flags: <nusw>
161159
; CHECK-EMPTY:
162160
; CHECK-NEXT: Expressions re-written:
163161
;

llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,6 @@ define i32 @check_no_dep_via_bounds_compare_symbolic_max_btc_neg_1(ptr %P, i32 %
105105
; CHECK-EMPTY:
106106
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
107107
; CHECK-NEXT: SCEV assumptions:
108-
; CHECK-NEXT: {(8 + (8 * %y) + %P),+,8}<%loop> Added Flags: <nusw>
109108
; CHECK-EMPTY:
110109
; CHECK-NEXT: Expressions re-written:
111110
;

llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ for.end: ; preds = %for.body
243243
; LAA: Memory dependences are safe{{$}}
244244
; LAA: SCEV assumptions:
245245
; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
246-
; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64))<nsw> + %a),+,-4}<%for.body> Added Flags: <nusw>
246+
; LAA-EMPTY:
247247

248248
; LAA: [PSE] %arrayidxA = getelementptr inbounds i16, ptr %a, i32 %mul:
249249
; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)

llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -11,23 +11,15 @@ define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, p
1111
; CHECK: for.body.lver.check:
1212
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
1313
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
14-
; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
15-
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
16-
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
17-
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[MUL_RESULT]]
18-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[MUL_RESULT]]
19-
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult ptr [[TMP3]], [[A]]
20-
; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]]
21-
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP1]], [[TMP5]]
22-
; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
14+
; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
2315
; CHECK: for.body.ph.lver.orig:
2416
; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]]
2517
; CHECK: for.body.lver.orig:
2618
; CHECK-NEXT: [[IND_LVER_ORIG:%.*]] = phi i64 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[ADD_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
2719
; CHECK-NEXT: [[IND1_LVER_ORIG:%.*]] = phi i32 [ 0, [[FOR_BODY_PH_LVER_ORIG]] ], [ [[INC1_LVER_ORIG:%.*]], [[FOR_BODY_LVER_ORIG]] ]
2820
; CHECK-NEXT: [[MUL_LVER_ORIG:%.*]] = mul i32 [[IND1_LVER_ORIG]], 2
2921
; CHECK-NEXT: [[MUL_EXT_LVER_ORIG:%.*]] = zext i32 [[MUL_LVER_ORIG]] to i64
30-
; CHECK-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[MUL_EXT_LVER_ORIG]]
22+
; CHECK-NEXT: [[ARRAYIDXA_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[MUL_EXT_LVER_ORIG]]
3123
; CHECK-NEXT: [[LOADA_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDXA_LVER_ORIG]], align 4
3224
; CHECK-NEXT: [[ARRAYIDXB_LVER_ORIG:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[MUL_EXT_LVER_ORIG]]
3325
; CHECK-NEXT: [[LOADB_LVER_ORIG:%.*]] = load i32, ptr [[ARRAYIDXB_LVER_ORIG]], align 4
@@ -53,14 +45,14 @@ define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, p
5345
; CHECK-NEXT: [[MUL_LDIST1:%.*]] = mul i32 [[IND1_LDIST1]], 2
5446
; CHECK-NEXT: [[MUL_EXT_LDIST1:%.*]] = zext i32 [[MUL_LDIST1]] to i64
5547
; CHECK-NEXT: [[ARRAYIDXA_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[MUL_EXT_LDIST1]]
56-
; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXA_LDIST1]], align 4, !alias.scope !0
48+
; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXA_LDIST1]], align 4, !alias.scope [[META0:![0-9]+]]
5749
; CHECK-NEXT: [[ARRAYIDXB_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[MUL_EXT_LDIST1]]
5850
; CHECK-NEXT: [[LOADB_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXB_LDIST1]], align 4
5951
; CHECK-NEXT: [[MULA_LDIST1:%.*]] = mul i32 [[LOADB_LDIST1]], [[LOADA_LDIST1]]
6052
; CHECK-NEXT: [[ADD_LDIST1]] = add nuw nsw i64 [[IND_LDIST1]], 1
6153
; CHECK-NEXT: [[INC1_LDIST1]] = add i32 [[IND1_LDIST1]], 1
6254
; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD_LDIST1]]
63-
; CHECK-NEXT: store i32 [[MULA_LDIST1]], ptr [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope !3
55+
; CHECK-NEXT: store i32 [[MULA_LDIST1]], ptr [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope [[META3:![0-9]+]]
6456
; CHECK-NEXT: [[EXITCOND_LDIST1:%.*]] = icmp eq i64 [[ADD_LDIST1]], [[N]]
6557
; CHECK-NEXT: br i1 [[EXITCOND_LDIST1]], label [[FOR_BODY_PH:%.*]], label [[FOR_BODY_LDIST1]]
6658
; CHECK: for.body.ph:
@@ -83,7 +75,7 @@ define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, p
8375
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]]
8476
; CHECK: for.end.loopexit:
8577
; CHECK-NEXT: br label [[FOR_END:%.*]]
86-
; CHECK: for.end.loopexit2:
78+
; CHECK: for.end.loopexit1:
8779
; CHECK-NEXT: br label [[FOR_END]]
8880
; CHECK: for.end:
8981
; CHECK-NEXT: ret void
@@ -144,15 +136,7 @@ define void @f_with_offset(ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr n
144136
; CHECK: for.body.lver.check:
145137
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
146138
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
147-
; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
148-
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
149-
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
150-
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 0, [[MUL_RESULT]]
151-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[MUL_RESULT]]
152-
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult ptr [[TMP3]], [[A]]
153-
; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]]
154-
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP1]], [[TMP5]]
155-
; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
139+
; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
156140
; CHECK: for.body.ph.lver.orig:
157141
; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]]
158142
; CHECK: for.body.lver.orig:
@@ -186,14 +170,14 @@ define void @f_with_offset(ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr n
186170
; CHECK-NEXT: [[MUL_LDIST1:%.*]] = mul i32 [[IND1_LDIST1]], 2
187171
; CHECK-NEXT: [[MUL_EXT_LDIST1:%.*]] = zext i32 [[MUL_LDIST1]] to i64
188172
; CHECK-NEXT: [[ARRAYIDXA_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[MUL_EXT_LDIST1]]
189-
; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXA_LDIST1]], align 4, !alias.scope !5
173+
; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXA_LDIST1]], align 4, !alias.scope [[META5:![0-9]+]]
190174
; CHECK-NEXT: [[ARRAYIDXB_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[MUL_EXT_LDIST1]]
191175
; CHECK-NEXT: [[LOADB_LDIST1:%.*]] = load i32, ptr [[ARRAYIDXB_LDIST1]], align 4
192176
; CHECK-NEXT: [[MULA_LDIST1:%.*]] = mul i32 [[LOADB_LDIST1]], [[LOADA_LDIST1]]
193177
; CHECK-NEXT: [[ADD_LDIST1]] = add nuw nsw i64 [[IND_LDIST1]], 1
194178
; CHECK-NEXT: [[INC1_LDIST1]] = add i32 [[IND1_LDIST1]], 1
195179
; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LDIST1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD_LDIST1]]
196-
; CHECK-NEXT: store i32 [[MULA_LDIST1]], ptr [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope !8
180+
; CHECK-NEXT: store i32 [[MULA_LDIST1]], ptr [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope [[META8:![0-9]+]]
197181
; CHECK-NEXT: [[EXITCOND_LDIST1:%.*]] = icmp eq i64 [[ADD_LDIST1]], [[N]]
198182
; CHECK-NEXT: br i1 [[EXITCOND_LDIST1]], label [[FOR_BODY_PH:%.*]], label [[FOR_BODY_LDIST1]]
199183
; CHECK: for.body.ph:
@@ -216,7 +200,7 @@ define void @f_with_offset(ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr n
216200
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]]
217201
; CHECK: for.end.loopexit:
218202
; CHECK-NEXT: br label [[FOR_END:%.*]]
219-
; CHECK: for.end.loopexit2:
203+
; CHECK: for.end.loopexit1:
220204
; CHECK-NEXT: br label [[FOR_END]]
221205
; CHECK: for.end:
222206
; CHECK-NEXT: ret void

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo
9595
}
9696

9797
; CHECK: LV: Checking a loop in 'gather_nxv4i32_stride2'
98-
; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %0 = load float, ptr %arrayidx, align 4
98+
; CHECK: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %0 = load float, ptr %arrayidx, align 4
9999
define void @gather_nxv4i32_stride2(ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
100100
entry:
101101
br label %for.body

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -279,51 +279,52 @@ for.cond.cleanup: ; preds = %for.inc, %entry
279279
ret void
280280
}
281281

282-
283-
284282
define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) #0 {
285283
; CHECK-LABEL: @gather_nxv4i32_ind64_stride2(
286284
; CHECK-NEXT: entry:
287285
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
288286
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
289-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
290-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
287+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i64 [[N:%.*]], [[TMP1]]
288+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH:%.*]]
291289
; CHECK: vector.ph:
292290
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
293-
; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -8
291+
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP2]], 3
292+
; CHECK-NEXT: [[DOTNEG:%.*]] = add nsw i64 [[TMP7]], -1
294293
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
294+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_VEC]], 0
295+
; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP7]], i64 [[N_VEC]]
296+
; CHECK-NEXT: [[N_VEC1:%.*]] = sub i64 [[N]], [[TMP6]]
295297
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
296-
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP3]], 2
297298
; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 3
298-
; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
299-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP7]], i64 0
300-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
301299
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
302300
; CHECK: vector.body:
303301
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
304-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP5]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
305-
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
306-
; CHECK-NEXT: [[TMP8:%.*]] = shl <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
307-
; CHECK-NEXT: [[TMP9:%.*]] = shl <vscale x 4 x i64> [[STEP_ADD]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
308-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], <vscale x 4 x i64> [[TMP8]]
309-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], <vscale x 4 x i64> [[TMP9]]
310-
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP10]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
311-
; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP11]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> poison)
302+
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
303+
; CHECK-NEXT: [[DOTIDX1:%.*]] = shl i64 [[INDEX]], 3
304+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[DOTIDX1]]
305+
; CHECK-NEXT: [[DOTIDX3:%.*]] = shl nuw nsw i64 [[TMP9]], 5
306+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[B]], i64 [[DOTIDX3]]
307+
; CHECK-NEXT: [[DOTIDX4:%.*]] = shl i64 [[INDEX]], 3
308+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[DOTIDX4]]
309+
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x float>, ptr [[TMP10]], align 4
310+
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> [[WIDE_VEC]])
311+
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC]], 0
312+
; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <vscale x 8 x float>, ptr [[TMP15]], align 4
313+
; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> [[WIDE_VEC1]])
314+
; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[STRIDED_VEC2]], 0
312315
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
313316
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
314317
; CHECK-NEXT: [[DOTIDX:%.*]] = shl nuw nsw i64 [[TMP13]], 4
315318
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[DOTIDX]]
316319
; CHECK-NEXT: store <vscale x 4 x float> [[WIDE_MASKED_GATHER]], ptr [[TMP12]], align 4
317320
; CHECK-NEXT: store <vscale x 4 x float> [[WIDE_MASKED_GATHER2]], ptr [[TMP14]], align 4
318321
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
319-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[STEP_ADD]], [[DOTSPLAT]]
320-
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
321-
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
322+
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC1]]
323+
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
322324
; CHECK: middle.block:
323-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
324-
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
325+
; CHECK-NEXT: br label [[SCALAR_PH]]
325326
; CHECK: scalar.ph:
326-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
327+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
327328
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
328329
; CHECK: for.body:
329330
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
@@ -334,7 +335,7 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias
334335
; CHECK-NEXT: store float [[TMP16]], ptr [[ARRAYIDX2]], align 4
335336
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
336337
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
337-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
338+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
338339
; CHECK: for.cond.cleanup:
339340
; CHECK-NEXT: ret void
340341
;

llvm/test/Transforms/LoopVectorize/X86/pr54634.ll

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,26 +19,8 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo
1919
; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(10) [[TMP5]], i64 0, i32 1
2020
; CHECK-NEXT: [[DOTUNPACK2:%.*]] = load i64, ptr addrspace(10) [[DOTELT1]], align 8, !tbaa [[TBAA8]]
2121
; CHECK-NEXT: [[TMP8:%.*]] = add nsw i64 [[TMP2]], 1
22-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP8]], 60
23-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
24-
; CHECK: vector.scevcheck:
25-
; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP2]])
26-
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
27-
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
28-
; CHECK-NEXT: [[TMP9:%.*]] = sub i64 0, [[MUL_RESULT]]
29-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr addrspace(13) [[TMP7]], i64 [[MUL_RESULT]]
30-
; CHECK-NEXT: [[TMP11:%.*]] = icmp ult ptr addrspace(13) [[TMP10]], [[TMP7]]
31-
; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[MUL_OVERFLOW]]
32-
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr addrspace(13) [[TMP7]], i64 8
33-
; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP2]])
34-
; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
35-
; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
36-
; CHECK-NEXT: [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT2]]
37-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(13) [[SCEVGEP]], i64 [[MUL_RESULT2]]
38-
; CHECK-NEXT: [[TMP15:%.*]] = icmp ult ptr addrspace(13) [[TMP14]], [[SCEVGEP]]
39-
; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW3]]
40-
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP12]], [[TMP16]]
41-
; CHECK-NEXT: br i1 [[TMP17]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
22+
; CHECK-NEXT: [[TMP17:%.*]] = icmp ult i64 [[TMP8]], 16
23+
; CHECK-NEXT: br i1 [[TMP17]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4224
; CHECK: vector.ph:
4325
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP8]], 16
4426
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP8]], [[N_MOD_VF]]
@@ -77,7 +59,7 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo
7759
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]]
7860
; CHECK-NEXT: br i1 [[CMP_N]], label [[L44:%.*]], label [[SCALAR_PH]]
7961
; CHECK: scalar.ph:
80-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[TOP:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
62+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[TOP:%.*]] ]
8163
; CHECK-NEXT: br label [[L26:%.*]]
8264
; CHECK: L26:
8365
; CHECK-NEXT: [[VALUE_PHI5:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[L26]] ]

0 commit comments

Comments
 (0)