Skip to content

Commit 48ef270

Browse files
committed
Address review comment
1 parent 116062a commit 48ef270

File tree

1 file changed

+54
-24
lines changed

1 file changed

+54
-24
lines changed

llvm/test/Transforms/LoopVectorize/optsize.ll

Lines changed: 54 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -334,22 +334,29 @@ for.body29:
334334
br i1 %cmp26, label %for.body29, label %for.cond.cleanup28
335335
}
336336

337-
define void @pr43371_pgso() !prof !14 {
337+
define void @pr43371_pgso(i16 %val) !prof !14 {
338338
;
339339
; CHECK-LABEL: define void @pr43371_pgso(
340-
; CHECK-SAME: ) !prof [[PROF14]] {
340+
; CHECK-SAME: i16 [[VAL:%.*]]) !prof [[PROF14]] {
341341
; CHECK-NEXT: [[ENTRY:.*:]]
342342
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
343343
; CHECK: [[VECTOR_PH]]:
344+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[VAL]], i64 0
345+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
344346
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
345347
; CHECK: [[VECTOR_BODY]]:
346348
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
347-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
348-
; CHECK-NEXT: [[TMP0:%.*]] = add i16 3, [[OFFSET_IDX]]
349-
; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP0]] to i32
349+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
350+
; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], [[VEC_IND]]
351+
; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
352+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
353+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
350354
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]]
351-
; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP5]], align 1
355+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP3]]
356+
; CHECK-NEXT: store i16 0, ptr [[TMP5]], align 1
357+
; CHECK-NEXT: store i16 0, ptr [[TMP7]], align 1
352358
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
359+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
353360
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756
354361
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
355362
; CHECK: [[MIDDLE_BLOCK]]:
@@ -358,19 +365,26 @@ define void @pr43371_pgso() !prof !14 {
358365
; CHECK-NEXT: unreachable
359366
;
360367
; PGSO-LABEL: define void @pr43371_pgso(
361-
; PGSO-SAME: ) !prof [[PROF14]] {
368+
; PGSO-SAME: i16 [[VAL:%.*]]) !prof [[PROF14]] {
362369
; PGSO-NEXT: [[ENTRY:.*:]]
363370
; PGSO-NEXT: br label %[[VECTOR_PH:.*]]
364371
; PGSO: [[VECTOR_PH]]:
372+
; PGSO-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[VAL]], i64 0
373+
; PGSO-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
365374
; PGSO-NEXT: br label %[[VECTOR_BODY:.*]]
366375
; PGSO: [[VECTOR_BODY]]:
367376
; PGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
368-
; PGSO-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
369-
; PGSO-NEXT: [[TMP0:%.*]] = add i16 3, [[OFFSET_IDX]]
370-
; PGSO-NEXT: [[TMP4:%.*]] = zext i16 [[TMP0]] to i32
377+
; PGSO-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
378+
; PGSO-NEXT: [[TMP0:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], [[VEC_IND]]
379+
; PGSO-NEXT: [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
380+
; PGSO-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
381+
; PGSO-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
371382
; PGSO-NEXT: [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]]
372-
; PGSO-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP5]], align 1
383+
; PGSO-NEXT: [[TMP7:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP3]]
384+
; PGSO-NEXT: store i16 0, ptr [[TMP5]], align 1
385+
; PGSO-NEXT: store i16 0, ptr [[TMP7]], align 1
373386
; PGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
387+
; PGSO-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
374388
; PGSO-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756
375389
; PGSO-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
376390
; PGSO: [[MIDDLE_BLOCK]]:
@@ -379,15 +393,19 @@ define void @pr43371_pgso() !prof !14 {
379393
; PGSO-NEXT: unreachable
380394
;
381395
; NPGSO-LABEL: define void @pr43371_pgso(
382-
; NPGSO-SAME: ) !prof [[PROF14]] {
396+
; NPGSO-SAME: i16 [[VAL:%.*]]) !prof [[PROF14]] {
383397
; NPGSO-NEXT: [[ENTRY:.*:]]
384-
; NPGSO-NEXT: br label %[[VECTOR_PH:.*]]
398+
; NPGSO-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
399+
; NPGSO: [[VECTOR_SCEVCHECK]]:
400+
; NPGSO-NEXT: [[TMP0:%.*]] = add i16 [[VAL]], 755
401+
; NPGSO-NEXT: [[TMP4:%.*]] = icmp ult i16 [[TMP0]], [[VAL]]
402+
; NPGSO-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
385403
; NPGSO: [[VECTOR_PH]]:
386404
; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]]
387405
; NPGSO: [[VECTOR_BODY]]:
388406
; NPGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
389407
; NPGSO-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
390-
; NPGSO-NEXT: [[TMP1:%.*]] = add i16 3, [[OFFSET_IDX]]
408+
; NPGSO-NEXT: [[TMP1:%.*]] = add i16 [[VAL]], [[OFFSET_IDX]]
391409
; NPGSO-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
392410
; NPGSO-NEXT: [[TMP3:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP2]]
393411
; NPGSO-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP3]], align 1
@@ -396,8 +414,19 @@ define void @pr43371_pgso() !prof !14 {
396414
; NPGSO-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
397415
; NPGSO: [[MIDDLE_BLOCK]]:
398416
; NPGSO-NEXT: br label %[[FOR_COND_CLEANUP28:.*]]
417+
; NPGSO: [[SCALAR_PH]]:
418+
; NPGSO-NEXT: br label %[[FOR_BODY29:.*]]
399419
; NPGSO: [[FOR_COND_CLEANUP28]]:
400420
; NPGSO-NEXT: unreachable
421+
; NPGSO: [[FOR_BODY29]]:
422+
; NPGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ]
423+
; NPGSO-NEXT: [[ADD33:%.*]] = add i16 [[VAL]], [[I24_0170]]
424+
; NPGSO-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32
425+
; NPGSO-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]]
426+
; NPGSO-NEXT: store i16 0, ptr [[ARRAYIDX35]], align 1
427+
; NPGSO-NEXT: [[INC37]] = add i16 [[I24_0170]], 1
428+
; NPGSO-NEXT: [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756
429+
; NPGSO-NEXT: br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]], !llvm.loop [[LOOP21:![0-9]+]]
401430
;
402431
; We do not want to generate SCEV predicates when optimising for size, because
403432
; that will lead to extra code generation such as the SCEV overflow runtime
@@ -411,7 +440,7 @@ for.cond.cleanup28:
411440

412441
for.body29:
413442
%i24.0170 = phi i16 [ 0, %entry], [ %inc37, %for.body29]
414-
%add33 = add i16 3, %i24.0170
443+
%add33 = add i16 %val, %i24.0170
415444
%idxprom34 = zext i16 %add33 to i32
416445
%arrayidx35 = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 %idxprom34
417446
store i16 0, ptr %arrayidx35, align 1
@@ -523,7 +552,7 @@ define i32 @pr45526_pgso() !prof !14 {
523552
; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
524553
; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
525554
; NPGSO-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 508
526-
; NPGSO-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
555+
; NPGSO-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
527556
; NPGSO: [[MIDDLE_BLOCK]]:
528557
; NPGSO-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
529558
; NPGSO-NEXT: br label %[[SCALAR_PH:.*]]
@@ -534,7 +563,7 @@ define i32 @pr45526_pgso() !prof !14 {
534563
; NPGSO-NEXT: [[FOR:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[PIVPLUS1]], %[[LOOP]] ]
535564
; NPGSO-NEXT: [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1
536565
; NPGSO-NEXT: [[COND:%.*]] = icmp ult i32 [[PIV]], 510
537-
; NPGSO-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP22:![0-9]+]]
566+
; NPGSO-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP23:![0-9]+]]
538567
; NPGSO: [[EXIT]]:
539568
; NPGSO-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ]
540569
; NPGSO-NEXT: ret i32 [[FOR_LCSSA]]
@@ -666,7 +695,7 @@ define void @stride1(ptr noalias %B, i32 %BStride) optsize {
666695
; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
667696
; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
668697
; NPGSO-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026
669-
; NPGSO-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
698+
; NPGSO-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
670699
; NPGSO: [[MIDDLE_BLOCK]]:
671700
; NPGSO-NEXT: br label %[[FOR_END:.*]]
672701
; NPGSO: [[FOR_END]]:
@@ -771,7 +800,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 {
771800
; NPGSO-NEXT: store <2 x i16> splat (i16 42), ptr [[TMP1]], align 4
772801
; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2
773802
; NPGSO-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
774-
; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
803+
; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
775804
; NPGSO: [[MIDDLE_BLOCK]]:
776805
; NPGSO-NEXT: br label %[[SCALAR_PH]]
777806
; NPGSO: [[SCALAR_PH]]:
@@ -784,7 +813,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 {
784813
; NPGSO-NEXT: store i16 42, ptr [[GEPOFB]], align 4
785814
; NPGSO-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
786815
; NPGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025
787-
; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
816+
; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
788817
; NPGSO: [[FOR_END]]:
789818
; NPGSO-NEXT: ret void
790819
;
@@ -988,9 +1017,10 @@ exit:
9881017
; NPGSO: [[LOOP18]] = distinct !{[[LOOP18]], [[META17]], [[META16]]}
9891018
; NPGSO: [[LOOP19]] = distinct !{[[LOOP19]], [[META16]], [[META17]]}
9901019
; NPGSO: [[LOOP20]] = distinct !{[[LOOP20]], [[META16]], [[META17]]}
991-
; NPGSO: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]], [[META17]]}
992-
; NPGSO: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META16]]}
993-
; NPGSO: [[LOOP23]] = distinct !{[[LOOP23]], [[META16]], [[META17]]}
1020+
; NPGSO: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]]}
1021+
; NPGSO: [[LOOP22]] = distinct !{[[LOOP22]], [[META16]], [[META17]]}
1022+
; NPGSO: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META16]]}
9941023
; NPGSO: [[LOOP24]] = distinct !{[[LOOP24]], [[META16]], [[META17]]}
995-
; NPGSO: [[LOOP25]] = distinct !{[[LOOP25]], [[META16]]}
1024+
; NPGSO: [[LOOP25]] = distinct !{[[LOOP25]], [[META16]], [[META17]]}
1025+
; NPGSO: [[LOOP26]] = distinct !{[[LOOP26]], [[META16]]}
9961026
;.

0 commit comments

Comments
 (0)