@@ -334,22 +334,29 @@ for.body29:
334334 br i1 %cmp26 , label %for.body29 , label %for.cond.cleanup28
335335}
336336
337- define void @pr43371_pgso () !prof !14 {
337+ define void @pr43371_pgso (i16 %val ) !prof !14 {
338338;
339339; CHECK-LABEL: define void @pr43371_pgso(
340- ; CHECK-SAME: ) !prof [[PROF14]] {
340+ ; CHECK-SAME: i16 [[VAL:%.*]] ) !prof [[PROF14]] {
341341; CHECK-NEXT: [[ENTRY:.*:]]
342342; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
343343; CHECK: [[VECTOR_PH]]:
344+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[VAL]], i64 0
345+ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
344346; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
345347; CHECK: [[VECTOR_BODY]]:
346348; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
347- ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
348- ; CHECK-NEXT: [[TMP0:%.*]] = add i16 3, [[OFFSET_IDX]]
349- ; CHECK-NEXT: [[TMP4:%.*]] = zext i16 [[TMP0]] to i32
349+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
350+ ; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], [[VEC_IND]]
351+ ; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
352+ ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
353+ ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
350354; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]]
351- ; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP5]], align 1
355+ ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP3]]
356+ ; CHECK-NEXT: store i16 0, ptr [[TMP5]], align 1
357+ ; CHECK-NEXT: store i16 0, ptr [[TMP7]], align 1
352358; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
359+ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
353360; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756
354361; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
355362; CHECK: [[MIDDLE_BLOCK]]:
@@ -358,19 +365,26 @@ define void @pr43371_pgso() !prof !14 {
358365; CHECK-NEXT: unreachable
359366;
360367; PGSO-LABEL: define void @pr43371_pgso(
361- ; PGSO-SAME: ) !prof [[PROF14]] {
368+ ; PGSO-SAME: i16 [[VAL:%.*]] ) !prof [[PROF14]] {
362369; PGSO-NEXT: [[ENTRY:.*:]]
363370; PGSO-NEXT: br label %[[VECTOR_PH:.*]]
364371; PGSO: [[VECTOR_PH]]:
372+ ; PGSO-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[VAL]], i64 0
373+ ; PGSO-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer
365374; PGSO-NEXT: br label %[[VECTOR_BODY:.*]]
366375; PGSO: [[VECTOR_BODY]]:
367376; PGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
368- ; PGSO-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
369- ; PGSO-NEXT: [[TMP0:%.*]] = add i16 3, [[OFFSET_IDX]]
370- ; PGSO-NEXT: [[TMP4:%.*]] = zext i16 [[TMP0]] to i32
377+ ; PGSO-NEXT: [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
378+ ; PGSO-NEXT: [[TMP0:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], [[VEC_IND]]
379+ ; PGSO-NEXT: [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
380+ ; PGSO-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
381+ ; PGSO-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
371382; PGSO-NEXT: [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]]
372- ; PGSO-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP5]], align 1
383+ ; PGSO-NEXT: [[TMP7:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP3]]
384+ ; PGSO-NEXT: store i16 0, ptr [[TMP5]], align 1
385+ ; PGSO-NEXT: store i16 0, ptr [[TMP7]], align 1
373386; PGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
387+ ; PGSO-NEXT: [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
374388; PGSO-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756
375389; PGSO-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
376390; PGSO: [[MIDDLE_BLOCK]]:
@@ -379,15 +393,19 @@ define void @pr43371_pgso() !prof !14 {
379393; PGSO-NEXT: unreachable
380394;
381395; NPGSO-LABEL: define void @pr43371_pgso(
382- ; NPGSO-SAME: ) !prof [[PROF14]] {
396+ ; NPGSO-SAME: i16 [[VAL:%.*]] ) !prof [[PROF14]] {
383397; NPGSO-NEXT: [[ENTRY:.*:]]
384- ; NPGSO-NEXT: br label %[[VECTOR_PH:.*]]
398+ ; NPGSO-NEXT: br label %[[VECTOR_SCEVCHECK:.*]]
399+ ; NPGSO: [[VECTOR_SCEVCHECK]]:
400+ ; NPGSO-NEXT: [[TMP0:%.*]] = add i16 [[VAL]], 755
401+ ; NPGSO-NEXT: [[TMP4:%.*]] = icmp ult i16 [[TMP0]], [[VAL]]
402+ ; NPGSO-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
385403; NPGSO: [[VECTOR_PH]]:
386404; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]]
387405; NPGSO: [[VECTOR_BODY]]:
388406; NPGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
389407; NPGSO-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
390- ; NPGSO-NEXT: [[TMP1:%.*]] = add i16 3 , [[OFFSET_IDX]]
408+ ; NPGSO-NEXT: [[TMP1:%.*]] = add i16 [[VAL]] , [[OFFSET_IDX]]
391409; NPGSO-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
392410; NPGSO-NEXT: [[TMP3:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP2]]
393411; NPGSO-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP3]], align 1
@@ -396,8 +414,19 @@ define void @pr43371_pgso() !prof !14 {
396414; NPGSO-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
397415; NPGSO: [[MIDDLE_BLOCK]]:
398416; NPGSO-NEXT: br label %[[FOR_COND_CLEANUP28:.*]]
417+ ; NPGSO: [[SCALAR_PH]]:
418+ ; NPGSO-NEXT: br label %[[FOR_BODY29:.*]]
399419; NPGSO: [[FOR_COND_CLEANUP28]]:
400420; NPGSO-NEXT: unreachable
421+ ; NPGSO: [[FOR_BODY29]]:
422+ ; NPGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ]
423+ ; NPGSO-NEXT: [[ADD33:%.*]] = add i16 [[VAL]], [[I24_0170]]
424+ ; NPGSO-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32
425+ ; NPGSO-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]]
426+ ; NPGSO-NEXT: store i16 0, ptr [[ARRAYIDX35]], align 1
427+ ; NPGSO-NEXT: [[INC37]] = add i16 [[I24_0170]], 1
428+ ; NPGSO-NEXT: [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756
429+ ; NPGSO-NEXT: br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]], !llvm.loop [[LOOP21:![0-9]+]]
401430;
402431; We do not want to generate SCEV predicates when optimising for size, because
403432; that will lead to extra code generation such as the SCEV overflow runtime
@@ -411,7 +440,7 @@ for.cond.cleanup28:
411440
412441for.body29:
413442 %i24.0170 = phi i16 [ 0 , %entry ], [ %inc37 , %for.body29 ]
414- %add33 = add i16 3 , %i24.0170
443+ %add33 = add i16 %val , %i24.0170
415444 %idxprom34 = zext i16 %add33 to i32
416445 %arrayidx35 = getelementptr [2592 x i16 ], ptr @cm_array , i32 0 , i32 %idxprom34
417446 store i16 0 , ptr %arrayidx35 , align 1
@@ -523,7 +552,7 @@ define i32 @pr45526_pgso() !prof !14 {
523552; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
524553; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
525554; NPGSO-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 508
526- ; NPGSO-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21 :![0-9]+]]
555+ ; NPGSO-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22 :![0-9]+]]
527556; NPGSO: [[MIDDLE_BLOCK]]:
528557; NPGSO-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
529558; NPGSO-NEXT: br label %[[SCALAR_PH:.*]]
@@ -534,7 +563,7 @@ define i32 @pr45526_pgso() !prof !14 {
534563; NPGSO-NEXT: [[FOR:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[PIVPLUS1]], %[[LOOP]] ]
535564; NPGSO-NEXT: [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1
536565; NPGSO-NEXT: [[COND:%.*]] = icmp ult i32 [[PIV]], 510
537- ; NPGSO-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP22 :![0-9]+]]
566+ ; NPGSO-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP23 :![0-9]+]]
538567; NPGSO: [[EXIT]]:
539568; NPGSO-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ]
540569; NPGSO-NEXT: ret i32 [[FOR_LCSSA]]
@@ -666,7 +695,7 @@ define void @stride1(ptr noalias %B, i32 %BStride) optsize {
666695; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
667696; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
668697; NPGSO-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026
669- ; NPGSO-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23 :![0-9]+]]
698+ ; NPGSO-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24 :![0-9]+]]
670699; NPGSO: [[MIDDLE_BLOCK]]:
671700; NPGSO-NEXT: br label %[[FOR_END:.*]]
672701; NPGSO: [[FOR_END]]:
@@ -771,7 +800,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 {
771800; NPGSO-NEXT: store <2 x i16> splat (i16 42), ptr [[TMP1]], align 4
772801; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2
773802; NPGSO-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
774- ; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24 :![0-9]+]]
803+ ; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25 :![0-9]+]]
775804; NPGSO: [[MIDDLE_BLOCK]]:
776805; NPGSO-NEXT: br label %[[SCALAR_PH]]
777806; NPGSO: [[SCALAR_PH]]:
@@ -784,7 +813,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 {
784813; NPGSO-NEXT: store i16 42, ptr [[GEPOFB]], align 4
785814; NPGSO-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
786815; NPGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025
787- ; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP25 :![0-9]+]]
816+ ; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP26 :![0-9]+]]
788817; NPGSO: [[FOR_END]]:
789818; NPGSO-NEXT: ret void
790819;
@@ -988,9 +1017,10 @@ exit:
9881017; NPGSO: [[LOOP18]] = distinct !{[[LOOP18]], [[META17]], [[META16]]}
9891018; NPGSO: [[LOOP19]] = distinct !{[[LOOP19]], [[META16]], [[META17]]}
9901019; NPGSO: [[LOOP20]] = distinct !{[[LOOP20]], [[META16]], [[META17]]}
991- ; NPGSO: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]], [[META17]] }
992- ; NPGSO: [[LOOP22]] = distinct !{[[LOOP22]], [[META17 ]], [[META16 ]]}
993- ; NPGSO: [[LOOP23]] = distinct !{[[LOOP23]], [[META16 ]], [[META17 ]]}
1020+ ; NPGSO: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]]}
1021+ ; NPGSO: [[LOOP22]] = distinct !{[[LOOP22]], [[META16 ]], [[META17 ]]}
1022+ ; NPGSO: [[LOOP23]] = distinct !{[[LOOP23]], [[META17 ]], [[META16 ]]}
9941023; NPGSO: [[LOOP24]] = distinct !{[[LOOP24]], [[META16]], [[META17]]}
995- ; NPGSO: [[LOOP25]] = distinct !{[[LOOP25]], [[META16]]}
1024+ ; NPGSO: [[LOOP25]] = distinct !{[[LOOP25]], [[META16]], [[META17]]}
1025+ ; NPGSO: [[LOOP26]] = distinct !{[[LOOP26]], [[META16]]}
9961026;.
0 commit comments