@@ -226,11 +226,10 @@ define i64 @test_pr62954_scalar_epilogue_required(ptr %A, ptr noalias %B, ptr %C
226226; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 36
227227; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
228228; CHECK: middle.block:
229- ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
230229; CHECK-NEXT: br label [[SCALAR_PH]]
231230; CHECK: scalar.ph:
232231; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 73, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
233- ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT ]], [[MIDDLE_BLOCK]] ], [ [[REC_START]], [[ENTRY]] ]
232+ ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[TMP2 ]], [[MIDDLE_BLOCK]] ], [ [[REC_START]], [[ENTRY]] ]
234233; CHECK-NEXT: br label [[LOOP:%.*]]
235234; CHECK: loop:
236235; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -364,28 +363,28 @@ define void @test_for_tried_to_force_scalar(ptr noalias %A, ptr noalias %B, ptr
364363; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1
365364; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 2
366365; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 3
367- ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 4
368- ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 5
369- ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 6
370- ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 7
371- ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr nusw [3 x float], ptr [[A:%.*]], i64 [[TMP5]]
372- ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP6]]
373- ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP7]]
374- ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP8]]
375- ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP13]], i32 0
376- ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x ptr> [[TMP17]], ptr [[TMP14]], i32 1
377- ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x ptr> [[TMP18]], ptr [[TMP15]], i32 2
378- ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x ptr> [[TMP19]], ptr [[TMP16]], i32 3
379- ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP9]]
380- ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP10]]
381- ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP11]]
382- ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP12]]
366+ ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 4
367+ ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 5
368+ ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 6
369+ ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 7
370+ ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr nusw [3 x float], ptr [[A:%.*]], i64 [[TMP5]]
371+ ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP6]]
372+ ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP7]]
373+ ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP8]]
383374; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP21]], i32 0
384375; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x ptr> [[TMP25]], ptr [[TMP22]], i32 1
385376; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x ptr> [[TMP26]], ptr [[TMP23]], i32 2
386377; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x ptr> [[TMP27]], ptr [[TMP24]], i32 3
387- ; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x ptr> [[TMP20]], <4 x ptr> [[TMP28]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
388- ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x float>, ptr [[TMP21]], align 4
378+ ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP11]]
379+ ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP12]]
380+ ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP9]]
381+ ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr nusw [3 x float], ptr [[A]], i64 [[TMP10]]
382+ ; CHECK-NEXT: [[TMP33:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP19]], i32 0
383+ ; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x ptr> [[TMP33]], ptr [[TMP20]], i32 1
384+ ; CHECK-NEXT: [[TMP35:%.*]] = insertelement <4 x ptr> [[TMP34]], ptr [[TMP31]], i32 2
385+ ; CHECK-NEXT: [[TMP38:%.*]] = insertelement <4 x ptr> [[TMP35]], ptr [[TMP32]], i32 3
386+ ; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x ptr> [[TMP28]], <4 x ptr> [[TMP38]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
387+ ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <12 x float>, ptr [[TMP19]], align 4
389388; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <12 x float> [[WIDE_VEC]], <12 x float> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
390389; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x float> [[STRIDED_VEC]], i32 3
391390; CHECK-NEXT: store float [[TMP30]], ptr [[C:%.*]], align 4
@@ -399,7 +398,7 @@ define void @test_for_tried_to_force_scalar(ptr noalias %A, ptr noalias %B, ptr
399398; CHECK-NEXT: br label [[SCALAR_PH]]
400399; CHECK: scalar.ph:
401400; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
402- ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi ptr [ [[TMP24 ]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ]
401+ ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi ptr [ [[TMP32 ]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ]
403402; CHECK-NEXT: br label [[LOOP:%.*]]
404403; CHECK: loop:
405404; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
0 commit comments