@@ -15,7 +15,7 @@ define void @arm_mult_q15(ptr %pSrcA, ptr %pSrcB, ptr noalias %pDst, i32 %blockS
1515; CHECK-NEXT: br i1 [[CMP_NOT2]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
1616; CHECK: while.body.preheader:
1717; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[BLOCKSIZE]], 8
18- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[WHILE_BODY_PREHEADER16 :%.*]], label [[VECTOR_PH:%.*]]
18+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[WHILE_BODY_PREHEADER18 :%.*]], label [[VECTOR_PH:%.*]]
1919; CHECK: vector.ph:
2020; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[BLOCKSIZE]], -8
2121; CHECK-NEXT: [[IND_END:%.*]] = and i32 [[BLOCKSIZE]], 7
@@ -28,44 +28,44 @@ define void @arm_mult_q15(ptr %pSrcA, ptr %pSrcB, ptr noalias %pDst, i32 %blockS
2828; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2929; CHECK: vector.body:
3030; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
31- ; CHECK-NEXT: [[TMP3 :%.*]] = shl i32 [[INDEX]], 1
32- ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRCA]], i32 [[TMP3 ]]
33- ; CHECK-NEXT: [[TMP4 :%.*]] = shl i32 [[INDEX]], 1
34- ; CHECK-NEXT: [[NEXT_GEP13 :%.*]] = getelementptr i8, ptr [[PDST]], i32 [[TMP4 ]]
35- ; CHECK-NEXT: [[TMP5 :%.*]] = shl i32 [[INDEX]], 1
36- ; CHECK-NEXT: [[NEXT_GEP14 :%.*]] = getelementptr i8, ptr [[PSRCB]], i32 [[TMP5 ]]
31+ ; CHECK-NEXT: [[OFFSET_IDX :%.*]] = shl i32 [[INDEX]], 1
32+ ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRCA]], i32 [[OFFSET_IDX ]]
33+ ; CHECK-NEXT: [[OFFSET_IDX13 :%.*]] = shl i32 [[INDEX]], 1
34+ ; CHECK-NEXT: [[NEXT_GEP14 :%.*]] = getelementptr i8, ptr [[PDST]], i32 [[OFFSET_IDX13 ]]
35+ ; CHECK-NEXT: [[OFFSET_IDX15 :%.*]] = shl i32 [[INDEX]], 1
36+ ; CHECK-NEXT: [[NEXT_GEP16 :%.*]] = getelementptr i8, ptr [[PSRCB]], i32 [[OFFSET_IDX15 ]]
3737; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2
38- ; CHECK-NEXT: [[TMP6 :%.*]] = sext <8 x i16> [[WIDE_LOAD]] to <8 x i32>
39- ; CHECK-NEXT: [[WIDE_LOAD15 :%.*]] = load <8 x i16>, ptr [[NEXT_GEP14 ]], align 2
40- ; CHECK-NEXT: [[TMP7 :%.*]] = sext <8 x i16> [[WIDE_LOAD15 ]] to <8 x i32>
41- ; CHECK-NEXT: [[TMP8 :%.*]] = mul nsw <8 x i32> [[TMP7 ]], [[TMP6 ]]
42- ; CHECK-NEXT: [[TMP9 :%.*]] = ashr <8 x i32> [[TMP8 ]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
43- ; CHECK-NEXT: [[TMP10 :%.*]] = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[TMP9 ]], <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
44- ; CHECK-NEXT: [[TMP11 :%.*]] = trunc <8 x i32> [[TMP10 ]] to <8 x i16>
45- ; CHECK-NEXT: store <8 x i16> [[TMP11 ]], ptr [[NEXT_GEP13 ]], align 2
38+ ; CHECK-NEXT: [[TMP3 :%.*]] = sext <8 x i16> [[WIDE_LOAD]] to <8 x i32>
39+ ; CHECK-NEXT: [[WIDE_LOAD17 :%.*]] = load <8 x i16>, ptr [[NEXT_GEP16 ]], align 2
40+ ; CHECK-NEXT: [[TMP4 :%.*]] = sext <8 x i16> [[WIDE_LOAD17 ]] to <8 x i32>
41+ ; CHECK-NEXT: [[TMP5 :%.*]] = mul nsw <8 x i32> [[TMP4 ]], [[TMP3 ]]
42+ ; CHECK-NEXT: [[TMP6 :%.*]] = ashr <8 x i32> [[TMP5 ]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
43+ ; CHECK-NEXT: [[TMP7 :%.*]] = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[TMP6 ]], <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
44+ ; CHECK-NEXT: [[TMP8 :%.*]] = trunc <8 x i32> [[TMP7 ]] to <8 x i16>
45+ ; CHECK-NEXT: store <8 x i16> [[TMP8 ]], ptr [[NEXT_GEP14 ]], align 2
4646; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
47- ; CHECK-NEXT: [[TMP12 :%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
48- ; CHECK-NEXT: br i1 [[TMP12 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
47+ ; CHECK-NEXT: [[TMP9 :%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
48+ ; CHECK-NEXT: br i1 [[TMP9 ]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4949; CHECK: middle.block:
5050; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]]
51- ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[WHILE_BODY_PREHEADER16 ]]
51+ ; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[WHILE_BODY_PREHEADER18 ]]
5252; CHECK: while.body.preheader18:
5353; CHECK-NEXT: [[BLKCNT_06_PH:%.*]] = phi i32 [ [[BLOCKSIZE]], [[WHILE_BODY_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
5454; CHECK-NEXT: [[PSRCA_ADDR_05_PH:%.*]] = phi ptr [ [[PSRCA]], [[WHILE_BODY_PREHEADER]] ], [ [[IND_END7]], [[MIDDLE_BLOCK]] ]
5555; CHECK-NEXT: [[PDST_ADDR_04_PH:%.*]] = phi ptr [ [[PDST]], [[WHILE_BODY_PREHEADER]] ], [ [[IND_END9]], [[MIDDLE_BLOCK]] ]
5656; CHECK-NEXT: [[PSRCB_ADDR_03_PH:%.*]] = phi ptr [ [[PSRCB]], [[WHILE_BODY_PREHEADER]] ], [ [[IND_END11]], [[MIDDLE_BLOCK]] ]
5757; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
5858; CHECK: while.body:
59- ; CHECK-NEXT: [[BLKCNT_06:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BLKCNT_06_PH]], [[WHILE_BODY_PREHEADER16 ]] ]
60- ; CHECK-NEXT: [[PSRCA_ADDR_05:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[PSRCA_ADDR_05_PH]], [[WHILE_BODY_PREHEADER16 ]] ]
61- ; CHECK-NEXT: [[PDST_ADDR_04:%.*]] = phi ptr [ [[INCDEC_PTR4:%.*]], [[WHILE_BODY]] ], [ [[PDST_ADDR_04_PH]], [[WHILE_BODY_PREHEADER16 ]] ]
62- ; CHECK-NEXT: [[PSRCB_ADDR_03:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], [[WHILE_BODY]] ], [ [[PSRCB_ADDR_03_PH]], [[WHILE_BODY_PREHEADER16 ]] ]
59+ ; CHECK-NEXT: [[BLKCNT_06:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[BLKCNT_06_PH]], [[WHILE_BODY_PREHEADER18 ]] ]
60+ ; CHECK-NEXT: [[PSRCA_ADDR_05:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[PSRCA_ADDR_05_PH]], [[WHILE_BODY_PREHEADER18 ]] ]
61+ ; CHECK-NEXT: [[PDST_ADDR_04:%.*]] = phi ptr [ [[INCDEC_PTR4:%.*]], [[WHILE_BODY]] ], [ [[PDST_ADDR_04_PH]], [[WHILE_BODY_PREHEADER18 ]] ]
62+ ; CHECK-NEXT: [[PSRCB_ADDR_03:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], [[WHILE_BODY]] ], [ [[PSRCB_ADDR_03_PH]], [[WHILE_BODY_PREHEADER18 ]] ]
6363; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[PSRCA_ADDR_05]], i32 2
64- ; CHECK-NEXT: [[TMP13 :%.*]] = load i16, ptr [[PSRCA_ADDR_05]], align 2
65- ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP13 ]] to i32
64+ ; CHECK-NEXT: [[TMP10 :%.*]] = load i16, ptr [[PSRCA_ADDR_05]], align 2
65+ ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP10 ]] to i32
6666; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i8, ptr [[PSRCB_ADDR_03]], i32 2
67- ; CHECK-NEXT: [[TMP14 :%.*]] = load i16, ptr [[PSRCB_ADDR_03]], align 2
68- ; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP14 ]] to i32
67+ ; CHECK-NEXT: [[TMP11 :%.*]] = load i16, ptr [[PSRCB_ADDR_03]], align 2
68+ ; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP11 ]] to i32
6969; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]]
7070; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[MUL]], 15
7171; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = tail call i32 @llvm.smin.i32(i32 [[SHR]], i32 32767)
0 commit comments