Skip to content

Commit a38ee16

Browse files
committed
update aarch64 testcase
1 parent 6416af1 commit a38ee16

File tree

2 files changed

+38
-38
lines changed

2 files changed

+38
-38
lines changed

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -660,16 +660,16 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
660660
; COMMON-NEXT: store i8 6, ptr [[TMP6]], align 1
661661
; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE12]]
662662
; COMMON: [[PRED_STORE_CONTINUE12]]:
663-
; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[EXIT:.*]]
663+
; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
664664
; COMMON: [[PRED_STORE_IF13]]:
665665
; COMMON-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 7
666666
; COMMON-NEXT: store i8 7, ptr [[TMP7]], align 1
667-
; COMMON-NEXT: br label %[[EXIT]]
667+
; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE14]]
668+
; COMMON: [[PRED_STORE_CONTINUE14]]:
669+
; COMMON-NEXT: br label %[[MIDDLE_BLOCK:.*]]
670+
; COMMON: [[MIDDLE_BLOCK]]:
671+
; COMMON-NEXT: br label %[[EXIT:.*]]
668672
; COMMON: [[EXIT]]:
669-
; COMMON-NEXT: br label %[[SCALAR_PH:.*]]
670-
; COMMON: [[SCALAR_PH]]:
671-
; COMMON-NEXT: br label %[[EXIT1:.*]]
672-
; COMMON: [[EXIT1]]:
673673
; COMMON-NEXT: ret void
674674
;
675675
entry:
@@ -1303,7 +1303,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 {
13031303
; PRED-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
13041304
; PRED: [[VECTOR_MEMCHECK]]:
13051305
; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
1306-
; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 16
1306+
; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
13071307
; PRED-NEXT: [[TMP3:%.*]] = sub i64 [[C1]], [[A2]]
13081308
; PRED-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
13091309
; PRED-NEXT: [[TMP4:%.*]] = sub i64 [[C1]], [[B3]]
@@ -1312,42 +1312,42 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 {
13121312
; PRED-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
13131313
; PRED: [[VECTOR_PH]]:
13141314
; PRED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
1315-
; PRED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16
1315+
; PRED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 4
1316+
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i8> poison, i8 [[Y]], i64 0
1317+
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
13161318
; PRED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
1317-
; PRED-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 4
1319+
; PRED-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 2
13181320
; PRED-NEXT: [[TMP9:%.*]] = sub i64 [[TMP0]], [[TMP8]]
13191321
; PRED-NEXT: [[TMP10:%.*]] = icmp ugt i64 [[TMP0]], [[TMP8]]
13201322
; PRED-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 0
1321-
; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[TMP0]])
1322-
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[Y]], i64 0
1323-
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
1323+
; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[TMP0]])
1324+
; PRED-NEXT: [[TMP16:%.*]] = icmp ne <vscale x 4 x i8> [[BROADCAST_SPLAT]], zeroinitializer
1325+
; PRED-NEXT: [[TMP13:%.*]] = select <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i8> [[BROADCAST_SPLAT]], <vscale x 4 x i8> splat (i8 1)
13241326
; PRED-NEXT: br label %[[VECTOR_BODY:.*]]
13251327
; PRED: [[VECTOR_BODY]]:
13261328
; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1327-
; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
1329+
; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
13281330
; PRED-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
1329-
; PRED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP12]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
1330-
; PRED-NEXT: [[TMP13:%.*]] = uitofp <vscale x 16 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 16 x float>
1331+
; PRED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr align 1 [[TMP12]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
1332+
; PRED-NEXT: [[TMP15:%.*]] = uitofp <vscale x 4 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 4 x float>
13311333
; PRED-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
1332-
; PRED-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
1333-
; PRED-NEXT: [[TMP15:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_MASKED_LOAD5]], zeroinitializer
1334-
; PRED-NEXT: [[TMP16:%.*]] = select <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i1> [[TMP15]], <vscale x 16 x i1> zeroinitializer
1335-
; PRED-NEXT: [[TMP17:%.*]] = xor <vscale x 16 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1)
1336-
; PRED-NEXT: [[TMP18:%.*]] = select <vscale x 16 x i1> [[TMP16]], <vscale x 16 x i8> [[BROADCAST_SPLAT]], <vscale x 16 x i8> splat (i8 1)
1337-
; PRED-NEXT: [[TMP19:%.*]] = udiv <vscale x 16 x i8> [[TMP17]], [[TMP18]]
1338-
; PRED-NEXT: [[TMP20:%.*]] = icmp ugt <vscale x 16 x i8> [[TMP19]], splat (i8 1)
1339-
; PRED-NEXT: [[TMP21:%.*]] = select <vscale x 16 x i1> [[TMP20]], <vscale x 16 x i32> zeroinitializer, <vscale x 16 x i32> splat (i32 255)
1340-
; PRED-NEXT: [[PREDPHI:%.*]] = select <vscale x 16 x i1> [[TMP15]], <vscale x 16 x i32> [[TMP21]], <vscale x 16 x i32> zeroinitializer
1341-
; PRED-NEXT: [[TMP22:%.*]] = zext <vscale x 16 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 16 x i32>
1342-
; PRED-NEXT: [[TMP23:%.*]] = sub <vscale x 16 x i32> [[PREDPHI]], [[TMP22]]
1343-
; PRED-NEXT: [[TMP24:%.*]] = sitofp <vscale x 16 x i32> [[TMP23]] to <vscale x 16 x float>
1344-
; PRED-NEXT: [[TMP25:%.*]] = call <vscale x 16 x float> @llvm.fmuladd.nxv16f32(<vscale x 16 x float> [[TMP24]], <vscale x 16 x float> splat (float 3.000000e+00), <vscale x 16 x float> [[TMP13]])
1345-
; PRED-NEXT: [[TMP26:%.*]] = fptoui <vscale x 16 x float> [[TMP25]] to <vscale x 16 x i8>
1334+
; PRED-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr align 1 [[TMP14]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
1335+
; PRED-NEXT: [[TMP17:%.*]] = icmp eq <vscale x 4 x i8> [[WIDE_MASKED_LOAD5]], zeroinitializer
1336+
; PRED-NEXT: [[TMP18:%.*]] = xor <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1)
1337+
; PRED-NEXT: [[TMP19:%.*]] = udiv <vscale x 4 x i8> [[TMP18]], [[TMP13]]
1338+
; PRED-NEXT: [[TMP20:%.*]] = icmp ugt <vscale x 4 x i8> [[TMP19]], splat (i8 1)
1339+
; PRED-NEXT: [[TMP21:%.*]] = select <vscale x 4 x i1> [[TMP20]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> splat (i32 255)
1340+
; PRED-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP17]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[TMP21]]
1341+
; PRED-NEXT: [[TMP22:%.*]] = zext <vscale x 4 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 4 x i32>
1342+
; PRED-NEXT: [[TMP23:%.*]] = sub <vscale x 4 x i32> [[PREDPHI]], [[TMP22]]
1343+
; PRED-NEXT: [[TMP24:%.*]] = sitofp <vscale x 4 x i32> [[TMP23]] to <vscale x 4 x float>
1344+
; PRED-NEXT: [[TMP25:%.*]] = call <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> [[TMP24]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x float> [[TMP15]])
1345+
; PRED-NEXT: [[TMP26:%.*]] = fptoui <vscale x 4 x float> [[TMP25]] to <vscale x 4 x i8>
13461346
; PRED-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[C]], i64 [[INDEX]]
1347-
; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP26]], ptr align 1 [[TMP27]], <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
1347+
; PRED-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP26]], ptr align 1 [[TMP27]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
13481348
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
1349-
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP11]])
1350-
; PRED-NEXT: [[TMP28:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
1349+
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP11]])
1350+
; PRED-NEXT: [[TMP28:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
13511351
; PRED-NEXT: [[TMP29:%.*]] = xor i1 [[TMP28]], true
13521352
; PRED-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
13531353
; PRED: [[MIDDLE_BLOCK]]:

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,12 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
114114
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]]
115115
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
116116
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
117+
; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[CONV6]], 0
118+
; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP16]], i64 [[CONV6]], i64 1
119+
; CHECK-NEXT: [[TMP19:%.*]] = sdiv i64 [[M]], [[TMP18]]
120+
; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
121+
; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP19]], [[CONV61]]
122+
; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[X]], [[TMP20]]
117123
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
118124
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
119125
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
@@ -128,14 +134,8 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
128134
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
129135
; CHECK-NEXT: [[TMP22:%.*]] = icmp ule <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
130136
; CHECK-NEXT: [[TMP23:%.*]] = select <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i1> [[TMP22]], <vscale x 2 x i1> zeroinitializer
131-
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <vscale x 2 x i1> [[TMP23]], i32 0
132-
; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i64 [[CONV6]], i64 1
133-
; CHECK-NEXT: [[TMP26:%.*]] = sdiv i64 [[M]], [[TMP25]]
134-
; CHECK-NEXT: [[TMP27:%.*]] = trunc i64 [[TMP26]] to i32
135-
; CHECK-NEXT: [[TMP28:%.*]] = mul i64 [[TMP26]], [[CONV61]]
136137
; CHECK-NEXT: [[TMP29:%.*]] = sub i64 [[INDEX]], [[TMP28]]
137138
; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32
138-
; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[X]], [[TMP27]]
139139
; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP31]], [[TMP30]]
140140
; CHECK-NEXT: [[TMP33:%.*]] = sext i32 [[TMP32]] to i64
141141
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP33]]

0 commit comments

Comments
 (0)