|
1 | | -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6 |
2 | 2 | ; RUN: opt -passes="loop-vectorize" -pass-remarks-output=%t.yaml -S %s | FileCheck %s |
3 | 3 | ; RUN: FileCheck --input-file=%t.yaml --check-prefix=REMARKS %s |
4 | 4 |
|
5 | | -; REMARKS: the cost-model indicates that vectorization is not beneficial |
| 5 | +target triple = "arm64-apple-macosx" |
6 | 6 |
|
7 | | -; Test for https://github.com/llvm/llvm-project/issues/116375. |
8 | | -define void @test_i24_load_for(ptr noalias %src, ptr %dst) { |
9 | | -; CHECK-LABEL: define void @test_i24_load_for( |
10 | | -; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) { |
11 | | -; CHECK-NEXT: [[ENTRY:.*]]: |
12 | | -; CHECK-NEXT: br label %[[LOOP:.*]] |
13 | | -; CHECK: [[LOOP]]: |
14 | | -; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] |
15 | | -; CHECK-NEXT: [[FOR:%.*]] = phi i24 [ 0, %[[ENTRY]] ], [ [[FOR_NEXT:%.*]], %[[LOOP]] ] |
16 | | -; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 |
17 | | -; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i24, ptr [[SRC]], i16 [[IV]] |
18 | | -; CHECK-NEXT: [[FOR_NEXT]] = load i24, ptr [[GEP_SRC]], align 1 |
19 | | -; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i24, ptr [[DST]], i16 [[IV]] |
20 | | -; CHECK-NEXT: store i24 [[FOR]], ptr [[GEP_DST]], align 4 |
21 | | -; CHECK-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 1000 |
22 | | -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] |
23 | | -; CHECK: [[EXIT]]: |
24 | | -; CHECK-NEXT: ret void |
| 7 | +; REMARKS: Recipe with invalid costs prevented vectorization at VF=(vscale x 1): load |
| 8 | +; Test case for https://github.com/llvm/llvm-project/issues/160792. |
| 9 | +define void @replicate_sdiv_conditional(ptr noalias %a, ptr noalias %b, ptr noalias %c) #0 { |
| 10 | +; CHECK-LABEL: define void @replicate_sdiv_conditional( |
| 11 | +; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) #[[ATTR0:[0-9]+]] { |
| 12 | +; CHECK-NEXT: [[ENTRY:.*:]] |
| 13 | +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() |
| 14 | +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 |
| 15 | +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 64, [[TMP1]] |
| 16 | +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] |
| 17 | +; CHECK: [[VECTOR_PH]]: |
| 18 | +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() |
| 19 | +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 |
| 20 | +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]] |
| 21 | +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] |
| 22 | +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| 23 | +; CHECK: [[VECTOR_BODY]]: |
| 24 | +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| 25 | +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] |
| 26 | +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP4]], align 4 |
| 27 | +; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer |
| 28 | +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] |
| 29 | +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP6]], i32 4, <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i32> poison) |
| 30 | +; CHECK-NEXT: [[TMP7:%.*]] = sext <vscale x 4 x i32> [[WIDE_MASKED_LOAD]] to <vscale x 4 x i64> |
| 31 | +; CHECK-NEXT: [[TMP8:%.*]] = ashr <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 1) |
| 32 | +; CHECK-NEXT: [[TMP9:%.*]] = add <vscale x 4 x i32> [[TMP8]], [[WIDE_LOAD]] |
| 33 | +; CHECK-NEXT: [[TMP10:%.*]] = sext <vscale x 4 x i32> [[TMP9]] to <vscale x 4 x i64> |
| 34 | +; CHECK-NEXT: [[TMP11:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i64> [[TMP7]], <vscale x 4 x i64> splat (i64 1) |
| 35 | +; CHECK-NEXT: [[TMP12:%.*]] = sdiv <vscale x 4 x i64> [[TMP10]], [[TMP11]] |
| 36 | +; CHECK-NEXT: [[TMP13:%.*]] = trunc <vscale x 4 x i64> [[TMP12]] to <vscale x 4 x i32> |
| 37 | +; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP5]], <vscale x 4 x i32> [[TMP13]], <vscale x 4 x i32> [[WIDE_LOAD]] |
| 38 | +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] |
| 39 | +; CHECK-NEXT: store <vscale x 4 x i32> [[PREDPHI]], ptr [[TMP14]], align 4 |
| 40 | +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] |
| 41 | +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] |
| 42 | +; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| 43 | +; CHECK: [[MIDDLE_BLOCK]]: |
| 44 | +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]] |
| 45 | +; CHECK-NEXT: br i1 [[CMP_N]], [[FOR_END:label %.*]], label %[[SCALAR_PH]] |
| 46 | +; CHECK: [[SCALAR_PH]]: |
25 | 47 | ; |
26 | 48 | entry: |
27 | | - br label %loop |
| 49 | + br label %loop.header |
28 | 50 |
|
29 | | -loop: |
30 | | - %iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ] |
31 | | - %for = phi i24 [ 0, %entry ], [ %for.next, %loop ] |
32 | | - %iv.next = add i16 %iv, 1 |
33 | | - %gep.src = getelementptr inbounds i24, ptr %src, i16 %iv |
34 | | - %for.next = load i24, ptr %gep.src, align 1 |
35 | | - %gep.dst = getelementptr inbounds i24, ptr %dst, i16 %iv |
36 | | - store i24 %for, ptr %gep.dst |
37 | | - %ec = icmp eq i16 %iv.next, 1000 |
38 | | - br i1 %ec, label %exit, label %loop |
| 51 | +loop.header: |
| 52 | + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] |
| 53 | + %gep.c = getelementptr inbounds i32, ptr %c, i64 %iv |
| 54 | + %val.c = load i32, ptr %gep.c, align 4 |
| 55 | + %cmp = icmp slt i32 %val.c, 0 |
| 56 | + br i1 %cmp, label %if.then, label %loop.latch |
39 | 57 |
|
40 | | -exit: |
| 58 | +if.then: |
| 59 | + %gep.b = getelementptr inbounds i32, ptr %b, i64 %iv |
| 60 | + %val.b = load i32, ptr %gep.b, align 4 |
| 61 | + %sext = sext i32 %val.b to i64 |
| 62 | + %shr = ashr i32 %val.b, 1 |
| 63 | + %add = add i32 %shr, %val.c |
| 64 | + %conv = sext i32 %add to i64 |
| 65 | + %div = sdiv i64 %conv, %sext |
| 66 | + %trunc = trunc i64 %div to i32 |
| 67 | + br label %loop.latch |
| 68 | + |
| 69 | +loop.latch: |
| 70 | + %result = phi i32 [ %trunc, %if.then ], [ %val.c, %loop.header ] |
| 71 | + %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv |
| 72 | + store i32 %result, ptr %gep.a, align 4 |
| 73 | + %iv.next = add nuw nsw i64 %iv, 1 |
| 74 | + %exit = icmp eq i64 %iv.next, 64 |
| 75 | + br i1 %exit, label %for.end, label %loop.header |
| 76 | + |
| 77 | +for.end: |
41 | 78 | ret void |
42 | 79 | } |
| 80 | + |
| 81 | +attributes #0 = { "target-cpu"="neoverse-512tvb" } |
0 commit comments