1- // RUN: mlir-opt -gpu-map-parallel-loops -split-input-file %s | FileCheck %s
1+ // RUN: mlir-opt -gpu-map-parallel-loops -split-input-file %s | FileCheck %s --check-prefix=OUTER
2+ // RUN: mlir-opt -gpu-map-parallel-loops="mapping-policy=innermost-first" -split-input-file %s | FileCheck %s --check-prefix=INNER
23
34func.func @parallel_loop (%arg0 : index , %arg1 : index , %arg2 : index ,
45 %arg3 : index ) {
@@ -14,14 +15,23 @@ func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
1415 return
1516}
1617
17- // CHECK-LABEL: func @parallel_loop(
18- // CHECK: scf.parallel
19- // CHECK: scf.parallel
20- // CHECK: {mapping = [#gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)>,
21- // CHECK-SAME: #gpu.loop_dim_map<processor = thread_y, map = (d0) -> (d0), bound = (d0) -> (d0)>]}
22- // CHECK: {mapping = [#gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>,
23- // CHECK-SAME: #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>]}
24- // CHECK-NOT: mapping
18+ // OUTER-LABEL: func @parallel_loop(
19+ // OUTER: scf.parallel
20+ // OUTER: scf.parallel
21+ // OUTER: {mapping = [#gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)>,
22+ // OUTER-SAME: #gpu.loop_dim_map<processor = thread_y, map = (d0) -> (d0), bound = (d0) -> (d0)>]}
23+ // OUTER: {mapping = [#gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>,
24+ // OUTER-SAME: #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>]}
25+ // OUTER-NOT: mapping
26+
27+ // INNER-LABEL: func @parallel_loop(
28+ // INNER: scf.parallel
29+ // INNER: scf.parallel
30+ // INNER: {mapping = [#gpu.loop_dim_map<processor = thread_y, map = (d0) -> (d0), bound = (d0) -> (d0)>,
31+ // INNER-SAME: #gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)>]}
32+ // INNER: {mapping = [#gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>,
33+ // INNER-SAME: #gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>]}
34+ // INNER-NOT: mapping
2535
2636// -----
2737
@@ -42,20 +52,38 @@ func.func @parallel_loop_4d(%arg0 : index, %arg1 : index, %arg2 : index,
4252 return
4353}
4454
45- // CHECK-LABEL: func @parallel_loop_4d(
46- // CHECK: scf.parallel
47- // CHECK: scf.parallel
48- // CHECK: scf.parallel
49- // CHECK: {mapping = [#gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>,
50- // CHECK-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>,
51- // CHECK-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>,
52- // CHECK-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>]}
53- // CHECK: {mapping = [#gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)>,
54- // CHECK-SAME: #gpu.loop_dim_map<processor = thread_y, map = (d0) -> (d0), bound = (d0) -> (d0)>,
55- // CHECK-SAME: #gpu.loop_dim_map<processor = thread_z, map = (d0) -> (d0), bound = (d0) -> (d0)>,
56- // CHECK-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>]}
57- // CHECK: {mapping = [#gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>,
58- // CHECK-SAME: #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>,
59- // CHECK-SAME: #gpu.loop_dim_map<processor = block_z, map = (d0) -> (d0), bound = (d0) -> (d0)>,
60- // CHECK-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>]}
61- // CHECK-NOT: mapping
55+ // OUTER-LABEL: func @parallel_loop_4d(
56+ // OUTER: scf.parallel
57+ // OUTER: scf.parallel
58+ // OUTER: scf.parallel
59+ // OUTER: {mapping = [#gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>,
60+ // OUTER-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>,
61+ // OUTER-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>,
62+ // OUTER-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>]}
63+ // OUTER: {mapping = [#gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)>,
64+ // OUTER-SAME: #gpu.loop_dim_map<processor = thread_y, map = (d0) -> (d0), bound = (d0) -> (d0)>,
65+ // OUTER-SAME: #gpu.loop_dim_map<processor = thread_z, map = (d0) -> (d0), bound = (d0) -> (d0)>,
66+ // OUTER-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>]}
67+ // OUTER: {mapping = [#gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>,
68+ // OUTER-SAME: #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>,
69+ // OUTER-SAME: #gpu.loop_dim_map<processor = block_z, map = (d0) -> (d0), bound = (d0) -> (d0)>,
70+ // OUTER-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>]}
71+ // OUTER-NOT: mapping
72+
73+ // INNER-LABEL: func @parallel_loop_4d(
74+ // INNER: scf.parallel
75+ // INNER: scf.parallel
76+ // INNER: scf.parallel
77+ // INNER: {mapping = [#gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>,
78+ // INNER-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>,
79+ // INNER-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>,
80+ // INNER-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>]}
81+ // INNER: {mapping = [#gpu.loop_dim_map<processor = thread_z, map = (d0) -> (d0), bound = (d0) -> (d0)>,
82+ // INNER-SAME: #gpu.loop_dim_map<processor = thread_y, map = (d0) -> (d0), bound = (d0) -> (d0)>,
83+ // INNER-SAME: #gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)>,
84+ // INNER-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>]}
85+ // INNER: {mapping = [#gpu.loop_dim_map<processor = block_z, map = (d0) -> (d0), bound = (d0) -> (d0)>,
86+ // INNER-SAME: #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>,
87+ // INNER-SAME: #gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>,
88+ // INNER-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>]}
89+ // INNER-NOT: mapping
0 commit comments