|
10 | 10 |
|
11 | 11 | /// End-to-end test for tensor.pack where one of the inner tile sizes is |
12 | 12 | /// dynamic. |
13 | | -/// |
14 | | -/// Note, ATM this is a relatively simple example, with no vectorization and |
15 | | -/// the dynamic tile size being a compile-time constant. The intention is to |
16 | | -/// incrementally expand the config to something much more complex. |
17 | 13 |
|
18 | 14 | func.func @main() { |
19 | 15 | // Allocate and initialise the inputs |
@@ -89,26 +85,49 @@ module @transforms attributes { transform.with_named_sequence } { |
89 | 85 | %tiled_pack_op_p, %loops:2 = transform.structured.tile_using_for %pack tile_sizes [1, 1] |
90 | 86 | : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) |
91 | 87 |
|
92 | | - // 2. Decompose the tiled Op into (trimmed for brevity): |
| 88 | + // 2. Decompose the tiled pack Op into (trimmed for brevity): |
93 | 89 | // |
94 | 90 | // %padded = tensor.pad %slice_of_A (..) : |
95 | 91 | // tensor<?x?xi32> to tensor<8x1xi32> |
96 | 92 | // %inserted_slice = tensor.insert_slice %padded into %slice_of_A_pack (...) : |
97 | 93 | // tensor<8x1xi32> into tensor<1x1x?x1xi32> |
98 | 94 | // |
99 | | - // NOTE: no tile is transposed, hence no linalg.transpose |
100 | | - %func_1 = transform.get_parent_op %tiled_pack_op_p {isolated_from_above} : (!transform.any_op) -> !transform.any_op |
101 | | - transform.apply_patterns to %func_1 { |
| 95 | + // (NOTE: no tile is transposed, hence no linalg.transpose) |
| 96 | + // |
| 97 | + // This is followed by this decomposition of the pad Op: |
| 98 | + // |
| 99 | + // %c123_i32 = arith.constant 123 : i32 |
| 100 | + // %slice_of_A = tensor.extract_slice %A[%3, %arg3] [%4, %5] [1, 1] : |
| 101 | + // tensor<7x16xi32> to tensor<?x?xi32> |
| 102 | + // %empty = tensor.empty() : tensor<8x1xi32> |
| 103 | + // %fill = linalg.fill ins(%c123_i32 : i32) outs(%empty : |
| 104 | + // tensor<8x1xi32>) -> tensor<8x1xi32> |
| 105 | + // %inserted_slice = tensor.insert_slice %slice_of_A into %fill[0, 0] [%4, %5] [1, 1] : |
| 106 | + // tensor<?x?xi32> into tensor<8x1xi32> |
| 107 | + // |
| 108 | + %func_op = transform.get_parent_op %tiled_pack_op_p {isolated_from_above} : (!transform.any_op) -> !transform.op<"func.func"> |
| 109 | + transform.apply_patterns to %func_op { |
102 | 110 | transform.apply_patterns.linalg.decompose_pack_unpack |
103 | | - } : !transform.any_op |
| 111 | + transform.apply_patterns.linalg.decompose_pad |
| 112 | + } : !transform.op<"func.func"> |
| 113 | + |
| 114 | + // 3. Vectorize linalg.fill. |
| 115 | + // Vector sizes match the inner tiles in the payload IR. |
| 116 | + %fill = transform.structured.match ops{["linalg.fill"]} in %func_op : (!transform.op<"func.func">) -> !transform.any_op |
| 117 | + transform.structured.vectorize %fill vector_sizes [8, 1] : !transform.any_op |
| 118 | + |
| 119 | + transform.apply_patterns to %func_op { |
| 120 | + transform.apply_patterns.tensor.fold_tensor_subset_ops |
| 121 | + transform.apply_patterns.canonicalization |
| 122 | + } : !transform.op<"func.func"> |
104 | 123 |
|
105 | 124 | // 3. Bufferize before lowering to LLVM |
106 | 125 | %bufferize = transform.bufferization.one_shot_bufferize %module |
107 | 126 | {bufferize_function_boundaries=true} : (!transform.any_op) -> !transform.any_op |
108 | 127 |
|
109 | 128 | // 4. Canonicalize |
110 | | - %func_2 = transform.structured.match ops{["func.func"]} in %bufferize : (!transform.any_op) -> !transform.op<"func.func"> |
111 | | - transform.apply_patterns to %func_2 { |
| 129 | + %func_op_bufferized = transform.structured.match ops{["func.func"]} in %bufferize : (!transform.any_op) -> !transform.op<"func.func"> |
| 130 | + transform.apply_patterns to %func_op_bufferized { |
112 | 131 | transform.apply_patterns.canonicalization |
113 | 132 | } : !transform.op<"func.func"> |
114 | 133 |
|
|
0 commit comments