diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir index 0428ada86041d..32b7247e60d62 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir @@ -1,10 +1,7 @@ // DEFINE: %{compile} = mlir-opt %s \ -// DEFINE: -transform-interpreter -test-transform-dialect-erase-schedule |\ -// DEFINE: mlir-opt --test-linalg-transform-patterns="test-decompose-tensor-pack"\ -// DEFINE: --test-transform-dialect-erase-schedule \ -// DEFINE: -one-shot-bufferize="bufferize-function-boundaries" \ -// DEFINE: -buffer-deallocation-pipeline="private-function-dynamic-ownership" \ -// DEFINE: -cse -canonicalize -test-lower-to-llvm -o %t +// DEFINE: -transform-interpreter -test-transform-dialect-erase-schedule |\ +// DEFINE: mlir-opt \ +// DEFINE: -test-lower-to-llvm -o %t // DEFINE: %{entry_point} = main // DEFINE: %{run} = mlir-cpu-runner %t -e %{entry_point} -entry-point-result=void \ // DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils @@ -84,12 +81,37 @@ func.func private @pack(%A: tensor<7x16xi32>) { } module @transforms attributes { transform.with_named_sequence } { - transform.named_sequence @__transform_main(%module: !transform.any_op {transform.readonly}) { + transform.named_sequence @__transform_main(%module: !transform.any_op {transform.consume}) { %pack = transform.structured.match ops{["tensor.pack"]} in %module : (!transform.any_op) -> !transform.any_op - %tiled_linalg_op_p, %loops:2 = transform.structured.tile_using_for %pack tile_sizes [1, 1] + // 1. Tile so that we can decompose tensor.pack into tensor.pad and other + // Ops (see step 2) + %tiled_pack_op_p, %loops:2 = transform.structured.tile_using_for %pack tile_sizes [1, 1] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) + // 2. Decompose the tiled Op into (trimmed for brevity): + // + // %padded = tensor.pad %slice_of_A (..) : + // tensor to tensor<8x1xi32> + // %inserted_slice = tensor.insert_slice %padded into %slice_of_A_pack (...) : + // tensor<8x1xi32> into tensor<1x1x?x1xi32> + // + // NOTE: no tile is transposed, hence no linalg.transpose + %func_1 = transform.get_parent_op %tiled_pack_op_p {isolated_from_above} : (!transform.any_op) -> !transform.any_op + transform.apply_patterns to %func_1 { + transform.apply_patterns.linalg.decompose_pack_unpack + } : !transform.any_op + + // 3. Bufferize before lowering to LLVM + %bufferize = transform.bufferization.one_shot_bufferize %module + {bufferize_function_boundaries=true} : (!transform.any_op) -> !transform.any_op + + // 4. Canonicalize + %func_2 = transform.structured.match ops{["func.func"]} in %bufferize : (!transform.any_op) -> !transform.op<"func.func"> + transform.apply_patterns to %func_2 { + transform.apply_patterns.canonicalization + } : !transform.op<"func.func"> + transform.yield } }