From 750a3d8095cead3a3cc41e5fb4f5f285ce3188fc Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Thu, 2 Jan 2025 09:58:11 +0000 Subject: [PATCH 1/2] [mlir][tensor] Add e2e test for tensor.unpack with dynamic tile sizes Adds an end-to-end test for `tensor.unpack` with dynamic inner tile sizes. While relatively simple (e.g., no vectorization), this example required a few fixes in handling `tensor.unpack` (and similar fixes for `tensor.pack` before that): * #119379, #121393, #121400. The end goal for this test is to incrementally increase its complexity and to work towards scalable tile sizes. Note, this PR complements #115698 in which similar test for `tensor.pack` was added. --- .../Linalg/CPU/pack-dynamic-inner-tile.mlir | 5 +- .../Linalg/CPU/unpack-dynamic-inner-tile.mlir | 110 ++++++++++++++++++ 2 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir index 0d2fd977c8d55..bf6fa985bbd3b 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir @@ -9,7 +9,8 @@ // RUN: rm -f %t && %{compile} && %{run} | FileCheck %s /// End-to-end test for tensor.pack where one of the inner tile sizes is -/// dynamic. +/// dynamic. See unpack-dynamic-inner-tile.mlir for a similar test for +/// tensor.unpack. func.func @main() { // Allocate and initialise the inputs @@ -46,7 +47,7 @@ func.func private @pack(%A: tensor<7x16xi32>) { %A_cast = tensor.cast %A_pack : tensor to tensor<*xi32> // Print the results - // CHECK: Unranked Memref base@ = 0{{.*}} rank = 4 offset = 0 sizes = [1, 16, 8, 1] strides = [128, 8, 1, 1] data = + // CHECK: Unranked Memref base@ = 0x{{.*}} rank = 4 offset = 0 sizes = [1, 16, 8, 1] strides = [128, 8, 1, 1] data = // Tile 1: (8 x 1) // CHECK-NEXT: 1 // CHECK-NEXT: 2 diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir new file mode 100644 index 0000000000000..1dd73e6a42c7d --- /dev/null +++ b/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir @@ -0,0 +1,110 @@ +// DEFINE: %{compile} = mlir-opt %s \ +// DEFINE: -transform-interpreter -test-transform-dialect-erase-schedule |\ +// DEFINE: mlir-opt \ +// DEFINE: -test-lower-to-llvm -o %t +// DEFINE: %{entry_point} = main +// DEFINE: %{run} = mlir-cpu-runner %t -e %{entry_point} -entry-point-result=void \ +// DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils + +// RUN: rm -f %t && %{compile} && %{run} | FileCheck %s + +/// End-to-end test for tensor.unpack where one of the inner tile sizes is +/// dynamic. See pack-dynamic-inner-tile.mlir for a similar test for tensor.pack. + +func.func @main() { + // Allocate and initialise the inputs + %A_alloc = tensor.empty() : tensor<7x3xi32> + + %A = arith.constant dense<[ + [[[1], + [2], + [3], + [4], + [5], + [6], + [7], + [123]], + [[8], + [9], + [10], + [11], + [12], + [13], + [14], + [123]], + [[15], + [16], + [17], + [18], + [19], + [20], + [21], + [123]]] + ]> : tensor<1x3x8x1xi32> + + %A_cast = tensor.cast %A : tensor<1x3x8x1xi32> to tensor + func.call @unpack(%A_cast) : (tensor) -> () + + return +} + +func.func private @unpack(%A: tensor) { + %c1 = arith.constant 1 : index + %pad_val = arith.constant 123 : i32 + + // Dynamic tile size + %tile_size = arith.constant 8 : index + %A_unpack_empty = tensor.empty() : tensor<7x3xi32> + + %A_unpack = tensor.unpack %A + inner_dims_pos = [0, 1] + inner_tiles = [%tile_size, 1] + into %A_unpack_empty : tensor -> tensor<7x3xi32> + %A_cast = tensor.cast %A_unpack : tensor<7x3xi32> to tensor<*xi32> + + // Print the results + // CHECK: Unranked Memref base@ = 0x{{.*}} rank = 2 offset = 0 sizes = [7, 3] strides = [3, 1] data = + // CHECK-NEXT: [1, 8, 15], + // CHECK-NEXT: [2, 9, 16], + // CHECK-NEXT: [3, 10, 17], + // CHECK-NEXT: [4, 11, 18], + // CHECK-NEXT: [5, 12, 19], + // CHECK-NEXT: [6, 13, 20], + // CHECK-NEXT: [7, 14, 21] + call @printMemrefI32(%A_cast) : (tensor<*xi32>) -> () + + return +} + +module @transforms attributes { transform.with_named_sequence } { + transform.named_sequence @__transform_main(%module: !transform.any_op {transform.consume}) { + %pack = transform.structured.match ops{["tensor.unpack"]} in %module : (!transform.any_op) -> !transform.any_op + + // 1. Tile so that we can decompose tensor.pack + // Ops (see step 2) + %c8 = transform.param.constant 8 : i64 -> !transform.param + %tiled_pack_op_p, %loops:2 = transform.structured.tile_using_for %pack tile_sizes [%c8, 1] + : (!transform.any_op, !transform.param) -> (!transform.any_op, !transform.any_op, !transform.any_op) + + // 2. Decompose the tiled unpack Op into tensor.extract_slice + tensor.insert_slice: + %func_op = transform.get_parent_op %tiled_pack_op_p {isolated_from_above} : (!transform.any_op) -> !transform.op<"func.func"> + transform.apply_patterns to %func_op { + transform.apply_patterns.linalg.decompose_pack_unpack + transform.apply_patterns.linalg.decompose_pad + } : !transform.op<"func.func"> + + // 3. Bufferize before lowering to LLVM + %bufferize = transform.bufferization.one_shot_bufferize %module + {bufferize_function_boundaries=true} : (!transform.any_op) -> !transform.any_op + + // 4. Canonicalize + %func_op_bufferized = transform.structured.match ops{["func.func"]} in %bufferize : (!transform.any_op) -> !transform.op<"func.func"> + transform.apply_patterns to %func_op_bufferized { + transform.apply_patterns.canonicalization + } : !transform.op<"func.func"> + + transform.yield + } +} + +func.func private @printMemrefI32(%ptr : tensor<*xi32>) From ca6c1f5dca286099d5cd0857f3057de79faedba6 Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Wed, 8 Jan 2025 17:25:22 +0000 Subject: [PATCH 2/2] fixup! [mlir][tensor] Add e2e test for tensor.unpack with dynamic tile sizes Refine comments --- .../Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir | 3 +-- .../Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir index bf6fa985bbd3b..3a9f214ff43c3 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/pack-dynamic-inner-tile.mlir @@ -9,8 +9,7 @@ // RUN: rm -f %t && %{compile} && %{run} | FileCheck %s /// End-to-end test for tensor.pack where one of the inner tile sizes is -/// dynamic. See unpack-dynamic-inner-tile.mlir for a similar test for -/// tensor.unpack. +/// dynamic. func.func @main() { // Allocate and initialise the inputs diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir index 1dd73e6a42c7d..cae572ff3696b 100644 --- a/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/unpack-dynamic-inner-tile.mlir @@ -9,7 +9,7 @@ // RUN: rm -f %t && %{compile} && %{run} | FileCheck %s /// End-to-end test for tensor.unpack where one of the inner tile sizes is -/// dynamic. See pack-dynamic-inner-tile.mlir for a similar test for tensor.pack. +/// dynamic. func.func @main() { // Allocate and initialise the inputs