-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Closed
Labels
Description
After using #72920 locally, I am having the following error with the following example :
builtin.module attributes { transform.with_named_sequence } {
func.func @matmul_static_dispatch_0_matmul_128x512x256_i32(%3 : tensor<128x256xi32>, %4 : tensor<256x512xi32>) -> tensor<128x512xi32> {
%c0 = arith.constant 0 : index
%c0_i32 = arith.constant 0 : i32
%5 = tensor.empty() : tensor<128x512xi32>
%6 = linalg.fill ins(%c0_i32 : i32) outs(%5 : tensor<128x512xi32>) -> tensor<128x512xi32>
%7 = linalg.matmul ins(%3, %4 : tensor<128x256xi32>, tensor<256x512xi32>) outs(%6 : tensor<128x512xi32>) -> tensor<128x512xi32>
return %7 : tensor<128x512xi32>
}
transform.named_sequence @cleanup(%variant_op: !transform.any_op {transform.readonly}) {
%func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
transform.apply_patterns to %func {
transform.apply_patterns.linalg.tiling_canonicalization
transform.apply_patterns.scf.for_loop_canonicalization
transform.apply_patterns.canonicalization
} : !transform.any_op
transform.apply_cse to %func : !transform.any_op
transform.yield
}
transform.named_sequence @__transform_main(%variant_op: !transform.any_op {transform.read_only}) {
%ops = transform.structured.match ops{["linalg.fill", "linalg.matmul"]} in %variant_op : (!transform.any_op) -> !transform.any_op
%fill, %matmul = transform.split_handle %ops : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
// First level tile to forall with tile_sizes [8, 8].
%tiled_matmul, %forall =
transform.structured.tile_using_forall %matmul tile_sizes [8, 8]
( mapping = [#gpu.block<y>, #gpu.block<x>] ) : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
// Fuse fill operation into the loop
%fused_fill, %fused_for_all = transform.structured.fuse_into_containing_op %fill into %forall : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
// Pad operation.
%padded, %pad, %__ = transform.structured.pad %tiled_matmul {
padding_values=[0 : i32, 0 : i32, 0 : i32],
padding_dimensions=[0, 1, 2],
pack_paddings=[1, 1, 1],
copy_back_op="none"
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
%pad_dps = transform.structured.rewrite_in_destination_passing_style %pad : (!transform.any_op) -> !transform.any_op
// Promote the operands to shared memory.
%padded_lhs = transform.get_producer_of_operand %padded[0] : (!transform.any_op) -> (!transform.any_op)
%padded_lhs_buffer, %padded_lhs_new = transform.structured.bufferize_to_allocation %padded_lhs
{memory_space = 1, bufferize_destination_only} : !transform.any_op
%padded_rhs = transform.get_producer_of_operand %padded[1] : (!transform.any_op) -> (!transform.any_op)
%padded_rhs_buffer, %padded_rhs_new = transform.structured.bufferize_to_allocation %padded_rhs
{memory_space = 1, bufferize_destination_only} : !transform.any_op
// Run canonicalizations.
transform.include @cleanup failures(propagate) (%variant_op) : (!transform.any_op) -> ()
// Find the matmul and fill again
%tiled_ops = transform.structured.match ops{["linalg.fill", "linalg.matmul"]} in %fused_for_all : (!transform.any_op) -> !transform.any_op
%tiled_fill_op, %tiled_padded_matmul = transform.split_handle %tiled_ops : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
// Second level tile to forall with tile_sizes [4, 4].
%tiled_matmul_1, %forall_1 =
transform.structured.tile_using_forall %tiled_padded_matmul tile_sizes [4, 4]
( mapping = [#gpu.thread<y>, #gpu.thread<x>] ) : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
%fused_fill_2, %fused_for_all_2 = transform.structured.fuse_into_containing_op %tiled_fill_op into %forall_1 : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op)
// Pad operation.
%padded_1, %pad_1, %_ = transform.structured.pad %tiled_matmul_1 {
padding_values=[0 : i32, 0 : i32, 0 : i32],
padding_dimensions=[0, 1, 2],
pack_paddings=[0, 0, 1],
copy_back_op="linalg.copy"
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
%pad_1_dps = transform.structured.rewrite_in_destination_passing_style %pad_1 : (!transform.any_op) -> !transform.any_op
transform.yield
}
}
The error itself does not seem to be (at least AFAICS) related to #72920 . Running the following command throws the following error
mlir-opt --transform-interpreter repro.mlir
repro.mlir:72:18: error: expensive checks failure: operation mismatch, expected linalg.copy
%pad_1_dps = transform.structured.rewrite_in_destination_passing_style %pad_1 : (!transform.any_op) -> !transform.any_op
^
repro.mlir:7:10: note: payload op: tensor.pad
%7 = linalg.matmul ins(%3, %4 : tensor<128x256xi32>, tensor<256x512xi32>) outs(%6 : tensor<128x512xi32>) -> tensor<128x512xi32>
I tried to debug this, and the error happen on the last instruction
%pad_1_dps = transform.structured.rewrite_in_destination_passing_style %pad_1 : (!transform.any_op) -> !transform.any_op
Stepping through the transformation, I do see the underlying transformation performing the conversion to destination passing style and return a linalg.copy
but then when stepping through the rest of the transform dialect interpreter code, somewhere that is getting lost leading to this error. It might be that I am doing something wrong, so please do let me know if that the case.