diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 2dcd897330d1e..8c8b1b85ef5a3 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -2178,11 +2178,25 @@ vectorizePadOpPrecondition(tensor::PadOp padOp, inputVectorSizes))) return failure(); - if (llvm::any_of(padOp.getLow(), [](Value v) { - std::optional res = getConstantIntValue(v); - return !res.has_value() || res.value() != 0; + // Padding with non-zero low pad values is not supported, unless the + // corresponding result dim is 1 as this would require shifting the results to + // the right for the low padded dims by the required amount of low padding. + // However, we do support low padding if the dims being low padded have result + // sizes of 1. The reason is when we have a low pad on a unit result dim, the + // input size of that dimension will be dynamically zero (as the sum of the + // low pad and input dim size has to be one) and hence we will create a zero + // mask as the lowering logic just makes the mask one for the input dim size - + // which is zero here. Hence we will load the pad value which is what we want + // in this case. If the low pad is dynamically zero then the lowering is + // correct as well as no shifts are necessary. + if (llvm::any_of(llvm::enumerate(padOp.getLow()), [&](const auto &en) { + Value padValue = en.value(); + unsigned pos = en.index(); + std::optional pad = getConstantIntValue(padValue); + return (!pad.has_value() || pad.value() != 0) && + resultTensorShape[pos] != 1; })) { - LDBG("low pad must all be zero: " << padOp << "\n"); + LDBG("low pad must all be zero for all non unit dims: " << padOp << "\n"); return failure(); } diff --git a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir index 2d1f0191eb798..f653a4852b074 100644 --- a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir +++ b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir @@ -305,6 +305,33 @@ module attributes {transform.with_named_sequence} { // ----- +// Padding with non-zero low pad values is not supported, unless the corresponding +// result dim is 1. Here `%l0` being a non-zero low pad applied to a +// non-unit result dimension makes this case unsupported. +func.func @tensor_pad_non_zero_low_pad( + %0 : tensor, %h0 : index, %h1 : index, %l0 : index) + -> tensor<2x4xf32> { + // expected-error @+3 {{Attempted to vectorize, but failed}} + %cst = arith.constant 42.43 : f32 + %c0 = arith.constant 0 : index + %1 = tensor.pad %0 low[%l0, %c0] high[%h0, %h1] { + ^bb0(%hh1: index, %hh2: index): + tensor.yield %cst : f32 + } : tensor to tensor<2x4xf32> + return %1: tensor<2x4xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [2, 4] : !transform.any_op + transform.yield + } +} + +// ----- + // With dynamically shaped source, the vectorizer infers the vector size for // xfer Ops from the destination tensor and, conservatively, assumes // out-of-bounds accesses. Out-of-bounds accesses require a pad value, but diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index c6d9ec6215715..299be1296aa66 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -664,6 +664,48 @@ module attributes {transform.with_named_sequence} { } } +// ----- +// This case is supported because low padding `%l0` is applied on +// a unit dimension which is supported, non unit result dimension low +// padding is currently unsupported. +// CHECK-LABEL: func @test_masked_vectorize_non_zero_low_pad_unit_res_dim +func.func @test_masked_vectorize_non_zero_low_pad_unit_res_dim( + %0 : tensor, %h0 : index, %h1 : index, %l0 : index) + -> tensor<1x4xf32> +{ + // CHECK-DAG: %[[C42:.*]] = arith.constant 4.243000e+01 : f32 + // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index + // CHECK: %[[C0_1:.*]] = arith.constant 0 : index + // CHECK-DAG: %[[D0:.*]] = tensor.dim {{.*}} : tensor + // CHECK-DAG: %[[D1:.*]] = tensor.dim {{.*}} : tensor + // CHECK: %[[MASK:.*]] = vector.create_mask %[[D0]], %[[D1]] : vector<1x4xi1> + // CHECK: %[[MASKED_READ:.*]] = vector.mask %[[MASK]] { + // CHECK-SAME: vector.transfer_read %{{.*}}[%[[C0_1]], %[[C0_1]]], %[[C42]] + // CHECK-SAME: {in_bounds = [true, true]} : tensor, vector<1x4xf32> + // CHECK-SAME: } : vector<1x4xi1> -> vector<1x4xf32> + // CHECK-DAG: %[[EMPTY:.*]] = tensor.empty() : tensor<1x4xf32> + // CHECK-DAG: %[[C0_2:.*]] = arith.constant 0 : index + // CHECK: %[[MASKED_WRITE:.*]] = vector.transfer_write %[[MASKED_READ]], %[[EMPTY]][%[[C0_2]], %[[C0_2]]] + // CHECK-SAME: {in_bounds = [true, true]} : vector<1x4xf32>, tensor<1x4xf32> + // CHECK: return %[[MASKED_WRITE]] : tensor<1x4xf32> + %cst = arith.constant 42.43 : f32 + %c0 = arith.constant 0 : index + %1 = tensor.pad %0 low[%l0, %c0] high[%h0, %h1] { + ^bb0(%hh1: index, %hh2: index): + tensor.yield %cst : f32 + } : tensor to tensor<1x4xf32> + return %1: tensor<1x4xf32> +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 + : (!transform.any_op) -> !transform.any_op + transform.structured.vectorize %0 vector_sizes [1, 4] : !transform.any_op + transform.yield + } +} + // ----- // Input identical as the test in vectorization-with-patterns.mlir. Output is