From 701afb3b14bf5f55337956cf94a8f973d92a4c3d Mon Sep 17 00:00:00 2001 From: Adam Siemieniuk Date: Tue, 10 Dec 2024 14:32:15 +0100 Subject: [PATCH] [mlir][xegpu] Support boundary checks only for block instructions Constrains Vector lowering to apply boundary checks only to data transfers operating on block shapes. This further aligns lowering with the current Xe instructions' restrictions. --- .../VectorToXeGPU/VectorToXeGPU.cpp | 19 ++++++++++++++----- .../VectorToXeGPU/load-to-xegpu.mlir | 2 +- .../VectorToXeGPU/store-to-xegpu.mlir | 2 +- .../VectorToXeGPU/transfer-read-to-xegpu.mlir | 13 +++++++++++++ .../transfer-write-to-xegpu.mlir | 13 +++++++++++++ 5 files changed, 42 insertions(+), 7 deletions(-) diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp index 215e1b1b87452..1232d8795d4dc 100644 --- a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp +++ b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp @@ -82,6 +82,10 @@ static LogicalResult transferPreconditions(PatternRewriter &rewriter, xferOp, "Buffer must be contiguous in the innermost dimension"); unsigned vecRank = vecTy.getRank(); + if (xferOp.hasOutOfBoundsDim() && vecRank < 2) + return rewriter.notifyMatchFailure( + xferOp, "Boundary check is available only for block instructions."); + AffineMap map = xferOp.getPermutationMap(); if (!map.isProjectedPermutation(/*allowZeroInResults=*/false)) return rewriter.notifyMatchFailure(xferOp, "Unsupported permutation map"); @@ -255,9 +259,12 @@ struct LoadLowering : public OpRewritePattern { if (failed(storeLoadPreconditions(rewriter, loadOp, vecTy))) return failure(); + // Boundary check is available only for block instructions. + bool boundaryCheck = vecTy.getRank() > 1; + auto descType = xegpu::TensorDescType::get( vecTy.getShape(), vecTy.getElementType(), /*array_length=*/1, - /*boundary_check=*/true, xegpu::MemorySpace::Global); + boundaryCheck, xegpu::MemorySpace::Global); xegpu::CreateNdDescOp ndDesc = createNdDescriptor( rewriter, loc, descType, loadOp.getBase(), loadOp.getIndices()); @@ -285,10 +292,12 @@ struct StoreLowering : public OpRewritePattern { if (failed(storeLoadPreconditions(rewriter, storeOp, vecTy))) return failure(); - auto descType = - xegpu::TensorDescType::get(vecTy.getShape(), vecTy.getElementType(), - /*array_length=*/1, /*boundary_check=*/true, - xegpu::MemorySpace::Global); + // Boundary check is available only for block instructions. + bool boundaryCheck = vecTy.getRank() > 1; + + auto descType = xegpu::TensorDescType::get( + vecTy.getShape(), vecTy.getElementType(), + /*array_length=*/1, boundaryCheck, xegpu::MemorySpace::Global); xegpu::CreateNdDescOp ndDesc = createNdDescriptor( rewriter, loc, descType, storeOp.getBase(), storeOp.getIndices()); diff --git a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir index e2a506f8ad5ab..7cef17df79dd2 100644 --- a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir +++ b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir @@ -12,7 +12,7 @@ func.func @load_1D_vector(%source: memref<8x16x32xf32>, %offset: index) -> vecto // CHECK: %[[DESC:.+]] = xegpu.create_nd_tdesc // CHECK-SAME: %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]] // CHECK-SAME: memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32, -// CHECK-SAME: boundary_check = true +// CHECK-SAME: boundary_check = false // CHECK: %[[VEC:.+]] = xegpu.load_nd %[[DESC]]{{.*}}-> vector<8xf32> // CHECK: return %[[VEC]] diff --git a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir index 3d45407c2486d..4f069ebc39db3 100644 --- a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir +++ b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir @@ -14,7 +14,7 @@ func.func @store_1D_vector(%vec: vector<8xf32>, // CHECK: %[[DESC:.+]] = xegpu.create_nd_tdesc // CHECK-SAME: %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]] // CHECK-SAME: memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32, -// CHECK-SAME: boundary_check = true +// CHECK-SAME: boundary_check = false // CHECK: xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8xf32> // ----- diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir index 4841ecbb62e80..497eb86cea835 100644 --- a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir +++ b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir @@ -119,6 +119,19 @@ func.func @no_load_out_of_bounds_non_zero_pad(%source: memref<32x64xf32>, // ----- +func.func @no_load_out_of_bounds_1D_vector(%source: memref<8x16x32xf32>, + %offset: index) -> vector<8xf32> { + %c0 = arith.constant 0.0 : f32 + %0 = vector.transfer_read %source[%offset, %offset, %offset], %c0 + {in_bounds = [false]} : memref<8x16x32xf32>, vector<8xf32> + return %0 : vector<8xf32> +} + +// CHECK-LABEL: @no_load_out_of_bounds_1D_vector( +// CHECK: vector.transfer_read + +// ----- + func.func @no_load_masked(%source : memref<4xf32>, %offset : index) -> vector<4xf32> { %c0 = arith.constant 0.0 : f32 diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir index 076760fe21dc8..91e3fb3841f6e 100644 --- a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir +++ b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir @@ -164,3 +164,16 @@ func.func @no_store_unsupported_map(%vec: vector<8x16xf32>, // CHECK-LABEL: @no_store_unsupported_map( // CHECK: vector.transfer_write + +// ----- + +func.func @no_store_out_of_bounds_1D_vector(%vec: vector<8xf32>, + %source: memref<8x16x32xf32>, %offset: index) { + vector.transfer_write %vec, %source[%offset, %offset, %offset] + {in_bounds = [false]} + : vector<8xf32>, memref<8x16x32xf32> + return +} + +// CHECK-LABEL: @no_store_out_of_bounds_1D_vector( +// CHECK: vector.transfer_write