diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp index 215e1b1b87452..1232d8795d4dc 100644 --- a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp +++ b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp @@ -82,6 +82,10 @@ static LogicalResult transferPreconditions(PatternRewriter &rewriter, xferOp, "Buffer must be contiguous in the innermost dimension"); unsigned vecRank = vecTy.getRank(); + if (xferOp.hasOutOfBoundsDim() && vecRank < 2) + return rewriter.notifyMatchFailure( + xferOp, "Boundary check is available only for block instructions."); + AffineMap map = xferOp.getPermutationMap(); if (!map.isProjectedPermutation(/*allowZeroInResults=*/false)) return rewriter.notifyMatchFailure(xferOp, "Unsupported permutation map"); @@ -255,9 +259,12 @@ struct LoadLowering : public OpRewritePattern { if (failed(storeLoadPreconditions(rewriter, loadOp, vecTy))) return failure(); + // Boundary check is available only for block instructions. + bool boundaryCheck = vecTy.getRank() > 1; + auto descType = xegpu::TensorDescType::get( vecTy.getShape(), vecTy.getElementType(), /*array_length=*/1, - /*boundary_check=*/true, xegpu::MemorySpace::Global); + boundaryCheck, xegpu::MemorySpace::Global); xegpu::CreateNdDescOp ndDesc = createNdDescriptor( rewriter, loc, descType, loadOp.getBase(), loadOp.getIndices()); @@ -285,10 +292,12 @@ struct StoreLowering : public OpRewritePattern { if (failed(storeLoadPreconditions(rewriter, storeOp, vecTy))) return failure(); - auto descType = - xegpu::TensorDescType::get(vecTy.getShape(), vecTy.getElementType(), - /*array_length=*/1, /*boundary_check=*/true, - xegpu::MemorySpace::Global); + // Boundary check is available only for block instructions. + bool boundaryCheck = vecTy.getRank() > 1; + + auto descType = xegpu::TensorDescType::get( + vecTy.getShape(), vecTy.getElementType(), + /*array_length=*/1, boundaryCheck, xegpu::MemorySpace::Global); xegpu::CreateNdDescOp ndDesc = createNdDescriptor( rewriter, loc, descType, storeOp.getBase(), storeOp.getIndices()); diff --git a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir index e2a506f8ad5ab..7cef17df79dd2 100644 --- a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir +++ b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir @@ -12,7 +12,7 @@ func.func @load_1D_vector(%source: memref<8x16x32xf32>, %offset: index) -> vecto // CHECK: %[[DESC:.+]] = xegpu.create_nd_tdesc // CHECK-SAME: %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]] // CHECK-SAME: memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32, -// CHECK-SAME: boundary_check = true +// CHECK-SAME: boundary_check = false // CHECK: %[[VEC:.+]] = xegpu.load_nd %[[DESC]]{{.*}}-> vector<8xf32> // CHECK: return %[[VEC]] diff --git a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir index 3d45407c2486d..4f069ebc39db3 100644 --- a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir +++ b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir @@ -14,7 +14,7 @@ func.func @store_1D_vector(%vec: vector<8xf32>, // CHECK: %[[DESC:.+]] = xegpu.create_nd_tdesc // CHECK-SAME: %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]] // CHECK-SAME: memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32, -// CHECK-SAME: boundary_check = true +// CHECK-SAME: boundary_check = false // CHECK: xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8xf32> // ----- diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir index 4841ecbb62e80..497eb86cea835 100644 --- a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir +++ b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir @@ -119,6 +119,19 @@ func.func @no_load_out_of_bounds_non_zero_pad(%source: memref<32x64xf32>, // ----- +func.func @no_load_out_of_bounds_1D_vector(%source: memref<8x16x32xf32>, + %offset: index) -> vector<8xf32> { + %c0 = arith.constant 0.0 : f32 + %0 = vector.transfer_read %source[%offset, %offset, %offset], %c0 + {in_bounds = [false]} : memref<8x16x32xf32>, vector<8xf32> + return %0 : vector<8xf32> +} + +// CHECK-LABEL: @no_load_out_of_bounds_1D_vector( +// CHECK: vector.transfer_read + +// ----- + func.func @no_load_masked(%source : memref<4xf32>, %offset : index) -> vector<4xf32> { %c0 = arith.constant 0.0 : f32 diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir index 076760fe21dc8..91e3fb3841f6e 100644 --- a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir +++ b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir @@ -164,3 +164,16 @@ func.func @no_store_unsupported_map(%vec: vector<8x16xf32>, // CHECK-LABEL: @no_store_unsupported_map( // CHECK: vector.transfer_write + +// ----- + +func.func @no_store_out_of_bounds_1D_vector(%vec: vector<8xf32>, + %source: memref<8x16x32xf32>, %offset: index) { + vector.transfer_write %vec, %source[%offset, %offset, %offset] + {in_bounds = [false]} + : vector<8xf32>, memref<8x16x32xf32> + return +} + +// CHECK-LABEL: @no_store_out_of_bounds_1D_vector( +// CHECK: vector.transfer_write