Skip to content

Conversation

@adam-smnk
Copy link
Contributor

Constrains Vector lowering to apply boundary checks only to data transfers operating on block shapes.

This further aligns lowering with the current Xe instructions' restrictions.

Constrains Vector lowering to apply boundary checks only to data
transfers operating on block shapes.

This further aligns lowering with the current Xe instructions'
restrictions.
@llvmbot
Copy link
Member

llvmbot commented Dec 10, 2024

@llvm/pr-subscribers-mlir-gpu

Author: Adam Siemieniuk (adam-smnk)

Changes

Constrains Vector lowering to apply boundary checks only to data transfers operating on block shapes.

This further aligns lowering with the current Xe instructions' restrictions.


Full diff: https://github.com/llvm/llvm-project/pull/119380.diff

5 Files Affected:

  • (modified) mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp (+14-5)
  • (modified) mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir (+1-1)
  • (modified) mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir (+1-1)
  • (modified) mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir (+13)
  • (modified) mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir (+13)
diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
index 215e1b1b874520..1232d8795d4dcf 100644
--- a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
+++ b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
@@ -82,6 +82,10 @@ static LogicalResult transferPreconditions(PatternRewriter &rewriter,
         xferOp, "Buffer must be contiguous in the innermost dimension");
 
   unsigned vecRank = vecTy.getRank();
+  if (xferOp.hasOutOfBoundsDim() && vecRank < 2)
+    return rewriter.notifyMatchFailure(
+        xferOp, "Boundary check is available only for block instructions.");
+
   AffineMap map = xferOp.getPermutationMap();
   if (!map.isProjectedPermutation(/*allowZeroInResults=*/false))
     return rewriter.notifyMatchFailure(xferOp, "Unsupported permutation map");
@@ -255,9 +259,12 @@ struct LoadLowering : public OpRewritePattern<vector::LoadOp> {
     if (failed(storeLoadPreconditions(rewriter, loadOp, vecTy)))
       return failure();
 
+    // Boundary check is available only for block instructions.
+    bool boundaryCheck = vecTy.getRank() > 1;
+
     auto descType = xegpu::TensorDescType::get(
         vecTy.getShape(), vecTy.getElementType(), /*array_length=*/1,
-        /*boundary_check=*/true, xegpu::MemorySpace::Global);
+        boundaryCheck, xegpu::MemorySpace::Global);
     xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
         rewriter, loc, descType, loadOp.getBase(), loadOp.getIndices());
 
@@ -285,10 +292,12 @@ struct StoreLowering : public OpRewritePattern<vector::StoreOp> {
     if (failed(storeLoadPreconditions(rewriter, storeOp, vecTy)))
       return failure();
 
-    auto descType =
-        xegpu::TensorDescType::get(vecTy.getShape(), vecTy.getElementType(),
-                                   /*array_length=*/1, /*boundary_check=*/true,
-                                   xegpu::MemorySpace::Global);
+    // Boundary check is available only for block instructions.
+    bool boundaryCheck = vecTy.getRank() > 1;
+
+    auto descType = xegpu::TensorDescType::get(
+        vecTy.getShape(), vecTy.getElementType(),
+        /*array_length=*/1, boundaryCheck, xegpu::MemorySpace::Global);
     xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
         rewriter, loc, descType, storeOp.getBase(), storeOp.getIndices());
 
diff --git a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
index e2a506f8ad5abd..7cef17df79dd2a 100644
--- a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
@@ -12,7 +12,7 @@ func.func @load_1D_vector(%source: memref<8x16x32xf32>, %offset: index) -> vecto
 // CHECK:       %[[DESC:.+]] = xegpu.create_nd_tdesc
 // CHECK-SAME:    %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
 // CHECK-SAME:    memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32,
-// CHECK-SAME:    boundary_check = true
+// CHECK-SAME:    boundary_check = false
 // CHECK:       %[[VEC:.+]] = xegpu.load_nd %[[DESC]]{{.*}}-> vector<8xf32>
 // CHECK:       return %[[VEC]]
 
diff --git a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
index 3d45407c2486d6..4f069ebc39db37 100644
--- a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
@@ -14,7 +14,7 @@ func.func @store_1D_vector(%vec: vector<8xf32>,
 // CHECK:       %[[DESC:.+]] = xegpu.create_nd_tdesc
 // CHECK-SAME:    %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
 // CHECK-SAME:    memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32,
-// CHECK-SAME:    boundary_check = true
+// CHECK-SAME:    boundary_check = false
 // CHECK:       xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8xf32>
 
 // -----
diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
index 4841ecbb62e807..497eb86cea8353 100644
--- a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
@@ -119,6 +119,19 @@ func.func @no_load_out_of_bounds_non_zero_pad(%source: memref<32x64xf32>,
 
 // -----
 
+func.func @no_load_out_of_bounds_1D_vector(%source: memref<8x16x32xf32>,
+    %offset: index) -> vector<8xf32> {
+  %c0 = arith.constant 0.0 : f32
+  %0 = vector.transfer_read %source[%offset, %offset, %offset], %c0
+    {in_bounds = [false]} : memref<8x16x32xf32>, vector<8xf32>
+  return %0 : vector<8xf32>
+}
+
+// CHECK-LABEL: @no_load_out_of_bounds_1D_vector(
+// CHECK:       vector.transfer_read
+
+// -----
+
 func.func @no_load_masked(%source : memref<4xf32>,
     %offset : index) -> vector<4xf32> {
   %c0 = arith.constant 0.0 : f32
diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
index 076760fe21dc86..91e3fb3841f6eb 100644
--- a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
@@ -164,3 +164,16 @@ func.func @no_store_unsupported_map(%vec: vector<8x16xf32>,
 
 // CHECK-LABEL: @no_store_unsupported_map(
 // CHECK:       vector.transfer_write
+
+// -----
+
+func.func @no_store_out_of_bounds_1D_vector(%vec: vector<8xf32>,
+    %source: memref<8x16x32xf32>, %offset: index) {
+  vector.transfer_write %vec, %source[%offset, %offset, %offset]
+    {in_bounds = [false]}
+    : vector<8xf32>, memref<8x16x32xf32>
+  return
+}
+
+// CHECK-LABEL: @no_store_out_of_bounds_1D_vector(
+// CHECK:       vector.transfer_write

@llvmbot
Copy link
Member

llvmbot commented Dec 10, 2024

@llvm/pr-subscribers-mlir

Author: Adam Siemieniuk (adam-smnk)

Changes

Constrains Vector lowering to apply boundary checks only to data transfers operating on block shapes.

This further aligns lowering with the current Xe instructions' restrictions.


Full diff: https://github.com/llvm/llvm-project/pull/119380.diff

5 Files Affected:

  • (modified) mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp (+14-5)
  • (modified) mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir (+1-1)
  • (modified) mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir (+1-1)
  • (modified) mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir (+13)
  • (modified) mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir (+13)
diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
index 215e1b1b874520..1232d8795d4dcf 100644
--- a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
+++ b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
@@ -82,6 +82,10 @@ static LogicalResult transferPreconditions(PatternRewriter &rewriter,
         xferOp, "Buffer must be contiguous in the innermost dimension");
 
   unsigned vecRank = vecTy.getRank();
+  if (xferOp.hasOutOfBoundsDim() && vecRank < 2)
+    return rewriter.notifyMatchFailure(
+        xferOp, "Boundary check is available only for block instructions.");
+
   AffineMap map = xferOp.getPermutationMap();
   if (!map.isProjectedPermutation(/*allowZeroInResults=*/false))
     return rewriter.notifyMatchFailure(xferOp, "Unsupported permutation map");
@@ -255,9 +259,12 @@ struct LoadLowering : public OpRewritePattern<vector::LoadOp> {
     if (failed(storeLoadPreconditions(rewriter, loadOp, vecTy)))
       return failure();
 
+    // Boundary check is available only for block instructions.
+    bool boundaryCheck = vecTy.getRank() > 1;
+
     auto descType = xegpu::TensorDescType::get(
         vecTy.getShape(), vecTy.getElementType(), /*array_length=*/1,
-        /*boundary_check=*/true, xegpu::MemorySpace::Global);
+        boundaryCheck, xegpu::MemorySpace::Global);
     xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
         rewriter, loc, descType, loadOp.getBase(), loadOp.getIndices());
 
@@ -285,10 +292,12 @@ struct StoreLowering : public OpRewritePattern<vector::StoreOp> {
     if (failed(storeLoadPreconditions(rewriter, storeOp, vecTy)))
       return failure();
 
-    auto descType =
-        xegpu::TensorDescType::get(vecTy.getShape(), vecTy.getElementType(),
-                                   /*array_length=*/1, /*boundary_check=*/true,
-                                   xegpu::MemorySpace::Global);
+    // Boundary check is available only for block instructions.
+    bool boundaryCheck = vecTy.getRank() > 1;
+
+    auto descType = xegpu::TensorDescType::get(
+        vecTy.getShape(), vecTy.getElementType(),
+        /*array_length=*/1, boundaryCheck, xegpu::MemorySpace::Global);
     xegpu::CreateNdDescOp ndDesc = createNdDescriptor(
         rewriter, loc, descType, storeOp.getBase(), storeOp.getIndices());
 
diff --git a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
index e2a506f8ad5abd..7cef17df79dd2a 100644
--- a/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/load-to-xegpu.mlir
@@ -12,7 +12,7 @@ func.func @load_1D_vector(%source: memref<8x16x32xf32>, %offset: index) -> vecto
 // CHECK:       %[[DESC:.+]] = xegpu.create_nd_tdesc
 // CHECK-SAME:    %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
 // CHECK-SAME:    memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32,
-// CHECK-SAME:    boundary_check = true
+// CHECK-SAME:    boundary_check = false
 // CHECK:       %[[VEC:.+]] = xegpu.load_nd %[[DESC]]{{.*}}-> vector<8xf32>
 // CHECK:       return %[[VEC]]
 
diff --git a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
index 3d45407c2486d6..4f069ebc39db37 100644
--- a/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/store-to-xegpu.mlir
@@ -14,7 +14,7 @@ func.func @store_1D_vector(%vec: vector<8xf32>,
 // CHECK:       %[[DESC:.+]] = xegpu.create_nd_tdesc
 // CHECK-SAME:    %[[SRC]][%[[OFFSET]], %[[OFFSET]], %[[OFFSET]]]
 // CHECK-SAME:    memref<8x16x32xf32> -> !xegpu.tensor_desc<8xf32,
-// CHECK-SAME:    boundary_check = true
+// CHECK-SAME:    boundary_check = false
 // CHECK:       xegpu.store_nd %[[VEC]], %[[DESC]] : vector<8xf32>
 
 // -----
diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
index 4841ecbb62e807..497eb86cea8353 100644
--- a/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/transfer-read-to-xegpu.mlir
@@ -119,6 +119,19 @@ func.func @no_load_out_of_bounds_non_zero_pad(%source: memref<32x64xf32>,
 
 // -----
 
+func.func @no_load_out_of_bounds_1D_vector(%source: memref<8x16x32xf32>,
+    %offset: index) -> vector<8xf32> {
+  %c0 = arith.constant 0.0 : f32
+  %0 = vector.transfer_read %source[%offset, %offset, %offset], %c0
+    {in_bounds = [false]} : memref<8x16x32xf32>, vector<8xf32>
+  return %0 : vector<8xf32>
+}
+
+// CHECK-LABEL: @no_load_out_of_bounds_1D_vector(
+// CHECK:       vector.transfer_read
+
+// -----
+
 func.func @no_load_masked(%source : memref<4xf32>,
     %offset : index) -> vector<4xf32> {
   %c0 = arith.constant 0.0 : f32
diff --git a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
index 076760fe21dc86..91e3fb3841f6eb 100644
--- a/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
+++ b/mlir/test/Conversion/VectorToXeGPU/transfer-write-to-xegpu.mlir
@@ -164,3 +164,16 @@ func.func @no_store_unsupported_map(%vec: vector<8x16xf32>,
 
 // CHECK-LABEL: @no_store_unsupported_map(
 // CHECK:       vector.transfer_write
+
+// -----
+
+func.func @no_store_out_of_bounds_1D_vector(%vec: vector<8xf32>,
+    %source: memref<8x16x32xf32>, %offset: index) {
+  vector.transfer_write %vec, %source[%offset, %offset, %offset]
+    {in_bounds = [false]}
+    : vector<8xf32>, memref<8x16x32xf32>
+  return
+}
+
+// CHECK-LABEL: @no_store_out_of_bounds_1D_vector(
+// CHECK:       vector.transfer_write

@adam-smnk
Copy link
Contributor Author

Follow up on #110811

Copy link
Contributor

@chencha3 chencha3 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Thanks

@adam-smnk adam-smnk merged commit 4c597d4 into llvm:main Dec 13, 2024
11 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants