From 3c4f715415b73fa06bb2878ee9b8d3efe6b0e398 Mon Sep 17 00:00:00 2001 From: Asher Mancinelli Date: Wed, 15 Oct 2025 10:48:42 -0700 Subject: [PATCH 1/5] [mlir][python] Add bindings for OpenACC dialect --- mlir/python/CMakeLists.txt | 9 +++++++++ mlir/python/mlir/dialects/OpenACCOps.td | 14 ++++++++++++++ mlir/python/mlir/dialects/openacc.py | 6 ++++++ mlir/test/python/dialects/openacc.py | 19 +++++++++++++++++++ 4 files changed, 48 insertions(+) create mode 100644 mlir/python/mlir/dialects/OpenACCOps.td create mode 100644 mlir/python/mlir/dialects/openacc.py create mode 100644 mlir/test/python/dialects/openacc.py diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt index 9f5246de6bda0..c643d32e22174 100644 --- a/mlir/python/CMakeLists.txt +++ b/mlir/python/CMakeLists.txt @@ -134,6 +134,15 @@ declare_mlir_dialect_python_bindings( dialects/func.py DIALECT_NAME func) +declare_mlir_dialect_python_bindings( + ADD_TO_PARENT MLIRPythonSources.Dialects + ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" + TD_FILE dialects/OpenACCOps.td + SOURCES + dialects/openacc.py + DIALECT_NAME acc + DEPENDS acc_common_td) + declare_mlir_dialect_python_bindings( ADD_TO_PARENT MLIRPythonSources.Dialects ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mlir" diff --git a/mlir/python/mlir/dialects/OpenACCOps.td b/mlir/python/mlir/dialects/OpenACCOps.td new file mode 100644 index 0000000000000..69a3002e73b81 --- /dev/null +++ b/mlir/python/mlir/dialects/OpenACCOps.td @@ -0,0 +1,14 @@ +//===-- OpenACCOps.td - Entry point for OpenACCOps bind ------------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef PYTHON_BINDINGS_OPENACC_OPS +#define PYTHON_BINDINGS_OPENACC_OPS + +include "mlir/Dialect/OpenACC/OpenACCOps.td" + +#endif diff --git a/mlir/python/mlir/dialects/openacc.py b/mlir/python/mlir/dialects/openacc.py new file mode 100644 index 0000000000000..e06830a9c48f3 --- /dev/null +++ b/mlir/python/mlir/dialects/openacc.py @@ -0,0 +1,6 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +from ._acc_ops_gen import * + diff --git a/mlir/test/python/dialects/openacc.py b/mlir/test/python/dialects/openacc.py new file mode 100644 index 0000000000000..3e67c3deab75d --- /dev/null +++ b/mlir/test/python/dialects/openacc.py @@ -0,0 +1,19 @@ +from mlir.ir import * +from mlir.dialects import openacc + +def run(f): + print("\nTEST:", f.__name__) + with Context(), Location.unknown(): + f() + return f + +@run +def testOpenACCKernel(): + module = Module.create() + with InsertionPoint(module.body): + openacc.KernelOp( + openacc.KernelType.parallel, + openacc.KernelModifier.seq, + openacc.KernelModifier.seq, + ) + print(module) From 9b99be31db4c3a347caff06838900160df6ed862 Mon Sep 17 00:00:00 2001 From: Asher Mancinelli Date: Wed, 15 Oct 2025 12:38:36 -0700 Subject: [PATCH 2/5] Add tests --- mlir/test/python/dialects/openacc.py | 135 +++++++++++++++++++++++++-- 1 file changed, 127 insertions(+), 8 deletions(-) diff --git a/mlir/test/python/dialects/openacc.py b/mlir/test/python/dialects/openacc.py index 3e67c3deab75d..bd607febe21cc 100644 --- a/mlir/test/python/dialects/openacc.py +++ b/mlir/test/python/dialects/openacc.py @@ -1,19 +1,138 @@ -from mlir.ir import * -from mlir.dialects import openacc +# RUN: python %s | FileCheck %s +from mlir.ir import ( + Context, + FunctionType, + Location, + Module, + InsertionPoint, + IntegerType, + IndexType, + MemRefType, + F32Type, + Block, + ArrayAttr, + Attribute, + UnitAttr, + StringAttr, + DenseI32ArrayAttr, + ShapedType, +) +from mlir.dialects import openacc, func, arith, memref + def run(f): - print("\nTEST:", f.__name__) + print("\n// TEST:", f.__name__) with Context(), Location.unknown(): f() return f + @run -def testOpenACCKernel(): +def testManualReconstructedKernel(): module = Module.create() + + # Add required module attributes + module.operation.attributes["dlti.dl_spec"] = Attribute.parse("#dlti.dl_spec<>") + module.operation.attributes["gpu.container_module"] = UnitAttr.get() + + i32 = IntegerType.get_signless(32) + i64 = IntegerType.get_signless(64) + f32 = F32Type.get() + dynamic = ShapedType.get_dynamic_size() + memref_f32_1d_any = MemRefType.get([dynamic], f32) + with InsertionPoint(module.body): - openacc.KernelOp( - openacc.KernelType.parallel, - openacc.KernelModifier.seq, - openacc.KernelModifier.seq, + function_type = FunctionType.get( + [memref_f32_1d_any, memref_f32_1d_any, i64], [] ) + f = func.FuncOp( + type=function_type, + name="memcpy_idiom", + ) + f.attributes["sym_visibility"] = StringAttr.get("public") + + with InsertionPoint(f.add_entry_block()): + c1024 = arith.ConstantOp(i32, 1024) + c128 = arith.ConstantOp(i32, 128) + + parallel_op = openacc.ParallelOp( + asyncOperands=[], + waitOperands=[], + numGangs=[c1024], + numWorkers=[], + vectorLength=[c128], + reductionOperands=[], + privateOperands=[], + firstprivateOperands=[], + dataClauseOperands=[], + ) + + # Set required device_type and segment attributes to satisfy verifier + acc_device_none = ArrayAttr.get([Attribute.parse("#acc.device_type")]) + parallel_op.numGangsDeviceType = acc_device_none + parallel_op.numGangsSegments = DenseI32ArrayAttr.get([1]) + parallel_op.vectorLengthDeviceType = acc_device_none + + parallel_block = Block.create_at_start(parent=parallel_op.region, arg_types=[]) + + with InsertionPoint(parallel_block): + c0 = arith.ConstantOp(i64, 0) + c1 = arith.ConstantOp(i64, 1) + + loop_op = openacc.LoopOp( + results_=[], + lowerbound=[c0], + upperbound=[f.arguments[2]], + step=[c1], + gangOperands=[], + workerNumOperands=[], + vectorOperands=[], + tileOperands=[], + cacheOperands=[], + privateOperands=[], + reductionOperands=[], + firstprivateOperands=[], + ) + + # Set loop attributes: gang and independent on device_type + acc_device_none = ArrayAttr.get([Attribute.parse("#acc.device_type")]) + loop_op.gang = acc_device_none + loop_op.independent = acc_device_none + + loop_block = Block.create_at_start(parent=loop_op.region, arg_types=[i64]) + + with InsertionPoint(loop_block): + idx0 = arith.index_cast( + out=IndexType.get(), in_=loop_block.arguments[0] + ) + val = memref.load(memref=f.arguments[1], indices=[idx0]) + idx1 = arith.index_cast( + out=IndexType.get(), in_=loop_block.arguments[0] + ) + memref.store(value=val, memref=f.arguments[0], indices=[idx1]) + openacc.YieldOp([]) + + openacc.YieldOp([]) + + func.ReturnOp([]) + print(module) + + # CHECK-LABEL: func.func public @memcpy_idiom + # CHECK-SAME: (%[[ARG0:.*]]: memref, %[[ARG1:.*]]: memref, %[[ARG2:.*]]: i64) { + # CHECK: %[[CONSTANT_0:.*]] = arith.constant 1024 : i32 + # CHECK: %[[CONSTANT_1:.*]] = arith.constant 128 : i32 + # CHECK: acc.parallel num_gangs({%[[CONSTANT_0]] : i32}) vector_length(%[[CONSTANT_1]] : i32) { + # CHECK: %[[CONSTANT_2:.*]] = arith.constant 0 : i64 + # CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : i64 + # CHECK: acc.loop gang control(%[[VAL_0:.*]] : i64) = (%[[CONSTANT_2]] : i64) to (%[[ARG2]] : i64) step (%[[CONSTANT_3]] : i64) { + # CHECK: %[[INDEX_CAST_0:.*]] = arith.index_cast %[[VAL_0]] : i64 to index + # CHECK: %[[LOAD_0:.*]] = memref.load %[[ARG1]][%[[INDEX_CAST_0]]] : memref + # CHECK: %[[INDEX_CAST_1:.*]] = arith.index_cast %[[VAL_0]] : i64 to index + # CHECK: memref.store %[[LOAD_0]], %[[ARG0]][%[[INDEX_CAST_1]]] : memref + # CHECK: acc.yield + # CHECK: } + # CHECK: acc.yield + # CHECK: } + # CHECK: return + # CHECK: } From 3a58184392f044f98102c4cfd1c8dcb388204372 Mon Sep 17 00:00:00 2001 From: Asher Mancinelli Date: Wed, 15 Oct 2025 12:46:36 -0700 Subject: [PATCH 3/5] Format --- mlir/python/mlir/dialects/openacc.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mlir/python/mlir/dialects/openacc.py b/mlir/python/mlir/dialects/openacc.py index e06830a9c48f3..057f71aed20a6 100644 --- a/mlir/python/mlir/dialects/openacc.py +++ b/mlir/python/mlir/dialects/openacc.py @@ -3,4 +3,3 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception from ._acc_ops_gen import * - From 3d3912c723b8473a9d14932e694b381c7fc12cbb Mon Sep 17 00:00:00 2001 From: Asher Mancinelli Date: Wed, 15 Oct 2025 14:15:13 -0700 Subject: [PATCH 4/5] Clean up test --- mlir/test/python/dialects/openacc.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/mlir/test/python/dialects/openacc.py b/mlir/test/python/dialects/openacc.py index bd607febe21cc..0b41a46ce9b47 100644 --- a/mlir/test/python/dialects/openacc.py +++ b/mlir/test/python/dialects/openacc.py @@ -31,10 +31,6 @@ def run(f): def testManualReconstructedKernel(): module = Module.create() - # Add required module attributes - module.operation.attributes["dlti.dl_spec"] = Attribute.parse("#dlti.dl_spec<>") - module.operation.attributes["gpu.container_module"] = UnitAttr.get() - i32 = IntegerType.get_signless(32) i64 = IntegerType.get_signless(64) f32 = F32Type.get() @@ -102,14 +98,9 @@ def testManualReconstructedKernel(): loop_block = Block.create_at_start(parent=loop_op.region, arg_types=[i64]) with InsertionPoint(loop_block): - idx0 = arith.index_cast( - out=IndexType.get(), in_=loop_block.arguments[0] - ) - val = memref.load(memref=f.arguments[1], indices=[idx0]) - idx1 = arith.index_cast( - out=IndexType.get(), in_=loop_block.arguments[0] - ) - memref.store(value=val, memref=f.arguments[0], indices=[idx1]) + idx = arith.index_cast(out=IndexType.get(), in_=loop_block.arguments[0]) + val = memref.load(memref=f.arguments[1], indices=[idx]) + memref.store(value=val, memref=f.arguments[0], indices=[idx]) openacc.YieldOp([]) openacc.YieldOp([]) @@ -118,8 +109,8 @@ def testManualReconstructedKernel(): print(module) - # CHECK-LABEL: func.func public @memcpy_idiom - # CHECK-SAME: (%[[ARG0:.*]]: memref, %[[ARG1:.*]]: memref, %[[ARG2:.*]]: i64) { + # CHECK-LABEL: func.func public @memcpy_idiom( + # CHECK-SAME: %[[ARG0:.*]]: memref, %[[ARG1:.*]]: memref, %[[ARG2:.*]]: i64) { # CHECK: %[[CONSTANT_0:.*]] = arith.constant 1024 : i32 # CHECK: %[[CONSTANT_1:.*]] = arith.constant 128 : i32 # CHECK: acc.parallel num_gangs({%[[CONSTANT_0]] : i32}) vector_length(%[[CONSTANT_1]] : i32) { @@ -127,11 +118,10 @@ def testManualReconstructedKernel(): # CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : i64 # CHECK: acc.loop gang control(%[[VAL_0:.*]] : i64) = (%[[CONSTANT_2]] : i64) to (%[[ARG2]] : i64) step (%[[CONSTANT_3]] : i64) { # CHECK: %[[INDEX_CAST_0:.*]] = arith.index_cast %[[VAL_0]] : i64 to index - # CHECK: %[[LOAD_0:.*]] = memref.load %[[ARG1]][%[[INDEX_CAST_0]]] : memref - # CHECK: %[[INDEX_CAST_1:.*]] = arith.index_cast %[[VAL_0]] : i64 to index - # CHECK: memref.store %[[LOAD_0]], %[[ARG0]][%[[INDEX_CAST_1]]] : memref + # CHECK: %[[LOAD_0:.*]] = memref.load %[[ARG1]]{{\[}}%[[INDEX_CAST_0]]] : memref + # CHECK: memref.store %[[LOAD_0]], %[[ARG0]]{{\[}}%[[INDEX_CAST_0]]] : memref # CHECK: acc.yield - # CHECK: } + # CHECK: } attributes {independent = [#acc.device_type]} # CHECK: acc.yield # CHECK: } # CHECK: return From 31b4b2f7d4fa18deb538e71b5941689324974a05 Mon Sep 17 00:00:00 2001 From: Asher Mancinelli Date: Wed, 15 Oct 2025 17:16:02 -0700 Subject: [PATCH 5/5] Add uses of create/copyin/delete/copyout --- mlir/python/CMakeLists.txt | 3 +- mlir/test/python/dialects/openacc.py | 73 ++++++++++++++++++++++------ 2 files changed, 60 insertions(+), 16 deletions(-) diff --git a/mlir/python/CMakeLists.txt b/mlir/python/CMakeLists.txt index c643d32e22174..ffa96ad16a02b 100644 --- a/mlir/python/CMakeLists.txt +++ b/mlir/python/CMakeLists.txt @@ -141,7 +141,8 @@ declare_mlir_dialect_python_bindings( SOURCES dialects/openacc.py DIALECT_NAME acc - DEPENDS acc_common_td) + DEPENDS acc_common_td + ) declare_mlir_dialect_python_bindings( ADD_TO_PARENT MLIRPythonSources.Dialects diff --git a/mlir/test/python/dialects/openacc.py b/mlir/test/python/dialects/openacc.py index 0b41a46ce9b47..6afdf450de239 100644 --- a/mlir/test/python/dialects/openacc.py +++ b/mlir/test/python/dialects/openacc.py @@ -1,4 +1,5 @@ # RUN: python %s | FileCheck %s +from unittest import result from mlir.ir import ( Context, FunctionType, @@ -18,6 +19,7 @@ ShapedType, ) from mlir.dialects import openacc, func, arith, memref +from mlir.extras import types def run(f): @@ -28,18 +30,15 @@ def run(f): @run -def testManualReconstructedKernel(): +def testParallelMemcpy(): module = Module.create() - i32 = IntegerType.get_signless(32) - i64 = IntegerType.get_signless(64) - f32 = F32Type.get() dynamic = ShapedType.get_dynamic_size() - memref_f32_1d_any = MemRefType.get([dynamic], f32) + memref_f32_1d_any = MemRefType.get([dynamic], types.f32()) with InsertionPoint(module.body): function_type = FunctionType.get( - [memref_f32_1d_any, memref_f32_1d_any, i64], [] + [memref_f32_1d_any, memref_f32_1d_any, types.i64()], [] ) f = func.FuncOp( type=function_type, @@ -48,8 +47,29 @@ def testManualReconstructedKernel(): f.attributes["sym_visibility"] = StringAttr.get("public") with InsertionPoint(f.add_entry_block()): - c1024 = arith.ConstantOp(i32, 1024) - c128 = arith.ConstantOp(i32, 128) + c1024 = arith.ConstantOp(types.i32(), 1024) + c128 = arith.ConstantOp(types.i32(), 128) + + arg0, arg1, arg2 = f.arguments + + copied = openacc.copyin( + acc_var=arg0.type, + var=arg0, + var_type=types.f32(), + bounds=[], + async_operands=[], + implicit=False, + structured=True, + ) + created = openacc.create_( + acc_var=arg1.type, + var=arg1, + var_type=types.f32(), + bounds=[], + async_operands=[], + implicit=False, + structured=True, + ) parallel_op = openacc.ParallelOp( asyncOperands=[], @@ -72,8 +92,8 @@ def testManualReconstructedKernel(): parallel_block = Block.create_at_start(parent=parallel_op.region, arg_types=[]) with InsertionPoint(parallel_block): - c0 = arith.ConstantOp(i64, 0) - c1 = arith.ConstantOp(i64, 1) + c0 = arith.ConstantOp(types.i64(), 0) + c1 = arith.ConstantOp(types.i64(), 1) loop_op = openacc.LoopOp( results_=[], @@ -95,34 +115,57 @@ def testManualReconstructedKernel(): loop_op.gang = acc_device_none loop_op.independent = acc_device_none - loop_block = Block.create_at_start(parent=loop_op.region, arg_types=[i64]) + loop_block = Block.create_at_start( + parent=loop_op.region, arg_types=[types.i64()] + ) with InsertionPoint(loop_block): idx = arith.index_cast(out=IndexType.get(), in_=loop_block.arguments[0]) - val = memref.load(memref=f.arguments[1], indices=[idx]) - memref.store(value=val, memref=f.arguments[0], indices=[idx]) + val = memref.load(memref=copied, indices=[idx]) + memref.store(value=val, memref=created, indices=[idx]) openacc.YieldOp([]) openacc.YieldOp([]) + deleted = openacc.delete( + acc_var=copied, + bounds=[], + async_operands=[], + implicit=False, + structured=True, + ) + copied = openacc.copyout( + acc_var=created, + var=arg1, + var_type=types.f32(), + bounds=[], + async_operands=[], + implicit=False, + structured=True, + ) func.ReturnOp([]) print(module) + # CHECK: TEST: testParallelMemcpy # CHECK-LABEL: func.func public @memcpy_idiom( # CHECK-SAME: %[[ARG0:.*]]: memref, %[[ARG1:.*]]: memref, %[[ARG2:.*]]: i64) { # CHECK: %[[CONSTANT_0:.*]] = arith.constant 1024 : i32 # CHECK: %[[CONSTANT_1:.*]] = arith.constant 128 : i32 + # CHECK: %[[COPYIN_0:.*]] = acc.copyin varPtr(%[[ARG0]] : memref) -> memref + # CHECK: %[[CREATE_0:.*]] = acc.create varPtr(%[[ARG1]] : memref) -> memref # CHECK: acc.parallel num_gangs({%[[CONSTANT_0]] : i32}) vector_length(%[[CONSTANT_1]] : i32) { # CHECK: %[[CONSTANT_2:.*]] = arith.constant 0 : i64 # CHECK: %[[CONSTANT_3:.*]] = arith.constant 1 : i64 # CHECK: acc.loop gang control(%[[VAL_0:.*]] : i64) = (%[[CONSTANT_2]] : i64) to (%[[ARG2]] : i64) step (%[[CONSTANT_3]] : i64) { # CHECK: %[[INDEX_CAST_0:.*]] = arith.index_cast %[[VAL_0]] : i64 to index - # CHECK: %[[LOAD_0:.*]] = memref.load %[[ARG1]]{{\[}}%[[INDEX_CAST_0]]] : memref - # CHECK: memref.store %[[LOAD_0]], %[[ARG0]]{{\[}}%[[INDEX_CAST_0]]] : memref + # CHECK: %[[LOAD_0:.*]] = memref.load %[[COPYIN_0]]{{\[}}%[[INDEX_CAST_0]]] : memref + # CHECK: memref.store %[[LOAD_0]], %[[CREATE_0]]{{\[}}%[[INDEX_CAST_0]]] : memref # CHECK: acc.yield # CHECK: } attributes {independent = [#acc.device_type]} # CHECK: acc.yield # CHECK: } + # CHECK: acc.delete accPtr(%[[COPYIN_0]] : memref) + # CHECK: acc.copyout accPtr(%[[CREATE_0]] : memref) to varPtr(%[[ARG1]] : memref) # CHECK: return # CHECK: }