diff --git a/mlir/python/mlir/dialects/gpu/__init__.py b/mlir/python/mlir/dialects/gpu/__init__.py index 4cd80aa8b7ca8..b14ea68938160 100644 --- a/mlir/python/mlir/dialects/gpu/__init__.py +++ b/mlir/python/mlir/dialects/gpu/__init__.py @@ -3,5 +3,151 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception from .._gpu_ops_gen import * +from .._gpu_ops_gen import _Dialect from .._gpu_enum_gen import * from ..._mlir_libs._mlirDialectsGPU import * +from typing import Callable, Sequence, Union, Optional, List + +try: + from ...ir import ( + FunctionType, + TypeAttr, + StringAttr, + UnitAttr, + Block, + InsertionPoint, + ArrayAttr, + Type, + DictAttr, + Attribute, + DenseI32ArrayAttr, + ) + from .._ods_common import ( + get_default_loc_context as _get_default_loc_context, + _cext as _ods_cext, + ) +except ImportError as e: + raise RuntimeError("Error loading imports from extension module") from e + + +@_ods_cext.register_operation(_Dialect, replace=True) +class GPUFuncOp(GPUFuncOp): + __doc__ = GPUFuncOp.__doc__ + + KERNEL_ATTR_NAME = "gpu.kernel" + KNOWN_BLOCK_SIZE_ATTR_NAME = "known_block_size" + KNOWN_GRID_SIZE_ATTR_NAME = "known_grid_size" + + FUNCTION_TYPE_ATTR_NAME = "function_type" + SYM_NAME_ATTR_NAME = "sym_name" + ARGUMENT_ATTR_NAME = "arg_attrs" + RESULT_ATTR_NAME = "res_attrs" + + def __init__( + self, + function_type: Union[FunctionType, TypeAttr], + sym_name: Optional[Union[str, StringAttr]] = None, + kernel: Optional[bool] = None, + workgroup_attrib_attrs: Optional[Sequence[dict]] = None, + private_attrib_attrs: Optional[Sequence[dict]] = None, + known_block_size: Optional[Union[Sequence[int], DenseI32ArrayAttr]] = None, + known_grid_size: Optional[Union[Sequence[int], DenseI32ArrayAttr]] = None, + loc=None, + ip=None, + body_builder: Optional[Callable[[GPUFuncOp], None]] = None, + ): + """ + Create a GPUFuncOp with the provided `function_type`, `sym_name`, + `kernel`, `workgroup_attrib_attrs`, `private_attrib_attrs`, `known_block_size`, + `known_grid_size`, and `body_builder`. + - `function_type` is a FunctionType or a TypeAttr. + - `sym_name` is a string or a StringAttr representing the function name. + - `kernel` is a boolean representing whether the function is a kernel. + - `workgroup_attrib_attrs` is an optional list of dictionaries. + - `private_attrib_attrs` is an optional list of dictionaries. + - `known_block_size` is an optional list of integers or a DenseI32ArrayAttr representing the known block size. + - `known_grid_size` is an optional list of integers or a DenseI32ArrayAttr representing the known grid size. + - `body_builder` is an optional callback. When provided, a new entry block + is created and the callback is invoked with the new op as argument within + an InsertionPoint context already set for the block. The callback is + expected to insert a terminator in the block. + """ + function_type = ( + TypeAttr.get(function_type) + if not isinstance(function_type, TypeAttr) + else function_type + ) + super().__init__( + function_type, + workgroup_attrib_attrs=workgroup_attrib_attrs, + private_attrib_attrs=private_attrib_attrs, + loc=loc, + ip=ip, + ) + + if isinstance(sym_name, str): + self.attributes[self.SYM_NAME_ATTR_NAME] = StringAttr.get(sym_name) + elif isinstance(sym_name, StringAttr): + self.attributes[self.SYM_NAME_ATTR_NAME] = sym_name + else: + raise ValueError("sym_name must be a string or a StringAttr") + + if kernel: + self.attributes[self.KERNEL_ATTR_NAME] = UnitAttr.get() + + if known_block_size is not None: + if isinstance(known_block_size, Sequence): + block_size = DenseI32ArrayAttr.get(known_block_size) + self.attributes[self.KNOWN_BLOCK_SIZE_ATTR_NAME] = block_size + elif isinstance(known_block_size, DenseI32ArrayAttr): + self.attributes[self.KNOWN_BLOCK_SIZE_ATTR_NAME] = known_block_size + else: + raise ValueError( + "known_block_size must be a list of integers or a DenseI32ArrayAttr" + ) + + if known_grid_size is not None: + if isinstance(known_grid_size, Sequence): + grid_size = DenseI32ArrayAttr.get(known_grid_size) + self.attributes[self.KNOWN_GRID_SIZE_ATTR_NAME] = grid_size + elif isinstance(known_grid_size, DenseI32ArrayAttr): + self.attributes[self.KNOWN_GRID_SIZE_ATTR_NAME] = known_grid_size + else: + raise ValueError( + "known_grid_size must be a list of integers or a DenseI32ArrayAttr" + ) + + if body_builder is not None: + with InsertionPoint(self.add_entry_block()): + body_builder(self) + + @property + def name(self) -> StringAttr: + return StringAttr(self.attributes[self.SYM_NAME_ATTR_NAME]) + + @property + def is_kernel(self) -> bool: + return self.KERNEL_ATTR_NAME in self.attributes + + def add_entry_block(self) -> Block: + if len(self.body.blocks) > 0: + raise RuntimeError(f"Entry block already exists for {self.name.value}") + + function_type = self.function_type.value + return self.body.blocks.append( + *function_type.inputs, + arg_locs=[self.location for _ in function_type.inputs], + ) + + @property + def entry_block(self) -> Block: + if len(self.body.blocks) == 0: + raise RuntimeError( + f"Entry block does not exist for {self.name.value}." + + " Do you need to call the add_entry_block() method on this GPUFuncOp?" + ) + return self.body.blocks[0] + + @property + def arguments(self) -> Sequence[Type]: + return self.function_type.value.inputs diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py index 26ee9f34cb332..66c401886804c 100644 --- a/mlir/test/python/dialects/gpu/dialect.py +++ b/mlir/test/python/dialects/gpu/dialect.py @@ -1,6 +1,7 @@ # RUN: %PYTHON %s | FileCheck %s from mlir.ir import * +import mlir.ir as ir import mlir.dialects.gpu as gpu import mlir.dialects.gpu.passes from mlir.passmanager import * @@ -64,3 +65,95 @@ def testObjectAttr(): # CHECK: #gpu.object<#nvvm.target, kernels = <[#gpu.kernel_metadata<"kernel", () -> ()>]>, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf"> print(o) assert o.kernels == kernelTable + + +# CHECK-LABEL: testGPUFuncOp +@run +def testGPUFuncOp(): + assert gpu.GPUFuncOp.__doc__ is not None + module = Module.create() + with InsertionPoint(module.body): + gpu_module_name = StringAttr.get("gpu_module") + gpumodule = gpu.GPUModuleOp(gpu_module_name) + block = gpumodule.bodyRegion.blocks.append() + + def builder(func: gpu.GPUFuncOp) -> None: + gpu.GlobalIdOp(gpu.Dimension.x) + gpu.ReturnOp([]) + + with InsertionPoint(block): + name = StringAttr.get("kernel0") + func_type = ir.FunctionType.get(inputs=[], results=[]) + type_attr = TypeAttr.get(func_type) + func = gpu.GPUFuncOp(type_attr, name) + func.attributes["sym_name"] = name + func.attributes["gpu.kernel"] = UnitAttr.get() + + try: + func.entry_block + assert False, "Expected RuntimeError" + except RuntimeError as e: + assert ( + str(e) + == "Entry block does not exist for kernel0. Do you need to call the add_entry_block() method on this GPUFuncOp?" + ) + + block = func.add_entry_block() + with InsertionPoint(block): + builder(func) + + try: + func.add_entry_block() + assert False, "Expected RuntimeError" + except RuntimeError as e: + assert str(e) == "Entry block already exists for kernel0" + + func = gpu.GPUFuncOp( + func_type, + sym_name="kernel1", + kernel=True, + body_builder=builder, + known_block_size=[1, 2, 3], + known_grid_size=DenseI32ArrayAttr.get([4, 5, 6]), + ) + + assert func.name.value == "kernel1" + assert func.function_type.value == func_type + assert func.arg_attrs == None + assert func.res_attrs == None + assert func.arguments == [] + assert func.entry_block == func.body.blocks[0] + assert func.is_kernel + assert func.known_block_size == DenseI32ArrayAttr.get( + [1, 2, 3] + ), func.known_block_size + assert func.known_grid_size == DenseI32ArrayAttr.get( + [4, 5, 6] + ), func.known_grid_size + + func = gpu.GPUFuncOp( + func_type, + sym_name="non_kernel_func", + body_builder=builder, + ) + assert not func.is_kernel + assert func.known_block_size is None + assert func.known_grid_size is None + + print(module) + + # CHECK: gpu.module @gpu_module + # CHECK: gpu.func @kernel0() kernel { + # CHECK: %[[VAL_0:.*]] = gpu.global_id x + # CHECK: gpu.return + # CHECK: } + # CHECK: gpu.func @kernel1() kernel attributes + # CHECK-SAME: known_block_size = array + # CHECK-SAME: known_grid_size = array + # CHECK: %[[VAL_0:.*]] = gpu.global_id x + # CHECK: gpu.return + # CHECK: } + # CHECK: gpu.func @non_kernel_func() { + # CHECK: %[[VAL_0:.*]] = gpu.global_id x + # CHECK: gpu.return + # CHECK: }