diff --git a/mlir/python/mlir/dialects/gpu/__init__.py b/mlir/python/mlir/dialects/gpu/__init__.py
index 4cd80aa8b7ca8..b14ea68938160 100644
--- a/mlir/python/mlir/dialects/gpu/__init__.py
+++ b/mlir/python/mlir/dialects/gpu/__init__.py
@@ -3,5 +3,151 @@
 #  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 from .._gpu_ops_gen import *
+from .._gpu_ops_gen import _Dialect
 from .._gpu_enum_gen import *
 from ..._mlir_libs._mlirDialectsGPU import *
+from typing import Callable, Sequence, Union, Optional, List
+
+try:
+    from ...ir import (
+        FunctionType,
+        TypeAttr,
+        StringAttr,
+        UnitAttr,
+        Block,
+        InsertionPoint,
+        ArrayAttr,
+        Type,
+        DictAttr,
+        Attribute,
+        DenseI32ArrayAttr,
+    )
+    from .._ods_common import (
+        get_default_loc_context as _get_default_loc_context,
+        _cext as _ods_cext,
+    )
+except ImportError as e:
+    raise RuntimeError("Error loading imports from extension module") from e
+
+
+@_ods_cext.register_operation(_Dialect, replace=True)
+class GPUFuncOp(GPUFuncOp):
+    __doc__ = GPUFuncOp.__doc__
+
+    KERNEL_ATTR_NAME = "gpu.kernel"
+    KNOWN_BLOCK_SIZE_ATTR_NAME = "known_block_size"
+    KNOWN_GRID_SIZE_ATTR_NAME = "known_grid_size"
+
+    FUNCTION_TYPE_ATTR_NAME = "function_type"
+    SYM_NAME_ATTR_NAME = "sym_name"
+    ARGUMENT_ATTR_NAME = "arg_attrs"
+    RESULT_ATTR_NAME = "res_attrs"
+
+    def __init__(
+        self,
+        function_type: Union[FunctionType, TypeAttr],
+        sym_name: Optional[Union[str, StringAttr]] = None,
+        kernel: Optional[bool] = None,
+        workgroup_attrib_attrs: Optional[Sequence[dict]] = None,
+        private_attrib_attrs: Optional[Sequence[dict]] = None,
+        known_block_size: Optional[Union[Sequence[int], DenseI32ArrayAttr]] = None,
+        known_grid_size: Optional[Union[Sequence[int], DenseI32ArrayAttr]] = None,
+        loc=None,
+        ip=None,
+        body_builder: Optional[Callable[[GPUFuncOp], None]] = None,
+    ):
+        """
+        Create a GPUFuncOp with the provided `function_type`, `sym_name`,
+        `kernel`, `workgroup_attrib_attrs`, `private_attrib_attrs`, `known_block_size`,
+        `known_grid_size`, and `body_builder`.
+        - `function_type` is a FunctionType or a TypeAttr.
+        - `sym_name` is a string or a StringAttr representing the function name.
+        - `kernel` is a boolean representing whether the function is a kernel.
+        - `workgroup_attrib_attrs` is an optional list of dictionaries.
+        - `private_attrib_attrs` is an optional list of dictionaries.
+        - `known_block_size` is an optional list of integers or a DenseI32ArrayAttr representing the known block size.
+        - `known_grid_size` is an optional list of integers or a DenseI32ArrayAttr representing the known grid size.
+        - `body_builder` is an optional callback. When provided, a new entry block
+          is created and the callback is invoked with the new op as argument within
+          an InsertionPoint context already set for the block. The callback is
+          expected to insert a terminator in the block.
+        """
+        function_type = (
+            TypeAttr.get(function_type)
+            if not isinstance(function_type, TypeAttr)
+            else function_type
+        )
+        super().__init__(
+            function_type,
+            workgroup_attrib_attrs=workgroup_attrib_attrs,
+            private_attrib_attrs=private_attrib_attrs,
+            loc=loc,
+            ip=ip,
+        )
+
+        if isinstance(sym_name, str):
+            self.attributes[self.SYM_NAME_ATTR_NAME] = StringAttr.get(sym_name)
+        elif isinstance(sym_name, StringAttr):
+            self.attributes[self.SYM_NAME_ATTR_NAME] = sym_name
+        else:
+            raise ValueError("sym_name must be a string or a StringAttr")
+
+        if kernel:
+            self.attributes[self.KERNEL_ATTR_NAME] = UnitAttr.get()
+
+        if known_block_size is not None:
+            if isinstance(known_block_size, Sequence):
+                block_size = DenseI32ArrayAttr.get(known_block_size)
+                self.attributes[self.KNOWN_BLOCK_SIZE_ATTR_NAME] = block_size
+            elif isinstance(known_block_size, DenseI32ArrayAttr):
+                self.attributes[self.KNOWN_BLOCK_SIZE_ATTR_NAME] = known_block_size
+            else:
+                raise ValueError(
+                    "known_block_size must be a list of integers or a DenseI32ArrayAttr"
+                )
+
+        if known_grid_size is not None:
+            if isinstance(known_grid_size, Sequence):
+                grid_size = DenseI32ArrayAttr.get(known_grid_size)
+                self.attributes[self.KNOWN_GRID_SIZE_ATTR_NAME] = grid_size
+            elif isinstance(known_grid_size, DenseI32ArrayAttr):
+                self.attributes[self.KNOWN_GRID_SIZE_ATTR_NAME] = known_grid_size
+            else:
+                raise ValueError(
+                    "known_grid_size must be a list of integers or a DenseI32ArrayAttr"
+                )
+
+        if body_builder is not None:
+            with InsertionPoint(self.add_entry_block()):
+                body_builder(self)
+
+    @property
+    def name(self) -> StringAttr:
+        return StringAttr(self.attributes[self.SYM_NAME_ATTR_NAME])
+
+    @property
+    def is_kernel(self) -> bool:
+        return self.KERNEL_ATTR_NAME in self.attributes
+
+    def add_entry_block(self) -> Block:
+        if len(self.body.blocks) > 0:
+            raise RuntimeError(f"Entry block already exists for {self.name.value}")
+
+        function_type = self.function_type.value
+        return self.body.blocks.append(
+            *function_type.inputs,
+            arg_locs=[self.location for _ in function_type.inputs],
+        )
+
+    @property
+    def entry_block(self) -> Block:
+        if len(self.body.blocks) == 0:
+            raise RuntimeError(
+                f"Entry block does not exist for {self.name.value}."
+                + " Do you need to call the add_entry_block() method on this GPUFuncOp?"
+            )
+        return self.body.blocks[0]
+
+    @property
+    def arguments(self) -> Sequence[Type]:
+        return self.function_type.value.inputs
diff --git a/mlir/test/python/dialects/gpu/dialect.py b/mlir/test/python/dialects/gpu/dialect.py
index 26ee9f34cb332..66c401886804c 100644
--- a/mlir/test/python/dialects/gpu/dialect.py
+++ b/mlir/test/python/dialects/gpu/dialect.py
@@ -1,6 +1,7 @@
 # RUN: %PYTHON %s | FileCheck %s
 
 from mlir.ir import *
+import mlir.ir as ir
 import mlir.dialects.gpu as gpu
 import mlir.dialects.gpu.passes
 from mlir.passmanager import *
@@ -64,3 +65,95 @@ def testObjectAttr():
     # CHECK: #gpu.object<#nvvm.target, kernels = <[#gpu.kernel_metadata<"kernel", () -> ()>]>, "BC\C0\DE5\14\00\00\05\00\00\00b\0C0$MY\BEf">
     print(o)
     assert o.kernels == kernelTable
+
+
+# CHECK-LABEL: testGPUFuncOp
+@run
+def testGPUFuncOp():
+    assert gpu.GPUFuncOp.__doc__ is not None
+    module = Module.create()
+    with InsertionPoint(module.body):
+        gpu_module_name = StringAttr.get("gpu_module")
+        gpumodule = gpu.GPUModuleOp(gpu_module_name)
+        block = gpumodule.bodyRegion.blocks.append()
+
+        def builder(func: gpu.GPUFuncOp) -> None:
+            gpu.GlobalIdOp(gpu.Dimension.x)
+            gpu.ReturnOp([])
+
+        with InsertionPoint(block):
+            name = StringAttr.get("kernel0")
+            func_type = ir.FunctionType.get(inputs=[], results=[])
+            type_attr = TypeAttr.get(func_type)
+            func = gpu.GPUFuncOp(type_attr, name)
+            func.attributes["sym_name"] = name
+            func.attributes["gpu.kernel"] = UnitAttr.get()
+
+            try:
+                func.entry_block
+                assert False, "Expected RuntimeError"
+            except RuntimeError as e:
+                assert (
+                    str(e)
+                    == "Entry block does not exist for kernel0. Do you need to call the add_entry_block() method on this GPUFuncOp?"
+                )
+
+            block = func.add_entry_block()
+            with InsertionPoint(block):
+                builder(func)
+
+            try:
+                func.add_entry_block()
+                assert False, "Expected RuntimeError"
+            except RuntimeError as e:
+                assert str(e) == "Entry block already exists for kernel0"
+
+            func = gpu.GPUFuncOp(
+                func_type,
+                sym_name="kernel1",
+                kernel=True,
+                body_builder=builder,
+                known_block_size=[1, 2, 3],
+                known_grid_size=DenseI32ArrayAttr.get([4, 5, 6]),
+            )
+
+            assert func.name.value == "kernel1"
+            assert func.function_type.value == func_type
+            assert func.arg_attrs == None
+            assert func.res_attrs == None
+            assert func.arguments == []
+            assert func.entry_block == func.body.blocks[0]
+            assert func.is_kernel
+            assert func.known_block_size == DenseI32ArrayAttr.get(
+                [1, 2, 3]
+            ), func.known_block_size
+            assert func.known_grid_size == DenseI32ArrayAttr.get(
+                [4, 5, 6]
+            ), func.known_grid_size
+
+            func = gpu.GPUFuncOp(
+                func_type,
+                sym_name="non_kernel_func",
+                body_builder=builder,
+            )
+            assert not func.is_kernel
+            assert func.known_block_size is None
+            assert func.known_grid_size is None
+
+    print(module)
+
+    # CHECK: gpu.module @gpu_module
+    # CHECK: gpu.func @kernel0() kernel {
+    # CHECK:   %[[VAL_0:.*]] = gpu.global_id  x
+    # CHECK:   gpu.return
+    # CHECK: }
+    # CHECK: gpu.func @kernel1() kernel attributes
+    # CHECK-SAME: known_block_size = array<i32: 1, 2, 3>
+    # CHECK-SAME: known_grid_size = array<i32: 4, 5, 6>
+    # CHECK:   %[[VAL_0:.*]] = gpu.global_id  x
+    # CHECK:   gpu.return
+    # CHECK: }
+    # CHECK: gpu.func @non_kernel_func() {
+    # CHECK:   %[[VAL_0:.*]] = gpu.global_id  x
+    # CHECK:   gpu.return
+    # CHECK: }