intel · xurui1995 · Aug 15, 2024 · May 27, 2024 · May 27, 2024 · May 27, 2024
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
@@ -47,6 +47,7 @@ declare_mlir_python_sources(GcPythonSources.Common
   ADD_TO_PARENT GcPythonSources
   SOURCES
     __init__.py
+    graph_compiler.py
     dialects/__init__.py
     # init hooks
     _mlir_libs/_site_initialize_0.py
@@ -98,6 +99,8 @@ add_mlir_python_common_capi_library(GcPythonCAPI
     GcPythonSources
     MLIRPythonExtension.RegisterEverything
     MLIRPythonSources.Core
+    MLIRPythonSources.Dialects.linalg
+    MLIRPythonSources.ExecutionEngine
 )
 target_link_libraries(GcPythonCAPI PUBLIC GcInterface)
 
@@ -112,6 +115,9 @@ add_mlir_python_modules(GcPythonModules
     GcPythonSources
     MLIRPythonExtension.RegisterEverything
     MLIRPythonSources
+    MLIRPythonSources.ExecutionEngine
   COMMON_CAPI_LINK_LIBS
     GcPythonCAPI
-  )
+  )
+
+configure_file(config.py.in ${MLIR_BINARY_DIR}/python_packages/gc_mlir_core/gc_mlir/config.py @ONLY)
diff --git a/python/config.py.in b/python/config.py.in
@@ -0,0 +1,22 @@
+import os
+import sys
+
+llvm_obj_root = "@LLVM_BINARY_DIR@"
+llvm_lib_dir = "@LLVM_LIBRARY_DIR@"
+shlib_ext = "@LTDL_SHLIB_EXT@"
+
+if sys.platform.startswith("win32"):
+    mlir_runner_utils_dir = os.path.normpath(os.path.join(llvm_obj_root, "bin"))
+    shlib_prefix = ""
+else:
+    mlir_runner_utils_dir = llvm_lib_dir
+    shlib_prefix = "lib"
+
+MLIR_C_RUNNER_UTILS = os.path.normpath(
+    os.path.join(
+        mlir_runner_utils_dir, shlib_prefix + "mlir_c_runner_utils" + shlib_ext
+    )
+)
+MLIR_RUNNER_UTILS = os.path.normpath(
+    os.path.join(mlir_runner_utils_dir, shlib_prefix + "mlir_runner_utils" + shlib_ext)
+)
diff --git a/python/gc_mlir/graph_compiler.py b/python/gc_mlir/graph_compiler.py
@@ -0,0 +1,47 @@
+# ===-- graph_compiler.py - DESC ------------------------------*- Python -*-===#
+#
+# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# ===-----------------------------------------------------------------------===#
+
+from gc_mlir import execution_engine
+from gc_mlir import ir
+from gc_mlir import passmanager
+from gc_mlir.config import MLIR_C_RUNNER_UTILS, MLIR_RUNNER_UTILS
+
+__all__ = [
+    "GraphCompiler",
+]
+
+
+class GraphCompiler:
+    def __init__(
+        self,
+        pipeline: str = "any(gc-cpu-pipeline)",
+        opt_level: int = 3,
+    ):
+        self.shared_libs = [MLIR_C_RUNNER_UTILS, MLIR_RUNNER_UTILS]
+        self.pipeline = pipeline
+        self.opt_level = opt_level
+
+    def __call__(self, module: ir.Module, ir_printing: bool = False):
+        self.compile(module, ir_printing)
+
+    def compile(self, module: ir.Module, ir_printing: bool = False):
+        pm = passmanager.PassManager.parse(self.pipeline)
+        if ir_printing:
+            pm.enable_ir_printing()
+        pm.run(module.operation)
+
+    def jit(self, module: ir.Module) -> execution_engine.ExecutionEngine:
+        return execution_engine.ExecutionEngine(
+            module, opt_level=self.opt_level, shared_libs=self.shared_libs
+        )
+
+    def compile_and_jit(
+        self, module: ir.Module, ir_printing: bool = False
+    ) -> execution_engine.ExecutionEngine:
+        self.compile(module, ir_printing)
+        return self.jit(module)
diff --git a/test/mlir/test/gc/python/smoketest.py b/test/mlir/test/gc/python/smoketest.py
@@ -3,9 +3,9 @@
 # ===============================================================================
 # RUN: %python %s | FileCheck %s
 
+from gc_mlir.dialects import func, onednn_graph
+from gc_mlir.graph_compiler import GraphCompiler
 from gc_mlir.ir import *
-from gc_mlir.dialects import onednn_graph, func
-from gc_mlir.passmanager import PassManager
 
 
 def run(f):
@@ -36,22 +36,22 @@ def testCreateOp():
         print(module)
 
 
-# CHECK-LABEL: TEST: testPassManager
+# CHECK-LABEL: TEST: testCompiler
 @run
-def testPassManager():
+def testCompiler():
     with Context():
         module = Module.parse(
             """
-            // CHECK: [[C0:%.+]] = arith.constant 0
-            // CHECK: [[INIT:%.+]] = tensor.empty()
-            // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : bf16) outs([[INIT]] : tensor<128x256xbf16>) -> tensor<128x256xbf16>
-            // CHECK: linalg.matmul ins(%arg0, %arg1 : tensor<128x512xbf16>, tensor<512x256xbf16>) outs([[FILLED]] : tensor<128x256xbf16>) -> tensor<128x256xbf16>
-            func.func @matmul(%arg0: tensor<128x512xbf16>, %arg1: tensor<512x256xbf16>) -> tensor<128x256xbf16> {
-                %0 = onednn_graph.matmul %arg0, %arg1 : (tensor<128x512xbf16>, tensor<512x256xbf16>) -> tensor<128x256xbf16>
-                return %0 : tensor<128x256xbf16>
+            func.func @matmul(%arg0: tensor<128x512xf32>, %arg1: tensor<512x256xf32>) -> tensor<128x256xf32> {
+                %0 = onednn_graph.matmul %arg0, %arg1 : (tensor<128x512xf32>, tensor<512x256xf32>) -> tensor<128x256xf32>
+                return %0 : tensor<128x256xf32>
             }
             """
         )
-        pm = PassManager.parse("builtin.module(convert-onednn-graph-to-linalg)")
-        pm.run(module.operation)
+
+        compiler = GraphCompiler(
+            pipeline="builtin.module(convert-onednn-graph-to-linalg)"
+        )
+        compiler.compile(module)
+        #  CHECK-NOT: onednn_graph.matmul
         print(module)
diff --git a/tools/README.md b/tools/README.md
@@ -0,0 +1,92 @@
+# Python Tools
+## Pre-requisites
+### Enable python binding
+* Enable MLIR python binding, [README](https://github.com/intel/graph-compiler/blob/main/python/README.md)
+### Set env
+* **PYTHONPATH**=*${BUILD_DIR}*/python_packages/gc_mlir_core
+* **LD_PRELOAD**=path/to/libiomp5.so
+
+
+## Bench
+The tool has two different ways to calculate the time cost, and more experiments are needed to test which one is more stable and accurate. Currently, users can choose which way to use through options
+* Use the MLIR Python API to invoke the kernel and use Python to calculate the time cost
+* Modify MLIR by wrapping the kernel into a new method and calling the `nanoTime()` method before and after calling the kernel. Finally, calculate the difference as the time cost
+```
+  func.func private @nanoTime() -> i64 attributes {llvm.emit_c_interface}
+  func.func public @wrapped_main(%arg0: memref<1xi64>, %arg1: tensor<128x512xbf16>, %arg2: tensor<512x256xbf16>) -> tensor<128x256xbf16> attributes {llvm.emit_c_interface} {
+    %0 = call @nanoTime() : () -> i64
+    %1 = call @main_entry(%arg1, %arg2) : (tensor<128x512xbf16>, tensor<512x256xbf16>) -> tensor<128x256xbf16>
+    %2 = call @nanoTime() : () -> i64
+    %3 = arith.subi %2, %0 : i64
+    %c0 = arith.constant 0 : index
+    memref.store %3, %arg0[%c0] : memref<1xi64>
+    return %1 : tensor<128x256xbf16>
+  }
+}
+```
+
+### Examples:
+```
+# simple version
+python3 ./tools/main.py --driver=load_mlir --path=./tools/workloads/test.mlir
+
+# complex version
+python3 ./tools/main.py --type=bench --bench_kind=py --driver=load_mlir --path=./tools/workloads/test.mlir --warm_up=200 --repeat=200 --print_ir --entry=main_entry
+```
+
+```
+# result example
+===========bench result===========
+{
+    "args": {
+        "type": "bench",
+        "driver": "load_mlir",
+        "path": "./tools/workloads/test.mlir",
+        "entry": "main_entry",
+        "bench_kind": "py",
+        "print_ir": false,
+        "warm_up": 20,
+        "repeat": 100
+    },
+    "compile_cost(ms)": 25.58841183781624,
+    "execute_cost(ms)": 1.7501823976635933
+}
+```
+
+### Common Options
+*  `--driver`: the pattern to bench, currently support `mlp` and `load_mlir`
+*  `--bench_kind`: `py` or `wrapper`, different evaluation implementation of the benchmark
+*  `--warm_up`: warm-up times of the execution
+*  `--repeat`: repeat times of the execution
+*  `--print_ir`: print the ir before execution
+*  `--disable_results_to_params`: do not use this when using the default pipeline (gc-cpu-pipeline)
+
+### Driver Specific Options
+* load_mlir
+  * `--path`: the mlir file path
+  * `--entry`: the name of entry func
+```
+python3 ./tools/main.py --driver=load_mlir --path=./tools/workloads/test.mlir
+```
+
+
+* mlp  
+  * `--batch_size`: the input
+  * `--hidden_size_list`: hidden_sizes of mlp, example: 32x16x64
+  * `--has_bias`: if the matmul op has bias, example: 1x0
+  * `--act_type`: choices=["noop", "relu", "sigmoid"]
+  * `--dtype`: choices=["bf16", "f32"]
+```
+python3 ./tools/main.py --driver=mlp --batch_size=32 --hidden_size_list=32x16x64 --has_bias=0x0 --act_type=noop --dtype=f32
+
+===========bench func name:  main_entry ===========
+module {
+  func.func @main_entry(%arg0: tensor<32x32xf32>, %arg1: tensor<32x16xf32>, %arg2: tensor<16x64xf32>) -> tensor<32x64xf32> attributes {llvm.emit_c_interface} {
+    %0 = tensor.empty() : tensor<32x16xf32>
+    %1 = linalg.matmul {cast = #linalg.type_fn<cast_signed>} ins(%arg0, %arg1 : tensor<32x32xf32>, tensor<32x16xf32>) outs(%0 : tensor<32x16xf32>) -> tensor<32x16xf32>
+    %2 = tensor.empty() : tensor<32x64xf32>
+    %3 = linalg.matmul {cast = #linalg.type_fn<cast_signed>} ins(%1, %arg2 : tensor<32x16xf32>, tensor<16x64xf32>) outs(%2 : tensor<32x64xf32>) -> tensor<32x64xf32>
+    return %3 : tensor<32x64xf32>
+  }
+}
+```