Implement IP and Softmax using OneDNN Graph API (#227)

zhenwei-intel · web-flow · commit a6b975a0de28 · 2022-09-15T12:11:32.000+08:00
* add submodel onednngraph

* add innerproduct and softmax fp32

* reformat

* fix onednn graph version
diff --git a/.gitmodules b/.gitmodules
@@ -76,3 +76,7 @@
 [submodule "nlp_toolkit/backends/neural_engine/executor/third_party/boost/libs/mp11"]
 	path = nlp_toolkit/backends/neural_engine/executor/third_party/boost/libs/mp11
 	url = https://github.com/boostorg/mp11.git
+[submodule "nlp_toolkit/backends/neural_engine/executor/third_party/oneDNNGraph"]
+	path = nlp_toolkit/backends/neural_engine/executor/third_party/oneDNNGraph
+	url = https://github.com/oneapi-src/oneDNN.git
+	branch = dev-graph
diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/empty_ops.py b/nlp_toolkit/backends/neural_engine/compile/ops/empty_ops.py
@@ -101,6 +101,7 @@ def __init__(self):
 # Fused_op MatMul + BiasAdd
 # The inputs are two-dimensional matrices and 1-D const bias
 @operator_registry(operator_type='InnerProduct')
+@operator_registry(operator_type='InnerProductGraph')
 class InnerProduct(Operator):
     def __init__(self):
         super().__init__()
@@ -400,4 +401,4 @@ def __init__(self):
 @operator_registry(operator_type='Convolution')
 class Convolution(Operator):
     def __init__(self):
-        super().__init__()
+        super().__init__()
diff --git a/nlp_toolkit/backends/neural_engine/compile/ops/softmax.py b/nlp_toolkit/backends/neural_engine/compile/ops/softmax.py
@@ -23,6 +23,7 @@
 # Computes softmax activations.
 # tf.nn.softmax(logits, axis=None, name=None)
 @operator_registry(operator_type='Softmax')
+@operator_registry(operator_type='SoftmaxGraph')
 class Softmax(Operator):
     def __init__(self):
         super().__init__()
diff --git a/nlp_toolkit/backends/neural_engine/executor/CMakeLists.txt b/nlp_toolkit/backends/neural_engine/executor/CMakeLists.txt
@@ -50,7 +50,7 @@ endif()
 set(GFLAGS_USE_TARGET_NAMESPACE TRUE)
 add_subdirectory(${THIRD_PARTY_DIR}/gflags)
 set(WITH_GFLAGS OFF CACHE BOOL "disable gflags for glog")
-add_subdirectory(${THIRD_PARTY_DIR}/oneDNN)
+add_subdirectory(${THIRD_PARTY_DIR}/oneDNNGraph)
 add_subdirectory(${THIRD_PARTY_DIR}/pybind11)
 add_subdirectory(${THIRD_PARTY_DIR}/yaml-cpp)
 add_subdirectory(../SparseLib/ ./hostlibs)
@@ -63,12 +63,14 @@ add_library(neural_engine SHARED
     src/operators/output.cpp
     src/operators/binary_add.cpp
     src/operators/inner_product.cpp
+    src/onednn_graph_operators/inner_product_graph.cpp
     src/operators/layer_norm.cpp
     src/operators/matmul.cpp
     src/operators/one_hot.cpp
     src/operators/padding_sequence.cpp
     src/operators/reorder.cpp
     src/operators/softmax.cpp
+    src/onednn_graph_operators/softmax_graph.cpp
     src/operators/reshape.cpp
     src/operators/gather.cpp
     src/operators/strided_slice.cpp
@@ -93,6 +95,7 @@ target_include_directories(neural_engine
     PUBLIC
         ../
         ./include/sparse_operators
+        ./include/onednn_graph_operators
         ./include/operators
         ./include
         ./third_party/boost/libs/assert/include
@@ -111,13 +114,15 @@ target_include_directories(neural_engine
         ./third_party/boost/libs/tuple/include
         ./third_party/boost/libs/predef/include
         ./third_party/boost/libs/mp11/include
+        ./third_party/oneDNNGraph/include
 )
 
 # link against the third party libraries
 target_link_libraries(neural_engine
     PUBLIC
         ${CMAKE_THREAD_LIBS_INIT}
         dnnl
+        dnnl_graph
         yaml-cpp
         gflags
         glog
diff --git a/nlp_toolkit/backends/neural_engine/executor/include/onednn_graph_operators/inner_product_graph.hpp b/nlp_toolkit/backends/neural_engine/executor/include/onednn_graph_operators/inner_product_graph.hpp
@@ -0,0 +1,82 @@
+//  Copyright (c) 2021 Intel Corporation
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+
+#ifndef  ENGINE_EXECUTOR_INCLUDE_ONEDNN_GRAPH_OPERATORS_INNER_PRODUCT_GRAPH_HPP_
+#define  ENGINE_EXECUTOR_INCLUDE_ONEDNN_GRAPH_OPERATORS_INNER_PRODUCT_GRAPH_HPP_
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "../operator.hpp"
+#include "oneapi/dnnl/dnnl_graph.hpp"
+
+namespace executor {
+
+using logical_tensor = dnnl::graph::logical_tensor;
+using data_type = dnnl::graph::logical_tensor::data_type;
+using layout_type = dnnl::graph::logical_tensor::layout_type;
+using property_type = dnnl::graph::logical_tensor::property_type;
+
+/**
+ * @brief A InnerProduct operator.
+ *
+ */
+
+class InnerProductGraphOperator : public Operator {
+ public:
+  explicit InnerProductGraphOperator(const OperatorConfig& conf);
+  virtual ~InnerProductGraphOperator() {}
+
+  void Reshape(const vector<Tensor*>& input, const vector<Tensor*>& output) override;
+  void Forward(const vector<Tensor*>& input, const vector<Tensor*>& output) override;
+  void Prepare(const vector<Tensor*>& input, const vector<Tensor*>& output) override;
+
+ private:
+  dnnl::graph::graph g_;
+  dnnl::graph::engine eng_ {dnnl::graph::engine::kind::cpu, 0};
+  dnnl::graph::stream strm_ {eng_};
+  vector<logical_tensor> logical_inputs_;
+  vector<logical_tensor> logical_outputs_;
+  dnnl::graph::partition partition_;
+  dnnl::graph::compiled_partition cp_;
+
+  string output_dtype_ = "fp32";
+  Tensor* dst_min_ = nullptr;
+  Tensor* dst_max_ = nullptr;
+  Tensor* src_ = nullptr;
+  Tensor* dst_ = nullptr;
+  vector<int64_t> src0_perm_;
+  vector<int64_t> src1_perm_;
+  Tensor* src0_ = nullptr;
+  Tensor* src1_ = nullptr;
+  Tensor* bias_ = nullptr;
+  bool has_bias_ = false;
+  bool transpose_a_ = false;
+  bool transpose_b_ = true;
+  bool append_sum_ = false;
+  bool binary_add_ = false;
+  bool tanh_ = false;
+  bool gelu_tanh_ = false;
+  bool gelu_erf_ = false;
+  bool gelu_split_ = false;
+  bool sigmoid_ = false;
+  bool relu_ = false;
+  bool append_eltwise_ = false;
+  string append_op_;
+
+  void MapTensors(const vector<Tensor*>& input, const vector<Tensor*>& output);
+};
+}  // namespace executor
+#endif  //  ENGINE_EXECUTOR_INCLUDE_ONEDNN_GRAPH_OPERATORS_INNER_PRODUCT_GRAPH_HPP_
+
diff --git a/nlp_toolkit/backends/neural_engine/executor/include/onednn_graph_operators/softmax_graph.hpp b/nlp_toolkit/backends/neural_engine/executor/include/onednn_graph_operators/softmax_graph.hpp
@@ -0,0 +1,65 @@
+//  Copyright (c) 2021 Intel Corporation
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+
+#ifndef ENGINE_EXECUTOR_INCLUDE_ONEDNN_GRAPH_OPERATORS_SOFTMAX_GRAPH_HPP_
+#define ENGINE_EXECUTOR_INCLUDE_ONEDNN_GRAPH_OPERATORS_SOFTMAX_GRAPH_HPP_
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "../operator.hpp"
+#include "oneapi/dnnl/dnnl_graph.hpp"
+
+namespace executor {
+
+using logical_tensor = dnnl::graph::logical_tensor;
+using data_type = dnnl::graph::logical_tensor::data_type;
+using layout_type = dnnl::graph::logical_tensor::layout_type;
+using property_type = dnnl::graph::logical_tensor::property_type;
+
+/**
+ * @brief A Softmax operator.
+ *
+ */
+
+class SoftmaxGraphOperator : public Operator {
+ public:
+  explicit SoftmaxGraphOperator(const OperatorConfig& conf);
+  virtual ~SoftmaxGraphOperator() {}
+
+  void Reshape(const vector<Tensor*>& input, const vector<Tensor*>& output) override;
+  void Forward(const vector<Tensor*>& input, const vector<Tensor*>& output) override;
+  void Prepare(const vector<Tensor*>& input, const vector<Tensor*>& output) override;
+
+ private:
+  dnnl::graph::graph g_;
+  dnnl::graph::engine eng_ {dnnl::graph::engine::kind::cpu, 0};
+  dnnl::graph::stream strm_ {eng_};
+  vector<logical_tensor> logical_inputs_;
+  vector<logical_tensor> logical_outputs_;
+  dnnl::graph::partition partition_;
+  dnnl::graph::compiled_partition cp_;
+
+  int axis_;
+  string output_dtype_ = "fp32";
+  Tensor* dst_min_ = nullptr;
+  Tensor* dst_max_ = nullptr;
+  Tensor* src_ = nullptr;
+  Tensor* dst_ = nullptr;
+
+  void MapTensors(const vector<Tensor*>& input, const vector<Tensor*>& output);
+};
+}  // namespace executor
+#endif  // ENGINE_EXECUTOR_INCLUDE_ONEDNN_GRAPH_OPERATORS_SOFTMAX_GRAPH_HPP_
+
diff --git a/nlp_toolkit/backends/neural_engine/executor/src/onednn_graph_operators/inner_product_graph.cpp b/nlp_toolkit/backends/neural_engine/executor/src/onednn_graph_operators/inner_product_graph.cpp
diff --git a/nlp_toolkit/backends/neural_engine/executor/src/onednn_graph_operators/softmax_graph.cpp b/nlp_toolkit/backends/neural_engine/executor/src/onednn_graph_operators/softmax_graph.cpp
diff --git a/nlp_toolkit/backends/neural_engine/executor/third_party/oneDNNGraph b/nlp_toolkit/backends/neural_engine/executor/third_party/oneDNNGraph