From 138b4e3afb0fc5eb805f067452cf24b31ad91687 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Fri, 7 Apr 2023 14:15:04 +0800 Subject: [PATCH 1/4] Add more BF16 ops support on stock tensorflow Signed-off-by: Lv, Liang1 --- neural_compressor/adaptor/tensorflow.py | 6 ++++-- neural_compressor/adaptor/tensorflow.yaml | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/neural_compressor/adaptor/tensorflow.py b/neural_compressor/adaptor/tensorflow.py index 23b5fa611a3..92e486449ef 100644 --- a/neural_compressor/adaptor/tensorflow.py +++ b/neural_compressor/adaptor/tensorflow.py @@ -722,6 +722,7 @@ def _query_quantizable_ops(self, matched_nodes): fp32_common_config = {'weight': {'dtype': 'fp32'}, 'activation': {'dtype': 'fp32'}} uint8_type = self.query_handler.get_op_types_by_precision(precision='uint8') int8_type = self.query_handler.get_op_types_by_precision(precision='int8') + bf16_type = self.query_handler.get_op_types_by_precision(precision='bf16') tf_quantizable_op_type = list(set(uint8_type).union(set(int8_type))) valid_precision = self.query_handler.get_mixed_precision_combination() @@ -792,7 +793,8 @@ def _query_quantizable_ops(self, matched_nodes): self.quantizable_op_details[( node_name, self.unify_op_type_mapping[node_op] )] = [copy.deepcopy(other_config), fp32_common_config] - if ('bf16' in valid_precision and CpuInfo().bf16) or os.getenv('FORCE_BF16') == '1': + if node_op in bf16_type and (('bf16' in valid_precision and CpuInfo().bf16) \ + or os.getenv('FORCE_BF16') == '1'): self.quantizable_op_details[( node_name, self.unify_op_type_mapping[node_op] )].insert(1, bf16_common_config) @@ -2228,7 +2230,7 @@ def get_op_types_by_precision(self, precision): return self.cur_config[precision] if version1_gte_version2(tf.version.VERSION, '2.1.0') or \ version1_eq_version2(tf.version.VERSION, '1.15.0-up3'): - return ['Conv2D'] + return self.cur_config[precision] return [] def get_mixed_precision_combination(self): diff --git a/neural_compressor/adaptor/tensorflow.yaml b/neural_compressor/adaptor/tensorflow.yaml index 719109534c0..712f3fc848f 100644 --- a/neural_compressor/adaptor/tensorflow.yaml +++ b/neural_compressor/adaptor/tensorflow.yaml @@ -153,7 +153,8 @@ version: name: ['2.1.0', '2.2.0', '2.3.0', '2.4.0', '2.5.0', '2.6.0', '2.6.1', '2.6.2', '2.7.0', '2.8.0', '2.9.0', '2.9.1', '2.10.0', '2.11.0', '1.15.0-up1', '1.15.0-up2', 1.15.0-up3] - bf16: ['Conv2D'] + bf16: ['Conv2D', 'MatMul', 'ConcatV2', 'MaxPool', 'AvgPool', 'DepthwiseConv2dNative', 'Add', 'AddV2', 'AddN', + 'BiasAdd', 'Mul', 'Pad', 'Relu', 'Relu6', 'Reshape', 'Shape', 'Softmax', Squeeze', 'Sub'] fp32: ['*'] # '*' means all op types int8: { From 7780b74387b05fa20d3120fe4c8ca2eea843f331 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Fri, 7 Apr 2023 23:03:27 +0800 Subject: [PATCH 2/4] add bf16 in yaml Signed-off-by: Lv, Liang1 --- .../tensorflow_adaptor/test_tensorflow_query_yaml.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_query_yaml.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_query_yaml.py index c4ed79a6f2b..c841f0fe6e8 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_query_yaml.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_query_yaml.py @@ -52,7 +52,9 @@ def build_fake_framework_yaml(): --- - version: - name: ['2.1.0', '2.2.0', '2.3.0', '2.4.0', '2.5.0', '2.6.0', '2.7.0'] + name: ['2.1.0', '2.2.0', '2.3.0', '2.4.0', '2.5.0', '2.6.0', '2.7.0'] + + bf16: ['Conv2D', 'MatMul', 'ConcatV2', 'MaxPool', 'AvgPool', 'DepthwiseConv2dNative'] int8: { 'static': { @@ -93,6 +95,8 @@ def build_fake_framework_yaml(): version: name: ['default'] + bf16: ['Conv2D', 'MatMul', 'ConcatV2', 'MaxPool', 'AvgPool', 'DepthwiseConv2dNative'] + int8: { 'static': { 'Conv2D': { From f5ffb9a92644b9fcc08fe703bb536b4e9336853a Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Sun, 23 Apr 2023 15:55:25 +0800 Subject: [PATCH 3/4] fix extension test issues Signed-off-by: Lv, Liang1 --- neural_compressor/adaptor/tensorflow.yaml | 3 +-- .../adaptor/tf_utils/graph_rewriter/bf16/bf16_convert.py | 5 ++++- .../graph_rewriter/generic/dequantize_cast_optimizer.py | 9 ++++++++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/neural_compressor/adaptor/tensorflow.yaml b/neural_compressor/adaptor/tensorflow.yaml index 712f3fc848f..cbe0614198e 100644 --- a/neural_compressor/adaptor/tensorflow.yaml +++ b/neural_compressor/adaptor/tensorflow.yaml @@ -153,8 +153,7 @@ version: name: ['2.1.0', '2.2.0', '2.3.0', '2.4.0', '2.5.0', '2.6.0', '2.6.1', '2.6.2', '2.7.0', '2.8.0', '2.9.0', '2.9.1', '2.10.0', '2.11.0', '1.15.0-up1', '1.15.0-up2', 1.15.0-up3] - bf16: ['Conv2D', 'MatMul', 'ConcatV2', 'MaxPool', 'AvgPool', 'DepthwiseConv2dNative', 'Add', 'AddV2', 'AddN', - 'BiasAdd', 'Mul', 'Pad', 'Relu', 'Relu6', 'Reshape', 'Shape', 'Softmax', Squeeze', 'Sub'] + bf16: ['Conv2D', 'Conv3D', 'MatMul', 'BatchMatMul', 'MaxPool', 'MaxPool3D', 'AvgPool', 'AvgPool3D', 'DepthwiseConv2dNative'] fp32: ['*'] # '*' means all op types int8: { diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/bf16_convert.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/bf16_convert.py index d97fa5c62d3..d2f03c3e288 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/bf16_convert.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/bf16/bf16_convert.py @@ -34,6 +34,8 @@ from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper from ..generic.graph_cse_optimizer import GraphCseOptimizer from ..generic.dequantize_cast_optimizer import DequantizeCastOptimizer +import tensorflow as tf +from neural_compressor.adaptor.tf_utils.util import TF_SPR_BASE_VERSIONS DT_FLOAT32 = attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum) DT_BFLOAT16 = attr_value_pb2.AttrValue(type=dtypes.bfloat16.as_datatype_enum) @@ -179,7 +181,8 @@ def _bf16_convert(self, bf16_node_name): tensor=tensor_util.make_tensor_proto( fp32_value, dtypes.bfloat16, fp32_value.shape))) elif 'Dequantize' == input_node.op and len(input_node_outputs) == 1 \ - and input_node.attr['mode'].s != b'MIN_FIRST': + and input_node.attr['mode'].s != b'MIN_FIRST' \ + and tf.version.VERSION in TF_SPR_BASE_VERSIONS: # Dequantize with mode MIN_FIRST does not support bf16 in both eigen and mkl _, outputs_dt_input_node = self._dtype(input_node) allowed_input_node_dt_val = self._allowed_dtype_val(input_node) diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dequantize_cast_optimizer.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dequantize_cast_optimizer.py index 7685d911e7f..a341d81b054 100644 --- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dequantize_cast_optimizer.py +++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/dequantize_cast_optimizer.py @@ -21,8 +21,10 @@ from ..graph_base import GraphRewriterBase from neural_compressor.adaptor.tf_utils.graph_util import GraphAnalyzer -from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper from neural_compressor.utils.utility import dump_elapsed_time +import tensorflow as tf +from neural_compressor.adaptor.tf_utils.util import TF_SPR_BASE_VERSIONS + class DequantizeCastOptimizer(GraphRewriterBase): """Remove the Cast OP and set Dequantize output to B16 if the Cast OP output is BF16.""" @@ -36,6 +38,11 @@ def do_transformation(self): Returns: [graphdef]: optimized graph """ + # stock TF _MklDequantize doesn't support BF16 currently. + # TODO remove this when spr-base upstream to stock TF. + if not tf.version.VERSION in TF_SPR_BASE_VERSIONS: + return self.model + DT_BFLOAT16 = attr_value_pb2.AttrValue(type=dtypes.bfloat16.as_datatype_enum) cur_graph = GraphAnalyzer() cur_graph.graph = self.model From 786a465bd85db2fa0ad5aeec8ea91c856c7907db Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Mon, 24 Apr 2023 10:27:27 +0800 Subject: [PATCH 4/4] fix ut issue Signed-off-by: Lv, Liang1 --- ...sorflow_graph_dequantize_cast_optimizer.py | 5 +- ..._graph_dequantize_cast_optimizer_newapi.py | 96 +++++++++++++++++++ 2 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 test/tfnewapi/test_tensorflow_graph_dequantize_cast_optimizer_newapi.py diff --git a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_dequantize_cast_optimizer.py b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_dequantize_cast_optimizer.py index fc26f9a8ce6..281bf3a638f 100644 --- a/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_dequantize_cast_optimizer.py +++ b/test/adaptor/tensorflow_adaptor/test_tensorflow_graph_dequantize_cast_optimizer.py @@ -67,7 +67,10 @@ def test_dequantize_cast_normal(self): graph_def = build_fake_graphdef() converted_graph_def = DequantizeCastOptimizer(graph_def).do_transformation() for i in converted_graph_def.node: - self.assertNotEqual(i.op, 'Cast') + if i.op == 'Cast': + hasCast = True + break + self.assertEqual(hasCast, True) @disable_random() def test_dequantize_cast_min_first(self): diff --git a/test/tfnewapi/test_tensorflow_graph_dequantize_cast_optimizer_newapi.py b/test/tfnewapi/test_tensorflow_graph_dequantize_cast_optimizer_newapi.py new file mode 100644 index 00000000000..fc26f9a8ce6 --- /dev/null +++ b/test/tfnewapi/test_tensorflow_graph_dequantize_cast_optimizer_newapi.py @@ -0,0 +1,96 @@ +import unittest +import os +import yaml +import numpy as np +import tensorflow as tf +from tensorflow.python.framework import dtypes +from neural_compressor.adaptor.tf_utils.util import disable_random +from neural_compressor.adaptor.tf_utils.graph_util import GraphRewriterHelper as Helper +from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.dequantize_cast_optimizer import DequantizeCastOptimizer + +def build_fake_graphdef(set_min_first=False, dq_multi_outputs=False): + tf.compat.v1.disable_eager_execution() + + input = tf.compat.v1.placeholder(tf.float32, shape=(32, 224, 224, 3), name='input') + graph_def = tf.compat.v1.get_default_graph().as_graph_def(add_shapes=True) + + min_input = Helper.create_constant_node( + 'test_min', + value=0., + dtype=dtypes.float32) + + max_input = Helper.create_constant_node( + 'test_max', + value=[1], + dtype=dtypes.float32) + + quant_v2_node = Helper.create_node("QuantizeV2", 'test_quantize', + [input.name, min_input.name, max_input.name]) + + dequantize_node = Helper.create_node( + "Dequantize", 'test_dequantize', + [quant_v2_node.name, quant_v2_node.name + ':1', quant_v2_node.name + ':2']) + if set_min_first: + Helper.set_attr_string(dequantize_node, "mode", b'MIN_FIRST') + + cast_node = Helper.create_node( + "Cast", 'test_cast', [dequantize_node.name]) + Helper.set_attr_dtype(cast_node, "DstT", dtypes.bfloat16) + Helper.set_attr_dtype(cast_node, "SrcT", dtypes.float32) + Helper.set_attr_bool(cast_node, "Truncate", False) + + dentity_node = Helper.create_node( + "Identity", 'output', [cast_node.name]) + Helper.set_attr_dtype(dentity_node, "T", dtypes.bfloat16) + + graph_def.node.extend([ + min_input, + max_input, + quant_v2_node, + dequantize_node, + cast_node, + dentity_node, + ]) + + if dq_multi_outputs: + dentity_node_2 = Helper.create_node( + "Identity", 'id_1', [dequantize_node.name]) + Helper.set_attr_dtype(dentity_node_2, "T", dtypes.float32) + graph_def.node.extend([dentity_node_2]) + + return graph_def + +class TestDequantizeCastOptimizer(unittest.TestCase): + + @disable_random() + def test_dequantize_cast_normal(self): + graph_def = build_fake_graphdef() + converted_graph_def = DequantizeCastOptimizer(graph_def).do_transformation() + for i in converted_graph_def.node: + self.assertNotEqual(i.op, 'Cast') + + @disable_random() + def test_dequantize_cast_min_first(self): + graph_def = build_fake_graphdef(set_min_first=True) + converted_graph_def = DequantizeCastOptimizer(graph_def).do_transformation() + hasCast = False + for i in converted_graph_def.node: + if i.op == 'Cast': + hasCast = True + break + self.assertEqual(hasCast, True) + + @disable_random() + def test_dequantize_cast_multiple_outputs(self): + graph_def = build_fake_graphdef(dq_multi_outputs=True) + converted_graph_def = DequantizeCastOptimizer(graph_def).do_transformation() + hasCast = False + for i in converted_graph_def.node: + if i.op == 'Cast': + hasCast = True + break + self.assertEqual(hasCast, True) + + +if __name__ == "__main__": + unittest.main()