diff --git a/neural_compressor/adaptor/tf_utils/graph_converter.py b/neural_compressor/adaptor/tf_utils/graph_converter.py index 99dcf33b3aa..ca6573baf9f 100644 --- a/neural_compressor/adaptor/tf_utils/graph_converter.py +++ b/neural_compressor/adaptor/tf_utils/graph_converter.py @@ -163,8 +163,7 @@ def _inference(self, model): # ITEX optimization has broken INC calibration process. # INC needs turn off ITEX optimization pass in calibration stage. # TODO ITEX will provide API to replace setting environment variable. - if self.itex_mode: - os.environ["ITEX_REMAPPER"] = "0" + os.environ["ITEX_REMAPPER"] = "0" sess = model.sess iter_op = model.iter_op input_tensor = model.input_tensor @@ -225,26 +224,25 @@ def check_shape(tensor, data): return True disorder_tensors = [] - disorder_inputs = [] + disorder_inputs = [] for idx, sort_tensor in enumerate(input_tensor): sort_input = inputs[idx] if check_shape(sort_tensor, sort_input): - feed_dict.update({sort_tensor: sort_input}) + feed_dict.update({sort_tensor: sort_input}) else: disorder_tensors.append(sort_tensor) disorder_inputs.append(sort_input) for i, dis_tensor in enumerate(disorder_tensors): - for j, dis_input in enumerate(disorder_inputs): - if check_shape(dis_tensor, dis_input): - feed_dict.update({dis_tensor: dis_input}) - break + for j, dis_input in enumerate(disorder_inputs): + if check_shape(dis_tensor, dis_input): + feed_dict.update({dis_tensor: dis_input}) + break _ = sess.run(output_tensor, feed_dict) if iter_op==[] \ else iterator_sess_run(sess, iter_op, \ feed_dict, output_tensor, self.calib_iteration) if idx + 1 == self.calib_iteration: break - if self.itex_mode: - os.environ["ITEX_REMAPPER"] = "1" + os.environ["ITEX_REMAPPER"] = "1" def _check_tf_version(self): is_supported_version = False diff --git a/neural_compressor/experimental/benchmark.py b/neural_compressor/experimental/benchmark.py index 4ee2f8e6fd6..00329dabd43 100644 --- a/neural_compressor/experimental/benchmark.py +++ b/neural_compressor/experimental/benchmark.py @@ -181,8 +181,8 @@ def __call__(self, mode='performance'): assert cfg.evaluation is not None, 'benchmark evaluation filed should not be None...' assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...' set_all_env_var(deep_get(cfg, 'evaluation.{}.configs'.format(mode))) - # disable multi-instance for accuracy mode - if mode == "accuracy": + # disable multi-instance for accuracy mode or running bechmark on GPU device + if mode == "accuracy" or cfg.device == 'gpu': set_env_var('NC_ENV_CONF', True, overwrite_existing=True) logger.info("Start to run Benchmark.") diff --git a/test/itex/test_tensorflow_itex_basic.py b/test/itex/test_tensorflow_itex_basic.py index 6fc3e9a518a..9d3cb1e58ef 100644 --- a/test/itex/test_tensorflow_itex_basic.py +++ b/test/itex/test_tensorflow_itex_basic.py @@ -5,13 +5,14 @@ import os import shutil import yaml +import platform import numpy as np from neural_compressor.adaptor.tf_utils.quantize_graph.quantize_graph_for_intel_cpu import QuantizeGraphForIntel from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.strip_unused_nodes import StripUnusedNodesOptimizer from neural_compressor.adaptor.tf_utils.graph_rewriter.generic.fold_batch_norm import FoldBatchNormNodesOptimizer from neural_compressor.adaptor.tensorflow import TensorflowQuery from neural_compressor.adaptor.tf_utils.util import disable_random -from neural_compressor.experimental import Quantization, common +from neural_compressor.experimental import Quantization, Benchmark, common from neural_compressor.utils.utility import CpuInfo from neural_compressor.adaptor.tf_utils.util import version1_lt_version2, version1_gte_version2 @@ -217,5 +218,53 @@ def test_depthwiseconv2d_case(self): reshape_counter += 1 self.assertEqual(reshape_counter, 2) + @disable_random() + @unittest.skipIf(version1_lt_version2(tf.version.VERSION, '2.8.0') or \ + platform.system().lower() == "windows", "Only supports tf greater 2.7.0 and Linux") + def test_itex_benchmark_gpu(self): + x = tf.compat.v1.placeholder(tf.float32, [1, 56, 56, 16], name="input") + top_relu = tf.nn.relu(x) + paddings = tf.constant([[0, 0], [1, 1], [1, 1], [0, 0]]) + x_pad = tf.pad(top_relu, paddings, "CONSTANT") + conv_weights = tf.compat.v1.get_variable("weight", [3, 3, 16, 16], + initializer=tf.compat.v1.random_normal_initializer()) + conv = tf.nn.conv2d(x_pad, conv_weights, strides=[1, 2, 2, 1], padding="VALID") + normed = tf.compat.v1.layers.batch_normalization(conv) + conv_weights2 = tf.compat.v1.get_variable("weight2", [3, 3, 16, 16], + initializer=tf.compat.v1.random_normal_initializer()) + conv2 = tf.nn.conv2d(top_relu, conv_weights2, strides=[1, 2, 2, 1], padding="SAME") + normed2 = tf.compat.v1.layers.batch_normalization(conv2) + add = tf.raw_ops.Add(x=normed, y=normed2, name='addv2') + relu = tf.nn.relu(add) + relu6 = tf.nn.relu6(relu, name='op_to_store') + out_name = relu6.name.split(':')[0] + with tf.compat.v1.Session() as sess: + sess.run(tf.compat.v1.global_variables_initializer()) + output_graph_def = graph_util.convert_variables_to_constants( + sess=sess, + input_graph_def=sess.graph_def, + output_node_names=[out_name]) + + quantizer = Quantization('fake_yaml_2.yaml') + dataset = quantizer.dataset('dummy', shape=(100, 56, 56, 16), label=True) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.model = output_graph_def + output_graph = quantizer.fit() + + evaluator = Benchmark('fake_yaml_2.yaml') + evaluator.b_dataloader = common.DataLoader(dataset) + evaluator.model = output_graph + evaluator('performance') + + found_multi_instance_log = False + for file_name in os.listdir(os.getcwd()): + if file_name.endswith(".log"): + found_multi_instance_log = True + break + + self.assertEqual(found_multi_instance_log, False) + + if __name__ == '__main__': unittest.main()