diff --git a/.azure-pipelines/scripts/ut/env_setup.sh b/.azure-pipelines/scripts/ut/env_setup.sh index 65a46b1f6dc..01c1a0b4a91 100644 --- a/.azure-pipelines/scripts/ut/env_setup.sh +++ b/.azure-pipelines/scripts/ut/env_setup.sh @@ -28,7 +28,10 @@ elif [[ "${tensorflow_version}" != "" ]]; then pip install intel-tensorflow==${tensorflow_version} fi -if [[ "${itex_version}" != "" ]]; then +if [[ "${itex_version}" == "nightly" ]]; then + pip install /tf_dataset/itex_binary/221209/intel_extension_for_tensorflow-1.1.0-cp38-cp38-linux_x86_64.whl + pip install /tf_dataset/itex_binary/221209/intel_extension_for_tensorflow_lib-1.1.0.0-cp38-cp38-linux_x86_64.whl +elif [[ "${itex_version}" != "" ]]; then pip install --upgrade intel-extension-for-tensorflow[cpu]==${itex_version} fi diff --git a/.azure-pipelines/scripts/ut/run_basic_itex.sh b/.azure-pipelines/scripts/ut/run_basic_itex.sh index 45278216f8d..c937992b7be 100644 --- a/.azure-pipelines/scripts/ut/run_basic_itex.sh +++ b/.azure-pipelines/scripts/ut/run_basic_itex.sh @@ -3,7 +3,7 @@ python -c "import neural_compressor as nc;print(nc.version.__version__)" echo "run basic itex" echo "specify fwk version..." -export itex_version='1.0.0' +export itex_version='nightly' export tensorflow_version='2.10.0-official' echo "set up UT env..." diff --git a/examples/keras/mnist/README.md b/examples/keras/mnist/README.md new file mode 100644 index 00000000000..163b7c96a70 --- /dev/null +++ b/examples/keras/mnist/README.md @@ -0,0 +1,41 @@ +Step-by-Step +============ + +This document list steps of reproducing Keras mnist model tuning results via Neural Compressor. +This example can run on Intel CPUs. + +# Prerequisite + +### 1. Installation +Recommend python 3.6 or higher version. + +```shell +# Install IntelĀ® Neural Compressor +pip install neural-compressor +``` + +### 2. Install Tensorflow +```shell +pip install tensorflow +``` +> Note: Supported Tensorflow version > 2.10.0. + +### 3. Installation Dependency packages +```shell +cd examples/keras/mnist/ +pip install -r requirements.txt +``` + +#### Quantizing the model on Intel CPU(Experimental) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +# Run + + ```shell + cd examples/keras/mnist/ + python mnist.py + ``` diff --git a/examples/keras/mnist/mnist.py b/examples/keras/mnist/mnist.py new file mode 100644 index 00000000000..8e0fbf411e5 --- /dev/null +++ b/examples/keras/mnist/mnist.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tensorflow as tf +import numpy as np +from tensorflow import keras +from tensorflow.keras import layers +import time + +num_classes = 10 + +def build_dataset(): + # Load the data and split it between train and test sets + (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + + # Scale images to the [0, 1] range + x_train = x_train.astype("float32") / 255 + x_test = x_test.astype("float32") / 255 + # Make sure images have shape (28, 28, 1) + x_train = np.expand_dims(x_train, -1) + x_test = np.expand_dims(x_test, -1) + + # convert class vectors to binary class matrices + y_train = keras.utils.to_categorical(y_train, num_classes) + y_test = keras.utils.to_categorical(y_test, num_classes) + return x_train, y_train, x_test, y_test + +class Dataset(): + def __init__(self, ): + _, _ , self.inputs, self.labels = build_dataset() + + def __getitem__(self, idx): + return self.inputs[idx], self.labels[idx] + + def __len__(self): + assert len(self.inputs) == len(self.labels), 'inputs should have equal len with labels' + return len(self.inputs) + +def build_model(x_train, y_train, x_test, y_test): + if os.path.exists('fp32_model'): + model = keras.models.load_model('fp32_model') + return model + # Model / data parameters + input_shape = (28, 28, 1) + model = keras.Sequential( + [ + keras.Input(shape=input_shape), + layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), + layers.MaxPooling2D(pool_size=(2, 2)), + layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + layers.MaxPooling2D(pool_size=(2, 2)), + layers.Flatten(), + layers.Dropout(0.5), + layers.Dense(num_classes, activation="softmax"), + ] + ) + + batch_size = 128 + epochs = 1 + + model.compile(loss="categorical_crossentropy", optimizer="adam", + metrics=["accuracy"], run_eagerly=True) + model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1) + model.summary() + if not os.path.exists('fp32_model'): + model.save('fp32_model') + return model + +def eval_func(model): + x_train, y_train, x_test, y_test = build_dataset() + model.compile(metrics=["accuracy"], run_eagerly=False) + score = model.evaluate(x_test, y_test) + return score[1] + +def main(): + x_train, y_train, x_test, y_test = build_dataset() + model = build_model(x_train, y_train, x_test, y_test) + + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.utils.utility import set_random_seed + from neural_compressor.experimental import common + set_random_seed(9527) + config = PostTrainingQuantConfig(backend='itex') + quantized_model = fit(model, + conf=config, + calib_dataloader=common.DataLoader(Dataset(), batch_size=10), + eval_func=eval_func) + +if __name__ == '__main__': + main() + diff --git a/examples/keras/mnist/requirements.txt b/examples/keras/mnist/requirements.txt new file mode 100644 index 00000000000..cee1363064f --- /dev/null +++ b/examples/keras/mnist/requirements.txt @@ -0,0 +1,3 @@ +tensorflow +neural-compressor +intel-extension-for-tensorflow[cpu] \ No newline at end of file diff --git a/neural_compressor/adaptor/keras.py b/neural_compressor/adaptor/keras.py new file mode 100644 index 00000000000..ad8081d6b0d --- /dev/null +++ b/neural_compressor/adaptor/keras.py @@ -0,0 +1,539 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import copy +import json +import yaml +import math +import numpy as np +from collections import OrderedDict, UserDict +from .query import QueryBackendCapability +from .adaptor import adaptor_registry, Adaptor +from ..utils.utility import LazyImport, CpuInfo, singleton, Dequantize, dump_elapsed_time +from ..utils.utility import Statistics, GLOBAL_STATE, MODE, version1_lt_version2 +from ..utils import logger +from ..conf.dotdict import deep_get +from ..experimental.data.dataloaders.base_dataloader import BaseDataLoader +tf = LazyImport('tensorflow') + +def _add_supported_quantized_objects(custom_objects): + """Map all the quantized objects.""" + from neural_compressor.adaptor.keras_utils.quantizer import Quantize, DeQuantize + from neural_compressor.adaptor.keras_utils.quantizer import FakeQuant, QConv2D, QDense + custom_objects["Quantize"] = Quantize + custom_objects["DeQuantize"] = DeQuantize + custom_objects["FakeQuant"] = FakeQuant + custom_objects["QConv2D"] = QConv2D + custom_objects["QDense"] = QDense + return custom_objects + +@adaptor_registry +class KerasAdaptor(Adaptor): + '''The keras class of framework adaptor layer. + + ''' + def __init__(self, framework_specific_info): + super(KerasAdaptor, self).__init__(framework_specific_info) + self.framework_specific_info = framework_specific_info + self.approach = deep_get(self.framework_specific_info, 'approach', False) + self.quantize_config = {'op_wise_config': {}} + self.device = self.framework_specific_info['device'] + #self.work_dir = os.path.abspath(self.framework_specific_info['workspace_path']) + self.recipes = deep_get(self.framework_specific_info, 'recipes', {}) + #os.makedirs(self.work_dir, exist_ok=True) + + self.pre_optimized_model = None + self.pre_optimizer_handle = None + self.fp32_ops = [] + self.query_handler = KerasQuery(local_config_file=os.path.join( + os.path.dirname(__file__), 'keras.yaml')) + + self.fp32_results = [] + self.fp32_preds_as_label = False + self.benchmark = (GLOBAL_STATE.STATE == MODE.BENCHMARK) + self.callbacks = [] + self.optype_statistics = None + + def tuning_cfg_to_fw(self, tuning_cfg): + self.quantize_config['calib_iteration'] = tuning_cfg['calib_iteration'] + self.quantize_config['device'] = self.device + self.quantize_config['advance'] = deep_get(tuning_cfg, 'advance') + fp32_ops = [] + dispatched_op_names = [j[0] for j in tuning_cfg['op']] + invalid_op_names = [i for i in self.quantize_config['op_wise_config'] + if i not in dispatched_op_names] + + for op_name in invalid_op_names: + self.quantize_config['op_wise_config'].pop(op_name) + + for each_op_info in tuning_cfg['op']: + op_name = each_op_info[0] + if tuning_cfg['op'][each_op_info]['activation']['dtype'] == 'fp32': + if op_name in self.quantize_config['op_wise_config']: + self.quantize_config['op_wise_config'].pop(op_name) + fp32_ops.append(op_name) + continue + + is_perchannel = False + bit = None + if 'weight' in tuning_cfg['op'][each_op_info]: + is_perchannel = tuning_cfg['op'][each_op_info]['weight'][ + 'granularity'] == 'per_channel' + #bit = tuning_cfg['op'][each_op_info]['weight']['bit'] + weight_bit = bit if bit else 7.0 + algorithm = tuning_cfg['op'][each_op_info]['activation']['algorithm'] + is_asymmetric = False + if 'activation' in tuning_cfg['op'][each_op_info]: + is_asymmetric = tuning_cfg['op'][each_op_info]['activation']['scheme'] == 'asym' + self.quantize_config['op_wise_config'][op_name] = (is_perchannel, + algorithm, + is_asymmetric, + weight_bit) + self.fp32_ops = fp32_ops + + @dump_elapsed_time("Pass quantize model") + def quantize(self, tune_cfg, model, dataloader, q_func=None): + '''Execute the quantize process on the specified model. + + Args: + tune_cfg(dict): The chosen tuning configuration. + model (object): The model to do quantization. + dataloader(object): The dataloader used to load quantization dataset. + q_func (optional): training function for quantization aware training mode. + ''' + self.tuning_cfg_to_fw(tune_cfg) + logger.debug("Dump quantization configurations:") + logger.debug(self.quantize_config) + calib_sampling_size = tune_cfg.get('calib_sampling_size', 1) + if isinstance(dataloader, BaseDataLoader): + batch_size = dataloader.batch_size + for i in range(batch_size): + if calib_sampling_size % (batch_size - i) == 0: + calib_batch_size = batch_size - i + if i != 0: # pragma: no cover + logger.warning("Reset `calibration.dataloader.batch_size` field " + "to {}".format(calib_batch_size) + + " to make sure the sampling_size is " + "divisible exactly by batch size") + break + tmp_iterations = int(math.ceil(calib_sampling_size / calib_batch_size)) + dataloader.batch(calib_batch_size) + self.quantize_config['calib_iteration'] = tmp_iterations + + else: # pragma: no cover + if hasattr(dataloader, 'batch_size') and \ + calib_sampling_size % dataloader.batch_size != 0: + iter = self.quantize_config['calib_iteration'] + logger.warning( + "Please note that calibration sampling size {} " \ + "isn't divisible exactly by batch size {}. " \ + "So the real sampling size is {}.". + format(calib_sampling_size, dataloader.batch_size, + dataloader.batch_size * iter)) + q_layers = [] + for idx, layer in enumerate(self.fp32_layers): + layer_config = layer["config"] + if layer["class_name"] in ["Conv2D", "Dense"] and \ + layer['config']['name'] in self.quantize_config['op_wise_config']: + op_config = self.quantize_config['op_wise_config'][layer['config']['name']] + mode = 'per_channel' if op_config[0] else 'per_tensor' + #(TODO) support asym/sym + fake_quant_name = 'fake_quant_' + str(idx) + q_layers.append({'class_name': 'FakeQuant', + 'config': {'mode': 'per_tensor', 'name': fake_quant_name}}) + q_layers.append(layer) + else: + q_layers.append(layer) + + keras_object = model._model_object + json_model = copy.deepcopy(json.loads(keras_object.to_json())) + json_model['config']['layers'] = q_layers + quantized_model = self._restore_model_from_json(json_model) + + converted_model = self._calibrate(quantized_model, dataloader, + self.quantize_config['calib_iteration']) + + from neural_compressor.model.keras_model import KerasModel + converted_model = KerasModel(converted_model) + return converted_model + + def _calibrate(self, model, dataloader, calib_interation): + # run eagerly to fetch the numpy min/max + model.compile(run_eagerly=True) + results = {} + for idx, (inputs, labels) in enumerate(dataloader): + outputs = model.predict_on_batch(inputs) + json_model = copy.deepcopy(json.loads(model.to_json())) + config = json_model["config"] + layers = config["layers"] + for layer in layers: + if layer['class_name'] == 'FakeQuant': + min_value = layer['config']['min_value'] + max_value = layer['config']['max_value'] + if layer['config']['name'] not in results: + results[layer['config']['name']] = { + 'min': [min_value], 'max': [max_value]} + else: + results[layer['config']['name']]['min'].append(min_value) + results[layer['config']['name']]['max'].append(max_value) + if idx + 1 == calib_interation: + break + + # insert the calibrated min/max to Q/DQ + json_model = copy.deepcopy(json.loads(model.to_json())) + config = json_model["config"] + layers = config["layers"] + q_layers = [] + for layer in layers: + layer_config = copy.deepcopy(layer['config']) + if layer['class_name'] == 'FakeQuant': + min_value = min(results[layer['config']['name']]['min']) + max_value = max(results[layer['config']['name']]['max']) + q_layers.append({'class_name': 'Quantize', + 'config': {'min_range': min_value, + 'max_range': max_value, + }}) + q_layers.append({'class_name': 'DeQuantize', + 'config': {'min_range': min_value, + 'max_range': max_value, + }}) + elif layer['class_name'] == 'Conv2D' or layer['class_name'] == 'Dense': + # index 0 is weight, index 1 is bias + q_layer_name = 'Q' + layer['class_name'] + kernel = self.layer_weights[layer['config']['name']][0] + layer_config['min_value'] = str(kernel.min()) + layer_config['max_value'] = str(kernel.max()) + q_layers.append({'class_name': q_layer_name, 'config': layer_config}) + else: + q_layers.append(layer) + + json_model['config']['layers'] = q_layers + quantized_model = self._restore_model_from_json(json_model) + return quantized_model + + def _restore_model_from_json(self, json_model): + from tensorflow.keras.models import model_from_json + custom_objects = {} + # We need to keep a dictionary of custom objects as our quantized library + # is not recognized by keras. + custom_objects = _add_supported_quantized_objects(custom_objects) + qmodel = model_from_json(json.dumps(json_model), custom_objects=custom_objects) + qmodel = self._set_weights(qmodel, self.layer_weights) + return qmodel + + # set fp32 weights to qmodel + def _set_weights(self, qmodel, layer_weights): + for qlayer in qmodel.layers: + if qlayer.get_weights(): + if qlayer.name in layer_weights: + qlayer.set_weights(layer_weights[qlayer.name]) + else: + hit_layer = False + for sub_layer in qlayer.submodules: + if sub_layer.name in layer_weights: + qlayer.set_weights(layer_weights[sub_layer.name]) + hit_layer = True + break + if not hit_layer: + raise ValueError('Can not match the module weights....') + return qmodel + + @dump_elapsed_time(customized_msg="Model inference") + def evaluate(self, model, dataloader, postprocess=None, + metrics=None, measurer=None, iteration=-1, + tensorboard=False, fp32_baseline=False): + '''The function is used to run evaluation on validation dataset. + + Args: + model (object): The model to do calibration. + dataloader (generator): generate the data and labels. + postprocess (object, optional): process the result from the model + metric (object, optional): Depends on model category. Defaults to None. + measurer (object, optional): for precise benchmark measurement. + iteration(int, optional): control steps of mini-batch + tensorboard (boolean, optional): for tensorboard inspect tensor. + fp32_baseline (boolen, optional): only for compare_label=False pipeline + ''' + # use keras object + keras_model = model.model + logger.info("Start to evaluate the Keras model.") + results = [] + for idx, (inputs, labels) in enumerate(dataloader): + # use predict on batch + if measurer is not None: + measurer.start() + predictions = keras_model.predict_on_batch(inputs) + measurer.end() + else: + predictions = keras_model.predict_on_batch(inputs) + + if self.fp32_preds_as_label: + self.fp32_results.append(predictions) if fp32_baseline else \ + results.append(predictions) + + if postprocess is not None: + predictions, labels = postprocess((predictions, labels)) + if metrics: + for metric in metrics: + if not hasattr(metric, "compare_label") or \ + (hasattr(metric, "compare_label") and metric.compare_label): + metric.update(predictions, labels) + if idx + 1 == iteration: + break + return results + + def query_fw_capability(self, model): + '''The function is used to return framework tuning capability. + + Args: + model (object): The model to query quantization tuning capability. + ''' + self.pre_optimized_model = model + fp32_config = {'weight': {'dtype': 'fp32'}, 'activation': {'dtype': 'fp32'}} + int8_type = self.query_handler.get_op_types_by_precision(precision='int8') + op_capability = self.query_handler.get_quantization_capability() + conv_config = copy.deepcopy(op_capability['int8']['Conv2D']) + dense_config = copy.deepcopy(op_capability['int8']['Dense']) + other_config = copy.deepcopy(op_capability['int8']['default']) + + # get the layers info + keras_object = model._model_object + json_model = copy.deepcopy(json.loads(keras_object.to_json())) + config = json_model["config"] + self.fp32_layers = config["layers"] + + # get fp32 layer weights + self.layer_weights = {} + for layer in keras_object.layers: + if layer.get_weights(): + self.layer_weights[layer.name] = copy.deepcopy(layer.get_weights()) + + quantizable_op_details = OrderedDict() + for details in self.fp32_layers: + node_op = details['class_name'] + node_name = details['config']['name'] + if node_op == 'Conv2D': + quantizable_op_details[(node_name, node_op)] = [conv_config, fp32_config] + elif node_op == 'Dense': + quantizable_op_details[(node_name, node_op)] = [dense_config, fp32_config] + else: + quantizable_op_details[(node_name, node_op)] = [fp32_config] + + capability = { + 'opwise': copy.deepcopy(quantizable_op_details), + 'optypewise': self.get_optype_wise_ability(quantizable_op_details), + } + logger.debug("Dump framework quantization capability:") + logger.debug(capability) + + return capability + + def get_optype_wise_ability(self, quantizable_op_details): + """Get the op type wise capability by generating the union value of each op type. + Returns: + [string dict]: the key is op type while the value is the + detail configurations of activation and weight for this op type. + """ + res = OrderedDict() + for op in quantizable_op_details: + if op[1] not in res: + res[op[1]] = {'activation': quantizable_op_details[op][0]['activation']} + if 'weight' in quantizable_op_details[op][0]: + res[op[1]]['weight'] = quantizable_op_details[op][0]['weight'] + return res + + def inspect_tensor(self, model, dataloader, op_list=[], iteration_list=[], + inspect_type='activation', save_to_disk=False): + '''The function is used by tune strategy class for dumping tensor info. + + Args: + model (object): The model to inspect. + dataloader (object): The dataloader used to feed into. + op_list (list): The op name in the fp32 model for dumpping. + iteration_list (list): The iteration list containing iterations to dump. + inspect_type (str): The valid value are 'weight', 'activation', 'all'. + save_to_disk (bool): Save to disk or memory. + + Return: + Numpy Array Dict + { + 'weight': { + 'node0_name': {'weight0_name': numpy.array, 'bias0_name': numpy.array, ...}, + 'node1_name': {'weight1_name': numpy.array, 'bias1_name': numpy.array, ...}, + ... + }, + 'activation': [ + # iter 0 + { + 'node0_name': {'output0_name': numpy.array, 'output1_name': numpy.array, ...} + 'node1_name': {'output1_name': numpy.array, 'output1_name': numpy.array, ...} + ... + }, + # iter 1 + ... + ] + } + ''' + pass + + def set_tensor(self, model, tensor_dict): + '''The function is used by tune strategy class for setting tensor back to model. + + Args: + model (object): The model to set tensor. Usually it is quantized model. + tensor_dict (dict): The tensor dict to set. Note the numpy array contains float + value, adaptor layer has the responsibility to quantize to + int8 or int32 to set into the quantized model if needed. + The dict format is something like: + { + 'weight0_name': numpy.array, + 'bias0_name': numpy.array, + ... + } + ''' + pass + + def quantize_input(self, model): + ''' quantize the model to be able to take quantized input + + Args: + model (object): The model to quantize input + + Return: + model (object): The quantized input model + scale (float): The scale for dataloader to generate quantized input + ''' + return model, 1. + + def _pre_eval_hook(self, model, *args, **kwargs): + '''The function is used to do some preprocession before evaluation phase. + + Return: + model + ''' + return model + + def _post_eval_hook(self, model, *args, **kwargs): + '''The function is used to do some post process after complete evaluation. + ''' + pass + + def save(self, model, path): + '''The function is used by tune strategy class for saving model. + + Args: + model (object): The model to saved. + path (string): The path where to save. + ''' + model.save(path) + + def convert(self, model, source, destinatin): + '''The function is used to convert a source model format to another. + + Args: + model (neural_compressor.model): base model to be converted. + source (string): The source model format. + destination (string): The destination model format. + ''' + pass + +class KerasQuery(QueryBackendCapability): + def __init__(self, local_config_file=None): + super().__init__() + self.version = tf.version.VERSION + self.cfg = local_config_file + self.cur_config = None + self._one_shot_query() + + def _one_shot_query(self): + with open(self.cfg) as f: + content = yaml.safe_load(f) + try: + self.cur_config = self._get_specified_version_cfg(content) + except Exception as e: + logger.info("Fail to parse {} due to {}.".format(self.cfg, str(e))) + self.cur_config = None + raise ValueError("Please check if the format of {} follows Neural Compressor yaml schema.". + format(self.cfg)) + + def _get_specified_version_cfg(self, data): + """Get the configuration for the current runtime. + If there's no matched configuration in the input yaml, we'll + use the `default` field of yaml. + + Args: + data (Yaml content): input yaml file. + + Returns: + [dictionary]: the content for specific version. + """ + default_config = None + for sub_data in data: + if sub_data['version']['name'] == self.version: + return sub_data + + if sub_data['version']['name'] == 'default': + default_config = sub_data + + return default_config + + def get_version(self): + """Get the current backend version infomation. + + Returns: + [string]: version string. + """ + return self.cur_config['version']['name'] + + def get_precisions(self): + """Get supported precisions for current backend. + + Returns: + [string list]: the precisions' name. + """ + return self.cur_config['precisions']['names'] + + def get_op_types(self): + """Get the supported op types by all precisions. + + Returns: + [dictionary list]: A list composed of dictionary which key is precision + and value is the op types. + """ + return self.cur_config['ops'] + + def get_quantization_capability(self): + """Get the supported op types' quantization capability. + + Returns: + [dictionary list]: A list composed of dictionary which key is precision + and value is a dict that describes all op types' quantization capability. + """ + return self.cur_config['capabilities'] + + def get_op_types_by_precision(self, precision): + """Get op types per precision + + Args: + precision (string): precision name + + Returns: + [string list]: A list composed of op type. + """ + assert precision in list(self.cur_config['ops'].keys()) + return self.cur_config['ops'][precision] diff --git a/neural_compressor/adaptor/keras.yaml b/neural_compressor/adaptor/keras.yaml new file mode 100644 index 00000000000..291eb43dc0d --- /dev/null +++ b/neural_compressor/adaptor/keras.yaml @@ -0,0 +1,69 @@ +## Copyright (c) 2021 Intel Corporation +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +# +- + version: + name: 'default' + + precisions: &common_precisions + names: int8, fp32 + valid_mixed_precisions: [] + + ops: &common_ops + int8: ['Conv2D', 'Dense'] + fp32: ['*'] # '*' means all op types + + capabilities: &common_capabilities + int8: { + 'Conv2D': { + 'weight': { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_tensor'], + 'algorithm': ['minmax'] + }, + 'activation': { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'granularity': ['per_tensor'], + 'algorithm': ['minmax'] + } + }, + 'Dense': { + 'weight': { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'algorithm': ['minmax'], + 'granularity': ['per_tensor'], + }, + 'activation': { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'algorithm': ['minmax'], + 'granularity': ['per_tensor'], + } + }, + 'default': { + 'activation': { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'algorithm': ['minmax'], + 'granularity': ['per_tensor'] + } + }, + } diff --git a/neural_compressor/adaptor/keras_utils/__init__.py b/neural_compressor/adaptor/keras_utils/__init__.py new file mode 100644 index 00000000000..ed04d17bdbe --- /dev/null +++ b/neural_compressor/adaptor/keras_utils/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/neural_compressor/adaptor/keras_utils/quantizer.py b/neural_compressor/adaptor/keras_utils/quantizer.py new file mode 100644 index 00000000000..76742001ded --- /dev/null +++ b/neural_compressor/adaptor/keras_utils/quantizer.py @@ -0,0 +1,266 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import numpy as np +import tensorflow as tf + +from tensorflow.python.eager import context +from tensorflow.python.framework import tensor_shape +from tensorflow.python.keras import activations +from tensorflow.python.keras import backend +from tensorflow.python.keras import constraints +from tensorflow.python.keras import initializers +from tensorflow.python.keras import regularizers +from tensorflow.python.keras.engine.input_spec import InputSpec +from tensorflow.python.keras.utils import conv_utils +from tensorflow.python.keras.utils import tf_utils +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import embedding_ops +from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import sparse_ops +from tensorflow.python.ops import standard_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.util.tf_export import keras_export + +from tensorflow.keras.layers import Layer +from tensorflow.python.keras.layers.convolutional import Conv +from tensorflow.python.keras.layers.core import Dense + +class FakeQuant(Layer): + def __init__(self, mode='per_tensor', **kwargs): + super(FakeQuant, self).__init__(**kwargs) + self.mode = mode + self.axis = 1 if mode == 'per_channel' else 0 + self.min_value = tf.constant(np.finfo(np.float32).max, dtype=tf.float32) + self.max_value = tf.constant(np.finfo(np.float32).min, dtype=tf.float32) + + def call(self, inputs): + if self.mode == 'per_tensor': + self.min_value = tf.math.reduce_min(inputs) + self.max_value = tf.math.reduce_max(inputs) + else: + self.min_value = tf.math.reduce_min(inputs, axis=self.axis) + self.max_value = tf.math.reduce_max(inputs, axis=self.axis) + return inputs + + @classmethod + def from_config(cls, config): + return cls(**config) + + def get_config(self): + return {'mode': self.mode, + 'min_value': self.min_value.numpy(), + 'max_value': self.max_value.numpy(), + 'name': self.name} + +class Quantize(Layer): + def __init__(self, min_range, max_range, T=tf.qint8, mode='SCALED', + round_mode='HALF_AWAY_FROM_ZERO', narrow_range=False, axis=None): + super(Quantize, self).__init__() + self.min_range = float(min_range) + self.max_range = float(max_range) + self.T = T + self.mode = mode + self.round_mode = round_mode + self.narrow_range = narrow_range + self.axis = axis + + def call(self, inputs): + outputs, _, _ = tf.quantization.quantize(inputs, self.min_range, + self.max_range, self.T, + mode=self.mode, round_mode=self.round_mode, + narrow_range=self.narrow_range, axis=self.axis) + return outputs + + def get_config(self): + return {'min_range': self.min_range, 'max_range': self.max_range, + 'T': self.T, 'mode': self.mode, 'round_mode': self.round_mode, + 'narrow': self.narrow_range, 'axis': self.axis} + + @classmethod + def from_config(cls, config): + return cls(**config) + +class QConv2D(Conv): + def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', + data_format=None, dilation_rate=(1, 1), groups=1, activation=None, + use_bias=True, kernel_initializer='glorot_uniform', + bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, + activity_regularizer=None, kernel_constraint=None, bias_constraint=None, + min_value=-10000, max_value=10000, **kwargs): + super(QConv2D, self).__init__(rank=2, filters=filters, kernel_size=kernel_size, + strides=strides, padding=padding, data_format=data_format, + dilation_rate=dilation_rate, groups=groups, + activation=activations.get(activation), + use_bias=use_bias, kernel_initializer=initializers.get(kernel_initializer), + bias_initializer=initializers.get(bias_initializer), + kernel_regularizer=regularizers.get(kernel_regularizer), + bias_regularizer=regularizers.get(bias_regularizer), + activity_regularizer=regularizers.get(activity_regularizer), + kernel_constraint=constraints.get(kernel_constraint), + bias_constraint=constraints.get(bias_constraint), **kwargs) + self.weight_quantizer = Quantize(float(min_value), float(max_value)) + self.weight_dequantizer = DeQuantize(float(min_value), float(max_value)) + + def call(self, inputs): + input_shape = inputs.shape + + if self._is_causal: # Apply causal padding to inputs for Conv1D. + inputs = array_ops.pad(inputs, self._compute_causal_padding(inputs)) + + # add the Q/DQ here + kernel = self.weight_quantizer(self.kernel) + kernel = self.weight_dequantizer(kernel) + outputs = self._convolution_op(inputs, kernel) + + if self.use_bias: + output_rank = outputs.shape.rank + if self.rank == 1 and self._channels_first: + # nn.bias_add does not accept a 1D input tensor. + bias = array_ops.reshape(self.bias, (1, self.filters, 1)) + outputs += bias + else: + # Handle multiple batch dimensions. + if output_rank is not None and output_rank > 2 + self.rank: + + def _apply_fn(o): + return nn.bias_add(o, self.bias, data_format=self._tf_data_format) + + outputs = conv_utils.squeeze_batch_dims( + outputs, _apply_fn, inner_rank=self.rank + 1) + else: + outputs = nn.bias_add( + outputs, self.bias, data_format=self._tf_data_format) + + if not context.executing_eagerly(): + # Infer the static output shape: + out_shape = self.compute_output_shape(input_shape) + outputs.set_shape(out_shape) + + if self.activation is not None: + return self.activation(outputs) + return outputs + +class QDense(Dense): + def __init__(self, + units, + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + min_value=-10000, + max_value=10000, + **kwargs): + super(QDense, self).__init__( + units=units, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + **kwargs) + self.weight_quantizer = Quantize(float(min_value), float(max_value)) + self.weight_dequantizer = DeQuantize(float(min_value), float(max_value)) + + def call(self, inputs): + if inputs.dtype.base_dtype != self._compute_dtype_object.base_dtype: + inputs = math_ops.cast(inputs, dtype=self._compute_dtype_object) + + # add the Q/DQ here + # (TODO) we have not try sparse dense and may have issues + kernel = self.weight_quantizer(self.kernel) + kernel = self.weight_dequantizer(kernel) + rank = inputs.shape.rank + if rank == 2 or rank is None: + # We use embedding_lookup_sparse as a more efficient matmul operation for + # large sparse input tensors. The op will result in a sparse gradient, as + # opposed to sparse_ops.sparse_tensor_dense_matmul which results in dense + # gradients. This can lead to sigfinicant speedups, see b/171762937. + if isinstance(inputs, sparse_tensor.SparseTensor): + # We need to fill empty rows, as the op assumes at least one id per row. + inputs, _ = sparse_ops.sparse_fill_empty_rows(inputs, 0) + # We need to do some munging of our input to use the embedding lookup as + # a matrix multiply. We split our input matrix into separate ids and + # weights tensors. The values of the ids tensor should be the column + # indices of our input matrix and the values of the weights tensor + # can continue to the actual matrix weights. + # The column arrangement of ids and weights + # will be summed over and does not matter. See the documentation for + # sparse_ops.sparse_tensor_dense_matmul a more detailed explanation + # of the inputs to both ops. + ids = sparse_tensor.SparseTensor( + indices=inputs.indices, + values=inputs.indices[:, 1], + dense_shape=inputs.dense_shape) + weights = inputs + outputs = embedding_ops.embedding_lookup_sparse_v2( + kernel, ids, weights, combiner='sum') + else: + outputs = gen_math_ops.MatMul(a=inputs, b=kernel) + # Broadcast kernel to inputs. + else: + outputs = standard_ops.tensordot(inputs, kernel, [[rank - 1], [0]]) + # Reshape the output back to the original ndim of the input. + if not context.executing_eagerly(): + shape = inputs.shape.as_list() + output_shape = shape[:-1] + [kernel.shape[-1]] + outputs.set_shape(output_shape) + + if self.use_bias: + outputs = nn_ops.bias_add(outputs, self.bias) + + if self.activation is not None: + outputs = self.activation(outputs) + return outputs + + +class DeQuantize(Layer): + def __init__(self, min_range, max_range, mode='SCALED', + narrow_range=False, axis=None): + super(DeQuantize, self).__init__() + self.min_range = min_range + self.max_range = max_range + self.mode = mode + self.narrow_range = narrow_range + self.axis = axis + + def call(self, inputs): + return tf.quantization.dequantize(inputs, float(self.min_range), + float(self.max_range), mode=self.mode, + narrow_range=self.narrow_range, axis=self.axis) + def get_config(self): + return {'min_range': self.min_range, 'max_range': self.max_range, + 'mode': self.mode, 'narrow': self.narrow_range, 'axis': self.axis, + 'dtype': self.dtype} + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/neural_compressor/conf/config.py b/neural_compressor/conf/config.py index 2cb547e6cbc..617bafd09eb 100644 --- a/neural_compressor/conf/config.py +++ b/neural_compressor/conf/config.py @@ -1391,6 +1391,7 @@ def map_pyconfig_to_cfg(self, pythonic_config): if pythonic_config.benchmark.outputs != []: mapping.update({'model.outputs': pythonic_config.benchmark.outputs}) mapping.update({ + 'model.backend': pythonic_config.benchmark.backend, 'evaluation.performance.warmup': pythonic_config.benchmark.warmup, 'evaluation.performance.iteration': pythonic_config.benchmark.iteration, 'evaluation.performance.configs.cores_per_instance': diff --git a/neural_compressor/conf/pythonic_config.py b/neural_compressor/conf/pythonic_config.py index 19c373a8205..2124579b406 100644 --- a/neural_compressor/conf/pythonic_config.py +++ b/neural_compressor/conf/pythonic_config.py @@ -185,6 +185,10 @@ class TensorFlow(MXNet): def __init__(self, precisions=None): super().__init__(precisions) +class Keras(MXNet): + def __init__(self, precisions=None): + super().__init__(precisions) + class PyTorch(MXNet): def __init__(self, precisions=None): super().__init__(precisions) @@ -243,6 +247,7 @@ def search(self, search): nas = NASConfig() onnxruntime_config = ONNX() tensorflow_config = TensorFlow() +keras_config = Keras() pytorch_config = PyTorch() mxnet_config = MXNet() @@ -258,7 +263,8 @@ def __init__(self, onnxruntime=onnxruntime_config, tensorflow=tensorflow_config, pytorch=pytorch_config, - mxnet=mxnet_config): + mxnet=mxnet_config, + keras=keras_config): self._quantization = quantization self._benchmark = benchmark self._options = options @@ -269,6 +275,7 @@ def __init__(self, self._tensorflow = tensorflow self._pytorch = pytorch self._mxnet = mxnet + self._keras = keras @property def distillation(self): @@ -282,6 +289,10 @@ def nas(self): def tensorflow(self): return self._tensorflow + @property + def keras(self): + return self._keras + @property def pytorch(self): return self._pytorch diff --git a/neural_compressor/experimental/benchmark.py b/neural_compressor/experimental/benchmark.py index 22b7cdeb93f..7aff71cc14c 100644 --- a/neural_compressor/experimental/benchmark.py +++ b/neural_compressor/experimental/benchmark.py @@ -306,6 +306,8 @@ def run_instance(self, mode): "outputs": cfg.model.outputs, \ "recipes": cfg.model.recipes, \ 'workspace_path': cfg.tuning.workspace.path}) + if framework == 'keras': + framework_specific_info.update({'workspace_path': cfg.tuning.workspace.path}) if framework == 'mxnet': framework_specific_info.update({"b_dataloader": self._b_dataloader}) if 'onnx' in framework.lower(): @@ -479,6 +481,10 @@ def model(self, user_model): assert not isinstance(user_model, BaseModel), \ "Please pass an original framework model but not neural compressor model!" self.framework = get_model_fwk_name(user_model) + if self.framework == "tensorflow": + from ..model.model import get_model_type + if get_model_type(user_model) == 'keras' and cfg.model.backend == 'itex': + self.framework = 'keras' if self.framework == "pytorch": if cfg.model.backend == "default": self.framework = "pytorch_fx" diff --git a/neural_compressor/experimental/common/model.py b/neural_compressor/experimental/common/model.py index f34a5c35b80..6fec668f9e8 100644 --- a/neural_compressor/experimental/common/model.py +++ b/neural_compressor/experimental/common/model.py @@ -45,6 +45,8 @@ def __new__(cls, root, **kwargs): else: model_type = get_model_type(root) model = MODELS['tensorflow'](model_type, root, **kwargs) + elif framework == 'keras': + model = MODELS['keras'](root, **kwargs) elif framework == 'pytorch': model = MODELS[framework](root, **kwargs) else: diff --git a/neural_compressor/experimental/component.py b/neural_compressor/experimental/component.py index 8afc1703c23..4afbf2589e8 100644 --- a/neural_compressor/experimental/component.py +++ b/neural_compressor/experimental/component.py @@ -472,6 +472,10 @@ def model(self, user_model): assert not isinstance(user_model, BaseModel), \ "Please pass an original framework model but not neural compressor model!" self.framework = get_model_fwk_name(user_model) + if self.framework == "tensorflow": + from ..model.model import get_model_type + if get_model_type(user_model) == 'keras' and self.cfg.model.backend == 'itex': + self.framework = 'keras' if self.framework == "pytorch": if self.cfg.model.backend == "default": self.framework = "pytorch_fx" diff --git a/neural_compressor/experimental/data/dataloaders/dataloader.py b/neural_compressor/experimental/data/dataloaders/dataloader.py index c3463b875eb..c879b0b45d1 100644 --- a/neural_compressor/experimental/data/dataloaders/dataloader.py +++ b/neural_compressor/experimental/data/dataloaders/dataloader.py @@ -25,6 +25,7 @@ DATALOADERS = {"tensorflow": TensorflowDataLoader, "tensorflow_itex": TensorflowDataLoader, + "keras": TensorflowDataLoader, "mxnet": MXNetDataLoader, "pytorch": PyTorchDataLoader, "pytorch_ipex": PyTorchDataLoader, diff --git a/neural_compressor/experimental/metric/metric.py b/neural_compressor/experimental/metric/metric.py index b02b52cc861..31f0550b071 100644 --- a/neural_compressor/experimental/metric/metric.py +++ b/neural_compressor/experimental/metric/metric.py @@ -113,6 +113,7 @@ def __init__(self) -> None: framework_metrics = {"tensorflow": TensorflowMetrics, "tensorflow_itex": TensorflowMetrics, + "keras": TensorflowMetrics, "mxnet": MXNetMetrics, "pytorch": PyTorchMetrics, "pytorch_ipex": PyTorchMetrics, @@ -132,6 +133,7 @@ def __init__(self) -> None: registry_metrics = {"tensorflow": TENSORFLOW_METRICS, "tensorflow_itex": TENSORFLOW_ITEX_METRICS, + "keras": TENSORFLOW_METRICS, "mxnet": MXNET_METRICS, "pytorch": PYTORCH_METRICS, "pytorch_ipex": PYTORCH_METRICS, @@ -156,7 +158,7 @@ def __init__(self, framework: str): Args: framework: The framwork name. """ - assert framework in ("tensorflow", "tensorflow_itex", + assert framework in ("tensorflow", "tensorflow_itex","keras", "pytorch", "pytorch_ipex", "pytorch_fx", "onnxrt_qdq", "onnxrt_qlinearops", "onnxrt_integerops", "mxnet", "onnxrt_qoperator"), \ diff --git a/neural_compressor/model/keras_model.py b/neural_compressor/model/keras_model.py new file mode 100644 index 00000000000..f0995ceed59 --- /dev/null +++ b/neural_compressor/model/keras_model.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from abc import abstractmethod +from neural_compressor.model.base_model import BaseModel +from neural_compressor.utils.utility import LazyImport +tf = LazyImport('tensorflow') + +class KerasModel(BaseModel): + """Build KerasModel object + + Args: + model (string or keras model object): model path or model object + kwargs (dict): other required parameters + + """ + + def __init__(self, model, **kwargs): + self.component = None + self._model = model + if not isinstance(model, tf.keras.Model): + self._model_object = tf.keras.models.load_model(self._model) + else: + self._model_object = self._model + self._q_config = None + + @property + def q_config(self): + return self._q_config + + @q_config.setter + def q_config(self, q_config): + self._q_config = q_config + + @property + def model(self): + return self._model_object + + @property + def graph_info(self): + ''' return {Node: Node_type} like {'conv0': 'conv2d'} ''' + #(TODO) get the graph info + return None + + @abstractmethod + def save(self, root, *args, **kwargs): + self._model_object.save(root) + + @abstractmethod + def export( + self, + save_path: str, + conf, + ): + pass + + @abstractmethod + def framework(self): + return 'keras' diff --git a/neural_compressor/model/model.py b/neural_compressor/model/model.py index 9d1830db47f..cdeb526fa37 100644 --- a/neural_compressor/model/model.py +++ b/neural_compressor/model/model.py @@ -28,6 +28,7 @@ from neural_compressor.conf import config as cfg from neural_compressor.model.base_model import BaseModel from neural_compressor.model.onnx_model import ONNXModel +from neural_compressor.model.keras_model import KerasModel TORCH = False if importlib.util.find_spec('torch'): @@ -56,39 +57,31 @@ def get_model_type(model): """ from neural_compressor.adaptor.tf_utils.util import is_saved_model_format, is_ckpt_format + if isinstance(model, str): + model = os.path.abspath(os.path.expanduser(model)) + if (model.endswith('.h5') and os.path.isfile(model)) or \ + is_saved_model_format(os.path.dirname(model)) or \ + (os.path.isdir(model) and is_saved_model_format(model)): + if version1_lt_version2(tf.version.VERSION, '2.10.0'): + logger.warn("keras model running on tensorflow 2.10.0 and" + " lower not support intel ITEX.") + try: + model = tf.keras.models.load_model(model) + except: + pass + if isinstance(model, tf.keras.Model) and hasattr(model, 'to_json'): + return 'keras' if isinstance(model, tf.Graph): return 'graph' elif isinstance(model, tf.compat.v1.GraphDef): return 'graph_def' - elif isinstance(model, tf.keras.Model): - return 'keras' elif isinstance(model, tf.compat.v1.estimator.Estimator): return 'estimator' elif isinstance(model, str): model = os.path.abspath(os.path.expanduser(model)) - if (model.endswith('.h5') and os.path.isfile(model)): - if version1_lt_version2(tf.version.VERSION, '2.3.0'): - logger.warn("keras model running on tensorflow 2.2.0 and" - " lower may have problem.") - model = tf.keras.models.load_model(model) - if isinstance(model, tf.keras.Model): - return 'keras' if (model.endswith('.pb') and os.path.isfile(model)): if is_saved_model_format(os.path.dirname(model)): - # Warning: TF compatibility issue to load saved model. TF 2.3 keras.load - # can load saved model from TF backend, but TF 2.4 cannot. - try: - if version1_lt_version2(tf.version.VERSION, '2.3.0'): - logger.warn("keras model running on tensorflow 2.2.0 and" - " lower may have problem.") - model = tf.keras.models.load_model(model) - if isinstance(model, tf.keras.Model): - return 'keras' - else: - return 'saved_model' - except: - # can't use keras load - return 'saved_model' + return 'saved_model' else: return 'frozen_pb' elif model.endswith('.ckpt') and os.path.isfile(model): @@ -97,20 +90,7 @@ def get_model_type(model): if is_ckpt_format(model): return 'checkpoint' elif is_saved_model_format(model): - # it's very ugly tf version issue, in tf2.3 keras.load can - #batch_size_(batch_size), load saved model from tf backend, but tf2.4 it will crash - try: - if version1_lt_version2(tf.version.VERSION, '2.3.0'): - logger.warn("keras model running on tensorflow 2.2.0 and" - " lower may have problem.") - model = tf.keras.models.load_model(model) - if isinstance(model, tf.keras.Model): - return 'keras' - else: - return 'saved_model' - except: - # can't use keras load - return 'saved_model' + return 'saved_model' elif os.path.isfile(model + '.pb'): return 'frozen_pb' @@ -404,28 +384,6 @@ def load_saved_model(model, saved_model_tags, input_tensor_names, output_tensor_ grappler_meta_graph_def, graph_id=b"tf_graph") return opt, input_tensor_names, output_tensor_names -def check_keras_format(model, saved_model_dir): - from tensorflow.python import saved_model - from tensorflow.python.saved_model.load import load - from tensorflow.python.saved_model import save_options - from tensorflow.python.saved_model.loader_impl import parse_saved_model_with_debug_info - version = 'saved_model_v2' - try: - saved_model.save( - model, - saved_model_dir, - options=save_options.SaveOptions(save_debug_info=True)) - except: - return 'trackable_object' - saved_model_proto, _ = parse_saved_model_with_debug_info(saved_model_dir) - saved_model_version = saved_model_proto.saved_model_schema_version - if saved_model_version == 0: - return 'saved_model_v1' - if saved_model_version not in [1, 2]: - raise ValueError("SavedModel file format({0}) is not supported".format( - saved_model_version)) - return version - def get_graph_from_saved_model_v2(saved_model_dir, input_tensor_names, output_tensor_names): from tensorflow.python.saved_model import tag_constants @@ -481,6 +439,28 @@ def get_graph_from_original_keras_v2(model, output_dir): output_names = [tensor.name.split(':')[0] for tensor in output_tensors] return graph_def, input_names, output_names +def check_keras_format(model, saved_model_dir): + from tensorflow.python import saved_model + from tensorflow.python.saved_model.load import load + from tensorflow.python.saved_model import save_options + from tensorflow.python.saved_model.loader_impl import parse_saved_model_with_debug_info + version = 'saved_model_v2' + try: + saved_model.save( + model, + saved_model_dir, + options=save_options.SaveOptions(save_debug_info=True)) + except: + return 'trackable_object' + saved_model_proto, _ = parse_saved_model_with_debug_info(saved_model_dir) + saved_model_version = saved_model_proto.saved_model_schema_version + if saved_model_version == 0: + return 'saved_model_v1' + if saved_model_version not in [1, 2]: + raise ValueError("SavedModel file format({0}) is not supported".format( + saved_model_version)) + return version + def get_graph_from_saved_model_v1(model): from tensorflow.python.framework import ops from tensorflow.python.saved_model import constants @@ -526,12 +506,10 @@ def get_graph_from_saved_model_v1(model): def keras_session(model, input_tensor_names, output_tensor_names, **kwargs): """Build session with keras model - Args: model (string or tf.keras.Model): model path or tf.keras.Model object input_tensor_names (list of string): input_tensor_names of model output_tensor_names (list of string): output_tensor_names of model - Returns: sess (tf.compat.v1.Session): tf.compat.v1.Session object input_tensor_names (list of string): validated input_tensor_names @@ -570,6 +548,7 @@ def keras_session(model, input_tensor_names, output_tensor_names, **kwargs): shutil.rmtree(temp_dir, True) return graph_def_session(graph_def, input_names, output_names, **kwargs) + def slim_session(model, input_tensor_names, output_tensor_names, **kwargs): """Build session with slim model @@ -1100,7 +1079,8 @@ def graph_def(self, graph_def): 'estimator': TensorflowBaseModel, 'slim': TensorflowBaseModel, 'saved_model': TensorflowSavedModelModel, - 'keras': TensorflowSavedModelModel,} + 'keras': TensorflowSavedModelModel + } class TensorflowModel(object): def __new__(cls, model_type, root, **kwargs): @@ -1155,6 +1135,7 @@ def save(self, root=None): MODELS = {'tensorflow': TensorflowModel, 'tensorflow_itex': TensorflowModel, + 'keras': KerasModel, 'mxnet': MXNetModel, 'pytorch': PyTorchModel if TORCH else None, 'pytorch_ipex': PyTorchIpexModel if TORCH else None, diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index db4cae2b1d4..aeefeed4132 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -537,6 +537,9 @@ def set_framework_info(self, q_dataloader, q_func=None): if self.cfg.model.backend == 'itex': self.cfg.model.framework = 'tensorflow_itex' framework = 'tensorflow_itex' + if 'keras' in framework: + framework_specific_info.update({ + 'workspace_path': self.cfg.tuning.workspace.path, }) if framework == 'mxnet': framework_specific_info.update({"q_dataloader": q_dataloader}) if 'onnx' in framework.lower(): diff --git a/test/itex/test_keras_in_keras_out.py b/test/itex/test_keras_in_keras_out.py new file mode 100644 index 00000000000..aa776d1d6fd --- /dev/null +++ b/test/itex/test_keras_in_keras_out.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import os +import time +import shutil +import numpy as np +import tensorflow as tf +from tensorflow import keras +from neural_compressor.utils import logger + +test_mode = 'accuracy' + +def build_model(): + # Load MNIST dataset + mnist = keras.datasets.mnist + (train_images, train_labels), (test_images, test_labels) = mnist.load_data() + + # Normalize the input image so that each pixel value is between 0 to 1. + train_images = train_images / 255.0 + test_images = test_images / 255.0 + + # Define the model architecture. + model = keras.Sequential([ + keras.layers.InputLayer(input_shape=(28, 28)), + keras.layers.Reshape(target_shape=(28, 28, 1)), + keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'), + keras.layers.MaxPooling2D(pool_size=(2, 2)), + keras.layers.Flatten(), + keras.layers.Dense(10) + ]) + # Train the digit classification model + model.compile(optimizer='adam', + loss=tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True), + metrics=['accuracy']) + + model.fit( + train_images, + train_labels, + epochs=1, + validation_split=0.1, + ) + + _, baseline_model_accuracy = model.evaluate( + test_images, test_labels, verbose=0) + + print('Baseline test accuracy:', baseline_model_accuracy) + model.save("baseline_model") + +def build_dataset(): + # Load the data and split it between train and test sets + (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + + # Scale images to the [0, 1] range + x_train = x_train.astype("float32") / 255 + x_test = x_test.astype("float32") / 255 + # Make sure images have shape (28, 28, 1) + x_train = np.expand_dims(x_train, -1) + x_test = np.expand_dims(x_test, -1) + + # convert class vectors to binary class matrices + y_train = keras.utils.to_categorical(y_train, 10) + y_test = keras.utils.to_categorical(y_test, 10) + return x_train, y_train, x_test, y_test + +def eval_func(model): + x_train, y_train, x_test, y_test = build_dataset() + start = time.time() + model.compile(metrics=["accuracy"], run_eagerly=False) + score = model.evaluate(x_test, y_test) + end = time.time() + + if test_mode == 'performance': + latency = end - start + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} data/sec".format(1. / latency)) + return score[1] + +class Dataset(object): + def __init__(self, batch_size=100): + mnist = keras.datasets.mnist + (train_images, train_labels), (test_images, test_labels) = mnist.load_data() + + # Normalize the input image so that each pixel value is between 0 to 1. + self.train_images = train_images / 255.0 + self.test_images = test_images / 255.0 + self.train_labels = train_labels + self.test_labels = test_labels + + def __len__(self): + return len(self.test_images) + + def __getitem__(self, idx): + return self.test_images[idx], self.test_labels[idx] + + +class TestKerasInKerasOut(unittest.TestCase): + @classmethod + def setUpClass(self): + os.environ["ITEX_ONEDNN_GRAPH"] = '1' + + @classmethod + def tearDownClass(self): + shutil.rmtree('baseline_model',ignore_errors=True) + shutil.rmtree('itex_qdq_keras_model',ignore_errors=True) + + def test_keras_in_keras_out(self): + logger.info("Run test_keras_in_keras_out case...") + global test_mode + test_mode = 'accuracy' + build_model() + + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.utils.utility import set_random_seed + from neural_compressor.experimental import common + set_random_seed(9527) + config = PostTrainingQuantConfig(backend='itex') + logger.info("=================Run Quantization...") + q_model = fit(keras.models.load_model('./baseline_model'), + conf=config, + calib_dataloader=common.DataLoader(Dataset()), + eval_func=eval_func) + q_model.save("itex_qdq_keras_model") + model = keras.models.load_model('./itex_qdq_keras_model') + model.summary() + found_quantize = False + found_dequantize = False + for layer in model.layers: + if 'quantize' in layer.name: + found_quantize = True + if 'de_quantize' in layer.name: + found_dequantize = True + self.assertEqual(found_quantize, True) + self.assertEqual(found_dequantize, True) + + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(backend='itex', iteration=100, cores_per_instance=1, num_of_instance=1) + logger.info("=================Run BenchMark...") + test_mode = 'performance' + fit(model, conf, b_func=eval_func) + + def test_keras_model_interface(self): + logger.info("Run test_keras_model_interface case...") + global test_mode + test_mode = 'accuracy' + build_model() + + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.utils.utility import set_random_seed + from neural_compressor.experimental import common + set_random_seed(9527) + config = PostTrainingQuantConfig(backend='itex') + q_model = fit(keras.models.load_model('./baseline_model'), + conf=config, + calib_dataloader=common.DataLoader(Dataset()), + eval_func=eval_func) + q_model.save("itex_qdq_keras_model") + self.assertEqual(q_model.framework(), 'keras') + + framework_config = { + 'framework': 'keras', + 'approach': 'post_training_static_quant' + } + q_model.q_config = framework_config + self.assertEqual(q_model.q_config['framework'], 'keras') + self.assertEqual(q_model.graph_info, None) + self.assertEqual(q_model.framework(), 'keras') + self.assertEqual(isinstance(q_model.model, tf.keras.Model), True) + +if __name__ == '__main__': + unittest.main()