diff --git a/.azure-pipelines/scripts/ut/env_setup.sh b/.azure-pipelines/scripts/ut/env_setup.sh
index 65a46b1f6dc..01c1a0b4a91 100644
--- a/.azure-pipelines/scripts/ut/env_setup.sh
+++ b/.azure-pipelines/scripts/ut/env_setup.sh
@@ -28,7 +28,10 @@ elif [[ "${tensorflow_version}" != "" ]]; then
     pip install intel-tensorflow==${tensorflow_version}
 fi
 
-if [[ "${itex_version}" != "" ]]; then
+if [[ "${itex_version}" == "nightly" ]]; then
+    pip install /tf_dataset/itex_binary/221209/intel_extension_for_tensorflow-1.1.0-cp38-cp38-linux_x86_64.whl
+    pip install /tf_dataset/itex_binary/221209/intel_extension_for_tensorflow_lib-1.1.0.0-cp38-cp38-linux_x86_64.whl
+elif [[ "${itex_version}" != "" ]]; then
     pip install --upgrade intel-extension-for-tensorflow[cpu]==${itex_version}
 fi
 
diff --git a/.azure-pipelines/scripts/ut/run_basic_itex.sh b/.azure-pipelines/scripts/ut/run_basic_itex.sh
index 45278216f8d..c937992b7be 100644
--- a/.azure-pipelines/scripts/ut/run_basic_itex.sh
+++ b/.azure-pipelines/scripts/ut/run_basic_itex.sh
@@ -3,7 +3,7 @@ python -c "import neural_compressor as nc;print(nc.version.__version__)"
 echo "run basic itex"
 
 echo "specify fwk version..."
-export itex_version='1.0.0'
+export itex_version='nightly'
 export tensorflow_version='2.10.0-official'
 
 echo "set up UT env..."
diff --git a/examples/keras/mnist/README.md b/examples/keras/mnist/README.md
new file mode 100644
index 00000000000..163b7c96a70
--- /dev/null
+++ b/examples/keras/mnist/README.md
@@ -0,0 +1,41 @@
+Step-by-Step
+============
+
+This document list steps of reproducing Keras mnist model tuning results via Neural Compressor.
+This example can run on Intel CPUs.
+
+# Prerequisite
+
+### 1. Installation
+Recommend python 3.6 or higher version.
+
+```shell
+# Install Intel® Neural Compressor
+pip install neural-compressor
+```
+
+### 2. Install Tensorflow
+```shell
+pip install tensorflow
+```
+> Note: Supported Tensorflow version > 2.10.0.
+
+### 3. Installation Dependency packages
+```shell
+cd examples/keras/mnist/
+pip install -r requirements.txt
+```
+
+#### Quantizing the model on Intel CPU(Experimental)
+Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs.
+
+```shell
+pip install --upgrade intel-extension-for-tensorflow[cpu]
+```
+
+# Run
+
+  ```shell
+  cd examples/keras/mnist/
+  python mnist.py
+  ```
diff --git a/examples/keras/mnist/mnist.py b/examples/keras/mnist/mnist.py
new file mode 100644
index 00000000000..8e0fbf411e5
--- /dev/null
+++ b/examples/keras/mnist/mnist.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import tensorflow as tf
+import numpy as np
+from tensorflow import keras
+from tensorflow.keras import layers
+import time
+
+num_classes = 10
+
+def build_dataset():
+    # Load the data and split it between train and test sets
+    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
+    
+    # Scale images to the [0, 1] range
+    x_train = x_train.astype("float32") / 255
+    x_test = x_test.astype("float32") / 255
+    # Make sure images have shape (28, 28, 1)
+    x_train = np.expand_dims(x_train, -1)
+    x_test = np.expand_dims(x_test, -1)
+    
+    # convert class vectors to binary class matrices
+    y_train = keras.utils.to_categorical(y_train, num_classes)
+    y_test = keras.utils.to_categorical(y_test, num_classes)
+    return x_train, y_train, x_test, y_test
+
+class Dataset():
+    def __init__(self, ):
+         _, _ , self.inputs, self.labels = build_dataset()
+
+    def __getitem__(self, idx):
+        return self.inputs[idx], self.labels[idx]
+
+    def __len__(self):
+        assert len(self.inputs) == len(self.labels), 'inputs should have equal len with labels'
+        return len(self.inputs)
+
+def build_model(x_train, y_train, x_test, y_test):
+    if os.path.exists('fp32_model'):
+        model = keras.models.load_model('fp32_model')
+        return model
+    # Model / data parameters
+    input_shape = (28, 28, 1)
+    model = keras.Sequential(
+        [
+            keras.Input(shape=input_shape),
+            layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
+            layers.MaxPooling2D(pool_size=(2, 2)),
+            layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
+            layers.MaxPooling2D(pool_size=(2, 2)),
+            layers.Flatten(),
+            layers.Dropout(0.5),
+            layers.Dense(num_classes, activation="softmax"),
+        ]
+    )
+    
+    batch_size = 128
+    epochs = 1
+    
+    model.compile(loss="categorical_crossentropy", optimizer="adam",
+                  metrics=["accuracy"], run_eagerly=True)
+    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)
+    model.summary()
+    if not os.path.exists('fp32_model'):
+        model.save('fp32_model')
+    return model
+
+def eval_func(model):
+    x_train, y_train, x_test, y_test = build_dataset()
+    model.compile(metrics=["accuracy"], run_eagerly=False)
+    score = model.evaluate(x_test, y_test)
+    return score[1]
+
+def main():
+    x_train, y_train, x_test, y_test = build_dataset()
+    model = build_model(x_train, y_train, x_test, y_test)
+
+    from neural_compressor.quantization import fit
+    from neural_compressor.config import PostTrainingQuantConfig
+    from neural_compressor.utils.utility import set_random_seed
+    from neural_compressor.experimental import common
+    set_random_seed(9527)
+    config = PostTrainingQuantConfig(backend='itex')
+    quantized_model = fit(model,
+                          conf=config,
+                          calib_dataloader=common.DataLoader(Dataset(), batch_size=10),
+                          eval_func=eval_func)
+
+if __name__ == '__main__':
+    main()
+
diff --git a/examples/keras/mnist/requirements.txt b/examples/keras/mnist/requirements.txt
new file mode 100644
index 00000000000..cee1363064f
--- /dev/null
+++ b/examples/keras/mnist/requirements.txt
@@ -0,0 +1,3 @@
+tensorflow
+neural-compressor
+intel-extension-for-tensorflow[cpu]
\ No newline at end of file
diff --git a/neural_compressor/adaptor/keras.py b/neural_compressor/adaptor/keras.py
new file mode 100644
index 00000000000..ad8081d6b0d
--- /dev/null
+++ b/neural_compressor/adaptor/keras.py
@@ -0,0 +1,539 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import copy
+import json
+import yaml
+import math
+import numpy as np
+from collections import OrderedDict, UserDict
+from .query import QueryBackendCapability
+from .adaptor import adaptor_registry, Adaptor
+from ..utils.utility import LazyImport, CpuInfo, singleton, Dequantize, dump_elapsed_time
+from ..utils.utility import Statistics, GLOBAL_STATE, MODE, version1_lt_version2
+from ..utils import logger
+from ..conf.dotdict import deep_get
+from ..experimental.data.dataloaders.base_dataloader import BaseDataLoader
+tf = LazyImport('tensorflow')
+
+def _add_supported_quantized_objects(custom_objects):
+  """Map all the quantized objects."""
+  from neural_compressor.adaptor.keras_utils.quantizer import Quantize, DeQuantize
+  from neural_compressor.adaptor.keras_utils.quantizer import FakeQuant, QConv2D, QDense
+  custom_objects["Quantize"] = Quantize
+  custom_objects["DeQuantize"] = DeQuantize
+  custom_objects["FakeQuant"] = FakeQuant
+  custom_objects["QConv2D"] = QConv2D
+  custom_objects["QDense"] = QDense
+  return custom_objects
+
+@adaptor_registry
+class KerasAdaptor(Adaptor):
+    '''The keras class of framework adaptor layer.
+
+    '''
+    def __init__(self, framework_specific_info):
+        super(KerasAdaptor, self).__init__(framework_specific_info)
+        self.framework_specific_info = framework_specific_info
+        self.approach = deep_get(self.framework_specific_info, 'approach', False)
+        self.quantize_config = {'op_wise_config': {}}
+        self.device = self.framework_specific_info['device']
+        #self.work_dir = os.path.abspath(self.framework_specific_info['workspace_path'])
+        self.recipes = deep_get(self.framework_specific_info, 'recipes', {})
+        #os.makedirs(self.work_dir, exist_ok=True)
+
+        self.pre_optimized_model = None
+        self.pre_optimizer_handle = None
+        self.fp32_ops = []
+        self.query_handler = KerasQuery(local_config_file=os.path.join(
+            os.path.dirname(__file__), 'keras.yaml'))
+
+        self.fp32_results = []
+        self.fp32_preds_as_label = False
+        self.benchmark = (GLOBAL_STATE.STATE == MODE.BENCHMARK)
+        self.callbacks = []
+        self.optype_statistics = None
+
+    def tuning_cfg_to_fw(self, tuning_cfg):
+        self.quantize_config['calib_iteration'] = tuning_cfg['calib_iteration']
+        self.quantize_config['device'] = self.device
+        self.quantize_config['advance'] = deep_get(tuning_cfg, 'advance')
+        fp32_ops = []
+        dispatched_op_names = [j[0] for j in tuning_cfg['op']]
+        invalid_op_names = [i for i in self.quantize_config['op_wise_config']
+                            if i not in dispatched_op_names]
+
+        for op_name in invalid_op_names:
+            self.quantize_config['op_wise_config'].pop(op_name)
+
+        for each_op_info in tuning_cfg['op']:
+            op_name = each_op_info[0]
+            if tuning_cfg['op'][each_op_info]['activation']['dtype'] == 'fp32':
+                if op_name in self.quantize_config['op_wise_config']:
+                    self.quantize_config['op_wise_config'].pop(op_name)
+                    fp32_ops.append(op_name)
+                continue
+
+            is_perchannel = False
+            bit = None
+            if 'weight' in tuning_cfg['op'][each_op_info]:
+                is_perchannel = tuning_cfg['op'][each_op_info]['weight'][
+                    'granularity'] == 'per_channel'
+                #bit = tuning_cfg['op'][each_op_info]['weight']['bit']
+            weight_bit = bit if bit else 7.0
+            algorithm = tuning_cfg['op'][each_op_info]['activation']['algorithm']
+            is_asymmetric = False
+            if 'activation' in tuning_cfg['op'][each_op_info]:
+                is_asymmetric = tuning_cfg['op'][each_op_info]['activation']['scheme'] == 'asym'
+            self.quantize_config['op_wise_config'][op_name] = (is_perchannel,
+                                                               algorithm,
+                                                               is_asymmetric,
+                                                               weight_bit)
+        self.fp32_ops = fp32_ops
+
+    @dump_elapsed_time("Pass quantize model")
+    def quantize(self, tune_cfg, model, dataloader, q_func=None):
+        '''Execute the quantize process on the specified model.
+
+           Args:
+               tune_cfg(dict): The chosen tuning configuration.
+               model (object): The model to do quantization.
+               dataloader(object): The dataloader used to load quantization dataset.
+               q_func (optional): training function for quantization aware training mode.
+        '''
+        self.tuning_cfg_to_fw(tune_cfg)
+        logger.debug("Dump quantization configurations:")
+        logger.debug(self.quantize_config)
+        calib_sampling_size = tune_cfg.get('calib_sampling_size', 1)
+        if isinstance(dataloader, BaseDataLoader):
+            batch_size = dataloader.batch_size
+            for i in range(batch_size):
+                if calib_sampling_size % (batch_size - i) == 0:
+                    calib_batch_size = batch_size - i
+                    if i != 0:  # pragma: no cover
+                        logger.warning("Reset `calibration.dataloader.batch_size` field "
+                                       "to {}".format(calib_batch_size) +
+                                       " to make sure the sampling_size is "
+                                       "divisible exactly by batch size")
+                    break
+            tmp_iterations = int(math.ceil(calib_sampling_size / calib_batch_size))
+            dataloader.batch(calib_batch_size)
+            self.quantize_config['calib_iteration'] = tmp_iterations
+
+        else: # pragma: no cover
+            if hasattr(dataloader, 'batch_size') and \
+              calib_sampling_size % dataloader.batch_size != 0:
+                iter = self.quantize_config['calib_iteration']
+                logger.warning(
+                    "Please note that calibration sampling size {} " \
+                    "isn't divisible exactly by batch size {}. " \
+                    "So the real sampling size is {}.".
+                    format(calib_sampling_size, dataloader.batch_size,
+                           dataloader.batch_size * iter))
+        q_layers = []
+        for idx, layer in enumerate(self.fp32_layers):
+          layer_config = layer["config"]
+          if layer["class_name"] in ["Conv2D", "Dense"] and \
+            layer['config']['name'] in self.quantize_config['op_wise_config']:
+              op_config = self.quantize_config['op_wise_config'][layer['config']['name']]
+              mode = 'per_channel' if op_config[0] else 'per_tensor'
+              #(TODO) support asym/sym
+              fake_quant_name = 'fake_quant_' + str(idx)
+              q_layers.append({'class_name': 'FakeQuant', 
+                  'config': {'mode': 'per_tensor', 'name': fake_quant_name}})
+              q_layers.append(layer)
+          else:
+              q_layers.append(layer)
+
+        keras_object = model._model_object
+        json_model = copy.deepcopy(json.loads(keras_object.to_json()))
+        json_model['config']['layers'] = q_layers
+        quantized_model = self._restore_model_from_json(json_model)
+
+        converted_model = self._calibrate(quantized_model, dataloader, 
+                self.quantize_config['calib_iteration'])
+
+        from neural_compressor.model.keras_model import KerasModel
+        converted_model = KerasModel(converted_model)
+        return converted_model
+
+    def _calibrate(self, model, dataloader, calib_interation):
+        # run eagerly to fetch the numpy min/max
+        model.compile(run_eagerly=True)
+        results = {}
+        for idx, (inputs, labels) in enumerate(dataloader):
+            outputs = model.predict_on_batch(inputs)
+            json_model = copy.deepcopy(json.loads(model.to_json()))
+            config = json_model["config"]
+            layers = config["layers"]
+            for layer in layers:
+                if layer['class_name'] == 'FakeQuant':
+                    min_value = layer['config']['min_value']
+                    max_value = layer['config']['max_value']
+                    if layer['config']['name'] not in results:
+                        results[layer['config']['name']] = {
+                                'min': [min_value], 'max': [max_value]}
+                    else:
+                        results[layer['config']['name']]['min'].append(min_value)
+                        results[layer['config']['name']]['max'].append(max_value)
+            if idx + 1  == calib_interation:
+                break
+        
+        # insert the calibrated min/max to Q/DQ
+        json_model = copy.deepcopy(json.loads(model.to_json()))
+        config = json_model["config"]
+        layers = config["layers"]
+        q_layers = []
+        for layer in layers:
+            layer_config = copy.deepcopy(layer['config'])
+            if layer['class_name'] == 'FakeQuant':
+                min_value = min(results[layer['config']['name']]['min'])
+                max_value = max(results[layer['config']['name']]['max'])
+                q_layers.append({'class_name': 'Quantize',
+                                 'config': {'min_range': min_value,
+                                            'max_range': max_value,
+                                           }})
+                q_layers.append({'class_name': 'DeQuantize',
+                                 'config': {'min_range': min_value,
+                                            'max_range': max_value,
+                                           }})
+            elif layer['class_name'] == 'Conv2D' or layer['class_name'] == 'Dense':
+                # index 0 is weight, index 1 is bias
+                q_layer_name = 'Q' + layer['class_name']
+                kernel = self.layer_weights[layer['config']['name']][0]
+                layer_config['min_value'] = str(kernel.min())
+                layer_config['max_value'] = str(kernel.max())
+                q_layers.append({'class_name': q_layer_name, 'config': layer_config})
+            else:
+                q_layers.append(layer) 
+
+        json_model['config']['layers'] = q_layers
+        quantized_model = self._restore_model_from_json(json_model)
+        return quantized_model
+
+    def _restore_model_from_json(self, json_model):
+        from tensorflow.keras.models import model_from_json
+        custom_objects = {}
+        # We need to keep a dictionary of custom objects as our quantized library
+        # is not recognized by keras.
+        custom_objects = _add_supported_quantized_objects(custom_objects)
+        qmodel = model_from_json(json.dumps(json_model), custom_objects=custom_objects)
+        qmodel = self._set_weights(qmodel, self.layer_weights)
+        return qmodel
+
+    # set fp32 weights to qmodel
+    def _set_weights(self, qmodel, layer_weights):
+        for qlayer in qmodel.layers:
+            if qlayer.get_weights():
+                if qlayer.name in layer_weights:
+                    qlayer.set_weights(layer_weights[qlayer.name])
+                else:
+                    hit_layer = False
+                    for sub_layer in qlayer.submodules: 
+                        if sub_layer.name in layer_weights:
+                            qlayer.set_weights(layer_weights[sub_layer.name])
+                            hit_layer = True
+                            break
+                    if not hit_layer:
+                        raise ValueError('Can not match the module weights....')
+        return qmodel
+
+    @dump_elapsed_time(customized_msg="Model inference")
+    def evaluate(self, model, dataloader, postprocess=None,
+                 metrics=None, measurer=None, iteration=-1,
+                 tensorboard=False, fp32_baseline=False):
+        '''The function is used to run evaluation on validation dataset.
+
+           Args:
+               model (object): The model to do calibration.
+               dataloader (generator): generate the data and labels.
+               postprocess (object, optional): process the result from the model
+               metric (object, optional): Depends on model category. Defaults to None.
+               measurer (object, optional): for precise benchmark measurement.
+               iteration(int, optional): control steps of mini-batch
+               tensorboard (boolean, optional): for tensorboard inspect tensor.
+               fp32_baseline (boolen, optional): only for compare_label=False pipeline
+        '''
+        # use keras object
+        keras_model = model.model
+        logger.info("Start to evaluate the Keras model.")
+        results = []
+        for idx, (inputs, labels) in enumerate(dataloader):
+            # use predict on batch
+            if measurer is not None:
+                measurer.start()
+                predictions = keras_model.predict_on_batch(inputs)
+                measurer.end()
+            else:
+                predictions = keras_model.predict_on_batch(inputs)
+
+            if self.fp32_preds_as_label:
+                self.fp32_results.append(predictions) if fp32_baseline else \
+                    results.append(predictions)
+
+            if postprocess is not None:
+                predictions, labels = postprocess((predictions, labels))
+            if metrics:
+                for metric in metrics:
+                    if not hasattr(metric, "compare_label") or \
+                        (hasattr(metric, "compare_label") and metric.compare_label):
+                        metric.update(predictions, labels)
+            if idx + 1 == iteration:
+                break
+        return results
+
+    def query_fw_capability(self, model):
+        '''The function is used to return framework tuning capability.
+
+           Args:
+               model (object): The model to query quantization tuning capability.
+        '''
+        self.pre_optimized_model = model
+        fp32_config = {'weight': {'dtype': 'fp32'}, 'activation': {'dtype': 'fp32'}}
+        int8_type = self.query_handler.get_op_types_by_precision(precision='int8')
+        op_capability = self.query_handler.get_quantization_capability()
+        conv_config = copy.deepcopy(op_capability['int8']['Conv2D'])
+        dense_config = copy.deepcopy(op_capability['int8']['Dense'])
+        other_config = copy.deepcopy(op_capability['int8']['default'])
+
+        # get the layers info
+        keras_object = model._model_object
+        json_model = copy.deepcopy(json.loads(keras_object.to_json()))
+        config = json_model["config"]
+        self.fp32_layers = config["layers"]
+
+        # get fp32 layer weights
+        self.layer_weights = {}
+        for layer in keras_object.layers:
+            if layer.get_weights():
+                self.layer_weights[layer.name] = copy.deepcopy(layer.get_weights())
+
+        quantizable_op_details = OrderedDict()
+        for details in self.fp32_layers:
+            node_op = details['class_name']
+            node_name = details['config']['name']
+            if node_op == 'Conv2D': 
+                quantizable_op_details[(node_name, node_op)] = [conv_config, fp32_config]
+            elif node_op == 'Dense':
+                quantizable_op_details[(node_name, node_op)] = [dense_config, fp32_config]
+            else:
+                quantizable_op_details[(node_name, node_op)] = [fp32_config]
+
+        capability = {
+            'opwise': copy.deepcopy(quantizable_op_details),
+            'optypewise': self.get_optype_wise_ability(quantizable_op_details),
+        }
+        logger.debug("Dump framework quantization capability:")
+        logger.debug(capability)
+
+        return capability
+
+    def get_optype_wise_ability(self, quantizable_op_details):
+        """Get the op type wise capability by generating the union value of each op type.
+        Returns:
+            [string dict]: the key is op type while the value is the
+                           detail configurations of activation and weight for this op type.
+        """
+        res = OrderedDict()
+        for op in quantizable_op_details:
+            if op[1] not in res:
+                    res[op[1]] = {'activation': quantizable_op_details[op][0]['activation']}
+                    if 'weight' in quantizable_op_details[op][0]:
+                        res[op[1]]['weight'] = quantizable_op_details[op][0]['weight']
+        return res
+
+    def inspect_tensor(self, model, dataloader, op_list=[], iteration_list=[],
+                       inspect_type='activation', save_to_disk=False):
+        '''The function is used by tune strategy class for dumping tensor info.
+
+           Args:
+               model (object): The model to inspect.
+               dataloader (object): The dataloader used to feed into.
+               op_list (list): The op name in the fp32 model for dumpping.
+               iteration_list (list): The iteration list containing iterations to dump.
+               inspect_type (str): The valid value are 'weight', 'activation', 'all'.
+               save_to_disk (bool): Save to disk or memory.
+
+           Return:
+               Numpy Array Dict
+               {
+                 'weight': {
+                   'node0_name': {'weight0_name': numpy.array, 'bias0_name': numpy.array, ...},
+                   'node1_name': {'weight1_name': numpy.array, 'bias1_name': numpy.array, ...},
+                   ...
+                 },
+                 'activation': [
+                   # iter 0
+                   {
+                     'node0_name': {'output0_name': numpy.array, 'output1_name': numpy.array, ...}
+                     'node1_name': {'output1_name': numpy.array, 'output1_name': numpy.array, ...}
+                     ...
+                   },
+                   # iter 1
+                   ...
+                 ]
+               }
+        '''
+        pass
+
+    def set_tensor(self, model, tensor_dict):
+        '''The function is used by tune strategy class for setting tensor back to model.
+
+           Args:
+               model (object): The model to set tensor. Usually it is quantized model.
+               tensor_dict (dict): The tensor dict to set. Note the numpy array contains float
+                                   value, adaptor layer has the responsibility to quantize to
+                                   int8 or int32 to set into the quantized model if needed.
+                                   The dict format is something like:
+                                   {
+                                     'weight0_name': numpy.array,
+                                     'bias0_name': numpy.array,
+                                     ...
+                                   }
+        '''
+        pass
+
+    def quantize_input(self, model):
+        ''' quantize the model to be able to take quantized input
+
+            Args:
+                model (object): The model to quantize input
+
+            Return:
+                model (object): The quantized input model
+                scale (float): The scale for dataloader to generate quantized input
+        '''
+        return model, 1.
+
+    def _pre_eval_hook(self, model, *args, **kwargs):
+        '''The function is used to do some preprocession before evaluation phase.
+
+        Return:
+              model
+        '''
+        return model
+
+    def _post_eval_hook(self, model, *args, **kwargs):
+        '''The function is used to do some post process after complete evaluation.
+        '''
+        pass
+
+    def save(self, model, path):
+        '''The function is used by tune strategy class for saving model.
+
+           Args:
+               model (object): The model to saved.
+               path (string): The path where to save.
+        '''
+        model.save(path)
+
+    def convert(self, model, source, destinatin):
+        '''The function is used to convert a source model format to another.
+
+           Args:
+               model (neural_compressor.model): base model to be converted.
+               source (string): The source model format.
+               destination (string): The destination model format.
+        '''
+        pass
+
+class KerasQuery(QueryBackendCapability):
+    def __init__(self, local_config_file=None):
+        super().__init__()
+        self.version = tf.version.VERSION
+        self.cfg = local_config_file
+        self.cur_config = None
+        self._one_shot_query()
+
+    def _one_shot_query(self):
+        with open(self.cfg) as f:
+            content = yaml.safe_load(f)
+            try:
+                self.cur_config = self._get_specified_version_cfg(content)
+            except Exception as e:
+                logger.info("Fail to parse {} due to {}.".format(self.cfg, str(e)))
+                self.cur_config = None
+                raise ValueError("Please check if the format of {} follows Neural Compressor yaml schema.".
+                                 format(self.cfg))
+
+    def _get_specified_version_cfg(self, data):
+        """Get the configuration for the current runtime.
+        If there's no matched configuration in the input yaml, we'll
+        use the `default` field of yaml.
+
+        Args:
+            data (Yaml content): input yaml file.
+
+        Returns:
+            [dictionary]: the content for specific version.
+        """
+        default_config = None
+        for sub_data in data:
+            if sub_data['version']['name'] == self.version:
+                return sub_data
+
+            if sub_data['version']['name'] == 'default':
+                default_config = sub_data
+
+        return default_config
+
+    def get_version(self):
+        """Get the current backend version infomation.
+
+        Returns:
+            [string]: version string.
+        """
+        return self.cur_config['version']['name']
+
+    def get_precisions(self):
+        """Get supported precisions for current backend.
+
+        Returns:
+            [string list]: the precisions' name.
+        """
+        return self.cur_config['precisions']['names']
+
+    def get_op_types(self):
+        """Get the supported op types by all precisions.
+
+        Returns:
+            [dictionary list]: A list composed of dictionary which key is precision
+            and value is the op types.
+        """
+        return self.cur_config['ops']
+
+    def get_quantization_capability(self):
+        """Get the supported op types' quantization capability.
+
+        Returns:
+            [dictionary list]: A list composed of dictionary which key is precision
+            and value is a dict that describes all op types' quantization capability.
+        """
+        return self.cur_config['capabilities']
+
+    def get_op_types_by_precision(self, precision):
+        """Get op types per precision
+
+        Args:
+            precision (string): precision name
+
+        Returns:
+            [string list]: A list composed of op type.
+        """
+        assert precision in list(self.cur_config['ops'].keys())
+        return self.cur_config['ops'][precision]
diff --git a/neural_compressor/adaptor/keras.yaml b/neural_compressor/adaptor/keras.yaml
new file mode 100644
index 00000000000..291eb43dc0d
--- /dev/null
+++ b/neural_compressor/adaptor/keras.yaml
@@ -0,0 +1,69 @@
+## Copyright (c) 2021 Intel Corporation
+##
+## Licensed under the Apache License, Version 2.0 (the "License");
+## you may not use this file except in compliance with the License.
+## You may obtain a copy of the License at
+##
+##    http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+##
+#
+-
+  version:
+    name: 'default'
+  
+  precisions: &common_precisions
+    names: int8, fp32
+    valid_mixed_precisions: []
+  
+  ops: &common_ops
+    int8: ['Conv2D', 'Dense']
+    fp32: ['*'] # '*' means all op types
+  
+  capabilities: &common_capabilities
+    int8: {
+          'Conv2D':  {
+            'weight':   {
+                        'dtype': ['int8'],
+                        'scheme': ['sym'],
+                        'granularity': ['per_tensor'],
+                        'algorithm': ['minmax']
+                        },
+            'activation': {
+                        'dtype': ['int8'],
+                        'quant_mode': 'static',
+                        'scheme': ['sym'],
+                        'granularity': ['per_tensor'],
+                        'algorithm': ['minmax']
+                        }
+                    },
+          'Dense': {
+            'weight':   {
+                        'dtype': ['int8'],
+                        'scheme': ['sym'],
+                        'algorithm': ['minmax'],
+                        'granularity': ['per_tensor'],
+                        },
+            'activation': {
+                        'dtype': ['int8'],
+                        'quant_mode': 'static',
+                        'scheme': ['sym'],
+                        'algorithm': ['minmax'],
+                        'granularity': ['per_tensor'],
+                        }
+                    },
+         'default': {
+             'activation': {
+                        'dtype': ['int8'],
+                        'quant_mode': 'static',
+                        'scheme': ['sym'],
+                        'algorithm': ['minmax'],
+                        'granularity': ['per_tensor']
+                        }
+                    },
+          }
diff --git a/neural_compressor/adaptor/keras_utils/__init__.py b/neural_compressor/adaptor/keras_utils/__init__.py
new file mode 100644
index 00000000000..ed04d17bdbe
--- /dev/null
+++ b/neural_compressor/adaptor/keras_utils/__init__.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/neural_compressor/adaptor/keras_utils/quantizer.py b/neural_compressor/adaptor/keras_utils/quantizer.py
new file mode 100644
index 00000000000..76742001ded
--- /dev/null
+++ b/neural_compressor/adaptor/keras_utils/quantizer.py
@@ -0,0 +1,266 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.python.eager import context
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.keras import activations
+from tensorflow.python.keras import backend
+from tensorflow.python.keras import constraints
+from tensorflow.python.keras import initializers
+from tensorflow.python.keras import regularizers
+from tensorflow.python.keras.engine.input_spec import InputSpec
+from tensorflow.python.keras.utils import conv_utils
+from tensorflow.python.keras.utils import tf_utils
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import embedding_ops
+from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import sparse_ops
+from tensorflow.python.ops import standard_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.util.tf_export import keras_export
+
+from tensorflow.keras.layers import Layer
+from tensorflow.python.keras.layers.convolutional import Conv
+from tensorflow.python.keras.layers.core import Dense
+
+class FakeQuant(Layer):
+    def __init__(self, mode='per_tensor', **kwargs):
+        super(FakeQuant, self).__init__(**kwargs)
+        self.mode = mode
+        self.axis = 1 if mode == 'per_channel' else 0
+        self.min_value = tf.constant(np.finfo(np.float32).max, dtype=tf.float32)
+        self.max_value = tf.constant(np.finfo(np.float32).min, dtype=tf.float32)
+
+    def call(self, inputs):
+        if self.mode == 'per_tensor':
+            self.min_value = tf.math.reduce_min(inputs)
+            self.max_value = tf.math.reduce_max(inputs)
+        else:
+            self.min_value = tf.math.reduce_min(inputs, axis=self.axis)
+            self.max_value = tf.math.reduce_max(inputs, axis=self.axis)
+        return inputs
+
+    @classmethod
+    def from_config(cls, config):
+        return cls(**config)
+        
+    def get_config(self):
+        return {'mode': self.mode,
+                'min_value': self.min_value.numpy(),
+                'max_value': self.max_value.numpy(),
+                'name': self.name}
+
+class Quantize(Layer):
+    def __init__(self, min_range, max_range, T=tf.qint8, mode='SCALED', 
+                 round_mode='HALF_AWAY_FROM_ZERO', narrow_range=False, axis=None):
+        super(Quantize, self).__init__()
+        self.min_range = float(min_range)
+        self.max_range = float(max_range)
+        self.T = T
+        self.mode = mode
+        self.round_mode = round_mode
+        self.narrow_range = narrow_range
+        self.axis = axis
+
+    def call(self, inputs):
+        outputs, _, _ = tf.quantization.quantize(inputs, self.min_range,
+                                        self.max_range, self.T,
+                                        mode=self.mode, round_mode=self.round_mode,
+                                        narrow_range=self.narrow_range, axis=self.axis)
+        return outputs
+
+    def get_config(self):
+        return {'min_range': self.min_range, 'max_range': self.max_range,
+                'T': self.T, 'mode': self.mode, 'round_mode': self.round_mode,
+                'narrow': self.narrow_range, 'axis': self.axis}
+
+    @classmethod
+    def from_config(cls, config):
+        return cls(**config)
+
+class QConv2D(Conv):
+    def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid',
+                 data_format=None, dilation_rate=(1, 1), groups=1, activation=None,
+                 use_bias=True, kernel_initializer='glorot_uniform',
+                 bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None,
+                 activity_regularizer=None, kernel_constraint=None, bias_constraint=None,
+                 min_value=-10000, max_value=10000, **kwargs):
+        super(QConv2D, self).__init__(rank=2, filters=filters, kernel_size=kernel_size, 
+                strides=strides, padding=padding, data_format=data_format, 
+                dilation_rate=dilation_rate, groups=groups,
+                activation=activations.get(activation),
+                use_bias=use_bias, kernel_initializer=initializers.get(kernel_initializer),
+                bias_initializer=initializers.get(bias_initializer),
+                kernel_regularizer=regularizers.get(kernel_regularizer),
+                bias_regularizer=regularizers.get(bias_regularizer), 
+                activity_regularizer=regularizers.get(activity_regularizer),
+                kernel_constraint=constraints.get(kernel_constraint),
+                bias_constraint=constraints.get(bias_constraint), **kwargs)
+        self.weight_quantizer = Quantize(float(min_value), float(max_value))
+        self.weight_dequantizer = DeQuantize(float(min_value), float(max_value))
+
+    def call(self, inputs):
+      input_shape = inputs.shape
+
+      if self._is_causal:  # Apply causal padding to inputs for Conv1D.
+        inputs = array_ops.pad(inputs, self._compute_causal_padding(inputs))
+
+      # add the Q/DQ here
+      kernel = self.weight_quantizer(self.kernel)
+      kernel = self.weight_dequantizer(kernel)
+      outputs = self._convolution_op(inputs, kernel)
+
+      if self.use_bias:
+        output_rank = outputs.shape.rank
+        if self.rank == 1 and self._channels_first:
+          # nn.bias_add does not accept a 1D input tensor.
+          bias = array_ops.reshape(self.bias, (1, self.filters, 1))
+          outputs += bias
+        else:
+          # Handle multiple batch dimensions.
+          if output_rank is not None and output_rank > 2 + self.rank:
+
+            def _apply_fn(o):
+              return nn.bias_add(o, self.bias, data_format=self._tf_data_format)
+
+            outputs = conv_utils.squeeze_batch_dims(
+                outputs, _apply_fn, inner_rank=self.rank + 1)
+          else:
+            outputs = nn.bias_add(
+                outputs, self.bias, data_format=self._tf_data_format)
+
+      if not context.executing_eagerly():
+        # Infer the static output shape:
+        out_shape = self.compute_output_shape(input_shape)
+        outputs.set_shape(out_shape)
+
+      if self.activation is not None:
+        return self.activation(outputs)
+      return outputs
+
+class QDense(Dense):
+    def __init__(self,
+                 units,
+                 activation=None,
+                 use_bias=True,
+                 kernel_initializer='glorot_uniform',
+                 bias_initializer='zeros',
+                 kernel_regularizer=None,
+                 bias_regularizer=None,
+                 activity_regularizer=None,
+                 kernel_constraint=None,
+                 bias_constraint=None,
+                 min_value=-10000,
+                 max_value=10000,
+                 **kwargs):
+      super(QDense, self).__init__(
+                          units=units,
+                          activation=activation,
+                          use_bias=use_bias,
+                          kernel_initializer=kernel_initializer,
+                          bias_initializer=bias_initializer,
+                          kernel_regularizer=kernel_regularizer,
+                          bias_regularizer=bias_regularizer,
+                          activity_regularizer=activity_regularizer,
+                          kernel_constraint=kernel_constraint,
+                          bias_constraint=bias_constraint,
+                          **kwargs)
+      self.weight_quantizer = Quantize(float(min_value), float(max_value))
+      self.weight_dequantizer = DeQuantize(float(min_value), float(max_value))
+
+    def call(self, inputs):
+      if inputs.dtype.base_dtype != self._compute_dtype_object.base_dtype:
+        inputs = math_ops.cast(inputs, dtype=self._compute_dtype_object)
+
+      # add the Q/DQ here
+      # (TODO) we have not try sparse dense and may have issues
+      kernel = self.weight_quantizer(self.kernel)
+      kernel = self.weight_dequantizer(kernel)
+      rank = inputs.shape.rank
+      if rank == 2 or rank is None:
+        # We use embedding_lookup_sparse as a more efficient matmul operation for
+        # large sparse input tensors. The op will result in a sparse gradient, as
+        # opposed to sparse_ops.sparse_tensor_dense_matmul which results in dense
+        # gradients. This can lead to sigfinicant speedups, see b/171762937.
+        if isinstance(inputs, sparse_tensor.SparseTensor):
+          # We need to fill empty rows, as the op assumes at least one id per row.
+          inputs, _ = sparse_ops.sparse_fill_empty_rows(inputs, 0)
+          # We need to do some munging of our input to use the embedding lookup as
+          # a matrix multiply. We split our input matrix into separate ids and
+          # weights tensors. The values of the ids tensor should be the column
+          # indices of our input matrix and the values of the weights tensor
+          # can continue to the actual matrix weights.
+          # The column arrangement of ids and weights
+          # will be summed over and does not matter. See the documentation for
+          # sparse_ops.sparse_tensor_dense_matmul a more detailed explanation
+          # of the inputs to both ops.
+          ids = sparse_tensor.SparseTensor(
+              indices=inputs.indices,
+              values=inputs.indices[:, 1],
+              dense_shape=inputs.dense_shape)
+          weights = inputs
+          outputs = embedding_ops.embedding_lookup_sparse_v2(
+              kernel, ids, weights, combiner='sum')
+        else:
+          outputs = gen_math_ops.MatMul(a=inputs, b=kernel)
+      # Broadcast kernel to inputs.
+      else:
+        outputs = standard_ops.tensordot(inputs, kernel, [[rank - 1], [0]])
+        # Reshape the output back to the original ndim of the input.
+        if not context.executing_eagerly():
+          shape = inputs.shape.as_list()
+          output_shape = shape[:-1] + [kernel.shape[-1]]
+          outputs.set_shape(output_shape)
+
+      if self.use_bias:
+        outputs = nn_ops.bias_add(outputs, self.bias)
+
+      if self.activation is not None:
+        outputs = self.activation(outputs)
+      return outputs
+                                      
+
+class DeQuantize(Layer):
+    def __init__(self, min_range, max_range, mode='SCALED',
+                 narrow_range=False, axis=None):
+        super(DeQuantize, self).__init__()
+        self.min_range = min_range
+        self.max_range = max_range
+        self.mode = mode
+        self.narrow_range = narrow_range
+        self.axis = axis
+
+    def call(self, inputs):
+        return tf.quantization.dequantize(inputs, float(self.min_range),
+                                          float(self.max_range), mode=self.mode,
+                                          narrow_range=self.narrow_range, axis=self.axis)
+    def get_config(self):
+        return {'min_range': self.min_range, 'max_range': self.max_range,
+                'mode': self.mode, 'narrow': self.narrow_range, 'axis': self.axis,
+                'dtype': self.dtype}
+
+    @classmethod
+    def from_config(cls, config):
+        return cls(**config)
diff --git a/neural_compressor/conf/config.py b/neural_compressor/conf/config.py
index 2cb547e6cbc..617bafd09eb 100644
--- a/neural_compressor/conf/config.py
+++ b/neural_compressor/conf/config.py
@@ -1391,6 +1391,7 @@ def map_pyconfig_to_cfg(self, pythonic_config):
             if pythonic_config.benchmark.outputs != []:
                 mapping.update({'model.outputs': pythonic_config.benchmark.outputs})
             mapping.update({
+                'model.backend': pythonic_config.benchmark.backend,
                 'evaluation.performance.warmup': pythonic_config.benchmark.warmup,
                 'evaluation.performance.iteration': pythonic_config.benchmark.iteration,
                 'evaluation.performance.configs.cores_per_instance':
diff --git a/neural_compressor/conf/pythonic_config.py b/neural_compressor/conf/pythonic_config.py
index 19c373a8205..2124579b406 100644
--- a/neural_compressor/conf/pythonic_config.py
+++ b/neural_compressor/conf/pythonic_config.py
@@ -185,6 +185,10 @@ class TensorFlow(MXNet):
     def __init__(self, precisions=None):
         super().__init__(precisions)
 
+class Keras(MXNet):
+    def __init__(self, precisions=None):
+        super().__init__(precisions)
+
 class PyTorch(MXNet):
     def __init__(self, precisions=None):
         super().__init__(precisions)
@@ -243,6 +247,7 @@ def search(self, search):
 nas = NASConfig()
 onnxruntime_config = ONNX()
 tensorflow_config = TensorFlow()
+keras_config = Keras()
 pytorch_config = PyTorch()
 mxnet_config = MXNet()
 
@@ -258,7 +263,8 @@ def __init__(self,
                  onnxruntime=onnxruntime_config,
                  tensorflow=tensorflow_config,
                  pytorch=pytorch_config,
-                 mxnet=mxnet_config):
+                 mxnet=mxnet_config,
+                 keras=keras_config):
         self._quantization = quantization
         self._benchmark = benchmark
         self._options = options
@@ -269,6 +275,7 @@ def __init__(self,
         self._tensorflow = tensorflow
         self._pytorch = pytorch
         self._mxnet = mxnet
+        self._keras = keras
 
     @property
     def distillation(self):
@@ -282,6 +289,10 @@ def nas(self):
     def tensorflow(self):
         return self._tensorflow
 
+    @property
+    def keras(self):
+        return self._keras
+
     @property
     def pytorch(self):
         return self._pytorch
diff --git a/neural_compressor/experimental/benchmark.py b/neural_compressor/experimental/benchmark.py
index 22b7cdeb93f..7aff71cc14c 100644
--- a/neural_compressor/experimental/benchmark.py
+++ b/neural_compressor/experimental/benchmark.py
@@ -306,6 +306,8 @@ def run_instance(self, mode):
                                             "outputs": cfg.model.outputs, \
                                             "recipes": cfg.model.recipes, \
                                             'workspace_path': cfg.tuning.workspace.path})
+        if framework == 'keras':
+            framework_specific_info.update({'workspace_path': cfg.tuning.workspace.path})
         if framework == 'mxnet':
             framework_specific_info.update({"b_dataloader": self._b_dataloader})
         if 'onnx' in framework.lower():
@@ -479,6 +481,10 @@ def model(self, user_model):
             assert not isinstance(user_model, BaseModel), \
                 "Please pass an original framework model but not neural compressor model!"
             self.framework = get_model_fwk_name(user_model)
+            if self.framework == "tensorflow":
+                from ..model.model import get_model_type
+                if get_model_type(user_model) == 'keras' and cfg.model.backend == 'itex':
+                    self.framework = 'keras'
             if self.framework == "pytorch":
                 if cfg.model.backend == "default":
                     self.framework = "pytorch_fx"
diff --git a/neural_compressor/experimental/common/model.py b/neural_compressor/experimental/common/model.py
index f34a5c35b80..6fec668f9e8 100644
--- a/neural_compressor/experimental/common/model.py
+++ b/neural_compressor/experimental/common/model.py
@@ -45,6 +45,8 @@ def __new__(cls, root, **kwargs):
             else:
                 model_type = get_model_type(root)
             model = MODELS['tensorflow'](model_type, root, **kwargs)
+        elif framework == 'keras':
+            model = MODELS['keras'](root, **kwargs)
         elif framework == 'pytorch':
             model = MODELS[framework](root, **kwargs)
         else:
diff --git a/neural_compressor/experimental/component.py b/neural_compressor/experimental/component.py
index 8afc1703c23..4afbf2589e8 100644
--- a/neural_compressor/experimental/component.py
+++ b/neural_compressor/experimental/component.py
@@ -472,6 +472,10 @@ def model(self, user_model):
             assert not isinstance(user_model, BaseModel), \
                 "Please pass an original framework model but not neural compressor model!"
             self.framework = get_model_fwk_name(user_model)
+            if self.framework == "tensorflow":
+                from ..model.model import get_model_type
+                if get_model_type(user_model) == 'keras' and self.cfg.model.backend == 'itex':
+                    self.framework = 'keras'
             if self.framework == "pytorch":
                 if self.cfg.model.backend == "default":
                     self.framework = "pytorch_fx"
diff --git a/neural_compressor/experimental/data/dataloaders/dataloader.py b/neural_compressor/experimental/data/dataloaders/dataloader.py
index c3463b875eb..c879b0b45d1 100644
--- a/neural_compressor/experimental/data/dataloaders/dataloader.py
+++ b/neural_compressor/experimental/data/dataloaders/dataloader.py
@@ -25,6 +25,7 @@
 
 DATALOADERS = {"tensorflow": TensorflowDataLoader,
                "tensorflow_itex": TensorflowDataLoader,
+               "keras": TensorflowDataLoader,
                "mxnet": MXNetDataLoader,
                "pytorch": PyTorchDataLoader,
                "pytorch_ipex": PyTorchDataLoader,
diff --git a/neural_compressor/experimental/metric/metric.py b/neural_compressor/experimental/metric/metric.py
index b02b52cc861..31f0550b071 100644
--- a/neural_compressor/experimental/metric/metric.py
+++ b/neural_compressor/experimental/metric/metric.py
@@ -113,6 +113,7 @@ def __init__(self) -> None:
 
 framework_metrics = {"tensorflow": TensorflowMetrics,
                      "tensorflow_itex": TensorflowMetrics,
+                     "keras": TensorflowMetrics,
                      "mxnet": MXNetMetrics,
                      "pytorch": PyTorchMetrics,
                      "pytorch_ipex": PyTorchMetrics,
@@ -132,6 +133,7 @@ def __init__(self) -> None:
 
 registry_metrics = {"tensorflow": TENSORFLOW_METRICS,
                     "tensorflow_itex": TENSORFLOW_ITEX_METRICS,
+                    "keras": TENSORFLOW_METRICS,
                     "mxnet": MXNET_METRICS,
                     "pytorch": PYTORCH_METRICS,
                     "pytorch_ipex": PYTORCH_METRICS,
@@ -156,7 +158,7 @@ def __init__(self, framework: str):
         Args:
             framework: The framwork name.
         """
-        assert framework in ("tensorflow", "tensorflow_itex",
+        assert framework in ("tensorflow", "tensorflow_itex","keras",
                             "pytorch", "pytorch_ipex", "pytorch_fx", "onnxrt_qdq",
                              "onnxrt_qlinearops", "onnxrt_integerops", "mxnet",
                              "onnxrt_qoperator"), \
diff --git a/neural_compressor/model/keras_model.py b/neural_compressor/model/keras_model.py
new file mode 100644
index 00000000000..f0995ceed59
--- /dev/null
+++ b/neural_compressor/model/keras_model.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from abc import abstractmethod
+from neural_compressor.model.base_model import BaseModel
+from neural_compressor.utils.utility import LazyImport
+tf = LazyImport('tensorflow')
+
+class KerasModel(BaseModel):
+    """Build KerasModel object
+
+    Args:
+        model (string or keras model object): model path or model object
+        kwargs (dict): other required parameters
+
+    """
+
+    def __init__(self, model, **kwargs):
+        self.component = None
+        self._model = model
+        if not isinstance(model, tf.keras.Model):
+            self._model_object = tf.keras.models.load_model(self._model)
+        else:
+            self._model_object = self._model
+        self._q_config = None
+
+    @property
+    def q_config(self):
+        return self._q_config
+
+    @q_config.setter
+    def q_config(self, q_config):
+        self._q_config = q_config
+
+    @property
+    def model(self):
+        return self._model_object
+
+    @property
+    def graph_info(self):
+        ''' return {Node: Node_type} like {'conv0': 'conv2d'} '''
+        #(TODO) get the graph info
+        return None
+
+    @abstractmethod
+    def save(self, root, *args, **kwargs):
+        self._model_object.save(root)
+
+    @abstractmethod
+    def export(
+        self,
+        save_path: str,
+        conf,
+    ):
+        pass
+
+    @abstractmethod
+    def framework(self):
+        return 'keras'
diff --git a/neural_compressor/model/model.py b/neural_compressor/model/model.py
index 9d1830db47f..cdeb526fa37 100644
--- a/neural_compressor/model/model.py
+++ b/neural_compressor/model/model.py
@@ -28,6 +28,7 @@
 from neural_compressor.conf import config as cfg
 from neural_compressor.model.base_model import BaseModel
 from neural_compressor.model.onnx_model import ONNXModel
+from neural_compressor.model.keras_model import KerasModel
 
 TORCH = False
 if importlib.util.find_spec('torch'):
@@ -56,39 +57,31 @@ def get_model_type(model):
     """
 
     from neural_compressor.adaptor.tf_utils.util import is_saved_model_format, is_ckpt_format
+    if isinstance(model, str):
+        model = os.path.abspath(os.path.expanduser(model))
+        if (model.endswith('.h5') and os.path.isfile(model)) or \
+          is_saved_model_format(os.path.dirname(model)) or \
+          (os.path.isdir(model) and is_saved_model_format(model)):
+            if version1_lt_version2(tf.version.VERSION, '2.10.0'):
+                logger.warn("keras model running on tensorflow 2.10.0 and"
+                            " lower not support intel ITEX.")
+            try:
+                model = tf.keras.models.load_model(model)
+            except:
+                pass
+    if isinstance(model, tf.keras.Model) and hasattr(model, 'to_json'):
+        return 'keras'
     if isinstance(model, tf.Graph):
         return 'graph'
     elif isinstance(model, tf.compat.v1.GraphDef):
         return 'graph_def'
-    elif isinstance(model, tf.keras.Model):
-        return 'keras'
     elif isinstance(model, tf.compat.v1.estimator.Estimator):
         return 'estimator'
     elif isinstance(model, str):
         model = os.path.abspath(os.path.expanduser(model))
-        if (model.endswith('.h5') and os.path.isfile(model)):
-            if version1_lt_version2(tf.version.VERSION, '2.3.0'):
-                logger.warn("keras model running on tensorflow 2.2.0 and"
-                            " lower may have problem.")
-            model = tf.keras.models.load_model(model)
-            if isinstance(model, tf.keras.Model):
-                return 'keras'
         if (model.endswith('.pb') and os.path.isfile(model)):
             if is_saved_model_format(os.path.dirname(model)):
-                # Warning: TF compatibility issue to load saved model. TF 2.3 keras.load
-                # can load saved model from TF backend, but TF 2.4 cannot.
-                try:
-                    if version1_lt_version2(tf.version.VERSION, '2.3.0'):
-                        logger.warn("keras model running on tensorflow 2.2.0 and"
-                                    " lower may have problem.")
-                    model = tf.keras.models.load_model(model)
-                    if isinstance(model, tf.keras.Model):
-                        return 'keras'
-                    else:
-                        return 'saved_model'
-                except:
-                    # can't use keras load
-                    return 'saved_model'
+                return 'saved_model'
             else:
                 return 'frozen_pb'
         elif model.endswith('.ckpt') and os.path.isfile(model):
@@ -97,20 +90,7 @@ def get_model_type(model):
             if is_ckpt_format(model):
                 return 'checkpoint'
             elif is_saved_model_format(model):
-                # it's very ugly tf version issue, in tf2.3 keras.load can
-                #batch_size_(batch_size), load saved model from tf backend, but tf2.4 it will crash
-                try:
-                    if version1_lt_version2(tf.version.VERSION, '2.3.0'):
-                        logger.warn("keras model running on tensorflow 2.2.0 and"
-                                    " lower may have problem.")
-                    model = tf.keras.models.load_model(model)
-                    if isinstance(model, tf.keras.Model):
-                        return 'keras'
-                    else:
-                        return 'saved_model'
-                except:
-                    # can't use keras load
-                    return 'saved_model'
+                return 'saved_model'
         elif os.path.isfile(model + '.pb'):
             return 'frozen_pb'
 
@@ -404,28 +384,6 @@ def load_saved_model(model, saved_model_tags, input_tensor_names, output_tensor_
                                          grappler_meta_graph_def, graph_id=b"tf_graph")
         return opt, input_tensor_names, output_tensor_names
 
-def check_keras_format(model, saved_model_dir):
-    from tensorflow.python import saved_model
-    from tensorflow.python.saved_model.load import load
-    from tensorflow.python.saved_model import save_options
-    from tensorflow.python.saved_model.loader_impl import parse_saved_model_with_debug_info
-    version = 'saved_model_v2'
-    try:
-        saved_model.save(
-            model,
-            saved_model_dir,
-            options=save_options.SaveOptions(save_debug_info=True))
-    except:
-        return 'trackable_object'
-    saved_model_proto, _ = parse_saved_model_with_debug_info(saved_model_dir)
-    saved_model_version = saved_model_proto.saved_model_schema_version
-    if saved_model_version == 0:
-        return 'saved_model_v1'
-    if saved_model_version not in [1, 2]:
-        raise ValueError("SavedModel file format({0}) is not supported".format(
-            saved_model_version))
-    return version
-
 def get_graph_from_saved_model_v2(saved_model_dir,
         input_tensor_names, output_tensor_names):
     from tensorflow.python.saved_model import tag_constants
@@ -481,6 +439,28 @@ def get_graph_from_original_keras_v2(model, output_dir):
     output_names = [tensor.name.split(':')[0] for tensor in output_tensors]
     return graph_def, input_names, output_names
 
+def check_keras_format(model, saved_model_dir):
+    from tensorflow.python import saved_model
+    from tensorflow.python.saved_model.load import load
+    from tensorflow.python.saved_model import save_options
+    from tensorflow.python.saved_model.loader_impl import parse_saved_model_with_debug_info
+    version = 'saved_model_v2'
+    try:
+        saved_model.save(
+            model,
+            saved_model_dir,
+            options=save_options.SaveOptions(save_debug_info=True))
+    except:
+        return 'trackable_object'
+    saved_model_proto, _ = parse_saved_model_with_debug_info(saved_model_dir)
+    saved_model_version = saved_model_proto.saved_model_schema_version
+    if saved_model_version == 0:
+        return 'saved_model_v1'
+    if saved_model_version not in [1, 2]:
+        raise ValueError("SavedModel file format({0}) is not supported".format(
+            saved_model_version))
+    return version
+
 def get_graph_from_saved_model_v1(model):
     from tensorflow.python.framework import ops
     from tensorflow.python.saved_model import constants
@@ -526,12 +506,10 @@ def get_graph_from_saved_model_v1(model):
 
 def keras_session(model, input_tensor_names, output_tensor_names, **kwargs):
     """Build session with keras model
-
     Args:
         model (string or tf.keras.Model): model path or tf.keras.Model object
         input_tensor_names (list of string): input_tensor_names of model
         output_tensor_names (list of string): output_tensor_names of model
-
      Returns:
         sess (tf.compat.v1.Session): tf.compat.v1.Session object
         input_tensor_names (list of string): validated input_tensor_names
@@ -570,6 +548,7 @@ def keras_session(model, input_tensor_names, output_tensor_names, **kwargs):
     shutil.rmtree(temp_dir, True)
     return graph_def_session(graph_def, input_names, output_names, **kwargs)
 
+
 def slim_session(model, input_tensor_names, output_tensor_names, **kwargs):
     """Build session with slim model
 
@@ -1100,7 +1079,8 @@ def graph_def(self, graph_def):
                      'estimator': TensorflowBaseModel,
                      'slim': TensorflowBaseModel,
                      'saved_model': TensorflowSavedModelModel,
-                     'keras': TensorflowSavedModelModel,}
+                     'keras': TensorflowSavedModelModel
+                     }
 
 class TensorflowModel(object):
     def __new__(cls, model_type, root, **kwargs):
@@ -1155,6 +1135,7 @@ def save(self, root=None):
 
 MODELS = {'tensorflow': TensorflowModel,
           'tensorflow_itex': TensorflowModel,
+          'keras': KerasModel,
           'mxnet': MXNetModel,
           'pytorch': PyTorchModel if TORCH else None,
           'pytorch_ipex': PyTorchIpexModel if TORCH else None,
diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py
index db4cae2b1d4..aeefeed4132 100644
--- a/neural_compressor/strategy/strategy.py
+++ b/neural_compressor/strategy/strategy.py
@@ -537,6 +537,9 @@ def set_framework_info(self, q_dataloader, q_func=None):
             if self.cfg.model.backend == 'itex':
                 self.cfg.model.framework = 'tensorflow_itex'
                 framework = 'tensorflow_itex'
+        if 'keras' in framework:
+            framework_specific_info.update({
+                 'workspace_path': self.cfg.tuning.workspace.path, })
         if framework == 'mxnet':
             framework_specific_info.update({"q_dataloader": q_dataloader})
         if 'onnx' in framework.lower():
diff --git a/test/itex/test_keras_in_keras_out.py b/test/itex/test_keras_in_keras_out.py
new file mode 100644
index 00000000000..aa776d1d6fd
--- /dev/null
+++ b/test/itex/test_keras_in_keras_out.py
@@ -0,0 +1,190 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import os
+import time
+import shutil
+import numpy as np
+import tensorflow as tf
+from tensorflow import keras
+from neural_compressor.utils import logger
+
+test_mode = 'accuracy'
+
+def build_model():
+    # Load MNIST dataset
+    mnist = keras.datasets.mnist
+    (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
+
+    # Normalize the input image so that each pixel value is between 0 to 1.
+    train_images = train_images / 255.0
+    test_images = test_images / 255.0
+
+    # Define the model architecture.
+    model = keras.Sequential([
+        keras.layers.InputLayer(input_shape=(28, 28)),
+        keras.layers.Reshape(target_shape=(28, 28, 1)),
+        keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu'),
+        keras.layers.MaxPooling2D(pool_size=(2, 2)),
+        keras.layers.Flatten(),
+        keras.layers.Dense(10)
+    ])
+    # Train the digit classification model
+    model.compile(optimizer='adam',
+                  loss=tf.keras.losses.SparseCategoricalCrossentropy(
+                      from_logits=True),
+                  metrics=['accuracy'])
+
+    model.fit(
+        train_images,
+        train_labels,
+        epochs=1,
+        validation_split=0.1,
+    )
+
+    _, baseline_model_accuracy = model.evaluate(
+        test_images, test_labels, verbose=0)
+
+    print('Baseline test accuracy:', baseline_model_accuracy)
+    model.save("baseline_model")
+
+def build_dataset():
+    # Load the data and split it between train and test sets
+    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
+
+    # Scale images to the [0, 1] range
+    x_train = x_train.astype("float32") / 255
+    x_test = x_test.astype("float32") / 255
+    # Make sure images have shape (28, 28, 1)
+    x_train = np.expand_dims(x_train, -1)
+    x_test = np.expand_dims(x_test, -1)
+
+    # convert class vectors to binary class matrices
+    y_train = keras.utils.to_categorical(y_train, 10)
+    y_test = keras.utils.to_categorical(y_test, 10)
+    return x_train, y_train, x_test, y_test
+
+def eval_func(model):
+    x_train, y_train, x_test, y_test = build_dataset()
+    start = time.time()
+    model.compile(metrics=["accuracy"], run_eagerly=False)
+    score = model.evaluate(x_test, y_test)
+    end = time.time()
+
+    if test_mode == 'performance':
+        latency = end - start
+        print("Latency: {:.3f} ms".format(latency * 1000))
+        print("Throughput: {:.3f} data/sec".format(1. / latency))
+    return score[1]
+
+class Dataset(object):
+    def __init__(self, batch_size=100):
+        mnist = keras.datasets.mnist
+        (train_images, train_labels), (test_images, test_labels) = mnist.load_data()
+
+        # Normalize the input image so that each pixel value is between 0 to 1.
+        self.train_images = train_images / 255.0
+        self.test_images = test_images / 255.0
+        self.train_labels = train_labels
+        self.test_labels = test_labels
+
+    def __len__(self):
+        return len(self.test_images)
+
+    def __getitem__(self, idx):
+        return self.test_images[idx], self.test_labels[idx]
+
+
+class TestKerasInKerasOut(unittest.TestCase):
+    @classmethod
+    def setUpClass(self):
+        os.environ["ITEX_ONEDNN_GRAPH"] = '1'
+
+    @classmethod
+    def tearDownClass(self):
+        shutil.rmtree('baseline_model',ignore_errors=True)
+        shutil.rmtree('itex_qdq_keras_model',ignore_errors=True)
+
+    def test_keras_in_keras_out(self):
+        logger.info("Run test_keras_in_keras_out case...")
+        global test_mode
+        test_mode = 'accuracy'
+        build_model()
+
+        from neural_compressor.quantization import fit
+        from neural_compressor.config import PostTrainingQuantConfig
+        from neural_compressor.utils.utility import set_random_seed
+        from neural_compressor.experimental import common
+        set_random_seed(9527)
+        config = PostTrainingQuantConfig(backend='itex')
+        logger.info("=================Run Quantization...")
+        q_model = fit(keras.models.load_model('./baseline_model'),
+                      conf=config,
+                      calib_dataloader=common.DataLoader(Dataset()),
+                      eval_func=eval_func)
+        q_model.save("itex_qdq_keras_model")
+        model = keras.models.load_model('./itex_qdq_keras_model')
+        model.summary()
+        found_quantize = False
+        found_dequantize = False
+        for layer in model.layers:
+            if 'quantize' in layer.name:
+                found_quantize = True
+            if 'de_quantize' in layer.name:
+                found_dequantize = True
+        self.assertEqual(found_quantize, True)
+        self.assertEqual(found_dequantize, True)
+
+        from neural_compressor.benchmark import fit
+        from neural_compressor.config import BenchmarkConfig
+        conf = BenchmarkConfig(backend='itex', iteration=100, cores_per_instance=1, num_of_instance=1)
+        logger.info("=================Run BenchMark...")
+        test_mode = 'performance'
+        fit(model, conf, b_func=eval_func)
+
+    def test_keras_model_interface(self):
+        logger.info("Run test_keras_model_interface case...")
+        global test_mode
+        test_mode = 'accuracy'
+        build_model()
+
+        from neural_compressor.quantization import fit
+        from neural_compressor.config import PostTrainingQuantConfig
+        from neural_compressor.utils.utility import set_random_seed
+        from neural_compressor.experimental import common
+        set_random_seed(9527)
+        config = PostTrainingQuantConfig(backend='itex')
+        q_model = fit(keras.models.load_model('./baseline_model'),
+                      conf=config,
+                      calib_dataloader=common.DataLoader(Dataset()),
+                      eval_func=eval_func)
+        q_model.save("itex_qdq_keras_model")
+        self.assertEqual(q_model.framework(), 'keras')
+
+        framework_config = {
+            'framework': 'keras',
+            'approach': 'post_training_static_quant'
+        }
+        q_model.q_config = framework_config
+        self.assertEqual(q_model.q_config['framework'], 'keras')
+        self.assertEqual(q_model.graph_info, None)
+        self.assertEqual(q_model.framework(), 'keras')
+        self.assertEqual(isinstance(q_model.model, tf.keras.Model), True)
+
+if __name__ == '__main__':
+    unittest.main()