From 087c412bd50a5bae2eac253d973e43db9b53bb3c Mon Sep 17 00:00:00 2001
From: "Cheng, Penghui" <penghui.cheng@intel.com>
Date: Thu, 27 Apr 2023 15:57:42 +0800
Subject: [PATCH 01/14] Remove some redundant classes for quantization,
 benchmark and mixed precision

Signed-off-by: Cheng, Penghui <penghui.cheng@intel.com>
---
 neural_compressor/benchmark.py                | 535 +++++++-----------
 neural_compressor/config.py                   |   8 +-
 .../data/dataloaders/dataloader.py            |   9 +
 neural_compressor/metric/metric.py            |  55 ++
 neural_compressor/mix_precision.py            | 387 +++----------
 neural_compressor/model/model.py              |  67 +++
 neural_compressor/quantization.py             | 476 ++++------------
 neural_compressor/utils/utility.py            |  29 +-
 test/benchmark/test_benchmark_2.x.py          |  42 +-
 test/mixed_precision/test_mixed_precision.py  |   2 +-
 10 files changed, 565 insertions(+), 1045 deletions(-)

diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py
index 0ce19ab6dd1..d9537856a8a 100644
--- a/neural_compressor/benchmark.py
+++ b/neural_compressor/benchmark.py
@@ -24,6 +24,8 @@
 import signal
 import psutil
 from threading import Thread
+
+from neural_compressor.data.dataloaders.dataloader import check_dataloader
 from .adaptor import FRAMEWORKS
 from .objective import MultiObjective
 from .config import BenchmarkConfig, options
@@ -31,8 +33,7 @@
 from .utils import OPTIONS
 from .utils.utility import GLOBAL_STATE, MODE
 from .model import BaseModel
-from .model import Model as NCModel
-from .model.model import get_model_fwk_name
+from .model.model import wrap_model_from
 from .utils import logger
 from .utils.utility import Statistics
 
@@ -135,209 +136,31 @@ def get_bounded_threads(core_ids, threads, sockets):
     return res
 
 
-class _Benchmark(object):
-    """Benchmark class can be used to evaluate the model performance.
-
-    With the objective setting, user can get the data of what they configured in yaml.
-
-    Args:
-        conf (obj): The BenchmarkConfig class containing accuracy goal, tuning objective etc.
-    """
-
-    def __init__(self, conf):
-        """Init a Benchmark object."""
-        self.framework = None
-        self._model = None
-        self._b_dataloader = None
-        self._b_func = None
-        self._results = {}
-        assert isinstance(conf, BenchmarkConfig), \
-            "The config object should be config.BenchmarkConfig, not {}".format(type(conf))
-        self.conf = conf
-        if self.conf.framework is not None:
-            self.framework = self.conf.framework.lower()
-
-    def __call__(self, raw_cmd=None):
-        """Directly call a Benchmark object.
-
-        Args:
-            raw_cmd: raw command used for benchmark
-        """
-        cfg = self.conf
-        assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...'
-        # disable multi-instance for running bechmark on GPU device
-        set_all_env_var(cfg)
-        if cfg.device == 'gpu':
-            set_env_var('NC_ENV_CONF', True, overwrite_existing=True)
-
-        logger.info("Start to run Benchmark.")
-        if os.environ.get('NC_ENV_CONF') == 'True':
-            return self.run_instance()
-        if raw_cmd is None:
-            raw_cmd = sys.executable + ' ' + ' '.join(sys.argv)
-        self.config_instance(raw_cmd)
-        self.summary_benchmark()
-        return None
-
-    fit = __call__
-
-    def summary_benchmark(self):
-        """Get the summary of the benchmark."""
-        if sys.platform in ['linux']:
-            num_of_instance = int(os.environ.get('NUM_OF_INSTANCE'))
-            cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE'))
-            latency_l = []
-            throughput_l = []
-            for i in range(0, num_of_instance):
-                log = '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, i)
-                with open(log, "r") as f:
-                    for line in f:
-                        latency = re.search(r"[L,l]atency:\s+(\d+(\.\d+)?)", line)
-                        latency_l.append(float(latency.group(1))) if latency and latency.group(1) else None
-                        throughput = re.search(r"[T,t]hroughput:\s+(\d+(\.\d+)?)", line)
-                        throughput_l.append(float(throughput.group(1))) if throughput and throughput.group(1) else None
-            if throughput_l and latency_l:
-                assert len(latency_l)==len(throughput_l)==num_of_instance, \
-                    "Multiple instance benchmark failed with some instance!"
-
-                output_data = [
-                    ["Latency average [second/sample]", "{:.3f}".format(sum(latency_l)/len(latency_l))],
-                    ["Throughput sum [samples/second]", "{:.3f}".format(sum(throughput_l))]
-                ]
-                logger.info("********************************************")
-                Statistics(
-                    output_data,
-                    header='Multiple Instance Benchmark Summary',
-                    field_names=["Items", "Result"]).print_stat()
-        else:
-            # (TODO) should add summary after win32 benchmark has log
-            pass
-
-    def call_one(self, cmd, log_file):
-        """Execute one command for one instance in one thread and dump the log (for Windows)."""
-        proc = subprocess.Popen(cmd, stdin=subprocess.PIPE,
-                                stdout=subprocess.PIPE,
-                                stderr=subprocess.STDOUT,
-                                shell=True) # nosec
-        with open(log_file, "w", 1, encoding="utf-8") as log_file:
-            log_file.write(f"[ COMMAND ] {cmd} \n")
-            for line in proc.stdout:
-                decoded_line = line.decode("utf-8", errors="ignore").strip()
-                logger.info(decoded_line)   # redirect to terminal
-                log_file.write(decoded_line + "\n")
-
-    def config_instance(self, raw_cmd):
-        """Configure the multi-instance commands and trigger benchmark with sub process.
-
-        Args:
-            raw_cmd: raw command used for benchmark
-        """
-        multi_instance_cmd = ''
-        num_of_instance = int(os.environ.get('NUM_OF_INSTANCE'))
-        cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE'))
-
-        logger.info("num of instance: {}".format(num_of_instance))
-        logger.info("cores per instance: {}".format(cores_per_instance))
-
-        if(sys.platform in ['linux'] and get_architecture() == 'aarch64' and int(get_threads_per_core()) > 1):
-            raise OSError('Currently no support on ARM with hyperthreads')
-        elif sys.platform in ['linux']:
-            bounded_threads = get_bounded_threads(get_core_ids(), get_threads(), get_physical_ids())
-
-        for i in range(0, num_of_instance):
-            if sys.platform in ['linux'] and get_architecture() == 'x86_64':
-                core_list_idx = np.arange(0, cores_per_instance) + i * cores_per_instance
-                core_list = np.array(bounded_threads)[core_list_idx]
-            else:
-                core_list = np.arange(0, cores_per_instance) + i * cores_per_instance
-            # bind cores only allowed in linux/mac os with numactl enabled
-            prefix = self.generate_prefix(core_list)
-            instance_cmd = '{} {}'.format(prefix, raw_cmd)
-            if sys.platform in ['linux']:
-                instance_log = '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, i)
-                multi_instance_cmd += '{} 2>&1|tee {} & \\\n'.format(
-                    instance_cmd, instance_log)
-            else:  # pragma: no cover
-                multi_instance_cmd += '{} \n'.format(instance_cmd)
-
-        multi_instance_cmd += 'wait' if sys.platform in ['linux'] else ''
-        logger.info("Running command is\n{}".format(multi_instance_cmd))
-        # each instance will execute single instance
-        set_env_var('NC_ENV_CONF', True, overwrite_existing=True)
-        if sys.platform in ['linux']:
-            p = subprocess.Popen(multi_instance_cmd, preexec_fn=os.setsid, shell=True) # nosec
-        elif sys.platform in ['win32']:  # pragma: no cover
-            cmd_list = multi_instance_cmd.split("\n")[:-1]
-            threads = []
-            for idx, cmd in enumerate(cmd_list):
-                # wrap each execution of windows bat file in one thread
-                # write the log to the log file of the corresponding instance
-                logger.info('Will dump to {}_{}_{}.log'.format(num_of_instance, cores_per_instance, idx))
-                threads.append(Thread(target=self.call_one, args=(cmd,
-                    '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, idx))))
-            for command_thread in threads:
-                command_thread.start()
-                logger.info("Worker threads start")
-            # Wait for all of them to finish
-            for command_thread in threads:
-                command_thread.join()
-                logger.info("Worker threads join")
-            return
-        try:
-            p.communicate()
-        except KeyboardInterrupt:
-            os.killpg(os.getpgid(p.pid), signal.SIGKILL)
-
-    def generate_prefix(self, core_list):
-        """Generate the command prefix with numactl.
-
-        Args:
-            core_list: a list of core indexes bound with specific instances
-        """
-        if sys.platform in ['linux'] and os.system('numactl --show >/dev/null 2>&1') == 0:
-            return 'OMP_NUM_THREADS={} numactl --localalloc --physcpubind={}'.format(\
-                len(core_list), ','.join(core_list.astype(str)))
-        elif sys.platform in ['win32']:  # pragma: no cover
-            # (TODO) should we move the hw_info from ux?
-            from neural_compressor.ux.utils.hw_info import get_number_of_sockets
-            num_of_socket = int(get_number_of_sockets())
-            cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE'))
-            cores_per_socket = int(psutil.cpu_count(logical=False)) / num_of_socket
-            socket_id = int(core_list[0] // cores_per_socket)
-            # cores per socket should integral multiple of cores per instance, else not bind core
-            if cores_per_socket % cores_per_instance == 0:
-                from functools import reduce
-                hex_core = hex(reduce(lambda x, y : x | y, [1 << p for p in core_list]))
-                return 'start /b /WAIT /node {} /affinity {} CMD /c'.format(socket_id, hex_core)
-        else:
-            return ''
-
-    def run_instance(self):
+def run_instance(model, conf, b_dataloader=None, b_func=None):
         """Run the instance with the configuration.
 
         Args:
             runs benchmarking with numactl on specific cores and instances set
                 by user config and returns model performance
         """
-        if self._b_func is None:
-            cfg = self.conf
+        results = {}
+        if b_func is None:
             GLOBAL_STATE.STATE = MODE.BENCHMARK
-            framework_specific_info = {'device': cfg.device, \
-                                       'approach': None, \
+            framework_specific_info = {'device': conf.device,
+                                       'approach': None,
                                        'random_seed': options.random_seed,
-                                       'backend': cfg.backend \
-                                        if cfg.backend is not None else 'default',
+                                       'backend': conf.backend if conf.backend is not None else 'default',
                                        'format': 'default'}
-            framework = cfg.framework.lower()
+            framework = conf.framework.lower()
             if 'tensorflow' in framework:
-                framework_specific_info.update({"inputs": cfg.inputs, \
-                                                "outputs": cfg.outputs, \
+                framework_specific_info.update({"inputs": conf.inputs, \
+                                                "outputs": conf.outputs, \
                                                 "recipes": {}, \
                                                 'workspace_path': options.workspace})
             if framework == 'keras':
                 framework_specific_info.update({'workspace_path': options.workspace})
             if framework == 'mxnet':
-                framework_specific_info.update({"b_dataloader": self._b_dataloader})
+                framework_specific_info.update({"b_dataloader": b_dataloader})
             if 'onnx' in framework:
                 framework_specific_info.update(
                                      {'workspace_path': options.workspace, \
@@ -346,37 +169,37 @@ def run_instance(self):
                 framework_specific_info.update({"workspace_path": options.workspace,
                                                 "q_dataloader": None})
 
-            assert isinstance(self._model, BaseModel), 'need set neural_compressor Model for quantization....'
+            assert isinstance(model, BaseModel), 'need set neural_compressor Model for quantization....'
 
             adaptor = FRAMEWORKS[framework](framework_specific_info)
 
-            assert self._b_dataloader is not None, "dataloader should not be None"
+            assert b_dataloader is not None, "dataloader should not be None"
 
             from neural_compressor.utils.create_obj_from_config import create_eval_func
-            self._b_func = create_eval_func(self.framework, \
-                                    self._b_dataloader, \
-                                    adaptor, \
-                                    None)
+            b_func = create_eval_func(conf.framework,
+                                      b_dataloader,
+                                      adaptor,
+                                      None)
 
-            self.objectives = MultiObjective(["performance"],
-                                             {'relative': 0.1},
-                                             is_measure=True)
+            objectives = MultiObjective(["performance"],
+                                        {'relative': 0.1},
+                                        is_measure=True)
 
-            val = self.objectives.evaluate(self._b_func, self._model)
+            val = objectives.evaluate(b_func, model)
             # measurer contain info not only performance(eg, memory, model_size)
             # also measurer have result list among steps
             acc, _ = val
-            batch_size = self._b_dataloader.batch_size
-            warmup = cfg.warmup
-            if len(self.objectives.objectives[0].result_list()) < warmup:
-                if len(self.objectives.objectives[0].result_list()) > 1 and warmup != 0:
+            batch_size = b_dataloader.batch_size
+            warmup = conf.warmup
+            if len(objectives.objectives[0].result_list()) < warmup:
+                if len(objectives.objectives[0].result_list()) > 1 and warmup != 0:
                     warmup = 1
                 else:
                     warmup = 0
 
-            result_list = self.objectives.objectives[0].result_list()[warmup:]
+            result_list = objectives.objectives[0].result_list()[warmup:]
             latency = np.array(result_list).mean() / batch_size
-            self._results["performance"] = acc, batch_size, result_list
+            results["performance"] = acc, batch_size, result_list
 
             logger.info("\nbenchmark result:")
             for i, res in enumerate(result_list):
@@ -384,136 +207,175 @@ def run_instance(self):
             logger.info("Batch size = {}".format(batch_size))
             logger.info("Latency: {:.3f} ms".format(latency * 1000))
             logger.info("Throughput: {:.3f} images/sec".format(1. / latency))
+            return results
         else:
-            self._b_func(self._model.model)
+            b_func(model.model)
 
-    @property
-    def results(self):
-        """Get the results of benchmarking."""
-        return self._results
 
-    @property
-    def b_dataloader(self):
-        """Get the dataloader for the benchmarking."""
-        return self._b_dataloader
+def generate_prefix(core_list):
+    """Generate the command prefix with numactl.
 
-    @b_dataloader.setter
-    def b_dataloader(self, dataloader):
-        """Set dataloader for benchmarking.
+    Args:
+        core_list: a list of core indexes bound with specific instances
+    """
+    if sys.platform in ['linux'] and os.system('numactl --show >/dev/null 2>&1') == 0:
+        return 'OMP_NUM_THREADS={} numactl --localalloc --physcpubind={}'.format(\
+            len(core_list), ','.join(core_list.astype(str)))
+    elif sys.platform in ['win32']:  # pragma: no cover
+        # (TODO) should we move the hw_info from ux?
+        from neural_compressor.ux.utils.hw_info import get_number_of_sockets
+        num_of_socket = int(get_number_of_sockets())
+        cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE'))
+        cores_per_socket = int(psutil.cpu_count(logical=False)) / num_of_socket
+        socket_id = int(core_list[0] // cores_per_socket)
+        # cores per socket should integral multiple of cores per instance, else not bind core
+        if cores_per_socket % cores_per_instance == 0:
+            from functools import reduce
+            hex_core = hex(reduce(lambda x, y : x | y, [1 << p for p in core_list]))
+            return 'start /b /WAIT /node {} /affinity {} CMD /c'.format(socket_id, hex_core)
+    else:
+        return ''
 
-        It is iterable and the batched data should consist of a tuple like (input, label) or yield (input, _).
-        When b_dataloader is set, users can configure postprocess(optional) and metric
-        in yaml file or set postprocess and metric cls for evaluation,
-        or just get performance without a label in dataloader and configure postprocess/metric.
 
-        Args:
-            dataloader(generator): users are supported to set a user-defined dataloader
-                                    which meet the requirements that can yield a tuple of
-                                    (input, label)/(input, _) batched data.
-                                    Another good practice is to use
-                                    neural_compressor.data.DataLoader
-                                    to initialize a neural_compressor dataloader object.
-                                    Notice neural_compressor.data.DataLoader
-                                    is just a wrapper of the information needed to
-                                    build a dataloader, it can't yield
-                                    batched data and only in this setter method
-                                    a 'real' eval_dataloader will be created,
-                                    the reason is we have to know the framework info
-                                    and only after the Quantization object is created then
-                                    framework information can be known.
-                                    Future we will support creating iterable dataloader
-                                    from neural_compressor.data.DataLoader
-        """
-        assert hasattr(dataloader, '__iter__') and \
-                    hasattr(dataloader, 'batch_size'), \
-                    'dataloader must implement __iter__ method and batch_size attribute'
-        self._b_dataloader = dataloader
+def call_one(cmd, log_file):
+    """Execute one command for one instance in one thread and dump the log (for Windows)."""
+    proc = subprocess.Popen(cmd, stdin=subprocess.PIPE,
+                            stdout=subprocess.PIPE,
+                            stderr=subprocess.STDOUT,
+                            shell=True) # nosec
+    with open(log_file, "w", 1, encoding="utf-8") as log_file:
+        log_file.write(f"[ COMMAND ] {cmd} \n")
+        for line in proc.stdout:
+            decoded_line = line.decode("utf-8", errors="ignore").strip()
+            logger.info(decoded_line)   # redirect to terminal
+            log_file.write(decoded_line + "\n")
 
-    @property
-    def b_func(self):
-        """Not support getting b_func."""
-        assert False, 'Should not try to get the value of `b_func` attribute.'
-        return None
 
-    @b_func.setter
-    def b_func(self, user_b_func):
-        """Eval function for benchmark.
+def config_instance(raw_cmd):
+    """Configure the multi-instance commands and trigger benchmark with sub process.
 
-        Args:
-            user_b_func: This function takes "model" as input parameter
-                         and executes the entire training process with self
-                         contained training hyper-parameters. If train_func is set,
-                         an evaluation process must be triggered and the user should
-                         set eval_dataloader with metric configured or directly eval_func
-                         to make an evaluation of the model executed.
-        """
-        self._b_func = user_b_func
+    Args:
+        raw_cmd: raw command used for benchmark
+    """
+    multi_instance_cmd = ''
+    num_of_instance = int(os.environ.get('NUM_OF_INSTANCE'))
+    cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE'))
+
+    logger.info("num of instance: {}".format(num_of_instance))
+    logger.info("cores per instance: {}".format(cores_per_instance))
+
+    if (sys.platform in ['linux'] and get_architecture() == 'aarch64' and int(get_threads_per_core()) > 1):
+        raise OSError('Currently no support on ARM with hyperthreads')
+    elif sys.platform in ['linux']:
+        bounded_threads = get_bounded_threads(get_core_ids(), get_threads(), get_physical_ids())
+
+    for i in range(0, num_of_instance):
+        if sys.platform in ['linux'] and get_architecture() == 'x86_64':
+            core_list_idx = np.arange(0, cores_per_instance) + i * cores_per_instance
+            core_list = np.array(bounded_threads)[core_list_idx]
+        else:
+            core_list = np.arange(0, cores_per_instance) + i * cores_per_instance
+        # bind cores only allowed in linux/mac os with numactl enabled
+        prefix = generate_prefix(core_list)
+        instance_cmd = '{} {}'.format(prefix, raw_cmd)
+        if sys.platform in ['linux']:
+            instance_log = '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, i)
+            multi_instance_cmd += '{} 2>&1|tee {} & \\\n'.format(
+                instance_cmd, instance_log)
+        else:  # pragma: no cover
+            multi_instance_cmd += '{} \n'.format(instance_cmd)
+
+    multi_instance_cmd += 'wait' if sys.platform in ['linux'] else ''
+    logger.info("Running command is\n{}".format(multi_instance_cmd))
+    # each instance will execute single instance
+    set_env_var('NC_ENV_CONF', True, overwrite_existing=True)
+    if sys.platform in ['linux']:
+        p = subprocess.Popen(multi_instance_cmd, preexec_fn=os.setsid, shell=True) # nosec
+    elif sys.platform in ['win32']:  # pragma: no cover
+        cmd_list = multi_instance_cmd.split("\n")[:-1]
+        threads = []
+        for idx, cmd in enumerate(cmd_list):
+            # wrap each execution of windows bat file in one thread
+            # write the log to the log file of the corresponding instance
+            logger.info('Will dump to {}_{}_{}.log'.format(num_of_instance, cores_per_instance, idx))
+            threads.append(Thread(target=call_one, args=(cmd,
+                '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, idx))))
+        for command_thread in threads:
+            command_thread.start()
+            logger.info("Worker threads start")
+        # Wait for all of them to finish
+        for command_thread in threads:
+            command_thread.join()
+            logger.info("Worker threads join")
+        return
+    try:
+        p.communicate()
+    except KeyboardInterrupt:
+        os.killpg(os.getpgid(p.pid), signal.SIGKILL)
+
+
+def summary_benchmark():
+    """Get the summary of the benchmark."""
+    if sys.platform in ['linux']:
+        num_of_instance = int(os.environ.get('NUM_OF_INSTANCE'))
+        cores_per_instance = int(os.environ.get('CORES_PER_INSTANCE'))
+        latency_l = []
+        throughput_l = []
+        for i in range(0, num_of_instance):
+            log = '{}_{}_{}.log'.format(num_of_instance, cores_per_instance, i)
+            with open(log, "r") as f:
+                for line in f:
+                    latency = re.search(r"[L,l]atency:\s+(\d+(\.\d+)?)", line)
+                    latency_l.append(float(latency.group(1))) if latency and latency.group(1) else None
+                    throughput = re.search(r"[T,t]hroughput:\s+(\d+(\.\d+)?)", line)
+                    throughput_l.append(float(throughput.group(1))) if throughput and throughput.group(1) else None
+        if throughput_l and latency_l:
+            assert len(latency_l)==len(throughput_l)==num_of_instance, \
+                "Multiple instance benchmark failed with some instance!"
+
+            output_data = [
+                ["Latency average [second/sample]", "{:.3f}".format(sum(latency_l)/len(latency_l))],
+                ["Throughput sum [samples/second]", "{:.3f}".format(sum(throughput_l))]
+            ]
+            logger.info("********************************************")
+            Statistics(
+                output_data,
+                header='Multiple Instance Benchmark Summary',
+                field_names=["Items", "Result"]).print_stat()
+    else:
+        # (TODO) should add summary after win32 benchmark has log
+        pass
 
-    @property
-    def model(self):
-        """Get the model."""
-        return self._model
 
-    @model.setter
-    def model(self, user_model):
-        """Set the user model and dispatch to the framework-specific internal model object.
+def benchmark_with_raw_cmd(raw_cmd, config=None):
+    """Benchmark the model performance with the raw commend.
 
-        Args:
-           user_model: users are supported to set model from the original framework model format
-                       (eg, tensorflow frozen_pb or path to a saved model),
-                       but not recommended. A best practice is to set from an initialized
-                       neural_compressor.model.Model.
-                       If tensorflow model is used, the model's inputs/outputs will be
-                       auto inferenced, but sometimes auto inferenced
-                       inputs/outputs will not meet your requests, so it is better to
-                       set them manually in config yaml file.
-                       Another corner case is the slim model of tensorflow,
-                       be careful of the name of the model configured in the yaml file,
-                       make sure the name is in the supported slim model list.
-        """
-        cfg = self.conf
-        if cfg.framework is None:
-            assert not isinstance(user_model, BaseModel), \
-                "Please pass an original framework model but not neural compressor model!"
-            self.framework = get_model_fwk_name(user_model)
-            if self.framework == "tensorflow":
-                from .model.tensorflow_model import get_model_type
-                if get_model_type(user_model) == 'keras' and cfg.backend == 'itex':
-                    self.framework = 'keras'
-            if self.framework == "pytorch":
-                if cfg.backend == "default":
-                    self.framework = "pytorch_fx"
-                elif cfg.backend == "ipex":
-                    self.framework = "pytorch_ipex"
-                    import intel_extension_for_pytorch
-            cfg.framework = self.framework
-
-        if not isinstance(user_model, BaseModel):
-            logger.warning("Force convert framework model to neural_compressor model.")
-            if "tensorflow" in self.framework or self.framework == "keras":
-                self._model = NCModel(user_model, backend=self.framework, device=cfg.device)
-            else:
-                self._model = NCModel(user_model, backend=self.framework)
-        else:
-            # It is config of neural_compressor version < 2.0, no need in 2.0
-            if cfg.framework == "pytorch_ipex":
-                from neural_compressor.model.torch_model import IPEXModel
-                if not isinstance(user_model, IPEXModel):
-                    self._model = NCModel(user_model.model, framework=cfg.framework)
-                    return
-            self._model = user_model
-
-        if 'tensorflow' in self.framework:
-            self._model.name = cfg.model_name
-            self._model.output_tensor_names = cfg.outputs
-            self._model.input_tensor_names = cfg.inputs
-            self._model.workspace_path = options.workspace
-
-    def __repr__(self):
-        """Get the object representation in string format."""
-        return 'Benchmark'
-
-def fit(model, config=None, b_dataloader=None, b_func=None):
+    Args:
+        raw_cmd (string):           The commend to be benchmarked.
+        config (BenchmarkConfig): The configuration for benchmark containing accuracy goal,
+                                  tuning objective and preferred calibration & quantization
+                                  tuning space etc.
+
+    Example::
+
+        # Run benchmark according to config
+        from neural_compressor.benchmark import fit_with_raw_cmd
+
+        conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7)
+        fit_with_raw_cmd("test.py", conf)
+    """
+    if config is not None:
+        if config.backend == "ipex":
+            import intel_extension_for_pytorch
+        assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...'
+        # disable multi-instance for running bechmark on GPU device
+        set_all_env_var(config)
+
+    config_instance(raw_cmd)
+    summary_benchmark()
+
+
+def fit(model, config, b_dataloader=None, b_func=None):
     """Benchmark the model performance with the configure.
 
     Args:
@@ -533,11 +395,22 @@ def fit(model, config=None, b_dataloader=None, b_func=None):
         conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7)
         fit(model='./int8.pb', config=conf, b_dataloader=eval_dataloader)
     """
-    benchmarker = _Benchmark(config)
-    benchmarker.model = model
-    if b_func is not None:
-        benchmarker.b_func = b_func
+    if config.backend == "ipex":
+        import intel_extension_for_pytorch
+
+    wrapped_model = wrap_model_from(model, config)
+
     if b_dataloader is not None:
-        benchmarker.b_dataloader = b_dataloader
-    benchmarker()
-    return benchmarker.results
+        check_dataloader(b_dataloader)
+
+    assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...'
+    # disable multi-instance for running bechmark on GPU device
+    set_all_env_var(config)
+    if config.device == 'gpu':
+        set_env_var('NC_ENV_CONF', True, overwrite_existing=True)
+
+    logger.info("Start to run Benchmark.")
+    if os.environ.get('NC_ENV_CONF') == 'True':
+        return run_instance(model=wrapped_model, conf=config, b_dataloader=b_dataloader, b_func=b_func)
+    raw_cmd = sys.executable + ' ' + ' '.join(sys.argv)
+    benchmark_with_raw_cmd(raw_cmd)
diff --git a/neural_compressor/config.py b/neural_compressor/config.py
index 0bdb967d70f..670f3573a67 100644
--- a/neural_compressor/config.py
+++ b/neural_compressor/config.py
@@ -1295,12 +1295,12 @@ def __init__(self,
     def approach(self):
         """Get approach."""
         return self._approach
-    
+
     @property
     def framework(self):
         """Get framework."""
         return self._framework
-    
+
     @framework.setter
     def framework(self, framework):
         """Set framework."""
@@ -1309,7 +1309,7 @@ def framework(self, framework):
 
 class WeightPruningConfig:
     """Config Class for Pruning. Define a single or a sequence of pruning configs.
-    
+
     Args:
         pruning_configs (list of dicts, optional): Local pruning configs only valid to linked layers.
             Parameters defined out of pruning_configs are valid for all layers.
@@ -2238,7 +2238,7 @@ def onnxruntime(self):
     def accuracy(self):
         """Get the accuracy object."""
         return self._accuracy
-    
+
     @property
     def tuning(self):
         """Get the tuning object."""
diff --git a/neural_compressor/data/dataloaders/dataloader.py b/neural_compressor/data/dataloaders/dataloader.py
index 348caf40a3d..89e8cdd2ea8 100644
--- a/neural_compressor/data/dataloaders/dataloader.py
+++ b/neural_compressor/data/dataloaders/dataloader.py
@@ -80,6 +80,7 @@ def __new__(cls, framework, dataset, batch_size=1, collate_fn=None,
                                       shuffle=shuffle,
                                       distributed=distributed)
 
+
 def _generate_common_dataloader(dataloader, framework, distributed=False):
     """Generate common dataloader.
 
@@ -112,3 +113,11 @@ def _generate_common_dataloader(dataloader, framework, distributed=False):
             pin_memory=dataloader.pin_memory,
             shuffle=dataloader.shuffle,
             distributed=bool(dataloader.distributed or distributed))
+
+
+def check_dataloader(dataloader):
+    """Check if the dataloader meets requirement of neural_compressor."""
+    assert hasattr(dataloader, '__iter__') and \
+        hasattr(dataloader, 'batch_size'), \
+        'dataloader must implement __iter__ method and batch_size attribute'
+    return True
diff --git a/neural_compressor/metric/metric.py b/neural_compressor/metric/metric.py
index 91a2328390a..172dd9780f0 100644
--- a/neural_compressor/metric/metric.py
+++ b/neural_compressor/metric/metric.py
@@ -1615,3 +1615,58 @@ def result(self):
         roc_auc = sklearn.metrics.roc_auc_score(targets, scores)
         acc = sklearn.metrics.accuracy_score(targets, np.round(scores))
         return acc
+
+
+def register_customer_metric(user_metric, framework):
+    """register customer metric class or a dict of built-in metric configures.
+
+    1. neural_compressor have many built-in metrics,
+       user can pass a metric configure dict to tell neural compressor what metric will be use.
+       You also can set multi-metrics to evaluate the performance of a specific model.
+            Single metric:
+                {topk: 1}
+            Multi-metrics:
+                {topk: 1,
+                 MSE: {compare_label: False},
+                 weight: [0.5, 0.5],
+                 higher_is_better: [True, False]
+                }
+    For the built-in metrics, please refer to below link:
+    https://github.com/intel/neural-compressor/blob/master/docs/source/metric.md#supported-built-in-metric-matrix.
+
+    2. User also can get the built-in metrics by neural_compressor.Metric:
+        Metric(name="topk", k=1)
+    3. User also can set specific metric through this api. The metric class should take the outputs of the model or
+       postprocess(if have) as inputs, neural_compressor built-in metric always take(predictions, labels)
+       as inputs for update, and user_metric.metric_cls should be sub_class of neural_compressor.metric.BaseMetric.
+
+    Args:
+        user_metric(neural_compressor.metric.Metric or a dict of built-in metric configurations):
+            The object of Metric or a dict of built-in metric configurations.
+
+        framework: framework, such as: tensorflow, pytorch......
+
+    """
+    if isinstance(user_metric, dict):
+        metric_cfg = user_metric
+    else:
+        if isinstance(user_metric, Metric):
+            if user_metric.metric_cls is None:
+                name = user_metric.name
+                metric_cls = METRICS(framework).metrics[name]
+                metric_cfg = {name: {**user_metric.kwargs}}
+                return metric_cfg
+            else:
+                name = user_metric.name
+                metric_cls = user_metric.metric_cls
+                metric_cfg = {name: {**user_metric.kwargs}}
+        else:
+            for i in ['reset', 'update', 'result']:
+                assert hasattr(user_metric, i), 'Please realise {} function' \
+                                                'in user defined metric'.format(i)
+            metric_cls = type(user_metric).__name__
+            name = 'user_' + metric_cls
+            metric_cfg = {name: id(user_metric)}
+        metrics = METRICS(framework)
+        metrics.register(name, metric_cls)
+    return metric_cfg
diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py
index 1f5bc0445ba..714199f96f6 100644
--- a/neural_compressor/mix_precision.py
+++ b/neural_compressor/mix_precision.py
@@ -20,296 +20,15 @@
 import pickle
 import numpy as np
 import random
-from .utils.utility import time_limit, CpuInfo
+
+from neural_compressor.data.dataloaders.dataloader import check_dataloader
+from neural_compressor.metric.metric import register_customer_metric
+from .utils.utility import time_limit
 from .strategy import STRATEGIES
 from .config import _Config, options
 from .utils import logger
-from .model.model import BaseModel, get_model_fwk_name, Model, MODELS
-
-class _MixedPrecision:
-    """Class used for generating low precision model.
-
-    _MixedPrecision class automatically generates low precision model across various DL
-    frameworks including tensorflow, pytorch and onnxruntime.
-
-    Example::
-
-        from neural_compressor.config import MixedPrecisionConfig
-        def eval_func(model):
-            ...
-            return accuracy
-
-        conf = MixedPrecisionConfig()
-        output_model = mix_precision.fit(
-            model,
-            conf,
-            eval_func=eval_func,
-        )
-    """
-    def __init__(self, conf=None):
-        """Initialize `MixedPrecision` class.
-
-        Args:
-            conf (obj): The MixedPrecisionConfig class containing accuracy goal, tuning objective etc.
-        """
-        self.conf = _Config(mixed_precision=conf, quantization=None, benchmark=None
-                            , pruning=None, distillation=None, nas=None)
-        seed = options.random_seed
-        random.seed(seed)
-        np.random.seed(seed)
-
-        self._eval_func = None
-        self._eval_dataloader = None
-        self._model = None
-        self._metric = None
-
-    def pre_process(self):
-        """Create strategy object for tuning."""
-        strategy = 'automixedprecision'
-        _resume = None
-        # check if interrupted tuning procedure exists. if yes, it will resume the
-        # whole auto tune process.
-        self.resume_file = os.path.abspath(os.path.expanduser(options.resume_from)) \
-                           if options.workspace and options.resume_from else None
-        if self.resume_file:
-            assert os.path.exists(self.resume_file), \
-                "The specified resume file {} doesn't exist!".format(self.resume_file)
-            with open(self.resume_file, 'rb') as f:
-                _resume = pickle.load(f).__dict__
-
-        self.strategy = STRATEGIES[strategy](
-            model=self.model,
-            conf=self.conf,
-            eval_func=self._eval_func,
-            eval_dataloader=self._eval_dataloader,
-            eval_metric=self.metric,
-            resume=_resume,
-            q_hooks=None)
-
-    def execute(self):
-        """Execute routinue based on strategy design."""
-        try:
-            with time_limit(self.conf.mixed_precision.tuning_criterion.timeout):
-                self.strategy.traverse()
-        except KeyboardInterrupt:
-            pass
-        except Exception as e:
-            logger.error("Unexpected exception {} happened during tuning.".format(repr(e)))
-            import traceback
-            traceback.print_exc()
-        finally:
-            if self.strategy.best_qmodel:
-                logger.info(
-                    "Specified timeout or max trials is reached! "
-                    "Found a quantized model which meet accuracy goal. Exit.")
-                self.strategy.deploy_config()
-            else:
-                logger.error(
-                    "Specified timeout or max trials is reached! "
-                    "Not found any quantized model which meet accuracy goal. Exit.")
-
-            return self.strategy.best_qmodel
-
-    def __call__(self):
-        """Execute this class.
-
-        For derived classes, an override function is required.
-        """
-        self.pre_process()
-        results = self.execute()
-        return results
-
-    fit = __call__
-
-    @property
-    def precisions(self):
-        """Get private member variable `precisions` of `_MixedPrecision` class."""
-        return self._precisions
-
-    @precisions.setter
-    def precisions(self, customized_precisions):
-        """Set private member variable `precisions` of `_MixedPrecision` class."""
-        if isinstance(customized_precisions, list):
-            self._precisions = sorted([i.strip() for i in customized_precisions])
-        elif isinstance(customized_precisions, str):
-            self._precisions = sorted([i.strip() for i in customized_precisions.split(',')])
-        self.conf.mixed_precision.precision = self._precisions
-
-    @property
-    def eval_dataloader(self):
-        """Get eval_dataloader."""
-        return self._eval_dataloader
-
-    @eval_dataloader.setter
-    def eval_dataloader(self, dataloader):
-        """Set Dataloader for evaluation.
-
-        It is iterable and the batched data should consists of a tuple like (input, label), 
-        when eval_dataloader is set, user should configure postprocess(optional) and metric 
-        in yaml file or set postprocess and metric cls. Notice evaluation dataloader will be 
-        used to generate data for model inference, make sure the input data can be feed to model.
-
-        Args:
-            dataloader(generator): user are supported to set a user defined dataloader
-                                    which meet the requirements that can yield tuple of
-                                    (input, label)/(input, _) batched data.
-                                    Another good practice is to use neural_compressor.common.DataLoader
-                                    to initialize a neural_compressor dataloader object.
-                                    Notice neural_compressor.common.DataLoader is just a wrapper of the
-                                    information needed to build a dataloader, it can't yield
-                                    batched data and only in this setter method
-                                    a 'real' eval_dataloader will be created,
-                                    the reason is we have to know the framework info
-                                    and only after the mixed_precision object created then
-                                    framework infomation can be known. Future we will support
-                                    creating iterable dataloader from neural_compressor.common.DataLoader
-        """
-        # pragma: no cover
-        assert hasattr(dataloader, '__iter__') and \
-            hasattr(dataloader, 'batch_size'), \
-            'dataloader must implement __iter__ method and batch_size attribute'
+from .model.model import wrap_model_from
 
-        self._eval_dataloader = dataloader
-
-    @property
-    def model(self):
-        """Get model."""
-        return self._model
-
-    @model.setter
-    def model(self, user_model):
-        """Set the user model and dispatch to framework specific internal model object.
-
-        Args:
-           user_model: user are supported to set model from original framework model format
-                       (eg, tensorflow frozen_pb or path to a saved model), but not recommended.
-                       Best practice is to set from a initialized neural_compressor.common.Model.
-                       If tensorflow model is used, model's inputs/outputs will be auto inferred,
-                       but sometimes auto inferred inputs/outputs will not meet your requests,
-                       set them manually in config yaml file. Another corner case is slim model
-                       of tensorflow, be careful of the name of model configured in yaml file,
-                       make sure the name is in supported slim model list.
-        """
-        cfg = self.conf
-        if cfg.mixed_precision.framework is None:
-            if isinstance(user_model, BaseModel):
-                cfg.mixed_precision.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))]
-                if cfg.mixed_precision.backend == "ipex":
-                    assert cfg.mixed_precision.framework == "pytorch_ipex",\
-                          "Please wrap the model with correct Model class!"
-                if cfg.mixed_precision.backend == "itex": # pragma: no cover
-                    from .model.tensorflow_model import get_model_type
-                    if get_model_type(user_model.model) == 'keras':
-                        assert cfg.mixed_precision.framework == "keras",\
-                              "Please wrap the model with KerasModel class!"
-                    else:
-                        assert cfg.mixed_precision.framework == "pytorch_itex", \
-                            "Please wrap the model with TensorflowModel class!"
-            else:
-                framework = get_model_fwk_name(user_model)
-                if framework == "tensorflow":
-                    from .model.tensorflow_model import get_model_type
-                    if get_model_type(user_model) == 'keras' and cfg.mixed_precision.backend == 'itex':
-                        framework = 'keras'
-                if framework == "pytorch":
-                    if cfg.mixed_precision.backend == "default":
-                        framework = "pytorch_fx"
-                    elif cfg.mixed_precision.backend == "ipex":
-                        framework = "pytorch_ipex"
-                cfg.mixed_precision.framework = framework
-
-        if not isinstance(user_model, BaseModel):
-            logger.warning("Force convert framework model to neural_compressor model.")
-            if "tensorflow" in cfg.mixed_precision.framework or cfg.mixed_precision.framework == "keras":
-                self._model = Model(user_model, backend=cfg.mixed_precision.framework
-                                    , device=cfg.mixed_precision.device)
-            else:
-                self._model = Model(user_model, backend=cfg.mixed_precision.framework)
-        else: # pragma: no cover
-            if cfg.mixed_precision.framework == "pytorch_ipex":
-                from neural_compressor.model.torch_model import IPEXModel
-                assert type(user_model) == IPEXModel, \
-                            "The backend is ipex, please wrap the model with IPEXModel class!"
-            elif cfg.mixed_precision.framework == "pytorch_fx":
-                from neural_compressor.model.torch_model import PyTorchFXModel
-                assert type(user_model) == PyTorchFXModel, \
-                            "The backend is default, please wrap the model with PyTorchFXModel class!"
-
-            self._model = user_model
-
-        if 'tensorflow' in cfg.mixed_precision.framework:
-            self._model.name = cfg.mixed_precision.model_name
-            self._model.output_tensor_names = cfg.mixed_precision.outputs
-            self._model.input_tensor_names = cfg.mixed_precision.inputs
-            self._model.workspace_path = options.workspace
-
-    @property
-    def metric(self):
-        """Get `metric` attribute."""
-        return self._metric
-
-    @metric.setter
-    def metric(self, user_metric):
-        """Set metric class or a dict of built-in metric configures.
-
-        1. neural_compressor have many built-in metrics, user can pass a metric configure dict to tell neural 
-           compressor what metric will be use.
-           You can set multi-metrics to evaluate the performance of a specific model.
-                Single metric:
-                    {topk: 1}
-
-                Multi-metrics:
-                    {topk: 1,
-                     MSE: {compare_label: False},
-                     weight: [0.5, 0.5],
-                     higher_is_better: [True, False]
-                    }
-            Refer to this [file](../docs/source/metric.md#supported-built-in-metric-matrix) for built-in metric list
-        2. User also can set specific metric through this api. The metric class should take the outputs of the model or
-           postprocess(if have) as inputs, neural_compressor built-in metric always take(predictions, labels) as inputs
-           for update, and user_metric.metric_cls should be sub_class of neural_compressor.metric.BaseMetric.
-
-        Args:
-            user_metric(neural_compressor.metric.Metric or a dict of built-in metric configures):
-                The object of Metric or a dict of built-in metric configurations.
-        """
-        from .metric import Metric as NCMetric, METRICS
-        if isinstance(user_metric, dict):
-            metric_cfg = user_metric
-        else:
-            if isinstance(user_metric, NCMetric):
-                name = user_metric.name
-                metric_cls = user_metric.metric_cls
-                metric_cfg = {name: {**user_metric.kwargs}}
-            else:
-                for i in ['reset', 'update', 'result']:
-                    assert hasattr(user_metric, i), 'Please realise {} function' \
-                                                    'in user defined metric'.format(i)
-                metric_cls = type(user_metric).__name__
-                name = 'user_' + metric_cls
-                metric_cfg = {name: id(user_metric)}
-            metrics = METRICS(self.conf.mixed_precision.framework)
-            metrics.register(name, metric_cls)
-        self._metric = metric_cfg
-
-
-    @property
-    def eval_func(self):
-        """Get evaluation function."""
-        assert False, 'Should not try to get the value of `eval_func` attribute.'
-
-    @eval_func.setter
-    def eval_func(self, user_eval_func):
-        """Set evaluation function provided by user.
-
-        Args:
-            user_eval_func: This function takes "model" as input parameter
-                            and executes entire evaluation process with self
-                            contained metrics. If eval_func set,
-                            an evaluation process must be triggered
-                            to make evaluation of the model executed.
-        """
-        self._eval_func = user_eval_func
 
 def fit(model,
         config=None,
@@ -353,7 +72,7 @@ def fit(model,
                                               mixed_precision.
 
     Returns:
-        A _MixedPrecision object that generates low precision model across various DL frameworks.
+        A Mixed precision model across various DL frameworks.
 
     Raises:
         AssertionError.
@@ -366,39 +85,69 @@ def fit(model,
         conf = MixedPrecisionConfig()
         converted_model = mix_precision.fit(model, config=conf)
     """
-    converter = _MixedPrecision(config)
+    if eval_dataloader is not None:
+        check_dataloader(eval_dataloader)
+
     if config.precision in config.excluded_precisions:
-        logger.warning("Target precision is in excluded_precisions, "\
-            "please modify precision or excluded_precisions to make it understandable.")
+        logger.warning("Target precision is in excluded_precisions, "
+                       "please modify precision or excluded_precisions to make it understandable.")
         sys.exit(0)
-    precisions = list(set(config.precision) - set(config.excluded_precisions))
-    converter.conf.mixed_precision.precisions = precisions
-    converter.model = model
 
-    if ('bf16' in precisions or 'fp16' in precisions) and \
-        converter.conf.mixed_precision.framework == "onnxruntime": # pragma: no cover
-        if config.device == "cpu":
-            logger.warning("Mix precision exits due to device isn't gpu for onnx models.")
-            sys.exit(0)
-        elif config.backend != "onnxrt_cuda_ep":
-            logger.warning("Mix precision exits due to backend isn't onnxrt_cuda_ep for onnx models.")
-            sys.exit(0)
-    elif 'bf16' in precisions and not CpuInfo().bf16 and \
-        converter.conf.mixed_precision.framework != "onnxruntime": # pragma: no cover
-        if os.getenv('FORCE_BF16') == '1':
-            logger.warning("Mix precision will generate bf16 graph although " \
-                           "the hardware doesn't support bf16 instruction.")
-        else:
-            logger.warning("Mix precision exits due to the hardware " \
-                           "doesn't support bf16 instruction.")
-            sys.exit(0)
-    elif 'fp16' in precisions and converter.conf.mixed_precision.framework != "onnxruntime":
-        logger.warning("Currently mix precision only supports fp16 for onnx models.")
-        sys.exit(0)
-    if eval_func is not None:
-        converter.eval_func = eval_func
-    if eval_dataloader is not None:
-        converter.eval_dataloader = eval_dataloader
+    wrapped_model = wrap_model_from(model, config)
+
     if eval_metric is not None:
-        converter.metric = eval_metric
-    return converter()
+        metric = register_customer_metric(eval_metric, config.framework)
+    else:
+        metric = None
+
+    conf = _Config(mixed_precision=config,
+                   quantization=None,
+                   benchmark=None,
+                   pruning=None,
+                   distillation=None,
+                   nas=None)
+    seed = options.random_seed
+    random.seed(seed)
+    np.random.seed(seed)
+
+    _resume = None
+    # check if interrupted tuning procedure exists. if yes, it will resume the
+    # whole auto tune process.
+    resume_file = os.path.abspath(os.path.expanduser(
+        options.resume_from)) if options.workspace and options.resume_from else None
+    if resume_file:
+        assert os.path.exists(resume_file), \
+            "The specified resume file {} doesn't exist!".format(resume_file)
+        with open(resume_file, 'rb') as f:
+            _resume = pickle.load(f).__dict__
+
+    strategy = STRATEGIES['automixedprecision'](
+        model=wrapped_model,
+        conf=conf,
+        eval_func=eval_func,
+        eval_dataloader=eval_dataloader,
+        eval_metric=metric,
+        resume=_resume,
+        q_hooks=None)
+
+    try:
+        with time_limit(config.tuning_criterion.timeout):
+            strategy.traverse()
+    except KeyboardInterrupt:
+        pass
+    except Exception as e:
+        logger.error("Unexpected exception {} happened during tuning.".format(repr(e)))
+        import traceback
+        traceback.print_exc()
+    finally:
+        if strategy.best_qmodel:
+            logger.info(
+                "Specified timeout or max trials is reached! "
+                "Found a quantized model which meet accuracy goal. Exit.")
+            strategy.deploy_config()
+        else:
+            logger.error(
+                "Specified timeout or max trials is reached! "
+                "Not found any quantized model which meet accuracy goal. Exit.")
+
+        return strategy.best_qmodel
diff --git a/neural_compressor/model/model.py b/neural_compressor/model/model.py
index 7b5e9252e74..fb408c6f8f5 100644
--- a/neural_compressor/model/model.py
+++ b/neural_compressor/model/model.py
@@ -21,6 +21,7 @@
 import os
 import importlib
 import sys
+from neural_compressor.config import options
 from neural_compressor.utils.utility import LazyImport
 from neural_compressor.utils import logger
 from neural_compressor.model.base_model import BaseModel
@@ -186,3 +187,69 @@ def __new__(cls, root, **kwargs):
         else:
             model = MODELS[backend](root, **kwargs)
         return model
+
+
+def wrap_model_from(user_model, conf):
+    """Wrap the user model and dispatch to framework specific internal model object.
+
+    Args:
+       user_model: user are supported to set model from original framework model format
+                   (eg, tensorflow frozen_pb or path to a saved model), but not recommended.
+                   Best practice is to set from a initialized neural_compressor.common.Model.
+                   If tensorflow model is used, model's inputs/outputs will be auto inferred,
+                   but sometimes auto inferred inputs/outputs will not meet your requests,
+                   set them manually in config yaml file. Another corner case is slim model
+                   of tensorflow, be careful of the name of model configured in yaml file,
+                   make sure the name is in supported slim model list.
+        conf: the instance of PostTrainingQuantConfig or QuantizationAwareTrainingConfig or MixedPrecisionConfig.
+    """
+    if conf.framework is None:
+        if isinstance(user_model, BaseModel):  # pragma: no cover
+            conf.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))]
+            if conf.backend == "ipex":
+                assert conf.framework == "pytorch_ipex",\
+                      "Please wrap the model with correct Model class!"
+            if conf.backend == "itex":
+                if get_model_type(user_model.model) == 'keras':
+                    assert conf.framework == "keras",\
+                          "Please wrap the model with KerasModel class!"
+                else:
+                    assert conf.framework == "pytorch_itex", \
+                        "Please wrap the model with TensorflowModel class!"
+        else:
+            framework = get_model_fwk_name(user_model)
+            if framework == "tensorflow":
+                if get_model_type(user_model) == 'keras' and conf.backend == 'itex':
+                    framework = 'keras'
+            if framework == "pytorch":
+                if conf.backend == "default":
+                    framework = "pytorch_fx"
+                elif conf.backend == "ipex":
+                    framework = "pytorch_ipex"
+            conf.framework = framework
+
+    if not isinstance(user_model, BaseModel):
+        logger.warning("Force convert framework model to neural_compressor model.")
+        if "tensorflow" in conf.framework or conf.framework == "keras":
+            model = Model(user_model, backend=conf.framework, device=conf.device)
+        else:
+            model = Model(user_model, backend=conf.framework)
+    else:  # pragma: no cover
+        if conf.framework == "pytorch_ipex":
+            from neural_compressor.model.torch_model import IPEXModel
+            assert type(user_model) == IPEXModel, \
+                        "The backend is ipex, please wrap the model with IPEXModel class!"
+        elif conf.framework == "pytorch_fx":
+            from neural_compressor.model.torch_model import PyTorchFXModel
+            assert type(user_model) == PyTorchFXModel, \
+                        "The backend is default, please wrap the model with PyTorchFXModel class!"
+
+        model = user_model
+
+    if 'tensorflow' in conf.framework:
+        model.name = conf.model_name
+        model.output_tensor_names = conf.outputs
+        model.input_tensor_names = conf.inputs
+        model.workspace_path = options.workspace
+
+    return model
diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py
index 747cfa4d38d..6e29c2d6de2 100644
--- a/neural_compressor/quantization.py
+++ b/neural_compressor/quantization.py
@@ -21,372 +21,14 @@
 import random
 import numpy as np
 from .config import _Config, options
-from .model.model import BaseModel, get_model_fwk_name, get_model_type, Model, MODELS
+from .data.dataloaders.dataloader import check_dataloader
+from .metric.metric import register_customer_metric
+from .model.model import wrap_model_from
 from .strategy import STRATEGIES
 from .utils import logger
 from .utils.utility import time_limit, dump_class_attrs
 
 
-class _PostTrainingQuant:
-    """Post Training Quantization class.
-
-    It automatically searches for optimal quantization recipes for low precision model inference,
-    achieving best tuning objectives like inference performance within accuracy loss constraints.
-    Tuner abstracts out the differences of quantization APIs across various DL frameworks
-    and brings a unified API for automatic quantization that works on frameworks including
-    tensorflow, pytorch and mxnet.
-    Since DL use cases vary in the accuracy metrics (Top-1, MAP, ROC etc.), loss criteria
-    (<1% or <0.1% etc.) and tuning objectives (performance, memory footprint etc.).
-
-    Example::
-
-        conf = PostTrainingQuantConfig()
-        quantizer = _PostTrainingQuant(conf)
-        quantizer.model = model
-        quantizer.eval_func = eval_func
-        quantizer.calib_dataloader = calib_dataloader
-        q_model = quantizer.fit()
-    """
-    def __init__(self, conf, **kwargs):
-        """Initialize the parameters.
-
-        Args:
-            conf (PostTrainingQuantConfig): A instance of PostTrainingQuantConfig to
-                                            specify the quantization behavior.
-        """
-        self.conf = _Config(quantization=conf, benchmark=None, pruning=None, distillation=None, nas=None)
-        seed = options.random_seed
-        random.seed(seed)
-        np.random.seed(seed)
-        self._train_func = None
-        self._calib_dataloader = None
-        self._eval_func = None
-        self._eval_dataloader = None
-        self._model = None
-        self._metric = None
-        self.callbacks = None
-        if "model" in kwargs:
-            self.model = kwargs["model"]
-
-    def pre_proccess(self):
-        """Create strategy to optimize model."""
-        cfg = self.conf
-
-        strategy = cfg.quantization.tuning_criterion.strategy
-
-        if cfg.quantization.quant_level == "auto":
-            strategy = "auto"
-
-        elif cfg.quantization.quant_level == 0:
-            strategy = "conservative"
-
-        if strategy == "mse_v2":
-            if not (cfg.quantization.framework.startswith("tensorflow")\
-                     or cfg.quantization.framework == 'pytorch_fx'): # pragma: no cover
-                strategy = "basic"
-                logger.warning(f"MSE_v2 does not support {cfg.quantization.framework} now, use basic instead.")
-                logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.")
-        assert strategy in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy)
-
-        logger.info(f"Start {strategy} tuning.")
-        _resume = None
-        # check if interrupted tuning procedure exists. if yes, it will resume the
-        # whole auto tune process.
-        self.resume_file = os.path.abspath(os.path.expanduser(options.resume_from)) \
-                           if options.workspace and options.resume_from else None
-        if self.resume_file:
-            assert os.path.exists(self.resume_file), \
-                "The specified resume file {} doesn't exist!".format(self.resume_file)
-            with open(self.resume_file, 'rb') as f:
-                _resume = pickle.load(f).__dict__
-
-        if self._eval_func is None and self._eval_dataloader is None: # pragma: no cover
-            logger.info("Quantize model without tuning!")
-
-        self.strategy = STRATEGIES[strategy](
-            model = self.model,
-            conf = self.conf,
-            q_dataloader=self._calib_dataloader,
-            q_func=self._train_func,
-            eval_func=self._eval_func,
-            eval_dataloader=self._eval_dataloader,
-            eval_metric=self.metric,
-            resume=_resume,
-            q_hooks=self.callbacks.hooks if self.callbacks is not None else None)
-
-    def execute(self):
-        """Quantization execute routinue based on strategy design."""
-        try:
-            with time_limit(self.conf.quantization.tuning_criterion.timeout):
-                logger.debug("Dump user configuration:")
-                conf_dict = {}
-                dump_class_attrs(self.conf, conf_dict)
-                logger.info(conf_dict)
-                self.strategy.traverse()
-        except KeyboardInterrupt:
-            pass
-        except Exception as e: # pragma: no cover
-            logger.error("Unexpected exception {} happened during tuning.".format(repr(e)))
-            import traceback
-            traceback.print_exc()
-        finally:
-            if self.strategy.best_qmodel:
-                logger.info(
-                    "Specified timeout or max trials is reached! "
-                    "Found a quantized model which meet accuracy goal. Exit.")
-                self.strategy.deploy_config()
-            else:
-                logger.error(
-                    "Specified timeout or max trials is reached! "
-                    "Not found any quantized model which meet accuracy goal. Exit.")
-
-            return self.strategy.best_qmodel
-
-    def __call__(self):
-        """Execute this class.
-
-        For derived classes(Pruning, Quantization, etc.), an override function is required.
-        """
-        self.pre_proccess()
-        results = self.execute()
-        return results
-
-    fit = __call__
-
-    @property
-    def model(self):
-        """Getter of model in neural_compressor.model."""
-        return self._model
-
-    @model.setter
-    def model(self, user_model):
-        """Set the user model and dispatch to framework specific internal model object.
-
-        Args:
-            user_model: user are supported to set model from original framework model format
-                        (eg, tensorflow frozen_pb or path to a saved model),
-                        but not recommended. Best practice is to set from a initialized
-                        neural_compressor.model.Model.
-                        If tensorflow model is used, model's inputs/outputs will be
-                        auto inferenced, but sometimes auto inferenced
-                        inputs/outputs will not meet your requests,
-                        set them manually in config yaml file.
-                        Another corner case is slim model of tensorflow,
-                        be careful of the name of model configured in yaml file,
-                        make sure the name is in supported slim model list.
-
-        """
-        cfg = self.conf
-        if cfg.quantization.framework is None:
-            if isinstance(user_model, BaseModel): # pragma: no cover
-                cfg.quantization.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))]
-                if cfg.quantization.backend == "ipex":
-                    assert cfg.quantization.framework == "pytorch_ipex",\
-                          "Please wrap the model with correct Model class!"
-                if cfg.quantization.backend == "itex":
-                    if get_model_type(user_model.model) == 'keras':
-                        assert cfg.quantization.framework == "keras",\
-                              "Please wrap the model with KerasModel class!"
-                    else:
-                        assert cfg.quantization.framework == "pytorch_itex", \
-                            "Please wrap the model with TensorflowModel class!"
-            else:
-                framework = get_model_fwk_name(user_model)
-                if framework == "tensorflow":
-                    if get_model_type(user_model) == 'keras' and cfg.quantization.backend == 'itex':
-                        framework = 'keras'
-                if framework == "pytorch":
-                    if cfg.quantization.backend == "default":
-                        framework = "pytorch_fx"
-                    elif cfg.quantization.backend == "ipex":
-                        framework = "pytorch_ipex"
-                cfg.quantization.framework = framework
-
-        if not isinstance(user_model, BaseModel):
-            logger.warning("Force convert framework model to neural_compressor model.")
-            if "tensorflow" in cfg.quantization.framework or cfg.quantization.framework == "keras":
-                self._model = Model(user_model, backend=cfg.quantization.framework, device=cfg.quantization.device)
-            else:
-                self._model = Model(user_model, backend=cfg.quantization.framework)
-        else: # pragma: no cover
-            if cfg.quantization.framework == "pytorch_ipex":
-                from neural_compressor.model.torch_model import IPEXModel
-                assert type(user_model) == IPEXModel, \
-                            "The backend is ipex, please wrap the model with IPEXModel class!"
-            elif cfg.quantization.framework == "pytorch_fx":
-                from neural_compressor.model.torch_model import PyTorchFXModel
-                assert type(user_model) == PyTorchFXModel, \
-                            "The backend is default, please wrap the model with PyTorchFXModel class!"
-
-            self._model = user_model
-
-        if 'tensorflow' in cfg.quantization.framework:
-            self._model.name = cfg.quantization.model_name
-            self._model.output_tensor_names = cfg.quantization.outputs
-            self._model.input_tensor_names = cfg.quantization.inputs
-            self._model.workspace_path = options.workspace
-
-    @property
-    def eval_func(self):
-        """Not support get eval_func."""
-        assert False, 'Should not try to get the value of `eval_func` attribute.'
-
-    @eval_func.setter
-    def eval_func(self, user_eval_func):
-        """Eval function for component.
-
-        Args:
-            user_eval_func: This function takes "model" as input parameter
-                         and executes entire evaluation process with self
-                         contained metrics. If eval_func set,
-                         an evaluation process must be triggered
-                         to make evaluation of the model executed.
-        """
-        self._eval_func = user_eval_func
-
-    @property
-    def eval_dataloader(self):
-        """Getter to eval dataloader."""
-        return self._eval_dataloader
-
-    @eval_dataloader.setter
-    def eval_dataloader(self, dataloader):
-        """Set Data loader for evaluation of component.
-
-        It is iterable and the batched data should consists of yield (input, _).
-        the input in the batched data will be used for model inference, so it
-        should satisfy the input format of specific model.
-
-        Args:
-            dataloader(generator): user are supported to set a user defined dataloader
-                                   which meet the requirements that can yield tuple of
-                                   (input, label)/(input, _) batched data.
-        """
-        assert hasattr(dataloader, '__iter__') and \
-            hasattr(dataloader, 'batch_size'), \
-            'dataloader must implement __iter__ method and batch_size attribute'
-
-        self._eval_dataloader = dataloader
-
-    @property
-    def metric(self):
-        """Get `metric` attribute."""
-        return self._metric
-
-    @metric.setter
-    def metric(self, user_metric):
-        """Set metric class or a dict of built-in metric configures.
-
-        1. neural_compressor have many built-in metrics,
-           user can pass a metric configure dict to tell neural compressor what metric will be use.
-           You also can set multi-metrics to evaluate the performance of a specific model.
-                Single metric:
-                    {topk: 1}
-                Multi-metrics:
-                    {topk: 1,
-                     MSE: {compare_label: False},
-                     weight: [0.5, 0.5],
-                     higher_is_better: [True, False]
-                    }
-        For the built-in metrics, please refer to below link:
-        https://github.com/intel/neural-compressor/blob/master/docs/source/metric.md#supported-built-in-metric-matrix.
-
-        2. User also can get the built-in metrics by neural_compressor.Metric:
-            Metric(name="topk", k=1)
-        3. User also can set specific metric through this api. The metric class should take the outputs of the model or
-           postprocess(if have) as inputs, neural_compressor built-in metric always take(predictions, labels)
-           as inputs for update, and user_metric.metric_cls should be sub_class of neural_compressor.metric.BaseMetric.
-
-        Args:
-            user_metric(neural_compressor.metric.Metric or a dict of built-in metric configurations):
-                The object of Metric or a dict of built-in metric configurations.
-
-        """
-        from .metric import Metric as NCMetric
-        from .metric import METRICS
-        if isinstance(user_metric, dict):
-            metric_cfg = user_metric
-        else:
-            if isinstance(user_metric, NCMetric):
-                if user_metric.metric_cls is None:
-                    name = user_metric.name
-                    metric_cls = METRICS(self.conf.quantization.framework).metrics[name]
-                    metric_cfg = {name: {**user_metric.kwargs}}
-                    self._metric = metric_cfg
-                    return
-                else:
-                    name = user_metric.name
-                    metric_cls = user_metric.metric_cls
-                    metric_cfg = {name: {**user_metric.kwargs}}
-            else:
-                for i in ['reset', 'update', 'result']:
-                    assert hasattr(user_metric, i), 'Please realise {} function' \
-                                                    'in user defined metric'.format(i)
-                metric_cls = type(user_metric).__name__
-                name = 'user_' + metric_cls
-                metric_cfg = {name: id(user_metric)}
-            metrics = METRICS(self.conf.quantization.framework)
-            metrics.register(name, metric_cls)
-        self._metric = metric_cfg
-
-    @property
-    def calib_func(self):
-        """Not support get train_func."""
-        assert False, 'Should not try to get the value of `train_func` attribute.'
-
-    @calib_func.setter
-    def calib_func(self, calib_func):
-        """Calibrate scale and zero for quantization.
-
-        Args:
-            calib_func: This function takes "model" as input parameter
-                         and executes entire evaluation process. If calib_func set,
-                         an evaluation process must be triggered and user should
-                         set eval_dataloader with metric configured or directly eval_func
-                         to make evaluation of the model executed.
-        """
-        self._train_func = calib_func
-
-    @property
-    def calib_dataloader(self):
-        """Get `calib_dataloader` attribute."""
-        return self._calib_dataloader
-
-    @calib_dataloader.setter
-    def calib_dataloader(self, dataloader):
-        """Set Data loader for calibration, mandatory for post-training quantization.
-
-        If calib_func is not be set then user must set calibration dataloader,
-        and calibration is iterable and the batched data should consists of a tuple like
-        (input, label) if the calibration dataset containing label, or yield (input, _)
-        for label-free calibration dataset, the input in the batched data will be used for
-        model inference, so it should satisfy the input format of specific model.
-        In calibration process, label of data loader will not be used and
-        neither the postprocess and metric. User only need to set
-        calib_dataloader when calib_dataloader can not be configured from yaml file.
-
-        Args:
-            dataloader(generator): user are supported to set a user defined dataloader
-                                    which meet the requirements that can yield tuple of
-                                    (input, label)/(input, _) batched data. Another good
-                                    practice is to use neural_compressor.data.DataLoader
-                                    to initialize a neural_compressor dataloader object. Notice
-                                    neural_compressor.data.DataLoader is just a wrapper of the
-                                    information needed to build a dataloader, it can't yield
-                                    batched data and only in this setter method
-                                    a 'real' calib_dataloader will be created,
-                                    the reason is we have to know the framework info
-                                    and only after the Quantization object created then
-                                    framework infomation can be known.
-                                    Future we will support creating iterable dataloader
-                                    from neural_compressor.data.DataLoader
-        """
-        assert hasattr(dataloader, '__iter__') and \
-            hasattr(dataloader, 'batch_size'), \
-            'dataloader must implement __iter__ method and batch_size attribute'
-        self._calib_dataloader = dataloader
-
-
 def fit(model,
         conf,
         calib_dataloader=None,
@@ -450,9 +92,29 @@ def eval_func(model):
                                               Tuner will combine model, eval_dataloader
                                               and pre-defined metrics to run evaluation
                                               process.
-        eval_metric (dict or obj):             Set metric class or a dict of built-in metric configures,
+        eval_metric (dict or obj):            Set metric class or a dict of built-in metric configures,
                                               and neural_compressor will initialize this class when evaluation.
 
+            1. neural_compressor have many built-in metrics, 
+               user can pass a metric configure dict to tell neural compressor what metric will be use.
+               You also can set multi-metrics to evaluate the performance of a specific model.
+                    Single metric:
+                        {topk: 1}
+                    Multi-metrics:
+                        {topk: 1,
+                         MSE: {compare_label: False},
+                         weight: [0.5, 0.5],
+                         higher_is_better: [True, False]
+                        }
+        For the built-in metrics, please refer to below link:
+        https://github.com/intel/neural-compressor/blob/master/docs/source/metric.md#supported-built-in-metric-matrix.
+
+            2. User also can get the built-in metrics by neural_compressor.Metric:
+                Metric(name="topk", k=1)
+            3. User also can set specific metric through this api. The metric class should take the outputs of the model or
+               postprocess(if have) as inputs, neural_compressor built-in metric always take(predictions, labels)
+               as inputs for update, and user_metric.metric_cls should be sub_class of neural_compressor.metric.BaseMetric.
+
     Example::
 
         # Quantization code for PTQ
@@ -474,16 +136,86 @@ def eval_func(model):
         # Saved quantized model in ./saved folder
         q_model.save("./saved")
     """
-    quantizer = _PostTrainingQuant(conf)
-    quantizer.model = model
-    if eval_func is not None:
-        quantizer.eval_func = eval_func
     if calib_dataloader is not None:
-        quantizer.calib_dataloader = calib_dataloader
-    if calib_func is not None:
-        quantizer.calib_func = calib_func
+        check_dataloader(calib_dataloader)
     if eval_dataloader is not None:
-        quantizer.eval_dataloader = eval_dataloader
+        check_dataloader(eval_dataloader)
+
+    seed = options.random_seed
+    random.seed(seed)
+    np.random.seed(seed)
+    wrapped_model = wrap_model_from(model, conf)
+
     if eval_metric is not None:
-        quantizer.metric = eval_metric
-    return quantizer()
+        metric = register_customer_metric(eval_metric, conf.framework)
+    else:
+        metric = None
+
+    config = _Config(quantization=conf, benchmark=None, pruning=None, distillation=None, nas=None)
+    strategy_name = conf.tuning_criterion.strategy
+
+    if conf.quant_level == "auto":
+        strategy_name = "auto"
+    elif conf.quant_level == 0:
+        strategy_name = "conservative"
+
+    if strategy_name == "mse_v2":
+        if not (conf.framework.startswith("tensorflow")\
+                 or conf.framework == 'pytorch_fx'): # pragma: no cover
+            strategy_name = "basic"
+            logger.warning(f"MSE_v2 does not support {conf.framework} now, use basic instead.")
+            logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.")
+    assert strategy_name in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy_name)
+
+    logger.info(f"Start {strategy_name} tuning.")
+    _resume = None
+    # check if interrupted tuning procedure exists. if yes, it will resume the
+    # whole auto tune process.
+    resume_file = os.path.abspath(os.path.expanduser(options.resume_from)) \
+                       if options.workspace and options.resume_from else None
+    if resume_file:
+        assert os.path.exists(resume_file), \
+            "The specified resume file {} doesn't exist!".format(resume_file)
+        with open(resume_file, 'rb') as f:
+            _resume = pickle.load(f).__dict__
+
+    if eval_func is None and eval_dataloader is None: # pragma: no cover
+        logger.info("Quantize model without tuning!")
+
+    strategy = STRATEGIES[strategy_name](
+        model=wrapped_model,
+        conf=config,
+        q_dataloader=calib_dataloader,
+        q_func=calib_func,
+        eval_func=eval_func,
+        eval_dataloader=eval_dataloader,
+        eval_metric=metric,
+        resume=_resume,
+        q_hooks=None
+    )
+
+    try:
+        with time_limit(conf.tuning_criterion.timeout):
+            logger.debug("Dump user configuration:")
+            conf_dict = {}
+            dump_class_attrs(conf, conf_dict)
+            logger.info(conf_dict)
+            strategy.traverse()
+    except KeyboardInterrupt:
+        pass
+    except Exception as e:  # pragma: no cover
+        logger.error("Unexpected exception {} happened during tuning.".format(repr(e)))
+        import traceback
+        traceback.print_exc()
+    finally:
+        if strategy.best_qmodel:
+            logger.info(
+                "Specified timeout or max trials is reached! "
+                "Found a quantized model which meet accuracy goal. Exit.")
+            strategy.deploy_config()
+        else:
+            logger.error(
+                "Specified timeout or max trials is reached! "
+                "Not found any quantized model which meet accuracy goal. Exit.")
+
+        return strategy.best_qmodel
diff --git a/neural_compressor/utils/utility.py b/neural_compressor/utils/utility.py
index 43d95c26e0d..a146eaff77b 100644
--- a/neural_compressor/utils/utility.py
+++ b/neural_compressor/utils/utility.py
@@ -54,22 +54,27 @@
     'mxnet': ['mxnet'],
 }
 
+
 def version1_lt_version2(version1, version2):
     """Check whether version1 is less than version2."""
     return parse_version(version1) < parse_version(version2)
-    
+
+
 def version1_gt_version2(version1, version2):
     """Check whether version1 is greater than version2."""
     return parse_version(version1) > parse_version(version2)
 
+
 def version1_eq_version2(version1, version2):
     """Check whether version1 is equal to version2."""
     return parse_version(version1) == parse_version(version2)
 
+
 def version1_gte_version2(version1, version2):
     """Check whether version1 is greater than version2 or is equal to it."""
     return parse_version(version1) > parse_version(version2) or parse_version(version1) == parse_version(version2)
 
+
 def version1_lte_version2(version1, version2):
     """Check whether version1 is less than version2 or is equal to it."""
     return parse_version(version1) < parse_version(version2) or parse_version(version1) == parse_version(version2)
@@ -109,7 +114,7 @@ def __call__(self, *args, **kwargs):
 
 def singleton(cls):
     """Not displayed in API Docs.
-    
+
     Singleton decorater.
     """
     instances = {}
@@ -173,7 +178,7 @@ def get_size(obj, seen=None):
 
 def compute_sparsity(tensor):
     """Compute the sparsity.
-    
+
     Args:
         tensor: Tensorflow or Pytorch tensor
 
@@ -411,7 +416,6 @@ def DequantizeWeight(weight_tensor, min_filter_tensor, max_filter_tensor):
         weight_tensor[:,:,:,i] = weight_tensor[:,:,:,i] * ((max_filter_tensor[i] - min_filter_tensor[i])/ 127.0)
 
 
-
 def Dequantize(data, scale_info):
     """Dequantize the data with the scale_info."""
     import numpy as np
@@ -425,7 +429,7 @@ def Dequantize(data, scale_info):
 
 class CaptureOutputToFile(object):
     """Not displayed in API Docs.
-    
+
     Capture the output to file.
     """
     def __init__(self, tmp_file_path, stream=sys.stderr):
@@ -450,7 +454,7 @@ class Statistics():
     """The statistics printer."""
     def __init__(self, data, header, field_names, output_handle=logger.info):
         """Init a Statistics object.
-        
+
         Args:
             data: The statistics data
             header: The table header
@@ -498,6 +502,7 @@ class GLOBAL_STATE():
     """Access the global model."""
     STATE = MODE.QUANTIZATION
 
+
 def load_data_from_pkl(path, filename):
     """Load data from local pkl file.
 
@@ -513,6 +518,7 @@ def load_data_from_pkl(path, filename):
     except FileExistsError:
         logging.getLogger("neural_compressor").info('Can not open %s.' % path)
 
+
 def dump_data_to_local(data, path, filename):
     """Dump data to local as pkl file.
 
@@ -533,7 +539,6 @@ def dump_data_to_local(data, path, filename):
         logging.getLogger("neural_compressor").info("Dumped data to %s" % file_path)
 
 
-
 def set_random_seed(seed: int):
     """Set the random seed in config."""
     from neural_compressor.config import options
@@ -557,6 +562,7 @@ def set_tensorboard(tensorboard: bool):
     from neural_compressor.config import options
     options.tensorboard = tensorboard
 
+
 def show_memory_info(hint):
     """Show process full memory."""
     pid = os.getpid()
@@ -567,7 +573,7 @@ def show_memory_info(hint):
     print('{} memory used: {} MB'.format(hint, memory))
 
 
-def dump_class_attrs(obj, result = {}):
+def dump_class_attrs(obj, result={}):
     """Dump the attributes and values of a config class.
 
     Args:
@@ -586,9 +592,7 @@ def dump_class_attrs(obj, result = {}):
             else:
                 attr = attr[1:] if attr.startswith('_') else attr
                 result[obj_name][attr] = value
-                
-                
-                
+
 
 class DotDict(dict):
     """access yaml using attributes instead of using the dictionary notation.
@@ -649,7 +653,6 @@ def __setstate__(self, d):
     __setattr__, __getattr__ = __setitem__, __getitem__
 
 
-
 def compare_objects(obj1, obj2, ignore_attrs):
     """Compare two objects and ignore the specified attributes.
 
@@ -673,4 +676,4 @@ def compare_objects(obj1, obj2, ignore_attrs):
     # Compare the attributes, ignoring the specified ones
     for attr in attrs1 - set(ignore_attrs):
         if getattr(obj1, attr) != getattr(obj2, attr):
-            return False
\ No newline at end of file
+            return False
diff --git a/test/benchmark/test_benchmark_2.x.py b/test/benchmark/test_benchmark_2.x.py
index 88a868e1a85..c812059de0f 100644
--- a/test/benchmark/test_benchmark_2.x.py
+++ b/test/benchmark/test_benchmark_2.x.py
@@ -8,6 +8,8 @@
 import tempfile
 import re
 from neural_compressor.adaptor.tf_utils.util import write_graph
+from neural_compressor.benchmark import benchmark_with_raw_cmd
+from neural_compressor.config import BenchmarkConfig
 
 
 def build_benchmark():
@@ -57,22 +59,42 @@ def build_benchmark():
     with open('fake_data_25.py', "w", encoding="utf-8") as f:
         f.writelines(fake_data_25)
 
+
 def build_benchmark2():
     seq = [
         "from argparse import ArgumentParser\n",
         "arg_parser = ArgumentParser(description='Parse args')\n",
         "arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input model')\n",
         "args = arg_parser.parse_args()\n",
-        "from neural_compressor.benchmark import fit\n"
+        "import time\n",
+        "import numpy as np\n",
+        "from neural_compressor.benchmark import benchmark_with_raw_cmd\n",
         "from neural_compressor.data import Datasets\n",
+        "from neural_compressor.model import Model\n",
         "dataset = Datasets('tensorflow')['dummy']((5, 32, 32, 1), label=True)\n",
 
         "from neural_compressor.data.dataloaders.dataloader import DataLoader\n",
         "b_dataloader = DataLoader(framework='tensorflow', dataset=dataset)\n",
-        "fit(args.input_model, b_dataloader=b_dataloader)\n"
+        "model = Model(args.input_model)\n",
+        "input_tensor = model.input_tensor\n",
+        "output_tensor = model.output_tensor if len(model.output_tensor)>1 else model.output_tensor[0]\n",
+        "iteration = 10\n",
+        "latency_list = []\n",
+        "for idx, (inputs, labels) in enumerate(b_dataloader):\n",
+        "    inputs = np.array([inputs])\n",
+        "    feed_dict = dict(zip(input_tensor, inputs))\n",
+        "    start = time.time()\n",
+        "    predictions = model.sess.run(output_tensor, feed_dict)\n",
+        "    end = time.time()\n",
+        "    latency_list.append(end-start)\n",
+        "    if idx + 1 == iteration:\n",
+        "        break\n",
+        "latency = np.array(latency_list).mean()\n",
+        "print('Latency: {:.3f} ms'.format(latency * 1000))\n",
+        "print('Throughput: {:.3f} images/sec'.format(1. / latency))\n"
     ]
 
-    with open('fake2.py', "w", encoding="utf-8") as f:
+    with open('fake_raw_cmd.py', "w", encoding="utf-8") as f:
         f.writelines(seq)
 
 
@@ -126,14 +148,14 @@ def setUpClass(self):
     def tearDownClass(self):
         if os.path.exists('fake.py'):
             os.remove('fake.py')
-        if os.path.exists('fake2.py'):
-            os.remove('fake2.py')
         if os.path.exists('fake_data_5.py'):
             os.remove('fake_data_5.py')
         if os.path.exists('fake_data_15.py'):
             os.remove('fake_data_15.py')
         if os.path.exists('fake_data_25.py'):
             os.remove('fake_data_25.py')
+        if os.path.exists('fake_raw_cmd.py'):
+            os.remove('fake_raw_cmd.py')
         shutil.rmtree('nc_workspace', ignore_errors=True)
 
     def test_benchmark(self):
@@ -172,6 +194,16 @@ def test_benchmark_data_25(self):
                 self.assertIsNotNone(throughput)
         os.system("rm *.log")
 
+    def test_benchmark_raw_cmd(self):
+        conf = BenchmarkConfig(warmup=5, iteration=10, cores_per_instance=4, num_of_instance=2)
+        raw_cmd = "python fake_raw_cmd.py --input_model={}".format(self.graph_path)
+        benchmark_with_raw_cmd(raw_cmd, config=conf)
+        for i in range(2):
+            with open(f'2_4_{i}.log', "r") as f:
+                for line in f:
+                    throughput = re.search(r"Throughput:\s+(\d+(\.\d+)?) images/sec", line)
+                self.assertIsNotNone(throughput)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/mixed_precision/test_mixed_precision.py b/test/mixed_precision/test_mixed_precision.py
index 938530a7bcd..099639b0159 100644
--- a/test/mixed_precision/test_mixed_precision.py
+++ b/test/mixed_precision/test_mixed_precision.py
@@ -315,7 +315,7 @@ def test_mixed_precision_with_evaluation(self):
                                          eval_dataloader=self.matmul_dataloader,
                                          eval_metric=ONNXRT_QL_METRICS["MSE"]())
         self.assertTrue(any([i.op_type == 'Cast' for i in output_model.nodes()]))
-        
+
     def test_mixed_precision_with_evaluation_old_api(self):
         from neural_compressor.conf.config import MixedPrecision_Conf
         from neural_compressor.experimental import MixedPrecision

From 130349ff5134a7e3f5b0481ff0f6cf31e77c1751 Mon Sep 17 00:00:00 2001
From: "Cheng, Penghui" <penghui.cheng@intel.com>
Date: Thu, 27 Apr 2023 16:44:40 +0800
Subject: [PATCH 02/14] Update API

Signed-off-by: Cheng, Penghui <penghui.cheng@intel.com>
---
 neural_compressor/adaptor/onnxrt.py          |  4 ++--
 neural_compressor/adaptor/pytorch.py         |  2 +-
 neural_compressor/config.py                  | 22 ++++++++++----------
 neural_compressor/mix_precision.py           |  2 +-
 test/config/test_config_2.x.py               |  2 +-
 test/mixed_precision/test_mixed_precision.py |  4 ++--
 6 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/neural_compressor/adaptor/onnxrt.py b/neural_compressor/adaptor/onnxrt.py
index 32737b8c328..3912568274a 100644
--- a/neural_compressor/adaptor/onnxrt.py
+++ b/neural_compressor/adaptor/onnxrt.py
@@ -117,7 +117,7 @@ def __init__(self, framework_specific_info):
                 static=self.static, 
                 format=self.format,
                 local_config_file=os.path.join(os.path.dirname(__file__), config_file))
- 
+
         self.work_space = framework_specific_info["workspace_path"]
         self.reduce_range = framework_specific_info["reduce_range"] if \
             "reduce_range" in framework_specific_info else not CpuInfo().vnni
@@ -133,7 +133,7 @@ def __init__(self, framework_specific_info):
                     continue
                 self.quantizable_op_types += \
                     self.query_handler.get_op_types_by_precision(precision=precision)
- 
+
         if self.backend == 'TensorrtExecutionProvider':
             self.recipes['add_qdq_pair_to_weight'] = True
             self.recipes['dedicated_qdq_pair'] = True
diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
index 86b1e463d61..2a40c113966 100644
--- a/neural_compressor/adaptor/pytorch.py
+++ b/neural_compressor/adaptor/pytorch.py
@@ -4253,7 +4253,7 @@ def get_quantization_capability(self, datatype='int8'):
 
     def get_quant_datatypes(self):
         """Got low-precision data types for quantization.
-        
+
         Collects all data types for quantization, such as int8, int4.
         """
         # TODO to handle other data types such FP8, FP8E4M3
diff --git a/neural_compressor/config.py b/neural_compressor/config.py
index 670f3573a67..88cf8a3c6ad 100644
--- a/neural_compressor/config.py
+++ b/neural_compressor/config.py
@@ -1647,7 +1647,7 @@ class MixedPrecisionConfig(object):
         backend (str, optional): Backend for model execution.
                                  Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep',
                                  default is 'default'.
-        precision (str, optional): Target precision for mix precision conversion.
+        precisions ([str, list], optional): Target precision for mix precision conversion.
                                    Support 'bf16' and 'fp16', default is 'bf16'.
         inputs (list, optional): Inputs of model, default is [].
         outputs (list, optional): Outputs of model, default is [].
@@ -1667,7 +1667,7 @@ class MixedPrecisionConfig(object):
     def __init__(self,
                  device="cpu",
                  backend="default",
-                 precision="bf16",
+                 precisions="bf16",
                  model=None,
                  model_name="",
                  inputs=[],
@@ -1683,27 +1683,27 @@ def __init__(self,
         self.excluded_precisions = excluded_precisions
         self.accuracy_criterion = accuracy_criterion
         self.tuning_criterion = tuning_criterion
-        self.precision = precision
-        self.use_bf16 = "bf16" in self.precision
+        self.precisions = precisions
+        self.use_bf16 = "bf16" in self.precisions
         self.model = model
         self.model_name = model_name
         self._framework = None
 
     @property
-    def precision(self):
+    def precisions(self):
         """Get precision."""
-        return self._precision
+        return self._precisions
 
-    @precision.setter
-    def precision(self, precision):
+    @precisions.setter
+    def precisions(self, precision):
         """Set precision."""
         if isinstance(precision, str):
             assert precision in ["fp16", "bf16"], "Only support 'fp16' and 'bf16' for mix precision."
-            self._precision = [precision]
+            self._precisions = [precision]
         elif isinstance(precision, list):
             assert all([i in ["fp16", "bf16"] for i in precision]), "Only " \
                 "support 'fp16' and 'bf16' for mix precision."
-            self._precision = precision
+            self._precisions = precision
 
     @property
     def model(self):
@@ -2088,7 +2088,7 @@ def precisions(self, precisions):
         if not isinstance(precisions, list):
             precisions = [precisions]
         for pr in precisions:
-            _check_value('precision', pr, str, ['int8', 'uint8', 'fp32', 'bf16', 'fp16'])
+            _check_value('precisions', pr, str, ['int8', 'uint8', 'fp32', 'bf16', 'fp16'])
         self._precisions = precisions
 
 
diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py
index 714199f96f6..def66642c4c 100644
--- a/neural_compressor/mix_precision.py
+++ b/neural_compressor/mix_precision.py
@@ -88,7 +88,7 @@ def fit(model,
     if eval_dataloader is not None:
         check_dataloader(eval_dataloader)
 
-    if config.precision in config.excluded_precisions:
+    if config.precisions in config.excluded_precisions:
         logger.warning("Target precision is in excluded_precisions, "
                        "please modify precision or excluded_precisions to make it understandable.")
         sys.exit(0)
diff --git a/test/config/test_config_2.x.py b/test/config/test_config_2.x.py
index e24b5ebc276..71700f9013f 100644
--- a/test/config/test_config_2.x.py
+++ b/test/config/test_config_2.x.py
@@ -55,7 +55,7 @@ def test_config(self):
 
         cfg = MixedPrecisionConfig()
         a = conf(mixed_precision=cfg)
-        self.assertEqual(a.mixed_precision.precision, ["bf16"])
+        self.assertEqual(a.mixed_precision.precisions, ["bf16"])
 
         cfg = MXNet()
         cfg.precisions = "bf16"
diff --git a/test/mixed_precision/test_mixed_precision.py b/test/mixed_precision/test_mixed_precision.py
index 099639b0159..15ba52e9163 100644
--- a/test/mixed_precision/test_mixed_precision.py
+++ b/test/mixed_precision/test_mixed_precision.py
@@ -274,7 +274,7 @@ def test_on_non_enabled_dtype(self):
             output_model = mix_precision.fit(self.onnx_model, conf)
         self.assertEqual(cm.exception.code, 0)
 
-        conf = MixedPrecisionConfig(precision="fp16")
+        conf = MixedPrecisionConfig(precisions="fp16")
         with self.assertRaises(SystemExit) as cm:
             output_model = mix_precision.fit(self.tf_model, conf)
         self.assertEqual(cm.exception.code, 0)
@@ -309,7 +309,7 @@ def test_mixed_precision_with_evaluation(self):
         #self.assertTrue(any([i.op_type == 'Cast' for i in output_model.nodes()]))
 
         tuning_criterion = TuningCriterion(max_trials=3, timeout=1000000)
-        conf = MixedPrecisionConfig(device='gpu', tuning_criterion=tuning_criterion, backend='onnxrt_cuda_ep', precision="fp16")
+        conf = MixedPrecisionConfig(device='gpu', tuning_criterion=tuning_criterion, backend='onnxrt_cuda_ep', precisions="fp16")
         output_model = mix_precision.fit(self.onnx_model,
                                          conf,
                                          eval_dataloader=self.matmul_dataloader,

From 8c6798f9979db2a64821099a22b798289b9594ef Mon Sep 17 00:00:00 2001
From: "Cheng, Penghui" <penghui.cheng@intel.com>
Date: Thu, 27 Apr 2023 17:17:58 +0800
Subject: [PATCH 03/14] Fixed pylink error

Signed-off-by: Cheng, Penghui <penghui.cheng@intel.com>
---
 neural_compressor/benchmark.py    | 151 +++++++++++++++---------------
 neural_compressor/model/model.py  |   2 +-
 neural_compressor/quantization.py |   7 +-
 3 files changed, 83 insertions(+), 77 deletions(-)

diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py
index d9537856a8a..bd159b09f23 100644
--- a/neural_compressor/benchmark.py
+++ b/neural_compressor/benchmark.py
@@ -137,79 +137,84 @@ def get_bounded_threads(core_ids, threads, sockets):
 
 
 def run_instance(model, conf, b_dataloader=None, b_func=None):
-        """Run the instance with the configuration.
-
-        Args:
-            runs benchmarking with numactl on specific cores and instances set
-                by user config and returns model performance
-        """
-        results = {}
-        if b_func is None:
-            GLOBAL_STATE.STATE = MODE.BENCHMARK
-            framework_specific_info = {'device': conf.device,
-                                       'approach': None,
-                                       'random_seed': options.random_seed,
-                                       'backend': conf.backend if conf.backend is not None else 'default',
-                                       'format': 'default'}
-            framework = conf.framework.lower()
-            if 'tensorflow' in framework:
-                framework_specific_info.update({"inputs": conf.inputs, \
-                                                "outputs": conf.outputs, \
-                                                "recipes": {}, \
-                                                'workspace_path': options.workspace})
-            if framework == 'keras':
-                framework_specific_info.update({'workspace_path': options.workspace})
-            if framework == 'mxnet':
-                framework_specific_info.update({"b_dataloader": b_dataloader})
-            if 'onnx' in framework:
-                framework_specific_info.update(
-                                     {'workspace_path': options.workspace, \
-                                     'graph_optimization': OPTIONS[framework].graph_optimization})
-            if framework == 'pytorch_ipex' or framework == 'pytorch' or framework == 'pytorch_fx':
-                framework_specific_info.update({"workspace_path": options.workspace,
-                                                "q_dataloader": None})
-
-            assert isinstance(model, BaseModel), 'need set neural_compressor Model for quantization....'
-
-            adaptor = FRAMEWORKS[framework](framework_specific_info)
-
-            assert b_dataloader is not None, "dataloader should not be None"
-
-            from neural_compressor.utils.create_obj_from_config import create_eval_func
-            b_func = create_eval_func(conf.framework,
-                                      b_dataloader,
-                                      adaptor,
-                                      None)
-
-            objectives = MultiObjective(["performance"],
-                                        {'relative': 0.1},
-                                        is_measure=True)
-
-            val = objectives.evaluate(b_func, model)
-            # measurer contain info not only performance(eg, memory, model_size)
-            # also measurer have result list among steps
-            acc, _ = val
-            batch_size = b_dataloader.batch_size
-            warmup = conf.warmup
-            if len(objectives.objectives[0].result_list()) < warmup:
-                if len(objectives.objectives[0].result_list()) > 1 and warmup != 0:
-                    warmup = 1
-                else:
-                    warmup = 0
-
-            result_list = objectives.objectives[0].result_list()[warmup:]
-            latency = np.array(result_list).mean() / batch_size
-            results["performance"] = acc, batch_size, result_list
-
-            logger.info("\nbenchmark result:")
-            for i, res in enumerate(result_list):
-                logger.debug("Iteration {} result {}:".format(i, res))
-            logger.info("Batch size = {}".format(batch_size))
-            logger.info("Latency: {:.3f} ms".format(latency * 1000))
-            logger.info("Throughput: {:.3f} images/sec".format(1. / latency))
-            return results
-        else:
-            b_func(model.model)
+    """Run the instance with the configuration.
+
+    Args:
+        model (object):           The model to be benchmarked.
+        conf (BenchmarkConfig): The configuration for benchmark containing accuracy goal,
+                                  tuning objective and preferred calibration & quantization
+                                  tuning space etc.
+        b_dataloader:             The dataloader for frameworks.
+        b_func:                   Customized benchmark function. If user passes the dataloader,
+                                  then b_func is not needed.
+    """
+    results = {}
+    if b_func is None:
+        GLOBAL_STATE.STATE = MODE.BENCHMARK
+        framework_specific_info = {'device': conf.device,
+                                   'approach': None,
+                                   'random_seed': options.random_seed,
+                                   'backend': conf.backend if conf.backend is not None else 'default',
+                                   'format': 'default'}
+        framework = conf.framework.lower()
+        if 'tensorflow' in framework:
+            framework_specific_info.update({"inputs": conf.inputs, \
+                                            "outputs": conf.outputs, \
+                                            "recipes": {}, \
+                                            'workspace_path': options.workspace})
+        if framework == 'keras':
+            framework_specific_info.update({'workspace_path': options.workspace})
+        if framework == 'mxnet':
+            framework_specific_info.update({"b_dataloader": b_dataloader})
+        if 'onnx' in framework:
+            framework_specific_info.update(
+                                 {'workspace_path': options.workspace, \
+                                 'graph_optimization': OPTIONS[framework].graph_optimization})
+        if framework == 'pytorch_ipex' or framework == 'pytorch' or framework == 'pytorch_fx':
+            framework_specific_info.update({"workspace_path": options.workspace,
+                                            "q_dataloader": None})
+
+        assert isinstance(model, BaseModel), 'need set neural_compressor Model for quantization....'
+
+        adaptor = FRAMEWORKS[framework](framework_specific_info)
+
+        assert b_dataloader is not None, "dataloader should not be None"
+
+        from neural_compressor.utils.create_obj_from_config import create_eval_func
+        b_func = create_eval_func(conf.framework,
+                                  b_dataloader,
+                                  adaptor,
+                                  None)
+
+        objectives = MultiObjective(["performance"],
+                                    {'relative': 0.1},
+                                    is_measure=True)
+
+        val = objectives.evaluate(b_func, model)
+        # measurer contain info not only performance(eg, memory, model_size)
+        # also measurer have result list among steps
+        acc, _ = val
+        batch_size = b_dataloader.batch_size
+        warmup = conf.warmup
+        if len(objectives.objectives[0].result_list()) < warmup:
+            if len(objectives.objectives[0].result_list()) > 1 and warmup != 0:
+                warmup = 1
+            else:
+                warmup = 0
+
+        result_list = objectives.objectives[0].result_list()[warmup:]
+        latency = np.array(result_list).mean() / batch_size
+        results["performance"] = acc, batch_size, result_list
+
+        logger.info("\nbenchmark result:")
+        for i, res in enumerate(result_list):
+            logger.debug("Iteration {} result {}:".format(i, res))
+        logger.info("Batch size = {}".format(batch_size))
+        logger.info("Latency: {:.3f} ms".format(latency * 1000))
+        logger.info("Throughput: {:.3f} images/sec".format(1. / latency))
+        return results
+    else:
+        b_func(model.model)
 
 
 def generate_prefix(core_list):
diff --git a/neural_compressor/model/model.py b/neural_compressor/model/model.py
index fb408c6f8f5..ed110f7bd0f 100644
--- a/neural_compressor/model/model.py
+++ b/neural_compressor/model/model.py
@@ -193,7 +193,7 @@ def wrap_model_from(user_model, conf):
     """Wrap the user model and dispatch to framework specific internal model object.
 
     Args:
-       user_model: user are supported to set model from original framework model format
+        user_model: user are supported to set model from original framework model format
                    (eg, tensorflow frozen_pb or path to a saved model), but not recommended.
                    Best practice is to set from a initialized neural_compressor.common.Model.
                    If tensorflow model is used, model's inputs/outputs will be auto inferred,
diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py
index 6e29c2d6de2..295e16dc9e3 100644
--- a/neural_compressor/quantization.py
+++ b/neural_compressor/quantization.py
@@ -111,9 +111,10 @@ def eval_func(model):
 
             2. User also can get the built-in metrics by neural_compressor.Metric:
                 Metric(name="topk", k=1)
-            3. User also can set specific metric through this api. The metric class should take the outputs of the model or
-               postprocess(if have) as inputs, neural_compressor built-in metric always take(predictions, labels)
-               as inputs for update, and user_metric.metric_cls should be sub_class of neural_compressor.metric.BaseMetric.
+            3. User also can set specific metric through this api. The metric class should take the outputs of
+               the model or postprocess(if have) as inputs, neural_compressor built-in metric always
+               take (predictions, labels) as inputs for update, and user_metric.metric_cls should be
+               sub_class of neural_compressor.metric.BaseMetric.
 
     Example::
 

From 2bc6b54d129363266784fc35276f67e484492bb5 Mon Sep 17 00:00:00 2001
From: "Cheng, Penghui" <penghui.cheng@intel.com>
Date: Thu, 4 May 2023 17:43:30 +0800
Subject: [PATCH 04/14] Removed redundant class in training.py

---
 docs/source/pruning.md                        |   2 +-
 .../pruning/eager/run_glue_no_trainer.py      |   2 +-
 .../eager/run_glue_no_trainer_mixed.py        |   2 +-
 neural_compressor/__init__.py                 |   5 +-
 neural_compressor/benchmark.py                |   8 +-
 neural_compressor/compression/__init__.py     |   2 -
 neural_compressor/compression/callbacks.py    | 371 +--------------
 .../compression/pruner/README.md              |   2 +-
 .../compression/pruner/pruners.py             |  67 ++-
 neural_compressor/compression/pruner/utils.py |   4 +-
 neural_compressor/mix_precision.py            |   4 +-
 neural_compressor/model/model.py              | 135 ++----
 neural_compressor/quantization.py             |   4 +-
 neural_compressor/strategy/strategy.py        |   3 +-
 neural_compressor/training.py                 | 450 ++++++++++--------
 .../test_adaptor_pytorch_2.x.py               |   9 +-
 test/pruning_2_plus.x/test_pruning.py         |   3 +-
 test/pruning_2_plus.x/test_pruning_block.py   |   3 +-
 18 files changed, 382 insertions(+), 694 deletions(-)

diff --git a/docs/source/pruning.md b/docs/source/pruning.md
index 1ba7d20c93a..1b7f064ca77 100644
--- a/docs/source/pruning.md
+++ b/docs/source/pruning.md
@@ -301,7 +301,7 @@ The following section exemplifies how to use hooks in user pass-in training func
      [**Experimental option** ]Modify model and optimizer.
 
     ```python
-        from neural_compressor.training import prepare_pruning, WeightPruningConfig 
+        from neural_compressor import prepare_pruning, WeightPruningConfig 
         config = WeightPruningConfig(configs)
         prepare_pruning(config, model, optimizer) # modify model and optimizer
         for epoch in range(num_train_epochs):
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py b/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py
index 0a8fe2891b1..0fd9c1ad61d 100644
--- a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py
+++ b/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py
@@ -523,7 +523,7 @@ def preprocess_function(examples):
     # pruner = Pruning(config)
     # pruner.model = model
     # pruner.on_train_begin()
-    from neural_compressor.training import prepare_pruning
+    from neural_compressor import prepare_pruning
     prepare_pruning(configs, model, optimizer)
 
 
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py b/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py
index b2c874fee49..9860d7b4e66 100644
--- a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py
+++ b/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py
@@ -46,7 +46,7 @@
 )
 from transformers.file_utils import get_full_repo_name
 from transformers.utils.versions import require_version
-from neural_compressor.training import prepare_pruning,WeightPruningConfig
+from neural_compressor import prepare_pruning, WeightPruningConfig
 
 logger = logging.getLogger(__name__)
 
diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py
index 977406e75a1..ba722b9db2c 100644
--- a/neural_compressor/__init__.py
+++ b/neural_compressor/__init__.py
@@ -18,11 +18,12 @@
 """Intel® Neural Compressor: An open-source Python library supporting popular model compression techniques."""
 from .version import __version__
 # we need to set a global 'NA' backend, or Model can't be used
-from .utils.utility import set_random_seed, set_tensorboard, set_workspace, set_resume_from
-from .utils import options
+from .experimental.compression import prepare_pruning
 from .config import DistillationConfig, PostTrainingQuantConfig, \
                     WeightPruningConfig, QuantizationAwareTrainingConfig, \
                     MixedPrecisionConfig
 from .contrib import *
 from .model import *
 from .metric import *
+from .utils import options
+from .utils.utility import set_random_seed, set_tensorboard, set_workspace, set_resume_from
diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py
index bd159b09f23..1d5613840e9 100644
--- a/neural_compressor/benchmark.py
+++ b/neural_compressor/benchmark.py
@@ -32,8 +32,7 @@
 from .utils import logger
 from .utils import OPTIONS
 from .utils.utility import GLOBAL_STATE, MODE
-from .model import BaseModel
-from .model.model import wrap_model_from
+from .model import BaseModel, Model
 from .utils import logger
 from .utils.utility import Statistics
 
@@ -184,7 +183,8 @@ def run_instance(model, conf, b_dataloader=None, b_func=None):
         b_func = create_eval_func(conf.framework,
                                   b_dataloader,
                                   adaptor,
-                                  None)
+                                  None,
+                                  iteration=conf.iteration)
 
         objectives = MultiObjective(["performance"],
                                     {'relative': 0.1},
@@ -403,7 +403,7 @@ def fit(model, config, b_dataloader=None, b_func=None):
     if config.backend == "ipex":
         import intel_extension_for_pytorch
 
-    wrapped_model = wrap_model_from(model, config)
+    wrapped_model = Model(model, conf=config)
 
     if b_dataloader is not None:
         check_dataloader(b_dataloader)
diff --git a/neural_compressor/compression/__init__.py b/neural_compressor/compression/__init__.py
index 14f79b22d7c..7a1b4c9fb70 100644
--- a/neural_compressor/compression/__init__.py
+++ b/neural_compressor/compression/__init__.py
@@ -16,6 +16,4 @@
 # limitations under the License.
 
 from .callbacks import QuantizationAwareTrainingCallbacks, DistillationCallbacks, PruningCallbacks
-from ..experimental.compression import prepare_pruning
-from .. import WeightPruningConfig
 from .pruner.model_slim.auto_slim import model_slim, parse_auto_slim_config
diff --git a/neural_compressor/compression/callbacks.py b/neural_compressor/compression/callbacks.py
index 82544b9a500..ffc2dd50510 100644
--- a/neural_compressor/compression/callbacks.py
+++ b/neural_compressor/compression/callbacks.py
@@ -21,18 +21,11 @@
 'PruningCallbacks' and 'DistillationCallbacks'.
 """
 
-import numpy as np
-import os
-import pickle
-import random
 from .distillation.criterions import Criterions
-from ..adaptor import FRAMEWORKS
-from ..config import _Config, options
 from ..utils import logger
-from ..utils.utility import time_limit, LazyImport
+from ..utils.utility import LazyImport
 from ..model import BaseModel, Model
-from ..model.model import get_model_fwk_name
-from ..strategy import STRATEGIES
+from ..model.model import MODELS
 from .pruner.utils import process_config, parse_to_prune, get_sparsity_ratio
 from .pruner.pruners import get_pruner, PRUNERS
 # model auto slim related
@@ -56,21 +49,13 @@ def __init__(self, conf=None, model=None):
             conf: A Config object which definds the compressor behavior.
                   Just like: QuantizationAwareTrainingConfig, WeightPruningConfig \
                     and DistillationConfig.
-            model: Model to be compressed in this object.
+            model: Model to be compressed in this object. It should be neural compressor model.
         """
-        self.conf = None
-        self.cfg = None
+        assert model is None or isinstance(model, BaseModel), "The model should be a instanceof BaseModel"
+        self.conf = conf
         self.framework = None
-        self._model = None
         self.model = model
-        self._train_func = None
-        self._train_dataloader = None
-        self._eval_func = None
-        self._eval_dataloader = None
-        self._train_distributed = False
-        self._evaluation_distributed = False
         self.adaptor = None
-        self._metric = None
         self.hooks = {
             'on_train_begin': self.on_train_begin,
             'on_train_end': self.on_train_end,
@@ -185,281 +170,6 @@ def __repr__(self):
         """Represent this class."""
         pass
 
-    @property
-    def model(self):
-        """Getter of model in neural_compressor.model."""
-        return self._model
-
-    @model.setter
-    def model(self, user_model):
-        """Set the user model and dispatch to framework specific internal model object.
-
-        Args:
-            user_model: user are supported to set model from original framework model format
-                        (eg, tensorflow frozen_pb or path to a saved model),
-                        but not recommended. Best practice is to set from a initialized
-                        neural_compressor.Model.
-                        If tensorflow model is used, model's inputs/outputs will be
-                        auto inferenced, but sometimes auto inferenced
-                        inputs/outputs will not meet your requests,
-                        set them manually in config yaml file.
-                        Another corner case is slim model of tensorflow,
-                        be careful of the name of model configured in yaml file,
-                        make sure the name is in supported slim model list.
-
-        """
-        if user_model is None:
-            return
-
-        if self.framework == None:
-            self.framework = get_model_fwk_name(
-                user_model.model if isinstance(user_model, BaseModel) else user_model)
-            if self.framework == "tensorflow":
-                from ..model.tensorflow_model import get_model_type
-                if not isinstance(user_model, BaseModel) and get_model_type(user_model) == 'keras'\
-                     and self.conf.backend == 'itex':
-                    self.framework = 'keras'
-            if self.framework == "pytorch":
-                try:
-                    if self.conf.quantization.backend == "default":
-                        self.framework = "pytorch_fx"
-                    elif self.conf.quantization.backend == "ipex":
-                        self.framework = "pytorch_ipex"
-                    self.conf.quantization.framework = self.framework
-                except Exception as e:
-                    pass
-
-        if not isinstance(user_model, BaseModel):
-            logger.warning("Force convert framework model to neural_compressor model.")
-            if "tensorflow" in self.framework or self.framework == "keras":
-                if self.conf.quantization and self.conf.quantization.approach == "quant_aware_training":
-                    self._model = Model(user_model, backend='tensorflow_qat', device=self.conf.device)
-                else:
-                    self._model = Model(user_model, backend=self.framework, device=self.conf.device)
-            else:
-                self._model = Model(user_model, backend=self.framework)
-        else:
-            self._model = user_model
-
-        if 'tensorflow' in self.framework:
-            try:
-                self._model.name = self.conf.quantization.model_name
-                self._model.output_tensor_names = self.conf.quantization.outputs
-                self._model.input_tensor_names = self.conf.quantization.inputs
-                self._model.workspace_path = options.workspace
-            except Exception as e:
-                self._model.name = None
-                self._model.output_tensor_names = None
-                self._model.input_tensor_names = None
-                self._model.workspace_path = None
-
-    def pre_process(self):
-        """Create strategy to optimize model."""
-        # Remove qat hooks if user want to tune accuracy with train function.
-        if self.adaptor is not None and hasattr(self.adaptor, "_pre_hook_for_qat"):
-            self.remove_hook("on_train_begin", self.adaptor._pre_hook_for_qat)
-            self.remove_hook("on_train_end", self.adaptor._post_hook_for_qat)
-
-        strategy = self.conf.quantization.tuning_criterion.strategy.lower()
-        if self.conf.quantization.quant_level == 0:
-            strategy = "conservative"
-            logger.info(f"On the premise that the accuracy meets the conditions, improve the performance.")
-
-        if strategy == "mse_v2":
-            if not (self.conf.quantization.framework.startswith("tensorflow") \
-                    or self.conf.quantization.framework == 'pytorch_fx'): # pragma: no cover
-                strategy = "basic"
-                logger.warning(f"MSE_v2 does not support \
-                               {self.conf.quantization.framework} now, use basic instead.")
-                logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.")
-        assert strategy in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy)
-
-        _resume = None
-        # check if interrupted tuning procedure exists. if yes, it will resume the
-        # whole auto tune process.
-        self.resume_file = os.path.abspath(os.path.expanduser(options.resume_from)) \
-                           if options.workspace and options.resume_from else None
-        if self.resume_file:
-            assert os.path.exists(self.resume_file), \
-                "The specified resume file {} doesn't exist!".format(self.resume_file)
-            with open(self.resume_file, 'rb') as f:
-                _resume = pickle.load(f).__dict__
-
-        self.strategy = STRATEGIES[strategy](
-            model = self.model,
-            conf = self.conf,
-            q_dataloader=None,
-            q_func=self._train_func,
-            eval_func=self._eval_func,
-            eval_dataloader=self._eval_dataloader,
-            eval_metric=self.metric,
-            resume=_resume,
-            q_hooks=None)
-
-    def execute(self):
-        """Quantization Aware Training execute routinue based on strategy design."""
-        try:
-            with time_limit(self.conf.quantization.tuning_criterion.timeout):
-                logger.debug("Dump user yaml configuration:")
-                logger.debug(self.conf)
-                self.strategy.traverse()
-        except KeyboardInterrupt:
-            pass
-        except Exception as e:
-            logger.error("Unexpected exception {} happened during tuning.".format(repr(e)))
-            import traceback
-            traceback.print_exc()
-        finally:
-            if self.strategy.best_qmodel:
-                logger.info(
-                    "Specified timeout or max trials is reached! "
-                    "Found a quantized model which meet accuracy goal. Exit.")
-                self.strategy.deploy_config()
-            else:
-                logger.error(
-                    "Specified timeout or max trials is reached! "
-                    "Not found any quantized model which meet accuracy goal. Exit.")
-
-            return self.strategy.best_qmodel
-
-    def __call__(self):
-        """Execute this class.
-
-        For derived classes, an override function is required.
-        """
-        self.pre_process()
-        results = self.execute()
-        return results
-
-    fit = __call__
-
-    @property
-    def train_func(self):
-        """Not support get train_func."""
-        assert False, 'Should not try to get the value of `train_func` attribute.'
-        return None
-
-    @train_func.setter
-    def train_func(self, user_train_func):
-        """Training function.
-
-        Args:
-            user_train_func: This function takes "model" as input parameter
-                         and executes entire training process with self
-                         contained training hyper-parameters. If training_func set,
-                         an evaluation process must be triggered and user should
-                         set eval_dataloader with metric configured or directly eval_func
-                         to make evaluation of the model executed. training_func will return
-                         a trained model.
-        """
-        self._train_func = user_train_func
-
-    @property
-    def eval_func(self):
-        """Not support get eval_func."""
-        assert False, 'Should not try to get the value of `eval_func` attribute.'
-        return None
-
-    @eval_func.setter
-    def eval_func(self, user_eval_func):
-        """Eval function for component.
-
-        Args:
-            user_eval_func: This function takes "model" as input parameter
-                         and executes entire evaluation process with self
-                         contained metrics. If eval_func set,
-                         an evaluation process must be triggered
-                         to make evaluation of the model executed.
-        """
-        self._eval_func = user_eval_func
-
-    @property
-    def eval_dataloader(self):
-        """Getter to eval dataloader."""
-        return self._eval_dataloader
-
-    @eval_dataloader.setter
-    def eval_dataloader(self, dataloader):
-        """Set Data loader for evaluation of component.
-
-        It is iterable and the batched data should consists of yield (input, _).
-        the input in the batched data will be used for model inference, so it
-        should satisfy the input format of specific model.
-        User only need to set eval_dataloader when eval_dataloader can not be
-        configured from yaml file.
-
-        Args:
-            dataloader(generator): user are supported to set a user defined dataloader
-                                   which meet the requirements that can yield tuple of
-                                   (input, label)/(input, _) batched data. Another good
-                                   practice is to use neural_compressor.experimental.common.DataLoader
-                                   to initialize a neural_compressor dataloader object. Notice
-                                   neural_compressor.experimental.common.DataLoader is just a wrapper of the
-                                   information needed to build a dataloader, it can't yield
-                                   batched data and only in this setter method
-                                   a 'real' train_dataloader will be created,
-                                   the reason is we have to know the framework info
-                                   and only after the Component object created then
-                                   framework information can be known.
-                                   Future we will support creating iterable dataloader
-                                   from neural_compressor.experimental.common.DataLoader.
-        """
-        assert hasattr(dataloader, '__iter__') and \
-            hasattr(dataloader, 'batch_size'), \
-            'dataloader must implement __iter__ method and batch_size attribute'
-        self._eval_dataloader = dataloader
-
-    @property
-    def metric(self):
-        """Get `metric` attribute."""
-        return self._metric
-
-    @metric.setter
-    def metric(self, user_metric):
-        """Set metric class or a dict of built-in metric configures,
-        and neural_compressor will initialize this class when evaluation.
-
-        1. neural_compressor have many built-in metrics,
-           user can pass a metric configure dict to tell neural compressor what metric will be use.
-           You can set multi-metrics to evaluate the performance of a specific model.
-                Single metric:
-                    {topk: 1}
-                Multi-metrics:
-                    {topk: 1,
-                     MSE: {compare_label: False},
-                     weight: [0.5, 0.5],
-                     higher_is_better: [True, False]
-                    }
-        For the built-in metrics, please refer to below link:
-        https://github.com/intel/neural-compressor/blob/master/docs/source/metric.md#supported-built-in-metric-matrix.
-
-        2. User also can set specific metric through this api. The metric class should take the outputs of the model or
-           postprocess(if have) as inputs, neural_compressor built-in metric always take(predictions, labels)
-           as inputs for update, and user_metric.metric_cls should be sub_class of neural_compressor.metric.BaseMetric.
-
-        Args:
-            user_metric(neural_compressor.metric.Metric or a dict of built-in metric configures):
-
-        """
-        from ..metric import Metric as NCMetric, METRICS
-        if isinstance(user_metric, dict):
-            metric_cfg = user_metric
-        else:
-            if isinstance(user_metric, NCMetric):
-                name = user_metric.name
-                metric_cls = user_metric.metric_cls
-                metric_cfg = {name: {**user_metric.kwargs}}
-            else:
-                for i in ['reset', 'update', 'result']:
-                    assert hasattr(user_metric, i), 'Please realise {} function' \
-                                                    'in user defined metric'.format(i)
-                metric_cls = type(user_metric).__name__
-                name = 'user_' + metric_cls
-                metric_cfg = {name: id(user_metric)}
-            metrics = METRICS(self.conf.quantization.framework)
-            metrics.register(name, metric_cls)
-        self._metric = metric_cfg
-
     def remove_hook(self, scope, hook):
         """Remove hooks if user want to tune accuracy with train_func."""
         for registed_hook in self.hooks_dict[scope]:
@@ -474,40 +184,16 @@ class QuantizationAwareTrainingCallbacks(BaseCallbacks):
     In this class will apply all hooks for Quantization-Aware Training.
     """
 
-    def __init__(self, conf=None, model=None):
+    def __init__(self, conf=None, model=None, adaptor=None):
         """Construct all the necessary attributes for the callbacks object.
 
         Args:
             conf: A QuantizationAwareTrainingConfig object which definds the compressor behavior.
-            model: Model to be quantized in this object.
+            model: Model to be quantized in this object. It should be neural compressor model.
         """
-        super(QuantizationAwareTrainingCallbacks, self).__init__(conf=None)
-        self.conf = _Config(quantization=conf, benchmark=None,pruning=None, distillation=None, nas=None)
-        self.model = model
-
-        seed = options.random_seed
-        random.seed(seed)
-        np.random.seed(seed)
-
-        framework_specific_info = {'device': self.conf.quantization.device,
-                                   'random_seed': options.random_seed,
-                                   'workspace_path': options.workspace,
-                                   'q_dataloader': None,
-                                   'backend': self.conf.quantization.backend if \
-                                    self.conf.quantization.backend is not None else 'default',
-                                   'format': self.conf.quantization.quant_format if \
-                                    self.conf.quantization.quant_format is not None else 'default'}
-        if self.conf.quantization.approach is not None:
-            framework_specific_info['approach'] = self.conf.quantization.approach
-
-        if 'tensorflow' in self.framework:
-            framework_specific_info.update(
-                {"inputs": self.conf.quantization.inputs, \
-                 "outputs": self.conf.quantization.outputs})
-        self.adaptor = FRAMEWORKS[self.framework](framework_specific_info)
-        self.adaptor.model = self.model
-        self.register_hook('on_train_begin', self.adaptor._pre_hook_for_qat)
-        self.register_hook('on_train_end', self.adaptor._post_hook_for_qat)
+        super(QuantizationAwareTrainingCallbacks, self).__init__(conf=conf, model=model)
+        self.register_hook('on_train_begin', adaptor._pre_hook_for_qat)
+        self.register_hook('on_train_end', adaptor._post_hook_for_qat)
 
     def __repr__(self):
         """Represent this class."""
@@ -525,14 +211,10 @@ def __init__(self, conf=None, model=None):
 
         Args:
             conf: A WeightPruningConfig object which definds the compressor behavior.
-            model: Model to be Pruning in this object.
+            model: Model to be Pruning in this object. It should be neural compressor model.
         """
-        super(PruningCallbacks, self).__init__(conf=None)
-        self.conf = _Config(pruning=conf, quantization=None, benchmark=None
-                           , distillation=None, nas=None)
-        self.cfg = self.conf.pruning
-        self.model = model
-        self.pruners_info = process_config(self.cfg)
+        super(PruningCallbacks, self).__init__(conf=conf, model=model)
+        self.pruners_info = process_config(self.conf)
         self.pruners = []
         self._generate_pruners()
         self.generate_hooks()
@@ -541,8 +223,8 @@ def on_train_end(self):
         """Be called after the end of training."""
         for on_train_end_hook in self.hooks_dict['on_train_end']:
             on_train_end_hook()
-        if isinstance(self._model.model, torch.nn.Module):
-            get_sparsity_ratio(self.pruners, self._model)
+        if isinstance(self.model.model, torch.nn.Module):
+            get_sparsity_ratio(self.pruners, self.model)
 
     def __repr__(self):
         """Return the class's string representation."""
@@ -557,9 +239,9 @@ def generate_hooks(self):
 
     def _generate_pruners(self):
         """Obtain Pruner objects."""
-        if isinstance(self._model.model, torch.nn.Module):
+        if isinstance(self.model.model, torch.nn.Module):
             for info in self.pruners_info:
-                modules = parse_to_prune(info, self._model.model)
+                modules = parse_to_prune(info, self.model.model)
                 if modules == {}:
                     logger.warning("one pruner hooks no layers, please have a check")
 
@@ -579,7 +261,7 @@ class DistillationCallbacks(BaseCallbacks):
 
     Args:
         conf: Distillation_Conf containing teacher model, distillation criterion etc.
-        model: Student model.
+        model: Student model. It should be neural compressor model.
 
     Attributes:
         _epoch_ran: A integer indicating how much epochs ran.
@@ -591,11 +273,9 @@ class DistillationCallbacks(BaseCallbacks):
 
     def __init__(self, conf=None, model=None):
         """Initialize the attributes."""
-        super(DistillationCallbacks, self).__init__()
-        self.conf = _Config(quantization=None, benchmark=None, pruning=None, distillation=conf, nas=None)
-        self.cfg = self.conf.distillation
-        self.model = model
+        super(DistillationCallbacks, self).__init__(conf=conf, model=model)
 
+        self.framework = list(MODELS.keys())[list(MODELS.values()).index(type(model))]
         self._teacher_model = None
         self._criterion = None
         self._epoch_ran = 0
@@ -604,9 +284,8 @@ def __init__(self, conf=None, model=None):
         self.best_score = 0
         self.best_model = None
         self.hooks_registered = False
-        assert hasattr(self.cfg, "teacher_model"),\
-              "Please assign teacher model in DistillationConfig."
-        self.teacher_model = self.cfg.teacher_model
+        assert hasattr(self.conf, "teacher_model"), "Please assign teacher model in DistillationConfig."
+        self.teacher_model = self.conf.teacher_model
         self.generate_hooks()
         self.create_criterion()
 
@@ -642,7 +321,7 @@ def init_train_cfg(self):
         """Initialize the training configuration."""
         if self._train_cfg is None:
             # train section of distillation section in yaml file should be configured.
-            self._train_cfg = self.cfg.criterion
+            self._train_cfg = self.conf.criterion
         assert self._train_cfg, "train field of distillation section in yaml file must " \
                                 "be configured for distillation if train_func is NOT set."
 
@@ -734,7 +413,7 @@ def teacher_model(self, user_model):
         """
         if not isinstance(user_model, BaseModel):
             logger.warning("Force convert framework model to neural_compressor model.")
-            self._teacher_model = Model(user_model, backend=self.framework)
+            self._teacher_model = Model(user_model)
         else:
             self._teacher_model = user_model
 
@@ -745,7 +424,7 @@ def student_model(self):
         Returns:
             The student model used in the distillation process.
         """
-        return self._model
+        return self.model
 
     @property
     def train_cfg(self):
diff --git a/neural_compressor/compression/pruner/README.md b/neural_compressor/compression/pruner/README.md
index d0029438f4a..f8b0cfaa925 100644
--- a/neural_compressor/compression/pruner/README.md
+++ b/neural_compressor/compression/pruner/README.md
@@ -301,7 +301,7 @@ The following section exemplifies how to use hooks in user pass-in training func
      [**Experimental option** ]Modify model and optimizer.
 
     ```python
-        from neural_compressor.training import prepare_pruning, WeightPruningConfig 
+        from neural_compressor import prepare_pruning, WeightPruningConfig 
         config = WeightPruningConfig(configs)
         prepare_pruning(config, model, optimizer) # modify model and optimizer
         for epoch in range(num_train_epochs):
diff --git a/neural_compressor/compression/pruner/pruners.py b/neural_compressor/compression/pruner/pruners.py
index b06b0b58e87..df5904a3968 100644
--- a/neural_compressor/compression/pruner/pruners.py
+++ b/neural_compressor/compression/pruner/pruners.py
@@ -27,6 +27,7 @@
 
 PRUNERS = {}
 
+
 def register_pruner(name):
     """Class decorator to register a Pruner subclass to the registry.
 
@@ -47,6 +48,7 @@ def register(pruner):
 
     return register
 
+
 def parse_valid_pruner_types():
     """Get all valid pruner names."""
     valid_pruner_types = []
@@ -130,11 +132,11 @@ def __init__(self, config, modules):
         self.start_step = self.config['start_step']
         self.end_step = self.config['end_step']
         self.pruning_frequency = self.config['pruning_frequency']
-        ##this is different with original code
+        # this is different with original code
         self.total_prune_cnt = (self.end_step - self.start_step + self.pruning_frequency) \
                                // self.pruning_frequency
         self.completed_pruned_cnt = 0
-        self.total_prune_cnt -= 1  ## not pruning at step 0
+        self.total_prune_cnt -= 1  # not pruning at step 0
         if self.total_prune_cnt == 0:
             self.total_prune_cnt = 1
             self.completed_pruned_cnt = 1
@@ -249,13 +251,13 @@ def forward(self, input):
             mask = self.block_mask.repeat_interleave(block_size[0], dim=0).repeat_interleave(\
                                                         block_size[1], dim=-1).to(self.weight.device)
             return F.linear(input, self.weight*mask, self.bias)
-        
+
         for key in self.modules.keys():
                 if not hasattr(self.modules[key], 'block_mask'):
                     continue # No corresponding block mask, skip.
                 module = self.modules[key]
                 module.forward = partial(forward, module)
-                
+
     def recover_forward(self):
         """Restore the forward format at the end of pruning"""
         with torch.no_grad():
@@ -264,7 +266,7 @@ def recover_forward(self):
                     continue # No corresponding block mask, skip.
                 module = self.modules[key]
                 module.forward = partial(torch.nn.Linear.forward, module)
-                
+
 
 @register_pruner("basic")
 class BasicPruner(BasePruner):
@@ -353,7 +355,7 @@ def on_before_optimizer_step(self):
 
     def on_after_optimizer_step(self):
         """Prune the model after optimization."""
-        ##the order of the following three lines can't not be exchanged
+        # the order of the following three lines can't not be exchanged
         if self.global_step >= self.start_step and self.global_step <= self.end_step:
             self.reg.on_after_optimizer_step()
         self.mask_weights()
@@ -420,7 +422,7 @@ class BlockMaskPruner(BasePruner):
     def __init__(self, config, modules):
         """Initialize."""
         super(BlockMaskPruner, self).__init__(config, modules)
-        
+
     def _init(self):
         """Initialize."""
         self.pattern = get_pattern(self.config, self.modules)
@@ -429,17 +431,17 @@ def _init(self):
         self.scheduler = get_scheduler(self.config)
         self.criterion = get_criterion(self.config, self.modules)
         self.reg = get_reg(self.config, self.modules, self.pattern)
-        
+
         if "channel" not in self.pattern.pattern:
             logger.info("Enabling channel-wise pattern would be a better choice.")
-    
+
     # def on_step_begin(self, local_step):
     #     """Implement at the start of each step.
-    
+
     #     Update the masks at a given local_step.
     #     """
     #     self.update_masks(local_step)
-        
+
     def update_masks(self, local_step):
         """Update the masks at a given local step."""
         if self.global_step == self.start_step:
@@ -469,13 +471,13 @@ def update_masks(self, local_step):
 
         self.current_sparsity_ratio = self.pattern.get_sparsity_ratio(self.masks)
         logger.info(f"current sparsity ratio is {self.current_sparsity_ratio}")
-        
+
     def on_before_optimizer_step(self):
         """Implement before optimizer.step()."""
         if self.global_step >= self.start_step and self.global_step <= self.end_step:
             self.reg.on_before_optimizer_step()
             self.criterion.on_before_optimizer_step()
-    
+
     def on_after_optimizer_step(self):
         """Prune the model after optimization."""
         ##the order of the following four lines can't not be exchanged
@@ -488,7 +490,7 @@ def on_after_optimizer_step(self):
             self.recover_forward()
             self.pattern.remove_block_masks()
         self.global_step += 1
-                
+
     def mask_weights(self):
         """Apply block masks to corresponding modules' weights.
 
@@ -496,14 +498,14 @@ def mask_weights(self):
         """
         with torch.no_grad():
             self.pattern.mask_block_weights(self.masks)
-        
+
     def update_block_masks(self, masks):
         """Update the block mask parameters."""
         with torch.no_grad():
             for key in self.masks.keys():
                 module = self.modules[key]
                 module.block_mask.data = masks[key].data
-                
+
     def zero_mask_grad(self):
         with torch.no_grad():
             for key in self.modules.keys():
@@ -516,8 +518,8 @@ def zero_mask_grad(self):
                     else:
                         mask.grad.requires_grad_(False)
                     mask.grad.zero_()
-        
-    
+
+
 @register_pruner('retrain_free')
 class RetrainFreePruner(BasePruner):
     """Pruning Pruner.
@@ -526,7 +528,7 @@ class RetrainFreePruner(BasePruner):
     RetrainFreePruner supports one-shot pruning (same effect as fast retraining free) and iterative pruning.
     Please refer to A Fast Post-Training Pruning Framework for Transformers
         (https://arxiv.org/abs/2204.09656)
-        
+
     1. Defines pruning functions called at step begin/end, before/after optimize and epoch begin/end.
     2. Defines the pruning criterion and fixed weight parameters.
     3. Obtain block masks and its grads.
@@ -545,7 +547,7 @@ class RetrainFreePruner(BasePruner):
     def __init__(self, config, modules):
         """Initialize."""
         super(RetrainFreePruner, self).__init__(config, modules)
-        
+
     def _init(self):
         """Initialize."""
         self.pattern = get_pattern(self.config, self.modules)
@@ -554,18 +556,18 @@ def _init(self):
         self.scheduler = get_scheduler(self.config)
         self.criterion = get_criterion(self.config, self.modules)
         self.reg = get_reg(self.config, self.modules, self.pattern)
-        
+
         logger.warning("Retrain-free pruner fixed the weights, please DO NOT turn on gradient update.")
         assert "channel" in self.pattern.pattern, \
                 "retrain-free pruner only supports large patterns like channel-wise pruning."
-        
+
     # def on_step_begin(self, local_step):
     #     """Implement at the start of each step.
-    
+
     #     Update the masks at a given local_step.
     #     """
     #     self.update_masks(local_step)
-        
+
     def update_masks(self, local_step):
         """Update the masks at a given local step."""
         if self.global_step == self.start_step:
@@ -589,20 +591,20 @@ def update_masks(self, local_step):
         self.completed_pruned_cnt += 1
         if self.criterion.scores == {}:
             return
-        ##the order of the following three lines can't not be exchanged
+        # the order of the following three lines can't not be exchanged
         self.masks = self.pattern.get_masks(self.criterion.scores, current_target_sparsity_ratio, self.masks)
         self.rearrange_masks(self.masks)
         self.update_block_masks(self.masks)
 
         self.current_sparsity_ratio = self.pattern.get_sparsity_ratio(self.masks)
         logger.info(f"current sparsity ratio is {self.current_sparsity_ratio}")
-        
+
     def on_before_optimizer_step(self):
         """Implement before optimizer.step()."""
         if self.global_step >= self.start_step and self.global_step <= self.end_step:
             self.reg.on_before_optimizer_step()
             self.criterion.on_before_optimizer_step()
-    
+
     def on_after_optimizer_step(self):
         """Prune the model after optimization."""
         ##the order of the following four lines can't not be exchanged
@@ -617,7 +619,7 @@ def on_after_optimizer_step(self):
             self.recover_forward()
             self.pattern.remove_block_masks()
         self.global_step += 1
-                
+
     def mask_weights(self):
         """Apply block masks to corresponding modules' weights.
 
@@ -625,14 +627,14 @@ def mask_weights(self):
         """
         with torch.no_grad():
             self.pattern.mask_block_weights(self.masks)
-        
+
     def update_block_masks(self, masks):
         """Update the block mask parameters."""
         with torch.no_grad():
             for key in self.masks.keys():
                 module = self.modules[key]
                 module.block_mask.data = masks[key].data
-            
+
     def rearrange_masks(self, masks):
         """Rearrange the masks of each layer with constant sparsity."""
         with torch.no_grad():
@@ -662,7 +664,7 @@ def rearrange_masks(self, masks):
                 new_masks[key][masked_indicies] = 0
                 new_masks[key] = new_masks[key] * torch.ones_like(block_mask).to(block_mask.device)
             self.masks = new_masks
-            
+
     def zero_mask_grad(self):
         with torch.no_grad():
             for key in self.modules.keys():
@@ -887,6 +889,3 @@ def print_progressive_sparsity(self):
         """Output the progressive sparsity."""
         cur_sp = self.pattern.get_sparsity_ratio_progressive(self.progressive_masks)
         logger.info("Step: {} -> Current progressive sparsity: {}".format(self.global_step, cur_sp))
-
-
-
diff --git a/neural_compressor/compression/pruner/utils.py b/neural_compressor/compression/pruner/utils.py
index e80213a5976..2a8e504c9b7 100644
--- a/neural_compressor/compression/pruner/utils.py
+++ b/neural_compressor/compression/pruner/utils.py
@@ -30,7 +30,6 @@
     LazyImport('torch.nn')
     torch = LazyImport('torch')
     F = LazyImport('torch.nn.functional')
-    
 except:
     import torch
     import torch.nn.functional as F
@@ -38,7 +37,6 @@
     import logging
     logger = logging.getLogger(__name__)
     from .schema_check import PrunerV2
-    
 
     class WeightPruningConfig:
         """Similiar to torch optimizer's interface."""
@@ -414,7 +412,7 @@ def parse_last_linear(model):
     """
     from .model_slim.pattern_analyzer import ClassifierHeadSearcher
     searcher = ClassifierHeadSearcher(model)
-    layer = searcher.search(return_name = True)
+    layer = searcher.search(return_name=True)
     return layer
 
 def parse_to_prune(config, model):
diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py
index def66642c4c..85dc4b1fbfa 100644
--- a/neural_compressor/mix_precision.py
+++ b/neural_compressor/mix_precision.py
@@ -27,7 +27,7 @@
 from .strategy import STRATEGIES
 from .config import _Config, options
 from .utils import logger
-from .model.model import wrap_model_from
+from .model import Model
 
 
 def fit(model,
@@ -93,7 +93,7 @@ def fit(model,
                        "please modify precision or excluded_precisions to make it understandable.")
         sys.exit(0)
 
-    wrapped_model = wrap_model_from(model, config)
+    wrapped_model = Model(model, conf=config)
 
     if eval_metric is not None:
         metric = register_customer_metric(eval_metric, config.framework)
diff --git a/neural_compressor/model/model.py b/neural_compressor/model/model.py
index ed110f7bd0f..e9149718aa5 100644
--- a/neural_compressor/model/model.py
+++ b/neural_compressor/model/model.py
@@ -163,93 +163,54 @@ def __new__(cls, root, **kwargs):
         Returns:
             BaseModel: neural_compressor built-in model
         """
-        backend = kwargs.get("backend", "NA")
-        if backend == "NA" or backend == "default":
-            backend_tmp = get_model_fwk_name(root)
-            if backend_tmp == "pytorch":
-                backend = "pytorch_fx"
-            else:
-                backend = backend_tmp
-        elif backend == "ipex":
-            backend = "pytorch_ipex"
-
-        if 'tensorflow' in backend or backend == 'keras':
-            if kwargs.get("approach", None) == "quant_aware_training" or backend == 'tensorflow_qat':
-                return MODELS['tensorflow_qat'](root, **kwargs)
-
-            if 'modelType' in kwargs:
-                model_type = kwargs['modelType']
-            else:
-                model_type = get_model_type(root)
-            if backend == 'keras' and model_type == 'keras':
-                return MODELS['keras'](root, **kwargs)
-            model = MODELS['tensorflow'](model_type, root, **kwargs)
+        conf = kwargs.pop("conf", "NA")
+        if isinstance(root, BaseModel):
+            if conf != "NA" and conf.framework is None:
+                conf.framework = list(MODELS.keys())[list(MODELS.values()).index(type(root))]
+                if conf.backend == "ipex":
+                    assert conf.framework == "pytorch_ipex",\
+                          "Please wrap the model with correct Model class!"
+                if conf.backend == "itex":
+                    if get_model_type(root.model) == 'keras':
+                        assert conf.framework == "keras",\
+                              "Please wrap the model with KerasModel class!"
+                    else:
+                        assert conf.framework == "tensorflow_itex", \
+                            "Please wrap the model with TensorflowModel class!"
+                if getattr(conf, "approach", None) == "quant_aware_training":
+                    assert conf.framework == "tensorflow_qat", \
+                            "Please wrap the model with TensorflowQATModel class!"
+            return root
         else:
-            model = MODELS[backend](root, **kwargs)
-        return model
-
-
-def wrap_model_from(user_model, conf):
-    """Wrap the user model and dispatch to framework specific internal model object.
-
-    Args:
-        user_model: user are supported to set model from original framework model format
-                   (eg, tensorflow frozen_pb or path to a saved model), but not recommended.
-                   Best practice is to set from a initialized neural_compressor.common.Model.
-                   If tensorflow model is used, model's inputs/outputs will be auto inferred,
-                   but sometimes auto inferred inputs/outputs will not meet your requests,
-                   set them manually in config yaml file. Another corner case is slim model
-                   of tensorflow, be careful of the name of model configured in yaml file,
-                   make sure the name is in supported slim model list.
-        conf: the instance of PostTrainingQuantConfig or QuantizationAwareTrainingConfig or MixedPrecisionConfig.
-    """
-    if conf.framework is None:
-        if isinstance(user_model, BaseModel):  # pragma: no cover
-            conf.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))]
-            if conf.backend == "ipex":
-                assert conf.framework == "pytorch_ipex",\
-                      "Please wrap the model with correct Model class!"
-            if conf.backend == "itex":
-                if get_model_type(user_model.model) == 'keras':
-                    assert conf.framework == "keras",\
-                          "Please wrap the model with KerasModel class!"
-                else:
-                    assert conf.framework == "pytorch_itex", \
-                        "Please wrap the model with TensorflowModel class!"
-        else:
-            framework = get_model_fwk_name(user_model)
-            if framework == "tensorflow":
-                if get_model_type(user_model) == 'keras' and conf.backend == 'itex':
-                    framework = 'keras'
-            if framework == "pytorch":
-                if conf.backend == "default":
+            framework = get_model_fwk_name(root)
+            if conf == "NA":
+                if framework == "pytorch":
                     framework = "pytorch_fx"
+                return MODELS[framework](root, **kwargs)
+            else:
+                conf.framework = framework
+                if conf.backend == "default":
+                    if framework == "pytorch":
+                        conf.framework = "pytorch_fx"
                 elif conf.backend == "ipex":
-                    framework = "pytorch_ipex"
-            conf.framework = framework
-
-    if not isinstance(user_model, BaseModel):
-        logger.warning("Force convert framework model to neural_compressor model.")
-        if "tensorflow" in conf.framework or conf.framework == "keras":
-            model = Model(user_model, backend=conf.framework, device=conf.device)
-        else:
-            model = Model(user_model, backend=conf.framework)
-    else:  # pragma: no cover
-        if conf.framework == "pytorch_ipex":
-            from neural_compressor.model.torch_model import IPEXModel
-            assert type(user_model) == IPEXModel, \
-                        "The backend is ipex, please wrap the model with IPEXModel class!"
-        elif conf.framework == "pytorch_fx":
-            from neural_compressor.model.torch_model import PyTorchFXModel
-            assert type(user_model) == PyTorchFXModel, \
-                        "The backend is default, please wrap the model with PyTorchFXModel class!"
-
-        model = user_model
-
-    if 'tensorflow' in conf.framework:
-        model.name = conf.model_name
-        model.output_tensor_names = conf.outputs
-        model.input_tensor_names = conf.inputs
-        model.workspace_path = options.workspace
-
-    return model
+                    conf.framework = "pytorch_ipex"
+
+                if 'tensorflow' in conf.framework:
+                    if getattr(conf, "approach", None) == "quant_aware_training":
+                        return MODELS['tensorflow_qat'](root, **kwargs)
+
+                    if 'modelType' in kwargs:
+                        model_type = kwargs['modelType']
+                    else:
+                        model_type = get_model_type(root)
+                    if conf.backend == "itex" and model_type == 'keras':
+                        return MODELS['keras'](root, **kwargs)
+                    model = MODELS['tensorflow'](model_type, root, **kwargs)
+                else:
+                    model = MODELS[conf.framework](root, **kwargs)
+                if 'tensorflow' in conf.framework:
+                    model.name = conf.model_name
+                    model.output_tensor_names = conf.outputs
+                    model.input_tensor_names = conf.inputs
+                    model.workspace_path = options.workspace
+        return model
diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py
index 295e16dc9e3..e30708a3eca 100644
--- a/neural_compressor/quantization.py
+++ b/neural_compressor/quantization.py
@@ -23,7 +23,7 @@
 from .config import _Config, options
 from .data.dataloaders.dataloader import check_dataloader
 from .metric.metric import register_customer_metric
-from .model.model import wrap_model_from
+from .model import Model
 from .strategy import STRATEGIES
 from .utils import logger
 from .utils.utility import time_limit, dump_class_attrs
@@ -145,7 +145,7 @@ def eval_func(model):
     seed = options.random_seed
     random.seed(seed)
     np.random.seed(seed)
-    wrapped_model = wrap_model_from(model, conf)
+    wrapped_model = Model(model, conf=conf)
 
     if eval_metric is not None:
         metric = register_customer_metric(eval_metric, conf.framework)
diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py
index 0dfa474f211..9f9f4bbdce9 100644
--- a/neural_compressor/strategy/strategy.py
+++ b/neural_compressor/strategy/strategy.py
@@ -41,7 +41,7 @@
 from ..version import __version__
 from ..algorithm import AlgorithmScheduler, ALGORITHMS
 
-from .utils.tuning_space import  TuningSpace
+from .utils.tuning_space import TuningSpace
 from .utils.tuning_structs import OpTuningConfig
 from .utils.constant import FALLBACK_RECIPES_SET
 
@@ -210,6 +210,7 @@ def _initialize_config(self, conf):
             Tuning config
         """
         config = conf.quantization
+        config.diagnosis = getattr(config, 'diagnosis', None)
         return config
 
     @abstractmethod
diff --git a/neural_compressor/training.py b/neural_compressor/training.py
index c9902267f24..526c8a23a0b 100644
--- a/neural_compressor/training.py
+++ b/neural_compressor/training.py
@@ -15,178 +15,150 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """The configuration of the training loop."""
-import copy
+import os
+import pickle
+import numpy as np
+import random
+
+from .adaptor import FRAMEWORKS
 from .compression.callbacks import QuantizationAwareTrainingCallbacks, DistillationCallbacks, PruningCallbacks
+from .config import _Config, options
+from .metric.metric import register_customer_metric
 from .model.model import Model
 from .utils import logger
+from .utils.utility import time_limit
+from neural_compressor.strategy.strategy import STRATEGIES
 from neural_compressor import (DistillationConfig, QuantizationAwareTrainingConfig,
                                WeightPruningConfig)
 from typing import Callable, List, Union
-from .compression import prepare_pruning
 
 class CompressionManager:
     """CompressionManager is uesd in train loop for what user want to deal with additional.
 
     Arguments:
-        model: A model to be compressed. It should be neural compressor model.
-        callbacks: A list of Callbacks instances.
-                   Such as: DistillationCallbbacks, QuantizationAwareTrainingCallbacks, PruningCallbacks.
+        model: A model to be compressed.
+        confs: The instance of QuantizationAwareTrainingConfig, PruningConfig and distillationConfig, or a list of
+               config for orchestration optimization.
 
     Examples::
 
         import neural_compressor.training.prepare_compression
-        compression_manager = prepare_compression(nc_model, confs)
+        compression_manager = prepare_compression(model, confs)
         compression_manager.callbacks.on_train_begin()
         model = compression_manager.model
-        train_loop:
-            for epoch in range(epochs):
-                compression_manager.callbacks.on_epoch_begin(epoch)
-                for i, batch in enumerate(dataloader):
-                    compression_manager.callbacks.on_step_begin(i)
-                    ......
-                    output = model(batch)
-                    loss = ......
-                    loss = compression_manager.callbacks.on_after_compute_loss(batch, output, loss)
-                    loss.backward()
-                    compression_manager.callbacks.on_before_optimizer_step()
-                    optimizer.step()
-                    compression_manager.callbacks.on_step_end()
-                compression_manager.callbacks.on_epoch_end()
+        # train_loop:
+        for epoch in range(epochs):
+            compression_manager.callbacks.on_epoch_begin(epoch)
+            for i, (batch, label) in enumerate(dataloader):
+                compression_manager.callbacks.on_step_begin(i)
+                ......
+                output = model(batch)
+                loss = ......
+                loss = compression_manager.callbacks.on_after_compute_loss(batch, output, loss)
+                loss.backward()
+                compression_manager.callbacks.on_before_optimizer_step()
+                optimizer.step()
+                compression_manager.callbacks.on_step_end()
+            compression_manager.callbacks.on_epoch_end()
         compression_manager.callbacks.on_train_end()
         compression_manager.save("path_to_save")
     """
-    def __init__(self, model, callbacks_list):
+    def __init__(self, model: Callable, confs: Union[Callable, List], **kwargs):
         """Initialize the CompressionManager's parameters.
 
-        model: A model to be compressed. It should be neural compressor model.
-        callbacks: A list of Callbacks instances.
-                   Such as: DistillationCallbbacks, QuantizationAwareTrainingCallbacks, PruningCallbacks.
+        model: A model to be compressed.
+        confs: The instance of QuantizationAwareTrainingConfig, PruningConfig and distillationConfig, or a list of
+               config for orchestration optimization.
         """
-        self.callbacks = CallBacks(callbacks_list)
-        self.model = model
-        self._train_func = None
-        self._eval_func = None
-        self.quantizer = None
+        callbacks_list = []
+        self.model = None
+        q_conf = None
+        p_conf = None
+        d_conf = None
+        self.adaptor = None
+
+        if isinstance(confs, List) and len(confs) > 1:
+            for conf in confs:
+                if isinstance(conf, QuantizationAwareTrainingConfig):
+                    self.model = Model(model, conf=conf)
+            if self.model is None:
+                self.model = Model(model)
+
+            for conf in confs:
+                if isinstance(conf, QuantizationAwareTrainingConfig):
+                    q_conf = conf
+
+                    framework_specific_info = {
+                        'device': conf.device,
+                        'random_seed': options.random_seed,
+                        'workspace_path': options.workspace,
+                        'q_dataloader': None,
+                        'backend': getattr(confs, "backend", 'default'),
+                        'format': getattr(confs, "quant_format", 'default'),
+                        'approach': conf.approach,
+                    }
+                    if 'tensorflow' in conf.framework:
+                        framework_specific_info.update(
+                            {"inputs": conf.inputs,
+                             "outputs": conf.outputs})
+                    self.adaptor = FRAMEWORKS[conf.framework](framework_specific_info)
+                    self.adaptor.model = self.model
+                    callbacks_list.append(QuantizationAwareTrainingCallbacks(conf, adaptor=self.adaptor))
+                elif isinstance(conf, WeightPruningConfig):
+                    p_conf = conf
+                    callbacks_list.append(PruningCallbacks(conf, model=self.model))
+                elif isinstance(conf, DistillationConfig):
+                    d_conf = conf
+                    callbacks_list.append(DistillationCallbacks(conf, model=self.model))
+                else:
+                    assert False, "Unsupported configure: {}".format(type(conf))
+            self.conf = _Config(quantization=q_conf, benchmark=None, pruning=p_conf, distillation=d_conf, nas=None)
+        else:
+            if isinstance(confs, List):
+                confs = confs[0]
+            if isinstance(confs, QuantizationAwareTrainingConfig):
+                self.model = Model(model, conf=confs)
+
+                framework_specific_info = {
+                    'device': confs.device,
+                    'random_seed': options.random_seed,
+                    'workspace_path': options.workspace,
+                    'q_dataloader': None,
+                    'backend': getattr(confs, "backend", 'default'),
+                    'format': getattr(confs, "quant_format", 'default'),
+                    'approach': confs.approach,
+                }
+                if 'tensorflow' in confs.framework:
+                    framework_specific_info.update(
+                        {"inputs": confs.inputs,
+                         "outputs": confs.outputs})
+                self.adaptor = FRAMEWORKS[confs.framework](framework_specific_info)
+                self.adaptor.model = self.model
+                callbacks_list.append(QuantizationAwareTrainingCallbacks(confs, adaptor=self.adaptor))
+                self.conf = _Config(quantization=confs, benchmark=None, pruning=None, distillation=None, nas=None)
+            elif isinstance(confs, WeightPruningConfig):
+                self.model = Model(model)
+                callbacks_list.append(PruningCallbacks(confs, model=self.model))
+                self.conf = _Config(quantization=None, benchmark=None, pruning=confs, distillation=None, nas=None)
+            elif isinstance(confs, DistillationConfig):
+                self.model = Model(model)
+                callbacks_list.append(DistillationCallbacks(confs, model=self.model))
+                self.conf = _Config(quantization=None, benchmark=None, pruning=None, distillation=confs, nas=None)
+            else:
+                assert False, logger.error(
+                    "confs should be one of QuantizationAwareTrainingConfig, "
+                    "PruningConfig, DistillationConfig. not {}".format(type(confs))
+                )
 
         try:
             # TODO: export to ONNX model need original fp32 model now, will remove it
             #  when int8 model can be exported to ONNX model.
-            self.fp32_model = model
+            self.fp32_model = self.model
         except Exception as e:  # pragma: no cover
             logger.warning("Fail to deep copy the model due to {}.".format(repr(e)))
             self.fp32_model = None
 
-        for component in callbacks_list:
-            if isinstance(component, QuantizationAwareTrainingCallbacks):
-                self.quantizer = component
-
-    @property
-    def train_func(self):
-        """Not support get train_func."""
-        assert False, 'Should not try to get the value of `train_func` attribute.'
-
-    @train_func.setter
-    def train_func(self, user_train_func):
-        """Set training function.
-
-        Args:
-            user_train_func: This function takes "model" as input parameter
-                         and executes entire training process with self
-                         contained training hyper-parameters. If training_func set,
-                         an evaluation process must be triggered and user should
-                         set eval_dataloader with metric configured or directly eval_func
-                         to make evaluation of the model executed. training_func will return
-                         a trained model.
-        """
-        self.quantizer.train_func = user_train_func
-
-    @property
-    def eval_func(self):
-        """Not support get eval_func."""
-        assert False, 'Should not try to get the value of `eval_func` attribute.'
-        return None
-
-    @eval_func.setter
-    def eval_func(self, user_eval_func):
-        """Eval function for component.
-
-        Args:
-            user_eval_func: This function takes "model" as input parameter
-                         and executes entire evaluation process with self
-                         contained metrics. If eval_func set,
-                         an evaluation process must be triggered
-                         to make evaluation of the model executed.
-        """
-        assert self.quantizer is not None, "There is no quantizer to tune, " \
-                                           "please pass a QuantizationAwareTrainingConfig."
-        self.quantizer.eval_func = user_eval_func
-
-    @property
-    def eval_dataloader(self):
-        """Getter to eval dataloader."""
-        return self.quantizer.eval_dataloader
-
-    @eval_dataloader.setter
-    def eval_dataloader(self, dataloader):
-        """Set Data loader for evaluation of component.
-
-        It is iterable and the batched data should consists of yield (input, _).
-        the input in the batched data will be used for model inference, so it
-        should satisfy the input format of specific model.
-
-        Args:
-            dataloader(generator): user are supported to set a user defined dataloader
-                                   which meet the requirements that can yield tuple of
-                                   (input, label)/(input, _) batched data.
-        """
-        assert self.quantizer is not None, "There is no quantizer to tune, " \
-                                           "please pass a QuantizationAwareTrainingConfig."
-        assert hasattr(dataloader, '__iter__') and \
-            hasattr(dataloader, 'batch_size'), \
-            'dataloader must implement __iter__ method and batch_size attribute'
-
-        self.quantizer.eval_dataloader = dataloader
-
-    @property
-    def metric(self):
-        """Get `metric` attribute."""
-        assert False, 'Should not try to get the value of `metric` attribute.'
-
-    @metric.setter
-    def metric(self, user_metric):
-        """Set metric class or a dict of built-in metric configures.
-
-        1. neural_compressor have many built-in metrics,
-           user can pass a metric configure dict to tell neural compressor what metric will be use.
-           You can set multi-metrics to evaluate the performance of a specific model.
-                Single metric:
-                    {topk: 1}
-
-                Multi-metrics:
-                    {topk: 1,
-                     MSE: {compare_label: False},
-                     weight: [0.5, 0.5],
-                     higher_is_better: [True, False]
-                    }
-        For the built-in metrics, please refer to below link:
-        https://github.com/intel/neural-compressor/blob/master/docs/source/metric.md#supported-built-in-metric-matrix.
-
-        2. User also can set specific metric through this api. The metric class should take the outputs of the model or
-           postprocess(if have) as inputs, neural_compressor built-in metric always take(predictions, labels)
-           as inputs for update, and user_metric.metric_cls should be sub_class of neural_compressor.metric.BaseMetric.
-
-        Args:
-            user_metric(neural_compressor.metric.Metric or a dict of built-in metric configurations):
-                The object of Metric or a dict of built-in metric configurations.
-        """
-        assert self.quantizer is not None, "There is no quantizer to tune, " \
-                                           "please pass a QuantizationAwareTrainingConfig."
-        self.quantizer.metric = user_metric
-
-    def fit(self):
-        """Compress model with tuning for quantization."""
-        self.model = self.quantizer.fit()
-        return self.model
+        self.callbacks = CallBacks(callbacks_list)
 
     def save(self, root=None):
         """Save compressed model.
@@ -216,7 +188,7 @@ def fit(compression_manager,
         eval_dataloader=None,
         eval_metric=None,
         **kwargs):
-    """Compress the model with tuning for quantization.
+    """Compress the model with accuracy tuning for quantization.
 
     Args:
         compression_manager (CompressionManager):  The Compression manager contains the model and
@@ -251,16 +223,125 @@ def eval_func(model):
                                               process.
         eval_metric (dict or obj):            Set metric class or a dict of built-in metric configures,
                                               and neural_compressor will initialize this class when evaluation.
+
+    Returns:
+        A optimized model.
+
+    Examples::
+
+        from neural_compressor.training import fit, prepare_compression
+
+        compression_manager = prepare_compression(conf, model)
+
+        def train_func(model):
+            compression_manager.callbacks.on_train_begin()
+            for epoch in range(epochs):
+                compression_manager.callbacks.on_epoch_begin(epoch)
+                for i, (batch, label) in enumerate(dataloader):
+                    compression_manager.callbacks.on_step_begin(i)
+                    ......
+                    output = model(batch)
+                    loss = ......
+                    loss = compression_manager.callbacks.on_after_compute_loss(batch, output, loss)
+                    loss.backward()
+                    compression_manager.callbacks.on_before_optimizer_step()
+                    optimizer.step()
+                    compression_manager.callbacks.on_step_end()
+                compression_manager.callbacks.on_epoch_end()
+            compression_manager.callbacks.on_train_end()
+            return model
+
+        def eval_func(model):
+            for i, (batch, label) in enumerate(dataloader):
+                output = model(batch)
+                # compute metric
+                metric = top1(output, label)
+            return metric.results()
+
+        model = fit(compression_manager, train_func=train_func, eval_func=eval_func)
     """
-    assert compression_manager.quantizer is not None, "Only quantization supports tuning with accuracy driven."
-    compression_manager.train_func = train_func
-    if eval_func is not None:
-        compression_manager.eval_func = eval_func
-    if eval_dataloader is not None:
-        compression_manager.eval_dataloader = eval_dataloader
+    assert compression_manager.conf.quantization is not None, "Only quantization supports tuning with accuracy driven."
+    seed = options.random_seed
+    random.seed(seed)
+    np.random.seed(seed)
+
+    # Remove qat hooks if user want to tune accuracy with train function.
+    for callback in compression_manager.callbacks.callbacks_list:
+        if isinstance(callback, QuantizationAwareTrainingCallbacks):
+            callback.remove_hook("on_train_begin", compression_manager.adaptor._pre_hook_for_qat)
+            callback.remove_hook("on_train_end", compression_manager.adaptor._post_hook_for_qat)
+
     if eval_metric is not None:
-        compression_manager.eval_metric = eval_metric
-    return compression_manager.fit()
+        metric = register_customer_metric(eval_metric, compression_manager.conf.quantization.framework)
+    else:
+        metric = None
+
+    strategy_name = compression_manager.conf.quantization.tuning_criterion.strategy
+
+    if compression_manager.conf.quantization.quant_level == "auto":
+        strategy_name = "auto"
+    elif compression_manager.conf.quantization.quant_level == 0:
+        strategy_name = "conservative"
+
+    if strategy_name == "mse_v2":
+        if not (compression_manager.conf.quantization.framework.startswith("tensorflow")
+                or compression_manager.conf.quantization.framework == 'pytorch_fx'):  # pragma: no cover
+            strategy_name = "basic"
+            logger.warning(f"MSE_v2 does not support {compression_manager.conf.quantization.framework} now, use basic instead.")
+            logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.")
+    assert strategy_name in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy_name)
+
+    logger.info(f"Start {strategy_name} tuning.")
+    _resume = None
+    # check if interrupted tuning procedure exists. if yes, it will resume the
+    # whole auto tune process.
+    resume_file = os.path.abspath(os.path.expanduser(options.resume_from)) \
+        if options.workspace and options.resume_from else None
+    if resume_file:
+        assert os.path.exists(resume_file), \
+            "The specified resume file {} doesn't exist!".format(resume_file)
+        with open(resume_file, 'rb') as f:
+            _resume = pickle.load(f).__dict__
+
+    if eval_func is None and eval_dataloader is None:  # pragma: no cover
+        logger.info("Quantize model without tuning!")
+
+    strategy = STRATEGIES[strategy_name](
+        model=compression_manager.model,
+        conf=compression_manager.conf,
+        q_dataloader=None,
+        q_func=train_func,
+        eval_func=eval_func,
+        eval_dataloader=eval_dataloader,
+        eval_metric=metric,
+        resume=_resume,
+        q_hooks=None
+    )
+    try:
+        with time_limit(compression_manager.conf.quantization.tuning_criterion.timeout):
+            logger.debug("Dump user yaml configuration:")
+            logger.debug(compression_manager.conf)
+            strategy.traverse()
+    except KeyboardInterrupt:
+        pass
+    except Exception as e:
+        logger.error("Unexpected exception {} happened during tuning.".format(repr(e)))
+        import traceback
+        traceback.print_exc()
+    finally:
+        if strategy.best_qmodel:
+            logger.info(
+                "Specified timeout or max trials is reached! "
+                "Found a quantized model which meet accuracy goal. Exit.")
+            strategy.deploy_config()
+        else:
+            logger.error(
+                "Specified timeout or max trials is reached! "
+                "Not found any quantized model which meet accuracy goal. Exit.")
+
+        compression_manager.model = strategy.best_qmodel
+
+    return compression_manager.model
 
 
 def prepare_compression(model: Callable, confs: Union[Callable, List], **kwargs):
@@ -277,59 +358,28 @@ def prepare_compression(model: Callable, confs: Union[Callable, List], **kwargs)
 
     Examples::
 
-        import neural_compressor.training.prepare_compression
+        from neural_compressor.training import prepare_compression
 
         compression_manager = prepare_compression(conf, model)
-        train_loop:
-            compression_manager.on_train_begin()
-            for epoch in range(epochs):
-                compression_manager.on_epoch_begin(epoch)
-                for i, batch in enumerate(dataloader):
-                    compression_manager.on_step_begin(i)
-                    ......
-                    output = model(batch)
-                    loss = ......
-                    loss = compression_manager.on_after_compute_loss(batch, output, loss)
-                    loss.backward()
-                    compression_manager.on_before_optimizer_step()
-                    optimizer.step()
-                    compression_manager.on_step_end()
-                compression_manager.on_epoch_end()
-            compression_manager.on_train_end()
+        model = compression_manager.model
+        # train_loop:
+        compression_manager.callbacks.on_train_begin()
+        for epoch in range(epochs):
+            compression_manager.callbacks.on_epoch_begin(epoch)
+            for i, (batch, label) in enumerate(dataloader):
+                compression_manager.callbacks.on_step_begin(i)
+                ......
+                output = model(batch)
+                loss = ......
+                loss = compression_manager.callbacks.on_after_compute_loss(batch, output, loss)
+                loss.backward()
+                compression_manager.callbacks.on_before_optimizer_step()
+                optimizer.step()
+                compression_manager.callbacks.on_step_end()
+            compression_manager.callbacks.on_epoch_end()
+        compression_manager.callbacks.on_train_end()
     """
-    callbacks_list = []
-    nc_model = None
-    if isinstance(confs, List) and len(confs) > 1:
-        for conf in confs:
-            if isinstance(conf, QuantizationAwareTrainingConfig):
-                nc_model = Model(model, backend=conf.backend, approach="quant_aware_training")
-                callbacks_list.append(QuantizationAwareTrainingCallbacks(conf, model=nc_model))
-            elif isinstance(conf, WeightPruningConfig):
-                callbacks_list.append(PruningCallbacks(conf, model=model))
-            elif isinstance(conf, DistillationConfig):
-                callbacks_list.append(DistillationCallbacks(conf, model=model))
-            else:
-                assert False, "Unsupported configure: {}".format(type(conf))
-    else:
-        if isinstance(confs, List):
-            confs = confs[0]
-        if isinstance(confs, QuantizationAwareTrainingConfig):
-            nc_model = Model(model, backend=confs.backend, approach="quant_aware_training")
-            callbacks_list.append(QuantizationAwareTrainingCallbacks(confs, model=nc_model))
-        elif isinstance(confs, WeightPruningConfig):
-            callbacks_list.append(PruningCallbacks(confs, model=model))
-        elif isinstance(confs, DistillationConfig):
-            callbacks_list.append(DistillationCallbacks(confs, model=model))
-        else:
-            assert False, logger.error(
-                "confs should be one of QuantizationAwareTrainingConfig, "
-                "PruningConfig, DistillationConfig. not {}".format(type(confs))
-            )
-
-    if nc_model is None:
-        nc_model = Model(model, backend="default")
-
-    compression_manager = CompressionManager(nc_model, callbacks_list=callbacks_list)
+    compression_manager = CompressionManager(model, confs, **kwargs)
 
     return compression_manager
 
diff --git a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py
index d08eb9cac7d..7357fea20ba 100644
--- a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py
+++ b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py
@@ -499,7 +499,7 @@ def test_mix_precision(self):
         # run fx_quant in neural_compressor and save the quantized GraphModule
         dataset = Datasets("pytorch")["dummy"]((100, 3, 224, 224))
         dataloader = DataLoader("pytorch", dataset)
-        set_workspace=("./saved")
+        set_workspace("./saved")
         conf = PostTrainingQuantConfig(op_name_dict=ptq_fx_op_name_list)
         q_model = quantization.fit(model_origin,
                                    conf,
@@ -530,8 +530,11 @@ def test_hawq_metric(self):
         dataset = Datasets("pytorch")["dummy"](((16, 3, 224, 224)))
         dataloader = DATALOADERS["pytorch"](dataset)
         q_model = fit(ori_model, conf=PostTrainingQuantConfig(), calib_dataloader=dataloader)
-        op_to_traces = hawq_top(fp32_model=pt_model, q_model=q_model, dataloader=dataloader, \
-             criterion=None, enable_act=True)
+        op_to_traces = hawq_top(fp32_model=pt_model,
+                                q_model=q_model,
+                                dataloader=dataloader,
+                                criterion=None,
+                                enable_act=True)
         self.assertIsNotNone(op_to_traces)
 
 
diff --git a/test/pruning_2_plus.x/test_pruning.py b/test/pruning_2_plus.x/test_pruning.py
index 4208cb9540a..396d2f63b78 100644
--- a/test/pruning_2_plus.x/test_pruning.py
+++ b/test/pruning_2_plus.x/test_pruning.py
@@ -8,8 +8,7 @@
 sys.path.insert(0, './')
 from neural_compressor.data import Datasets
 from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader
-from neural_compressor import WeightPruningConfig
-from neural_compressor.training import prepare_pruning
+from neural_compressor import prepare_pruning, WeightPruningConfig
 
 
 class TestPruning(unittest.TestCase):
diff --git a/test/pruning_2_plus.x/test_pruning_block.py b/test/pruning_2_plus.x/test_pruning_block.py
index f28b9f78a0c..e4b10945755 100644
--- a/test/pruning_2_plus.x/test_pruning_block.py
+++ b/test/pruning_2_plus.x/test_pruning_block.py
@@ -7,8 +7,7 @@
 sys.path.insert(0, './')
 from neural_compressor.data import Datasets
 from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader
-from neural_compressor import WeightPruningConfig
-from neural_compressor.training import prepare_pruning
+from neural_compressor import prepare_pruning, WeightPruningConfig
 
 
 class TestPruning(unittest.TestCase):

From 183f600c586166b068cd30ab089be93bd5ba1fc7 Mon Sep 17 00:00:00 2001
From: "Cheng, Penghui" <penghui.cheng@intel.com>
Date: Fri, 5 May 2023 10:58:55 +0800
Subject: [PATCH 05/14] fixed UT error

Signed-off-by: Cheng, Penghui <penghui.cheng@intel.com>
---
 .../quantization/ptq/main.py                  |  2 +-
 .../inception_v3/quantization/ptq/main.py     |  3 +-
 .../mobilenet_v2/quantization/ptq/main.py     |  2 +-
 .../resnet101/quantization/ptq/main.py        |  2 +-
 .../resnet50/quantization/ptq/main.py         |  2 +-
 .../resnet50_fashion/quantization/ptq/main.py |  2 +-
 .../resnetv2_101/quantization/ptq/main.py     |  2 +-
 .../resnetv2_50/quantization/ptq/main.py      |  2 +-
 .../vgg16/quantization/ptq/main.py            |  2 +-
 .../vgg19/quantization/ptq/main.py            |  2 +-
 .../xception/quantization/ptq/main.py         |  2 +-
 neural_compressor/model/model.py              | 46 +++++++++++++++----
 neural_compressor/training.py                 |  7 +--
 13 files changed, 53 insertions(+), 23 deletions(-)

diff --git a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py b/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py
index c3ac54e2a5e..d707e88fffd 100644
--- a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py
@@ -139,7 +139,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model.model import Model
-            accuracy = evaluate(Model(FLAGS.input_model, backend='keras').model)
+            accuracy = evaluate(Model(FLAGS.input_model).model)
             logger.info('Batch size = %d' % FLAGS.batch_size)
             logger.info("Accuracy: %.5f" % accuracy)
 
diff --git a/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py b/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py
index b8bf786ced6..67391cab8b8 100644
--- a/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py
@@ -139,9 +139,10 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model.model import Model
-            accuracy = evaluate(Model(FLAGS.input_model, backend='keras').model)
+            accuracy = evaluate(Model(FLAGS.input_model).model)
             logger.info('Batch size = %d' % FLAGS.batch_size)
             logger.info("Accuracy: %.5f" % accuracy)
 
+
 if __name__ == "__main__":
     tf.compat.v1.app.run()
diff --git a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py b/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py
index ce4d5450a28..e6d77c3a130 100644
--- a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py
@@ -138,7 +138,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model.model import Model
-            accuracy = evaluate(Model(FLAGS.input_model, backend='keras').model)
+            accuracy = evaluate(Model(FLAGS.input_model).model)
             logger.info('Batch size = %d' % FLAGS.batch_size)
             logger.info("Accuracy: %.5f" % accuracy)
 
diff --git a/examples/keras/image_recognition/resnet101/quantization/ptq/main.py b/examples/keras/image_recognition/resnet101/quantization/ptq/main.py
index 77bd046a2af..b391fa67e09 100644
--- a/examples/keras/image_recognition/resnet101/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/resnet101/quantization/ptq/main.py
@@ -145,7 +145,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model.model import Model
-            accuracy = evaluate(Model(FLAGS.input_model, backend='keras').model)
+            accuracy = evaluate(Model(FLAGS.input_model).model)
             logger.info('Batch size = %d' % FLAGS.batch_size)
             logger.info("Accuracy: %.5f" % accuracy)
 
diff --git a/examples/keras/image_recognition/resnet50/quantization/ptq/main.py b/examples/keras/image_recognition/resnet50/quantization/ptq/main.py
index e8c50b098b3..4f9f09b4085 100644
--- a/examples/keras/image_recognition/resnet50/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/resnet50/quantization/ptq/main.py
@@ -130,7 +130,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model import Model
-            model = Model(FLAGS.input_model, backend='keras').model
+            model = Model(FLAGS.input_model).model
             accuracy = evaluate(model)
             print('Batch size = %d' % FLAGS.batch_size)
             print("Accuracy: %.5f" % accuracy)
diff --git a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py b/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py
index a432587d5f6..2f6d238173b 100644
--- a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py
@@ -128,7 +128,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model import Model
-            model = Model(FLAGS.input_model, backend='keras').model
+            model = Model(FLAGS.input_model).model
             accuracy = evaluate(model)
             print('Batch size = %d' % FLAGS.batch_size)
             print("Accuracy: %.5f" % accuracy)
diff --git a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py
index ce4d5450a28..e6d77c3a130 100644
--- a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py
@@ -138,7 +138,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model.model import Model
-            accuracy = evaluate(Model(FLAGS.input_model, backend='keras').model)
+            accuracy = evaluate(Model(FLAGS.input_model).model)
             logger.info('Batch size = %d' % FLAGS.batch_size)
             logger.info("Accuracy: %.5f" % accuracy)
 
diff --git a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py
index ce4d5450a28..e6d77c3a130 100644
--- a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py
@@ -138,7 +138,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model.model import Model
-            accuracy = evaluate(Model(FLAGS.input_model, backend='keras').model)
+            accuracy = evaluate(Model(FLAGS.input_model).model)
             logger.info('Batch size = %d' % FLAGS.batch_size)
             logger.info("Accuracy: %.5f" % accuracy)
 
diff --git a/examples/keras/image_recognition/vgg16/quantization/ptq/main.py b/examples/keras/image_recognition/vgg16/quantization/ptq/main.py
index 380ee9a9223..224be9a5d54 100644
--- a/examples/keras/image_recognition/vgg16/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/vgg16/quantization/ptq/main.py
@@ -130,7 +130,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model import Model
-            model = Model(FLAGS.input_model, backend='keras').model
+            model = Model(FLAGS.input_model).model
             accuracy = evaluate(model)
             print('Batch size = %d' % FLAGS.batch_size)
             print("Accuracy: %.5f" % accuracy)
diff --git a/examples/keras/image_recognition/vgg19/quantization/ptq/main.py b/examples/keras/image_recognition/vgg19/quantization/ptq/main.py
index e8c50b098b3..4f9f09b4085 100644
--- a/examples/keras/image_recognition/vgg19/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/vgg19/quantization/ptq/main.py
@@ -130,7 +130,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model import Model
-            model = Model(FLAGS.input_model, backend='keras').model
+            model = Model(FLAGS.input_model).model
             accuracy = evaluate(model)
             print('Batch size = %d' % FLAGS.batch_size)
             print("Accuracy: %.5f" % accuracy)
diff --git a/examples/keras/image_recognition/xception/quantization/ptq/main.py b/examples/keras/image_recognition/xception/quantization/ptq/main.py
index a5b678023c1..6d0b3e2ac42 100644
--- a/examples/keras/image_recognition/xception/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/xception/quantization/ptq/main.py
@@ -138,7 +138,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model.model import Model
-            accuracy = evaluate(Model(FLAGS.input_model, backend='keras').model)
+            accuracy = evaluate(Model(FLAGS.input_model).model)
             logger.info('Batch size = %d' % FLAGS.batch_size)
             logger.info("Accuracy: %.5f" % accuracy)
 
diff --git a/neural_compressor/model/model.py b/neural_compressor/model/model.py
index e9149718aa5..2d898872119 100644
--- a/neural_compressor/model/model.py
+++ b/neural_compressor/model/model.py
@@ -175,17 +175,39 @@ def __new__(cls, root, **kwargs):
                         assert conf.framework == "keras",\
                               "Please wrap the model with KerasModel class!"
                     else:
-                        assert conf.framework == "tensorflow_itex", \
+                        assert conf.framework == "tensorflow", \
                             "Please wrap the model with TensorflowModel class!"
+                        conf.framework = "tensorflow_itex"
                 if getattr(conf, "approach", None) == "quant_aware_training":
                     assert conf.framework == "tensorflow_qat", \
                             "Please wrap the model with TensorflowQATModel class!"
+            else:
+                if 'tensorflow' in conf.framework:
+                    if getattr(root, "name", None) is None:
+                        root.name = conf.model_name
+                    if getattr(root, "output_tensor_names", None) is None:
+                        root.output_tensor_names = conf.outputs
+                    if getattr(root, "input_tensor_names", None) is None:
+                        root.input_tensor_names = conf.inputs
+                    if getattr(root, "workspace_path", None) is None:
+                        root.workspace_path = options.workspace
             return root
         else:
             framework = get_model_fwk_name(root)
             if conf == "NA":
                 if framework == "pytorch":
                     framework = "pytorch_fx"
+                if 'tensorflow' in framework:
+                    if kwargs.get("approach", None) == "quant_aware_training":
+                        return MODELS['tensorflow_qat'](root, **kwargs)
+                    if 'modelType' in kwargs:
+                        model_type = kwargs['modelType']
+                    else:
+                        model_type = get_model_type(root)
+                    if model_type == "keras" and kwargs.get("framework", None) != "tensorflow":
+                        return MODELS['keras'](root, **kwargs)
+                    else:
+                        return MODELS[framework](model_type, root, **kwargs)
                 return MODELS[framework](root, **kwargs)
             else:
                 conf.framework = framework
@@ -197,15 +219,21 @@ def __new__(cls, root, **kwargs):
 
                 if 'tensorflow' in conf.framework:
                     if getattr(conf, "approach", None) == "quant_aware_training":
-                        return MODELS['tensorflow_qat'](root, **kwargs)
-
-                    if 'modelType' in kwargs:
-                        model_type = kwargs['modelType']
+                        model = MODELS['tensorflow_qat'](root, **kwargs)
                     else:
-                        model_type = get_model_type(root)
-                    if conf.backend == "itex" and model_type == 'keras':
-                        return MODELS['keras'](root, **kwargs)
-                    model = MODELS['tensorflow'](model_type, root, **kwargs)
+                        if 'modelType' in kwargs:
+                            model_type = kwargs['modelType']
+                        else:
+                            model_type = get_model_type(root)
+                        if conf.backend == "itex":
+                            if model_type == 'keras':
+                                conf.framework = "keras"
+                                model = MODELS[conf.framework](root, **kwargs)
+                            else:
+                                conf.framework = "tensorflow_itex"
+                                model = MODELS[conf.framework](model_type, root, **kwargs)
+                        else:
+                            model = MODELS['tensorflow'](model_type, root, **kwargs)
                 else:
                     model = MODELS[conf.framework](root, **kwargs)
                 if 'tensorflow' in conf.framework:
diff --git a/neural_compressor/training.py b/neural_compressor/training.py
index 526c8a23a0b..4204955b012 100644
--- a/neural_compressor/training.py
+++ b/neural_compressor/training.py
@@ -166,7 +166,7 @@ def save(self, root=None):
         Args:
             root (str): path to save the model
         """
-        self.model.save(root)
+        self.model.save(root)  # pylint: disable=no-member
 
     def export(
         self,
@@ -179,7 +179,7 @@ def export(
             save_path (str): The path to save the model
             conf (Union[Callable, List]) : The configure for onnx exportation.
         """
-        self.model.export(save_path, conf)
+        self.model.export(save_path, conf)  # pylint: disable=no-member
 
 
 def fit(compression_manager,
@@ -287,7 +287,8 @@ def eval_func(model):
         if not (compression_manager.conf.quantization.framework.startswith("tensorflow")
                 or compression_manager.conf.quantization.framework == 'pytorch_fx'):  # pragma: no cover
             strategy_name = "basic"
-            logger.warning(f"MSE_v2 does not support {compression_manager.conf.quantization.framework} now, use basic instead.")
+            logger.warning(f"MSE_v2 does not support {compression_manager.conf.quantization.framework} now,"
+                           "use basic instead.")
             logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.")
     assert strategy_name in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy_name)
 

From 498180987d6210d96e35292577ea43ab60b1cef4 Mon Sep 17 00:00:00 2001
From: "Cheng, Penghui" <penghui.cheng@intel.com>
Date: Fri, 5 May 2023 17:54:04 +0800
Subject: [PATCH 06/14] Fixed UT error and update docs

Signed-off-by: Cheng, Penghui <penghui.cheng@intel.com>
---
 docs/source/benchmark.md                      |  2 +-
 docs/source/mixed_precision.md                |  8 ++--
 examples/helloworld/tf_example5/README.md     |  2 +-
 .../quantization/ptq/main.py                  |  2 +-
 .../inception_v3/quantization/ptq/main.py     |  2 +-
 .../mobilenet_v2/quantization/ptq/main.py     |  2 +-
 .../resnet101/quantization/ptq/main.py        |  2 +-
 .../resnet50/quantization/ptq/main.py         |  2 +-
 .../resnet50_fashion/quantization/ptq/main.py |  2 +-
 .../resnetv2_101/quantization/ptq/main.py     |  2 +-
 .../resnetv2_50/quantization/ptq/main.py      |  2 +-
 .../vgg16/quantization/ptq/main.py            |  2 +-
 .../vgg19/quantization/ptq/main.py            |  2 +-
 .../xception/quantization/ptq/main.py         |  2 +-
 .../quantization/ptq_static/main.py           |  2 +-
 .../gpt2/quantization/ptq_dynamic/gpt2.py     |  2 +-
 .../quantization/run_diffusion.py             |  2 +-
 .../quantization/ptq/fx/python/main.py        |  4 +-
 .../quantization/ptq/ipex/infer.py            |  2 +-
 .../quantization/qat/fx/ssd/main.py           |  2 +-
 .../quantization/ptq/main.py                  |  2 +-
 .../mobilenet_v1/quantization/ptq/main.py     |  2 +-
 .../mobilenet_v2/quantization/ptq/main.py     |  2 +-
 .../resnet_v2/quantization/qat/main.py        |  2 +-
 .../adaptor/tf_utils/graph_converter.py       | 23 ++++++-----
 .../graph_rewriter/generic/pre_optimize.py    |  2 +-
 neural_compressor/benchmark.py                | 24 +++++------
 neural_compressor/config.py                   |  4 +-
 neural_compressor/mix_precision.py            | 40 ++++++++++++++-----
 neural_compressor/model/model.py              |  2 +-
 neural_compressor/training.py                 |  1 +
 test/benchmark/test_benchmark_2.x.py          |  2 +-
 .../test_mixed_precision_keras_model.py       |  2 +-
 33 files changed, 89 insertions(+), 67 deletions(-)

diff --git a/docs/source/benchmark.md b/docs/source/benchmark.md
index 0ab89bc3548..4b660f3ac95 100644
--- a/docs/source/benchmark.md
+++ b/docs/source/benchmark.md
@@ -49,7 +49,7 @@ And please make sure `cores_per_instance * num_of_instance` must be less than CP
 from neural_compressor.config import BenchmarkConfig
 from neural_compressor.benchmark import fit
 conf = BenchmarkConfig(warmup=10, iteration=100, cores_per_instance=4, num_of_instance=7)
-fit(model='./int8.pb', config=conf, b_dataloader=eval_dataloader)
+fit(model='./int8.pb', conf=conf, b_dataloader=eval_dataloader)
 ```
 
 ## Examples
diff --git a/docs/source/mixed_precision.md b/docs/source/mixed_precision.md
index 86fb3986810..e288890d745 100644
--- a/docs/source/mixed_precision.md
+++ b/docs/source/mixed_precision.md
@@ -42,8 +42,8 @@ Supported precisions for mix precision include bf16 and fp16. If users want to g
 from neural_compressor import mix_precision
 from neural_compressor.config import MixedPrecisionConfig
 
-conf = MixedPrecisionConfig(precision='bf16')
-converted_model = mix_precision.fit(model, config=conf)
+conf = MixedPrecisionConfig(precisions='bf16')
+converted_model = mix_precision.fit(model, conf=conf)
 converted_model.save('./path/to/save/')
 ```
 
@@ -56,8 +56,8 @@ from neural_compressor.config import MixedPrecisionConfig
 conf = MixedPrecisionConfig(
         backend='onnxrt_cuda_ep',
         device='gpu',
-        precision='fp16')
-converted_model = mix_precision.fit(model, config=conf)
+        precisions='fp16')
+converted_model = mix_precision.fit(model, conf=conf)
 converted_model.save('./path/to/save/')
 ```
 
diff --git a/examples/helloworld/tf_example5/README.md b/examples/helloworld/tf_example5/README.md
index 399ebd8bfb8..e90509222f9 100644
--- a/examples/helloworld/tf_example5/README.md
+++ b/examples/helloworld/tf_example5/README.md
@@ -52,7 +52,7 @@ python test.py --benchmark --dataset_location=/path/to/imagenet/
 ```python
     from neural_compressor.benchmark import fit
     conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=1)
-    fit(model='./int8.pb', config=conf, b_dataloader=eval_dataloader)
+    fit(model='./int8.pb', conf=conf, b_dataloader=eval_dataloader)
  
 ```
 
diff --git a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py b/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py
index d707e88fffd..1e742e36cb6 100644
--- a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py
@@ -139,7 +139,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model.model import Model
-            accuracy = evaluate(Model(FLAGS.input_model).model)
+            accuracy = evaluate(Model(FLAGS.input_model, backend='itex').model)
             logger.info('Batch size = %d' % FLAGS.batch_size)
             logger.info("Accuracy: %.5f" % accuracy)
 
diff --git a/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py b/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py
index 67391cab8b8..a27de78af52 100644
--- a/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py
@@ -139,7 +139,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model.model import Model
-            accuracy = evaluate(Model(FLAGS.input_model).model)
+            accuracy = evaluate(Model(FLAGS.input_model, backend='itex').model)
             logger.info('Batch size = %d' % FLAGS.batch_size)
             logger.info("Accuracy: %.5f" % accuracy)
 
diff --git a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py b/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py
index e6d77c3a130..e5bbc475f24 100644
--- a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py
@@ -138,7 +138,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model.model import Model
-            accuracy = evaluate(Model(FLAGS.input_model).model)
+            accuracy = evaluate(Model(FLAGS.input_model, backend='itex').model)
             logger.info('Batch size = %d' % FLAGS.batch_size)
             logger.info("Accuracy: %.5f" % accuracy)
 
diff --git a/examples/keras/image_recognition/resnet101/quantization/ptq/main.py b/examples/keras/image_recognition/resnet101/quantization/ptq/main.py
index b391fa67e09..dbad4085197 100644
--- a/examples/keras/image_recognition/resnet101/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/resnet101/quantization/ptq/main.py
@@ -145,7 +145,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model.model import Model
-            accuracy = evaluate(Model(FLAGS.input_model).model)
+            accuracy = evaluate(Model(FLAGS.input_model, backend='itex').model)
             logger.info('Batch size = %d' % FLAGS.batch_size)
             logger.info("Accuracy: %.5f" % accuracy)
 
diff --git a/examples/keras/image_recognition/resnet50/quantization/ptq/main.py b/examples/keras/image_recognition/resnet50/quantization/ptq/main.py
index 4f9f09b4085..fba7c004931 100644
--- a/examples/keras/image_recognition/resnet50/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/resnet50/quantization/ptq/main.py
@@ -130,7 +130,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model import Model
-            model = Model(FLAGS.input_model).model
+            model = Model(FLAGS.input_model, backend='itex').model
             accuracy = evaluate(model)
             print('Batch size = %d' % FLAGS.batch_size)
             print("Accuracy: %.5f" % accuracy)
diff --git a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py b/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py
index 2f6d238173b..19a5c125f03 100644
--- a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py
@@ -128,7 +128,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model import Model
-            model = Model(FLAGS.input_model).model
+            model = Model(FLAGS.input_model, backend='itex').model
             accuracy = evaluate(model)
             print('Batch size = %d' % FLAGS.batch_size)
             print("Accuracy: %.5f" % accuracy)
diff --git a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py
index e6d77c3a130..e5bbc475f24 100644
--- a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py
@@ -138,7 +138,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model.model import Model
-            accuracy = evaluate(Model(FLAGS.input_model).model)
+            accuracy = evaluate(Model(FLAGS.input_model, backend='itex').model)
             logger.info('Batch size = %d' % FLAGS.batch_size)
             logger.info("Accuracy: %.5f" % accuracy)
 
diff --git a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py
index e6d77c3a130..e5bbc475f24 100644
--- a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py
@@ -138,7 +138,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model.model import Model
-            accuracy = evaluate(Model(FLAGS.input_model).model)
+            accuracy = evaluate(Model(FLAGS.input_model, backend='itex').model)
             logger.info('Batch size = %d' % FLAGS.batch_size)
             logger.info("Accuracy: %.5f" % accuracy)
 
diff --git a/examples/keras/image_recognition/vgg16/quantization/ptq/main.py b/examples/keras/image_recognition/vgg16/quantization/ptq/main.py
index 224be9a5d54..1396533ff13 100644
--- a/examples/keras/image_recognition/vgg16/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/vgg16/quantization/ptq/main.py
@@ -130,7 +130,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model import Model
-            model = Model(FLAGS.input_model).model
+            model = Model(FLAGS.input_model, backend='itex').model
             accuracy = evaluate(model)
             print('Batch size = %d' % FLAGS.batch_size)
             print("Accuracy: %.5f" % accuracy)
diff --git a/examples/keras/image_recognition/vgg19/quantization/ptq/main.py b/examples/keras/image_recognition/vgg19/quantization/ptq/main.py
index 4f9f09b4085..fba7c004931 100644
--- a/examples/keras/image_recognition/vgg19/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/vgg19/quantization/ptq/main.py
@@ -130,7 +130,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model import Model
-            model = Model(FLAGS.input_model).model
+            model = Model(FLAGS.input_model, backend='itex').model
             accuracy = evaluate(model)
             print('Batch size = %d' % FLAGS.batch_size)
             print("Accuracy: %.5f" % accuracy)
diff --git a/examples/keras/image_recognition/xception/quantization/ptq/main.py b/examples/keras/image_recognition/xception/quantization/ptq/main.py
index 6d0b3e2ac42..359d7e083c9 100644
--- a/examples/keras/image_recognition/xception/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/xception/quantization/ptq/main.py
@@ -138,7 +138,7 @@ def main(_):
             fit(FLAGS.input_model, conf, b_func=evaluate)
         else:
             from neural_compressor.model.model import Model
-            accuracy = evaluate(Model(FLAGS.input_model).model)
+            accuracy = evaluate(Model(FLAGS.input_model, backend='itex').model)
             logger.info('Batch size = %d' % FLAGS.batch_size)
             logger.info("Accuracy: %.5f" % accuracy)
 
diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/main.py b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/main.py
index 9b5674a50d0..e0632a8631f 100644
--- a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/main.py
+++ b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/main.py
@@ -414,7 +414,7 @@ def eval_func(model):
         config = PostTrainingQuantConfig(approach="static",
                                          quant_format=args.quant_format,
                                          recipes={"optypes_to_exclude_output_quant": ["MatMul"]})
-        q_model = quantization.fit(model, 
+        q_model = quantization.fit(model,
                                    config,
                                    eval_func=eval_func,
                                    calib_dataloader=dataloader)
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/gpt2.py b/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/gpt2.py
index f861724dea8..1d05d6955c4 100644
--- a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/gpt2.py
+++ b/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/gpt2.py
@@ -272,7 +272,7 @@ def eval_func(model):
         accuracy_criterion.relative = 0.11
         config = PostTrainingQuantConfig(approach='dynamic',
                                          accuracy_criterion=accuracy_criterion)
-        q_model = quantization.fit(model, 
+        q_model = quantization.fit(model,
                                    config,
                                    eval_func=eval_func)
         q_model.save(args.output_model)
diff --git a/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/run_diffusion.py b/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/run_diffusion.py
index 7392ac05b47..1dd38d7a115 100644
--- a/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/run_diffusion.py
+++ b/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/run_diffusion.py
@@ -322,7 +322,7 @@ def b_func(model):
             from neural_compressor.config import BenchmarkConfig
 
             b_conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=1)
-            fit(model, config=b_conf, b_func=b_func)
+            fit(model, conf=b_conf, b_func=b_func)
         if args.accuracy_only:
             setattr(pipe, "unet", model)
             accuracy(pipe, generator, _rows, args)
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/main.py b/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/main.py
index cdd072c3c27..53c084edd62 100644
--- a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/main.py
+++ b/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/main.py
@@ -623,7 +623,7 @@ def benchmark_func(model):
             from neural_compressor.config import BenchmarkConfig
             from neural_compressor import benchmark
             b_conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=1)
-            benchmark.fit(int8_model, config=b_conf, b_func=benchmark_func)
+            benchmark.fit(int8_model, conf=b_conf, b_func=benchmark_func)
     else:
         if args.accuracy:
             eval_func(raw_model)
@@ -631,7 +631,7 @@ def benchmark_func(model):
             from neural_compressor.config import BenchmarkConfig
             from neural_compressor import benchmark
             b_conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=1)
-            benchmark.fit(raw_model, config=b_conf, b_func=benchmark_func)
+            benchmark.fit(raw_model, conf=b_conf, b_func=benchmark_func)
 
     runner.finish()
     lg.DestroyQSL(qsl)
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/infer.py b/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/infer.py
index 312b7299d5d..3d302738143 100644
--- a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/infer.py
+++ b/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/infer.py
@@ -658,7 +658,7 @@ def coco_eval(model):
         from neural_compressor.config import BenchmarkConfig
         from neural_compressor import benchmark
         b_conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=1)
-        benchmark.fit(ssd_r34, config=b_conf, b_func=coco_eval)
+        benchmark.fit(ssd_r34, conf=b_conf, b_func=coco_eval)
         return
 
 
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/main.py b/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/main.py
index 6b04cca6f12..ffa33d433ae 100644
--- a/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/main.py
+++ b/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/main.py
@@ -424,7 +424,7 @@ def training_func_for_nc(model, dataloader=None):
             from neural_compressor.config import BenchmarkConfig
             from neural_compressor import benchmark
             b_conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=1)
-            benchmark.fit(new_model, config=b_conf, b_func=eval_func)
+            benchmark.fit(new_model, conf=b_conf, b_func=eval_func)
         return
 
     return False
diff --git a/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py b/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py
index 1dcf3a00757..1da7dc46c4c 100644
--- a/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py
@@ -127,7 +127,7 @@ def run(self):
             from neural_compressor.config import BenchmarkConfig
             if args.mode == 'performance':
                 conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=1)
-                fit(model=args.input_graph, config=conf, b_func=evaluate)
+                fit(model=args.input_graph, conf=conf, b_func=evaluate)
             else:
                 from neural_compressor.model import Model
                 model = Model(args.input_graph).model
diff --git a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py
index b95d854f0bd..65617b896b5 100644
--- a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py
@@ -125,7 +125,7 @@ def run(self):
                     'filter': None
                 }
                 eval_dataloader = create_dataloader('tensorflow', dataloader_args)
-                fit(model=args.input_graph, config=conf, b_dataloader=eval_dataloader)
+                fit(model=args.input_graph, conf=conf, b_dataloader=eval_dataloader)
             else:
                 from neural_compressor.model import Model
                 model = Model(args.input_graph).model
diff --git a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py
index b95d854f0bd..65617b896b5 100644
--- a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py
@@ -125,7 +125,7 @@ def run(self):
                     'filter': None
                 }
                 eval_dataloader = create_dataloader('tensorflow', dataloader_args)
-                fit(model=args.input_graph, config=conf, b_dataloader=eval_dataloader)
+                fit(model=args.input_graph, conf=conf, b_dataloader=eval_dataloader)
             else:
                 from neural_compressor.model import Model
                 model = Model(args.input_graph).model
diff --git a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/main.py b/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/main.py
index 3c7e19a6da4..b2dc72c70ea 100644
--- a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/main.py
+++ b/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/main.py
@@ -335,7 +335,7 @@ def evaluate(model):
 
     Args:
         model (tensorflow.Graph_def): The input model graph
-        
+
     Returns:
         accuracy (float): evaluation result, the larger is better.
     """
diff --git a/neural_compressor/adaptor/tf_utils/graph_converter.py b/neural_compressor/adaptor/tf_utils/graph_converter.py
index 18b516ed6cd..309b4828a19 100644
--- a/neural_compressor/adaptor/tf_utils/graph_converter.py
+++ b/neural_compressor/adaptor/tf_utils/graph_converter.py
@@ -126,7 +126,7 @@ def __init__(self,
         self._check_tf_version()
         self._check_args()
 
-        self._fp32_model = Model(self.model._model, **self.model.kwargs)
+        self._fp32_model = Model(self.model._model, **self.model.kwargs, backend="itex" if itex_mode else "default")
         self._fp32_model.graph_def = self.model.graph_def
         self._fp32_model.output_tensor_names = self.output_tensor_names
         self._fp32_model.input_tensor_names = self.input_tensor_names
@@ -145,7 +145,7 @@ def __init__(self,
         self.scale_info.update({'bf16_ops': self.bf16_ops})
         self.scale_info.update({'fp32_ops': self.fp32_ops})
 
-        self._sampling_model = Model(self.model._model, **self.model.kwargs)
+        self._sampling_model = Model(self.model._model, **self.model.kwargs, backend="itex" if itex_mode else "default")
         self._sampling_model.output_tensor_names = self.output_tensor_names
         self._sampling_model.input_tensor_names = self.input_tensor_names
 
@@ -154,7 +154,7 @@ def __init__(self,
             self._tmp_graph_def = self.model.graph_def
         else:
             self._tmp_graph_def = copy.deepcopy(self.model.graph_def)
-        self.new_api = new_api #bool(version1_gte_version2(tf.version.VERSION, '2.8.0'))
+        self.new_api = new_api  # bool(version1_gte_version2(tf.version.VERSION, '2.8.0'))
         self.use_bf16 = use_bf16
         self.exclude_node_names = []
 
@@ -231,7 +231,7 @@ def check_shape(tensor, data):
                     disorder_tensors = []
                     disorder_inputs = []
                     for idx, sort_tensor in enumerate(input_tensor):
-                        sort_input = inputs[idx] 
+                        sort_input = inputs[idx]
                         if check_shape(sort_tensor, sort_input):
                             feed_dict.update({sort_tensor: sort_input})
                         else:
@@ -326,7 +326,8 @@ def _gen_tmp_filenames(self):
             self._tmp_model = self._fp32_model
         else:
             # to keep temp model
-            self._tmp_model = Model(self.model._model, **self.model.kwargs)
+            self._tmp_model = Model(self.model._model, **self.model.kwargs,
+                                    backend="itex" if self.itex_mode else "default")
             self._tmp_model.graph_def = self.model.graph_def
             self._tmp_model.output_tensor_names = self.output_tensor_names
             self._tmp_model.input_tensor_names = self.input_tensor_names
@@ -495,7 +496,7 @@ def quantize(self):
                 output_tensor_names = copy.deepcopy(self.model.output_tensor_names)
                 sampling_graph_def = copy.deepcopy(self._fp32_model.graph_def)
 
-                # TODO: this is a workaround to make Min/Max node be completly eliminated in int8 graph 
+                # TODO: this is a workaround to make Min/Max node be completly eliminated in int8 graph
                 # after enabling pad+conv2d in new API.
                 non_pad_ops = list(list(set(self.fp32_ops).union(set(self.bf16_ops))))
                 sampling_graph_def = FusePadWithFP32Conv2DOptimizer(
@@ -602,7 +603,7 @@ def _generate_calibration_data(self, tmp_path, output_data, enable_kl_algo=False
 
         logger.debug("Generate calibration data and save to {}.".format(tmp_dump_file))
 
-        model = Model(tmp_path, **self._tmp_model.kwargs)
+        model = Model(tmp_path, **self._tmp_model.kwargs, backend="itex" if self.itex_mode else "default")
         model.output_tensor_names = self.output_tensor_names
         model.input_tensor_names = self.input_tensor_names
 
@@ -668,7 +669,7 @@ def _fuse_requantize_with_fused_quantized_node(self):
             if self.qdq_enabled:
                 self._tmp_graph_def = FuseMatMulRequantizeNewAPITransformer(
                     self._tmp_graph_def).do_transformation()
-            
+
                 self._tmp_graph_def = FuseMatMulRequantizeDequantizeNewAPITransformer(
                     self._tmp_graph_def).do_transformation()
             else:
@@ -677,7 +678,7 @@ def _fuse_requantize_with_fused_quantized_node(self):
 
                 self._tmp_graph_def = FuseMatMulRequantizeDequantizeTransformer(
                             self._tmp_graph_def).do_transformation()
-        
+
         self._tmp_graph_def = StripUnusedNodesOptimizer(
             self._tmp_graph_def,
             self._tmp_model.input_node_names,
@@ -751,7 +752,7 @@ def quantize_with_qdq_pattern(self):
 
     def _insert_qdq_pairs(self):
         """Insert QDQ pairs before Conv/MatMul/Pooling Ops."""
-        # Fuse Pad into Conv2D, Conv3D, DepthwiseConv2dNative 
+        # Fuse Pad into Conv2D, Conv3D, DepthwiseConv2dNative
         non_pad_ops = list(list(set(self.fp32_ops).union(set(self.bf16_ops))))
         self._tmp_graph_def = FusePadWithConv2DOptimizer(
                     self._tmp_graph_def,
@@ -828,7 +829,7 @@ def _insert_qdq_pairs(self):
         # Insert QDQ pattern
         self._tmp_graph_def = GenerateGraphWithQDQPattern(
               self._tmp_graph_def, self._calibration_data, self.op_wise_config,
-              self.fake_quant, self.fp32_ops, self.bf16_ops, self.quantized_node_info, 
+              self.fake_quant, self.fp32_ops, self.bf16_ops, self.quantized_node_info,
               self.device, self.performance_only, self.itex_mode).do_transformation()
 
     def _convert_qdq(self):
diff --git a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py
index 4ef09bba613..5ed17ad42e1 100644
--- a/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py
+++ b/neural_compressor/adaptor/tf_utils/graph_rewriter/generic/pre_optimize.py
@@ -108,7 +108,7 @@ def get_optimized_model(self, itex_mode=False):
         """
         from neural_compressor.model import Model
 
-        origin_model = Model(self.model._model, **self.model.kwargs)
+        origin_model = Model(self.model._model, **self.model.kwargs, backend="itex" if itex_mode else "default")
         origin_model.name = self.model.name
         origin_model.model_type = self.model.model_type
         origin_model.output_tensor_names = self.model.output_tensor_names
diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py
index 7d228ecd706..e21e5db90b5 100644
--- a/neural_compressor/benchmark.py
+++ b/neural_compressor/benchmark.py
@@ -352,7 +352,7 @@ def summary_benchmark():
         pass
 
 
-def benchmark_with_raw_cmd(raw_cmd, config=None):
+def benchmark_with_raw_cmd(raw_cmd, conf=None):
     """Benchmark the model performance with the raw commend.
 
     Args:
@@ -369,23 +369,23 @@ def benchmark_with_raw_cmd(raw_cmd, config=None):
         conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7)
         fit_with_raw_cmd("test.py", conf)
     """
-    if config is not None:
-        if config.backend == "ipex":
+    if conf is not None:
+        if conf.backend == "ipex":
             import intel_extension_for_pytorch
         assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...'
         # disable multi-instance for running bechmark on GPU device
-        set_all_env_var(config)
+        set_all_env_var(conf)
 
     config_instance(raw_cmd)
     summary_benchmark()
 
 
-def fit(model, config, b_dataloader=None, b_func=None):
+def fit(model, conf, b_dataloader=None, b_func=None):
     """Benchmark the model performance with the configure.
 
     Args:
         model (object):           The model to be benchmarked.
-        config (BenchmarkConfig): The configuration for benchmark containing accuracy goal,
+        conf (BenchmarkConfig): The configuration for benchmark containing accuracy goal,
                                   tuning objective and preferred calibration & quantization
                                   tuning space etc.
         b_dataloader:             The dataloader for frameworks.
@@ -398,24 +398,24 @@ def fit(model, config, b_dataloader=None, b_func=None):
         from neural_compressor.benchmark import fit
 
         conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7)
-        fit(model='./int8.pb', config=conf, b_dataloader=eval_dataloader)
+        fit(model='./int8.pb', conf=conf, b_dataloader=eval_dataloader)
     """
-    if config.backend == "ipex":
+    if conf.backend == "ipex":
         import intel_extension_for_pytorch
 
-    wrapped_model = Model(model, conf=config)
+    wrapped_model = Model(model, conf=conf)
 
     if b_dataloader is not None:
         check_dataloader(b_dataloader)
 
     assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...'
     # disable multi-instance for running bechmark on GPU device
-    set_all_env_var(config)
-    if config.device == 'gpu':
+    set_all_env_var(conf)
+    if conf.device == 'gpu':
         set_env_var('NC_ENV_CONF', True, overwrite_existing=True)
 
     logger.info("Start to run Benchmark.")
     if os.environ.get('NC_ENV_CONF') == 'True':
-        return run_instance(model=wrapped_model, conf=config, b_dataloader=b_dataloader, b_func=b_func)
+        return run_instance(model=wrapped_model, conf=conf, b_dataloader=b_dataloader, b_func=b_func)
     raw_cmd = sys.executable + ' ' + ' '.join(sys.argv)
     benchmark_with_raw_cmd(raw_cmd)
diff --git a/neural_compressor/config.py b/neural_compressor/config.py
index 88cf8a3c6ad..47696c70b2a 100644
--- a/neural_compressor/config.py
+++ b/neural_compressor/config.py
@@ -247,7 +247,7 @@ class BenchmarkConfig:
         from neural_compressor.benchmark import fit
 
         conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7)
-        fit(model='./int8.pb', config=conf, b_dataloader=eval_dataloader)
+        fit(model='./int8.pb', conf=conf, b_dataloader=eval_dataloader)
     """
     def __init__(self,
                  inputs=[],
@@ -1662,7 +1662,7 @@ class MixedPrecisionConfig(object):
         from neural_compressor.config import MixedPrecisionConfig
 
         conf = MixedPrecisionConfig()
-        converted_model = mix_precision.fit(model, config=conf)
+        converted_model = mix_precision.fit(model, conf=conf)
     """
     def __init__(self,
                  device="cpu",
diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py
index 85dc4b1fbfa..386347f5943 100644
--- a/neural_compressor/mix_precision.py
+++ b/neural_compressor/mix_precision.py
@@ -23,7 +23,7 @@
 
 from neural_compressor.data.dataloaders.dataloader import check_dataloader
 from neural_compressor.metric.metric import register_customer_metric
-from .utils.utility import time_limit
+from .utils.utility import time_limit, CpuInfo
 from .strategy import STRATEGIES
 from .config import _Config, options
 from .utils import logger
@@ -31,7 +31,7 @@
 
 
 def fit(model,
-        config=None,
+        conf=None,
         eval_func=None,
         eval_dataloader=None,
         eval_metric=None,
@@ -47,7 +47,7 @@ def fit(model,
                                               to .onnx file or onnx.onnx_ml_pb2.ModelProto.
                                               For MXNet model, it's mxnet.symbol.Symbol
                                               or gluon.HybirdBlock instance.
-        config (MixedPrecisionConfig):        The MixedPrecisionConfig class containing accuracy goal,
+        conf (MixedPrecisionConfig):        The MixedPrecisionConfig class containing accuracy goal,
                                               tuning objective and mixed_precision tuning space etc.
         eval_func (function, optional):       The evaluation function provided by user.
                                               This function takes model as parameter,
@@ -83,24 +83,44 @@ def fit(model,
         from neural_compressor.config import MixedPrecisionConfig
 
         conf = MixedPrecisionConfig()
-        converted_model = mix_precision.fit(model, config=conf)
+        converted_model = mix_precision.fit(model, conf=conf)
     """
     if eval_dataloader is not None:
         check_dataloader(eval_dataloader)
 
-    if config.precisions in config.excluded_precisions:
+    if conf.precisions in conf.excluded_precisions:
         logger.warning("Target precision is in excluded_precisions, "
                        "please modify precision or excluded_precisions to make it understandable.")
         sys.exit(0)
 
-    wrapped_model = Model(model, conf=config)
+    wrapped_model = Model(model, conf=conf)
+
+    precisions = list(set(conf.precisions) - set(conf.excluded_precisions))
+    if ('bf16' in precisions or 'fp16' in precisions) and conf.framework == "onnxruntime":  # pragma: no cover
+        if conf.device == "cpu":
+            logger.warning("Mix precision exits due to device isn't gpu for onnx models.")
+            sys.exit(0)
+        elif conf.backend != "onnxrt_cuda_ep":
+            logger.warning("Mix precision exits due to backend isn't onnxrt_cuda_ep for onnx models.")
+            sys.exit(0)
+    elif 'bf16' in precisions and not CpuInfo().bf16 and conf.framework != "onnxruntime":  # pragma: no cover
+        if os.getenv('FORCE_BF16') == '1':
+            logger.warning("Mix precision will generate bf16 graph although "
+                           "the hardware doesn't support bf16 instruction.")
+        else:
+            logger.warning("Mix precision exits due to the hardware "
+                           "doesn't support bf16 instruction.")
+            sys.exit(0)
+    elif 'fp16' in precisions and conf.framework != "onnxruntime":
+        logger.warning("Currently mix precision only supports fp16 for onnx models.")
+        sys.exit(0)
 
     if eval_metric is not None:
-        metric = register_customer_metric(eval_metric, config.framework)
+        metric = register_customer_metric(eval_metric, conf.framework)
     else:
         metric = None
 
-    conf = _Config(mixed_precision=config,
+    config = _Config(mixed_precision=conf,
                    quantization=None,
                    benchmark=None,
                    pruning=None,
@@ -123,7 +143,7 @@ def fit(model,
 
     strategy = STRATEGIES['automixedprecision'](
         model=wrapped_model,
-        conf=conf,
+        conf=config,
         eval_func=eval_func,
         eval_dataloader=eval_dataloader,
         eval_metric=metric,
@@ -131,7 +151,7 @@ def fit(model,
         q_hooks=None)
 
     try:
-        with time_limit(config.tuning_criterion.timeout):
+        with time_limit(conf.tuning_criterion.timeout):
             strategy.traverse()
     except KeyboardInterrupt:
         pass
diff --git a/neural_compressor/model/model.py b/neural_compressor/model/model.py
index 2d898872119..5df07d38d1e 100644
--- a/neural_compressor/model/model.py
+++ b/neural_compressor/model/model.py
@@ -204,7 +204,7 @@ def __new__(cls, root, **kwargs):
                         model_type = kwargs['modelType']
                     else:
                         model_type = get_model_type(root)
-                    if model_type == "keras" and kwargs.get("framework", None) != "tensorflow":
+                    if model_type == "keras" and kwargs.get("backend", None) == "itex":
                         return MODELS['keras'](root, **kwargs)
                     else:
                         return MODELS[framework](model_type, root, **kwargs)
diff --git a/neural_compressor/training.py b/neural_compressor/training.py
index 4204955b012..eb0b17971cd 100644
--- a/neural_compressor/training.py
+++ b/neural_compressor/training.py
@@ -32,6 +32,7 @@
                                WeightPruningConfig)
 from typing import Callable, List, Union
 
+
 class CompressionManager:
     """CompressionManager is uesd in train loop for what user want to deal with additional.
 
diff --git a/test/benchmark/test_benchmark_2.x.py b/test/benchmark/test_benchmark_2.x.py
index c812059de0f..ae3651b01fa 100644
--- a/test/benchmark/test_benchmark_2.x.py
+++ b/test/benchmark/test_benchmark_2.x.py
@@ -197,7 +197,7 @@ def test_benchmark_data_25(self):
     def test_benchmark_raw_cmd(self):
         conf = BenchmarkConfig(warmup=5, iteration=10, cores_per_instance=4, num_of_instance=2)
         raw_cmd = "python fake_raw_cmd.py --input_model={}".format(self.graph_path)
-        benchmark_with_raw_cmd(raw_cmd, config=conf)
+        benchmark_with_raw_cmd(raw_cmd, conf=conf)
         for i in range(2):
             with open(f'2_4_{i}.log', "r") as f:
                 for line in f:
diff --git a/test/mixed_precision/test_mixed_precision_keras_model.py b/test/mixed_precision/test_mixed_precision_keras_model.py
index 16e7cab3608..4a3950ee49e 100644
--- a/test/mixed_precision/test_mixed_precision_keras_model.py
+++ b/test/mixed_precision/test_mixed_precision_keras_model.py
@@ -99,7 +99,7 @@ def test_mixed_precision_with_keras_model(self):
         config = MixedPrecisionConfig()
         q_model = mix_precision.fit(
             model='./models/saved_model',
-            config=config,
+            conf=config,
             eval_dataloader=dataloader, 
             eval_metric=MyMetric())
 

From 82220f22a36de5f81e4871b29074fc50382a7ab3 Mon Sep 17 00:00:00 2001
From: "Cheng, Penghui" <penghui.cheng@intel.com>
Date: Fri, 5 May 2023 18:02:24 +0800
Subject: [PATCH 07/14] Fixed docstring check error

Signed-off-by: Cheng, Penghui <penghui.cheng@intel.com>
---
 neural_compressor/benchmark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py
index e21e5db90b5..bf20668f7b2 100644
--- a/neural_compressor/benchmark.py
+++ b/neural_compressor/benchmark.py
@@ -357,7 +357,7 @@ def benchmark_with_raw_cmd(raw_cmd, conf=None):
 
     Args:
         raw_cmd (string):           The commend to be benchmarked.
-        config (BenchmarkConfig): The configuration for benchmark containing accuracy goal,
+        conf (BenchmarkConfig): The configuration for benchmark containing accuracy goal,
                                   tuning objective and preferred calibration & quantization
                                   tuning space etc.
 

From acfede3c53bf8389b1582aefd99cccfe3e8a78e3 Mon Sep 17 00:00:00 2001
From: "Cheng, Penghui" <penghui.cheng@intel.com>
Date: Fri, 5 May 2023 19:55:18 +0800
Subject: [PATCH 08/14] Fixed UT error

Signed-off-by: Cheng, Penghui <penghui.cheng@intel.com>
---
 .../adaptor/tf_utils/graph_converter.py       | 24 ++++++++++++++-----
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/neural_compressor/adaptor/tf_utils/graph_converter.py b/neural_compressor/adaptor/tf_utils/graph_converter.py
index 309b4828a19..7c322ee3bce 100644
--- a/neural_compressor/adaptor/tf_utils/graph_converter.py
+++ b/neural_compressor/adaptor/tf_utils/graph_converter.py
@@ -126,7 +126,10 @@ def __init__(self,
         self._check_tf_version()
         self._check_args()
 
-        self._fp32_model = Model(self.model._model, **self.model.kwargs, backend="itex" if itex_mode else "default")
+        if "backend" in self.model.kwargs:
+            self._fp32_model = Model(self.model._model, **self.model.kwargs)
+        else:
+            self._fp32_model = Model(self.model._model, **self.model.kwargs, backend="itex" if itex_mode else "default")
         self._fp32_model.graph_def = self.model.graph_def
         self._fp32_model.output_tensor_names = self.output_tensor_names
         self._fp32_model.input_tensor_names = self.input_tensor_names
@@ -145,7 +148,10 @@ def __init__(self,
         self.scale_info.update({'bf16_ops': self.bf16_ops})
         self.scale_info.update({'fp32_ops': self.fp32_ops})
 
-        self._sampling_model = Model(self.model._model, **self.model.kwargs, backend="itex" if itex_mode else "default")
+        if "backend" in self.model.kwargs:
+            self._sampling_model = Model(self.model._model, **self.model.kwargs)
+        else:
+            self._sampling_model = Model(self.model._model, **self.model.kwargs, backend="itex" if itex_mode else "default")
         self._sampling_model.output_tensor_names = self.output_tensor_names
         self._sampling_model.input_tensor_names = self.input_tensor_names
 
@@ -326,8 +332,11 @@ def _gen_tmp_filenames(self):
             self._tmp_model = self._fp32_model
         else:
             # to keep temp model
-            self._tmp_model = Model(self.model._model, **self.model.kwargs,
-                                    backend="itex" if self.itex_mode else "default")
+            if "backend" in self.model.kwargs:
+                self._tmp_model = Model(self.model._model, **self.model.kwargs)
+            else:
+                self._tmp_model = Model(self.model._model, **self.model.kwargs,
+                                        backend="itex" if self.itex_mode else "default")
             self._tmp_model.graph_def = self.model.graph_def
             self._tmp_model.output_tensor_names = self.output_tensor_names
             self._tmp_model.input_tensor_names = self.input_tensor_names
@@ -484,7 +493,7 @@ def quantize(self):
             self._quantize_graph()
             self.quantized_node_info = [tuple(i) for i in self.quantized_node_info]
 
-            if self.fake_quant: # pragma: no cover
+            if self.fake_quant:  # pragma: no cover
                 self._fuse_requantize_with_fused_quantized_node()
             else:
                 if self._enable_kl_op_names:
@@ -603,7 +612,10 @@ def _generate_calibration_data(self, tmp_path, output_data, enable_kl_algo=False
 
         logger.debug("Generate calibration data and save to {}.".format(tmp_dump_file))
 
-        model = Model(tmp_path, **self._tmp_model.kwargs, backend="itex" if self.itex_mode else "default")
+        if "backend" in self._tmp_model.kwargs:
+            model = Model(tmp_path, **self._tmp_model.kwargs)
+        else:
+            model = Model(tmp_path, **self._tmp_model.kwargs, backend="itex" if self.itex_mode else "default")
         model.output_tensor_names = self.output_tensor_names
         model.input_tensor_names = self.input_tensor_names
 

From f6a0cffc6e296d018cb33dd2bc1576ce0f5299f3 Mon Sep 17 00:00:00 2001
From: "Cheng, Penghui" <penghui.cheng@intel.com>
Date: Fri, 5 May 2023 21:07:42 +0800
Subject: [PATCH 09/14] Fixed pylink error

Signed-off-by: Cheng, Penghui <penghui.cheng@intel.com>
---
 neural_compressor/adaptor/tf_utils/graph_converter.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/neural_compressor/adaptor/tf_utils/graph_converter.py b/neural_compressor/adaptor/tf_utils/graph_converter.py
index 7c322ee3bce..62c4657905b 100644
--- a/neural_compressor/adaptor/tf_utils/graph_converter.py
+++ b/neural_compressor/adaptor/tf_utils/graph_converter.py
@@ -129,7 +129,9 @@ def __init__(self,
         if "backend" in self.model.kwargs:
             self._fp32_model = Model(self.model._model, **self.model.kwargs)
         else:
-            self._fp32_model = Model(self.model._model, **self.model.kwargs, backend="itex" if itex_mode else "default")
+            self._fp32_model = Model(self.model._model,
+                                     **self.model.kwargs,
+                                     backend="itex" if itex_mode else "default")
         self._fp32_model.graph_def = self.model.graph_def
         self._fp32_model.output_tensor_names = self.output_tensor_names
         self._fp32_model.input_tensor_names = self.input_tensor_names
@@ -151,7 +153,9 @@ def __init__(self,
         if "backend" in self.model.kwargs:
             self._sampling_model = Model(self.model._model, **self.model.kwargs)
         else:
-            self._sampling_model = Model(self.model._model, **self.model.kwargs, backend="itex" if itex_mode else "default")
+            self._sampling_model = Model(self.model._model,
+                                         **self.model.kwargs,
+                                         backend="itex" if itex_mode else "default")
         self._sampling_model.output_tensor_names = self.output_tensor_names
         self._sampling_model.input_tensor_names = self.input_tensor_names
 
@@ -335,7 +339,8 @@ def _gen_tmp_filenames(self):
             if "backend" in self.model.kwargs:
                 self._tmp_model = Model(self.model._model, **self.model.kwargs)
             else:
-                self._tmp_model = Model(self.model._model, **self.model.kwargs,
+                self._tmp_model = Model(self.model._model,
+                                        **self.model.kwargs,
                                         backend="itex" if self.itex_mode else "default")
             self._tmp_model.graph_def = self.model.graph_def
             self._tmp_model.output_tensor_names = self.output_tensor_names

From a3ac1e4c76d6feae9bb5f113d2022769f7fdc42c Mon Sep 17 00:00:00 2001
From: "Cheng, Penghui" <penghui.cheng@intel.com>
Date: Mon, 8 May 2023 10:29:56 +0800
Subject: [PATCH 10/14] Update code style

Signed-off-by: Cheng, Penghui <penghui.cheng@intel.com>
---
 neural_compressor/benchmark.py       | 2 +-
 neural_compressor/data/__init__.py   | 2 ++
 neural_compressor/metric/__init__.py | 5 +++--
 neural_compressor/mix_precision.py   | 6 +++---
 neural_compressor/quantization.py    | 4 ++--
 neural_compressor/training.py        | 2 +-
 6 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py
index bf20668f7b2..aa64f23559d 100644
--- a/neural_compressor/benchmark.py
+++ b/neural_compressor/benchmark.py
@@ -25,7 +25,7 @@
 import psutil
 from threading import Thread
 
-from neural_compressor.data.dataloaders.dataloader import check_dataloader
+from neural_compressor.data import check_dataloader
 from .adaptor import FRAMEWORKS
 from .objective import MultiObjective
 from .config import BenchmarkConfig, options
diff --git a/neural_compressor/data/__init__.py b/neural_compressor/data/__init__.py
index 11a743ec345..4aaaa57c0e5 100644
--- a/neural_compressor/data/__init__.py
+++ b/neural_compressor/data/__init__.py
@@ -22,6 +22,7 @@
 import neural_compressor.data.transforms
 from .datasets import Datasets, Dataset, IterableDataset, dataset_registry, TensorflowImageRecord, COCORecordDataset
 from .dataloaders import DATALOADERS, DataLoader
+from .dataloaders.dataloader import check_dataloader
 from .dataloaders.default_dataloader import DefaultDataLoader
 from .transforms import TRANSFORMS, BaseTransform, ComposeTransform, transform_registry, Postprocess
 from .transforms import LabelShift, BilinearImagenetTransform, TensorflowResizeCropImagenetTransform
@@ -32,6 +33,7 @@
 from .filters import FILTERS, Filter, filter_registry, LabelBalanceCOCORecordFilter
 
 __all__ = [
+    "check_dataloader",
     "DataLoader",
     "DATALOADERS",
     "DefaultDataLoader",
diff --git a/neural_compressor/metric/__init__.py b/neural_compressor/metric/__init__.py
index 04d859d81fe..ab53b731065 100644
--- a/neural_compressor/metric/__init__.py
+++ b/neural_compressor/metric/__init__.py
@@ -18,7 +18,8 @@
 
 """Intel Neural Compressor Metric."""
 
-from .metric import METRICS, Metric, BaseMetric, TensorflowTopK, metric_registry, COCOmAPv2, SquadF1, GeneralTopK
+from .metric import (METRICS, Metric, BaseMetric, TensorflowTopK, metric_registry, COCOmAPv2, SquadF1, GeneralTopK,
+                     register_customer_metric)
 from os.path import dirname, basename, isfile, join
 import glob
 
@@ -30,4 +31,4 @@
 
 
 __all__ = ["METRICS", "Metric", "BaseMetric", "TensorflowTopK", "metric_registry",
-           "COCOmAPv2", "SquadF1", "GeneralTopK"]
+           "COCOmAPv2", "SquadF1", "GeneralTopK", "register_customer_metric"]
diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py
index 386347f5943..1e16b615252 100644
--- a/neural_compressor/mix_precision.py
+++ b/neural_compressor/mix_precision.py
@@ -21,8 +21,8 @@
 import numpy as np
 import random
 
-from neural_compressor.data.dataloaders.dataloader import check_dataloader
-from neural_compressor.metric.metric import register_customer_metric
+from neural_compressor.data import check_dataloader
+from neural_compressor.metric import register_customer_metric
 from .utils.utility import time_limit, CpuInfo
 from .strategy import STRATEGIES
 from .config import _Config, options
@@ -31,7 +31,7 @@
 
 
 def fit(model,
-        conf=None,
+        conf,
         eval_func=None,
         eval_dataloader=None,
         eval_metric=None,
diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py
index e30708a3eca..d9ee2f6c953 100644
--- a/neural_compressor/quantization.py
+++ b/neural_compressor/quantization.py
@@ -21,8 +21,8 @@
 import random
 import numpy as np
 from .config import _Config, options
-from .data.dataloaders.dataloader import check_dataloader
-from .metric.metric import register_customer_metric
+from .data import check_dataloader
+from .metric import register_customer_metric
 from .model import Model
 from .strategy import STRATEGIES
 from .utils import logger
diff --git a/neural_compressor/training.py b/neural_compressor/training.py
index eb0b17971cd..1ed7549eb27 100644
--- a/neural_compressor/training.py
+++ b/neural_compressor/training.py
@@ -23,7 +23,7 @@
 from .adaptor import FRAMEWORKS
 from .compression.callbacks import QuantizationAwareTrainingCallbacks, DistillationCallbacks, PruningCallbacks
 from .config import _Config, options
-from .metric.metric import register_customer_metric
+from .metric import register_customer_metric
 from .model.model import Model
 from .utils import logger
 from .utils.utility import time_limit

From 5a11a289b40d600c3ab26fdda324755037d5e187 Mon Sep 17 00:00:00 2001
From: "Cheng, Penghui" <penghui.cheng@intel.com>
Date: Mon, 8 May 2023 10:36:55 +0800
Subject: [PATCH 11/14] Update docs

Signed-off-by: Cheng, Penghui <penghui.cheng@intel.com>
---
 docs/source/mixed_precision.md    | 2 +-
 neural_compressor/quantization.py | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/docs/source/mixed_precision.md b/docs/source/mixed_precision.md
index e288890d745..fa134e6c0b9 100644
--- a/docs/source/mixed_precision.md
+++ b/docs/source/mixed_precision.md
@@ -42,7 +42,7 @@ Supported precisions for mix precision include bf16 and fp16. If users want to g
 from neural_compressor import mix_precision
 from neural_compressor.config import MixedPrecisionConfig
 
-conf = MixedPrecisionConfig(precisions='bf16')
+conf = MixedPrecisionConfig() # default precision is bf16
 converted_model = mix_precision.fit(model, conf=conf)
 converted_model.save('./path/to/save/')
 ```
diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py
index d9ee2f6c953..e03c8cb43e9 100644
--- a/neural_compressor/quantization.py
+++ b/neural_compressor/quantization.py
@@ -61,9 +61,7 @@ def fit(model,
         calib_func (function, optional):      Calibration function for post-training static
                                               quantization. It is optional.
                                               This function takes "model" as input parameter
-                                              and executes entire inference process. If this
-                                              parameter specified, calib_dataloader is also needed
-                                              for FX trace if PyTorch >= 1.13.
+                                              and executes entire inference process.
         eval_func (function, optional):       The evaluation function provided by user.
                                               This function takes model as parameter,
                                               and evaluation dataset and metrics should be

From d54c6510ea35c084c79a093fd0df2cc36bf468a8 Mon Sep 17 00:00:00 2001
From: "Cheng, Penghui" <penghui.cheng@intel.com>
Date: Mon, 8 May 2023 12:15:00 +0800
Subject: [PATCH 12/14] Fixed import error

Signed-off-by: Cheng, Penghui <penghui.cheng@intel.com>
---
 docs/source/pruning.md                                         | 3 ++-
 .../text-classification/pruning/eager/run_glue_no_trainer.py   | 2 +-
 .../pruning/eager/run_glue_no_trainer_mixed.py                 | 3 ++-
 neural_compressor/__init__.py                                  | 1 -
 neural_compressor/compression/__init__.py                      | 2 +-
 neural_compressor/compression/pruner/README.md                 | 3 ++-
 test/pruning_2_plus.x/test_pruning.py                          | 3 ++-
 test/pruning_2_plus.x/test_pruning_block.py                    | 3 ++-
 8 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/docs/source/pruning.md b/docs/source/pruning.md
index 1b7f064ca77..21e5b34663a 100644
--- a/docs/source/pruning.md
+++ b/docs/source/pruning.md
@@ -301,7 +301,8 @@ The following section exemplifies how to use hooks in user pass-in training func
      [**Experimental option** ]Modify model and optimizer.
 
     ```python
-        from neural_compressor import prepare_pruning, WeightPruningConfig 
+        from neural_compressor import WeightPruningConfig
+        from neural_compressor.experimental.compression import prepare_pruning
         config = WeightPruningConfig(configs)
         prepare_pruning(config, model, optimizer) # modify model and optimizer
         for epoch in range(num_train_epochs):
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py b/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py
index 0fd9c1ad61d..ea566c5e3e3 100644
--- a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py
+++ b/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py
@@ -523,7 +523,7 @@ def preprocess_function(examples):
     # pruner = Pruning(config)
     # pruner.model = model
     # pruner.on_train_begin()
-    from neural_compressor import prepare_pruning
+    from neural_compressor.experimental.compression import prepare_pruning
     prepare_pruning(configs, model, optimizer)
 
 
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py b/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py
index 9860d7b4e66..784a46ed698 100644
--- a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py
+++ b/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py
@@ -46,7 +46,8 @@
 )
 from transformers.file_utils import get_full_repo_name
 from transformers.utils.versions import require_version
-from neural_compressor import prepare_pruning, WeightPruningConfig
+from neural_compressor import WeightPruningConfig
+from neural_compressor.experimental.compression import prepare_pruning
 
 logger = logging.getLogger(__name__)
 
diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py
index ba722b9db2c..2cf56182e99 100644
--- a/neural_compressor/__init__.py
+++ b/neural_compressor/__init__.py
@@ -18,7 +18,6 @@
 """Intel® Neural Compressor: An open-source Python library supporting popular model compression techniques."""
 from .version import __version__
 # we need to set a global 'NA' backend, or Model can't be used
-from .experimental.compression import prepare_pruning
 from .config import DistillationConfig, PostTrainingQuantConfig, \
                     WeightPruningConfig, QuantizationAwareTrainingConfig, \
                     MixedPrecisionConfig
diff --git a/neural_compressor/compression/__init__.py b/neural_compressor/compression/__init__.py
index b9b85c89c7e..71e6a06cd29 100644
--- a/neural_compressor/compression/__init__.py
+++ b/neural_compressor/compression/__init__.py
@@ -15,4 +15,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .callbacks import QuantizationAwareTrainingCallbacks, DistillationCallbacks, PruningCallbacks
+from .callbacks import QuantizationAwareTrainingCallbacks, DistillationCallbacks, PruningCallbacks
\ No newline at end of file
diff --git a/neural_compressor/compression/pruner/README.md b/neural_compressor/compression/pruner/README.md
index f8b0cfaa925..6fb3738ec51 100644
--- a/neural_compressor/compression/pruner/README.md
+++ b/neural_compressor/compression/pruner/README.md
@@ -301,7 +301,8 @@ The following section exemplifies how to use hooks in user pass-in training func
      [**Experimental option** ]Modify model and optimizer.
 
     ```python
-        from neural_compressor import prepare_pruning, WeightPruningConfig 
+        from neural_compressor import WeightPruningConfig
+        from neural_compressor.experimental.compression import prepare_pruning
         config = WeightPruningConfig(configs)
         prepare_pruning(config, model, optimizer) # modify model and optimizer
         for epoch in range(num_train_epochs):
diff --git a/test/pruning_2_plus.x/test_pruning.py b/test/pruning_2_plus.x/test_pruning.py
index 396d2f63b78..9a3df39a62c 100644
--- a/test/pruning_2_plus.x/test_pruning.py
+++ b/test/pruning_2_plus.x/test_pruning.py
@@ -8,7 +8,8 @@
 sys.path.insert(0, './')
 from neural_compressor.data import Datasets
 from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader
-from neural_compressor import prepare_pruning, WeightPruningConfig
+from neural_compressor import WeightPruningConfig
+from neural_compressor.experimental.compression import prepare_pruning
 
 
 class TestPruning(unittest.TestCase):
diff --git a/test/pruning_2_plus.x/test_pruning_block.py b/test/pruning_2_plus.x/test_pruning_block.py
index e4b10945755..14a59dd39e4 100644
--- a/test/pruning_2_plus.x/test_pruning_block.py
+++ b/test/pruning_2_plus.x/test_pruning_block.py
@@ -7,7 +7,8 @@
 sys.path.insert(0, './')
 from neural_compressor.data import Datasets
 from neural_compressor.data.dataloaders.pytorch_dataloader import PyTorchDataLoader
-from neural_compressor import prepare_pruning, WeightPruningConfig
+from neural_compressor import WeightPruningConfig
+from neural_compressor.experimental.compression import prepare_pruning
 
 
 class TestPruning(unittest.TestCase):

From 7daaabe5c80c31060edebe26e033372439aebef9 Mon Sep 17 00:00:00 2001
From: "Cheng, Penghui" <penghui.cheng@intel.com>
Date: Mon, 8 May 2023 21:18:34 +0800
Subject: [PATCH 13/14] alias parameters with old name for benchmark and
 mixed_precision

Signed-off-by: Cheng, Penghui <penghui.cheng@intel.com>
---
 .../quantization/ptq/main.py                  |  2 +-
 .../inception_v3/quantization/ptq/main.py     |  2 +-
 .../mobilenet_v2/quantization/ptq/main.py     |  2 +-
 .../resnet101/quantization/ptq/main.py        |  2 +-
 .../resnet50/quantization/ptq/main.py         |  2 +-
 .../resnet50_fashion/quantization/ptq/main.py |  2 +-
 .../resnetv2_101/quantization/ptq/main.py     |  2 +-
 .../resnetv2_50/quantization/ptq/main.py      |  2 +-
 .../vgg16/quantization/ptq/main.py            |  2 +-
 .../vgg19/quantization/ptq/main.py            |  2 +-
 .../xception/quantization/ptq/main.py         |  2 +-
 .../quantization/ptq/main.py                  |  2 +-
 .../mobilenet_v1/quantization/ptq/main.py     |  2 +-
 .../mobilenet_v2/quantization/ptq/main.py     |  2 +-
 .../densenet121/quantization/ptq/main.py      |  2 +-
 .../densenet161/quantization/ptq/main.py      |  2 +-
 .../densenet169/quantization/ptq/main.py      |  2 +-
 .../efficientnet-b0/quantization/ptq/main.py  |  2 +-
 .../quantization/ptq/main.py                  |  2 +-
 .../inception_v1/quantization/ptq/main.py     |  2 +-
 .../inception_v2/quantization/ptq/main.py     |  2 +-
 .../inception_v3/quantization/ptq/main.py     |  2 +-
 .../inception_v4/quantization/ptq/main.py     |  2 +-
 .../mobilenet_v1/quantization/ptq/main.py     |  2 +-
 .../mobilenet_v2/quantization/ptq/main.py     |  2 +-
 .../mobilenet_v3/quantization/ptq/main.py     |  2 +-
 .../resnet101/quantization/ptq/main.py        |  2 +-
 .../resnet50_v1/quantization/ptq/main.py      |  2 +-
 .../resnet50_v1_5/quantization/ptq/main.py    |  2 +-
 .../resnet_v2_101/quantization/ptq/main.py    |  2 +-
 .../resnet_v2_152/quantization/ptq/main.py    |  2 +-
 .../resnet_v2_50/quantization/ptq/main.py     |  2 +-
 .../vgg16/quantization/ptq/main.py            |  2 +-
 .../vgg19/quantization/ptq/main.py            |  2 +-
 .../quantization/ptq/tf_benchmark.py          |  2 +-
 .../quantization/ptq/inference.py             |  2 +-
 .../quantization/ptq/run_accuracy.py          |  2 +-
 .../quantization/ptq/style_tune.py            |  2 +-
 neural_compressor/benchmark.py                |  4 +-
 neural_compressor/conf/pythonic_config.py     |  4 +-
 neural_compressor/config.py                   |  4 +-
 neural_compressor/mix_precision.py            |  7 +--
 neural_compressor/utils/__init__.py           |  4 +-
 neural_compressor/utils/utility.py            | 46 ++++++++++++++-----
 test/itex/test_keras_in_keras_out.py          |  4 +-
 test/itex/test_tensorflow_itex_2.x.py         |  2 +-
 46 files changed, 89 insertions(+), 62 deletions(-)

diff --git a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py b/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py
index 1e742e36cb6..0889c1e312d 100644
--- a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py
@@ -119,7 +119,7 @@ def main(_):
     if FLAGS.tune:
         from neural_compressor.quantization import fit
         from neural_compressor.config import PostTrainingQuantConfig
-        from neural_compressor.utils.utility import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
         config = PostTrainingQuantConfig(backend='itex', 
             calibration_sampling_size=[20, 150])
diff --git a/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py b/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py
index a27de78af52..82de2a2aa37 100644
--- a/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py
@@ -119,7 +119,7 @@ def main(_):
     if FLAGS.tune:
         from neural_compressor.quantization import fit
         from neural_compressor.config import PostTrainingQuantConfig
-        from neural_compressor.utils.utility import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
         config = PostTrainingQuantConfig(backend='itex', 
             calibration_sampling_size=[50, 100])
diff --git a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py b/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py
index e5bbc475f24..ab5e7f9a137 100644
--- a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py
@@ -118,7 +118,7 @@ def main(_):
     if FLAGS.tune:
         from neural_compressor.quantization import fit
         from neural_compressor.config import PostTrainingQuantConfig
-        from neural_compressor.utils.utility import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
         config = PostTrainingQuantConfig(backend='itex', 
             calibration_sampling_size=[50, 100])
diff --git a/examples/keras/image_recognition/resnet101/quantization/ptq/main.py b/examples/keras/image_recognition/resnet101/quantization/ptq/main.py
index dbad4085197..884620679bd 100644
--- a/examples/keras/image_recognition/resnet101/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/resnet101/quantization/ptq/main.py
@@ -125,7 +125,7 @@ def main(_):
     if FLAGS.tune:
         from neural_compressor.quantization import fit
         from neural_compressor.config import PostTrainingQuantConfig
-        from neural_compressor.utils.utility import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9524)
         config = PostTrainingQuantConfig(backend='itex', 
             calibration_sampling_size=[10, 15])
diff --git a/examples/keras/image_recognition/resnet50/quantization/ptq/main.py b/examples/keras/image_recognition/resnet50/quantization/ptq/main.py
index fba7c004931..9cf3d9c2cb3 100644
--- a/examples/keras/image_recognition/resnet50/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/resnet50/quantization/ptq/main.py
@@ -110,7 +110,7 @@ def eval_func(dataloader, metric):
     return acc
 
 def main(_):
-    from neural_compressor.utils import set_random_seed
+    from neural_compressor import set_random_seed
     set_random_seed(9527)
     if FLAGS.tune:
         from neural_compressor import quantization
diff --git a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py b/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py
index 19a5c125f03..6d163c39eba 100644
--- a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py
@@ -100,7 +100,7 @@ def eval_func(data_loader, metric):
     return acc
 
 def main(_):
-    from neural_compressor.utils import set_random_seed
+    from neural_compressor import set_random_seed
     set_random_seed(9527)
     if FLAGS.tune:
         from neural_compressor import quantization
diff --git a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py
index e5bbc475f24..ab5e7f9a137 100644
--- a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py
@@ -118,7 +118,7 @@ def main(_):
     if FLAGS.tune:
         from neural_compressor.quantization import fit
         from neural_compressor.config import PostTrainingQuantConfig
-        from neural_compressor.utils.utility import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
         config = PostTrainingQuantConfig(backend='itex', 
             calibration_sampling_size=[50, 100])
diff --git a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py
index e5bbc475f24..ab5e7f9a137 100644
--- a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py
@@ -118,7 +118,7 @@ def main(_):
     if FLAGS.tune:
         from neural_compressor.quantization import fit
         from neural_compressor.config import PostTrainingQuantConfig
-        from neural_compressor.utils.utility import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
         config = PostTrainingQuantConfig(backend='itex', 
             calibration_sampling_size=[50, 100])
diff --git a/examples/keras/image_recognition/vgg16/quantization/ptq/main.py b/examples/keras/image_recognition/vgg16/quantization/ptq/main.py
index 1396533ff13..0c6694e6fac 100644
--- a/examples/keras/image_recognition/vgg16/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/vgg16/quantization/ptq/main.py
@@ -110,7 +110,7 @@ def eval_func(dataloader, metric):
     return acc
 
 def main(_):
-    from neural_compressor.utils import set_random_seed
+    from neural_compressor import set_random_seed
     set_random_seed(9527)
     if FLAGS.tune:
         from neural_compressor import quantization
diff --git a/examples/keras/image_recognition/vgg19/quantization/ptq/main.py b/examples/keras/image_recognition/vgg19/quantization/ptq/main.py
index fba7c004931..9cf3d9c2cb3 100644
--- a/examples/keras/image_recognition/vgg19/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/vgg19/quantization/ptq/main.py
@@ -110,7 +110,7 @@ def eval_func(dataloader, metric):
     return acc
 
 def main(_):
-    from neural_compressor.utils import set_random_seed
+    from neural_compressor import set_random_seed
     set_random_seed(9527)
     if FLAGS.tune:
         from neural_compressor import quantization
diff --git a/examples/keras/image_recognition/xception/quantization/ptq/main.py b/examples/keras/image_recognition/xception/quantization/ptq/main.py
index 359d7e083c9..7e39fc77639 100644
--- a/examples/keras/image_recognition/xception/quantization/ptq/main.py
+++ b/examples/keras/image_recognition/xception/quantization/ptq/main.py
@@ -118,7 +118,7 @@ def main(_):
     if FLAGS.tune:
         from neural_compressor.quantization import fit
         from neural_compressor.config import PostTrainingQuantConfig
-        from neural_compressor.utils.utility import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
         config = PostTrainingQuantConfig(backend='itex', 
             calibration_sampling_size=[50, 100])
diff --git a/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py b/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py
index 1da7dc46c4c..8ad1b39bc7a 100644
--- a/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py
@@ -100,7 +100,7 @@ def eval_func(dataloader, metric):
 
 class eval_object_detection_optimized_graph(object):
     def run(self):
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
         if args.tune:
             from neural_compressor import quantization
diff --git a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py
index 65617b896b5..529e0cfe763 100644
--- a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py
@@ -93,7 +93,7 @@ def eval_func(dataloader, metric):
 
 class eval_object_detection_optimized_graph(object):
     def run(self):
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
         if args.tune:
             from neural_compressor import quantization
diff --git a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py
index 65617b896b5..529e0cfe763 100644
--- a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py
@@ -93,7 +93,7 @@ def eval_func(dataloader, metric):
 
 class eval_object_detection_optimized_graph(object):
     def run(self):
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
         if args.tune:
             from neural_compressor import quantization
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/main.py
index a8e25c9aa57..89b3733c41c 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/main.py
@@ -91,7 +91,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/main.py
index eefe7238b4f..6b8dfa20d96 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/main.py
@@ -91,7 +91,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/main.py
index eefe7238b4f..6b8dfa20d96 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/main.py
@@ -91,7 +91,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py
index 9d05e80e5ea..3172d34b3dc 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py
@@ -91,7 +91,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py
index 2155ec2a821..c007b8dc18f 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py
@@ -88,7 +88,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py
index 51672b8872a..bc9681f70a0 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py
@@ -88,7 +88,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py
index 481d928498f..4595b3105a0 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py
@@ -88,7 +88,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py
index 95194af2bc0..26fc20b6465 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py
@@ -88,7 +88,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py
index aa52c8f5779..8187b01cd84 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py
@@ -88,7 +88,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py
index e5bb40b9382..389ed7b7c63 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py
@@ -88,7 +88,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py
index 4fc568d5e00..53e198bc717 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py
@@ -88,7 +88,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py
index 4789bfb80e8..fbcc385936c 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py
@@ -88,7 +88,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/main.py
index 48b87a7b8e4..8d534299d2b 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/main.py
@@ -89,7 +89,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py
index c2db69f0702..2236af805d0 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py
@@ -88,7 +88,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py
index fed33935aee..0bef19c92ff 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py
@@ -93,7 +93,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py
index 481d928498f..4595b3105a0 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py
@@ -88,7 +88,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py
index 4fa8a56a1f7..44b48d1a0de 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py
@@ -88,7 +88,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py
index 481d928498f..4595b3105a0 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py
@@ -88,7 +88,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/main.py
index 1e3c3bcaf90..c9c03bdad8a 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/main.py
@@ -89,7 +89,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/main.py
index 1e3c3bcaf90..c9c03bdad8a 100644
--- a/examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/main.py
+++ b/examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/main.py
@@ -89,7 +89,7 @@ class eval_classifier_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
 
         if args.tune:
diff --git a/examples/tensorflow/oob_models/quantization/ptq/tf_benchmark.py b/examples/tensorflow/oob_models/quantization/ptq/tf_benchmark.py
index 93519f4dc49..5aefcbe03be 100644
--- a/examples/tensorflow/oob_models/quantization/ptq/tf_benchmark.py
+++ b/examples/tensorflow/oob_models/quantization/ptq/tf_benchmark.py
@@ -335,7 +335,7 @@ def __iter__(self):
         from neural_compressor.data import DataLoader
         from neural_compressor.quantization import fit
         from neural_compressor.config import PostTrainingQuantConfig
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
 
         set_random_seed(9527)
         config = PostTrainingQuantConfig(
diff --git a/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/inference.py b/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/inference.py
index 2db968beba6..a21a12b47c0 100644
--- a/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/inference.py
+++ b/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/inference.py
@@ -183,7 +183,7 @@ def auto_tune(self):
         """
         from neural_compressor.quantization import fit
         from neural_compressor.config import PostTrainingQuantConfig
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         infer_graph = load_graph(self.args.input_graph)
         set_random_seed(9527)
 
diff --git a/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_accuracy.py b/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_accuracy.py
index 5f039c4f925..b20ecfd9d6d 100644
--- a/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_accuracy.py
+++ b/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_accuracy.py
@@ -206,7 +206,7 @@ def __len__(self):
         from neural_compressor.data import DataLoader
         from neural_compressor.quantization import fit
         from neural_compressor.config import PostTrainingQuantConfig
-        from neural_compressor.utils import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
         config = PostTrainingQuantConfig(calibration_sampling_size=[40])
 
diff --git a/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_tune.py b/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_tune.py
index 018f6216cef..4c81cb05006 100644
--- a/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_tune.py
+++ b/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_tune.py
@@ -122,7 +122,7 @@ def main(args=None):
                 tf.import_graph_def(frozen_graph, name='')
                 from neural_compressor import quantization
                 from neural_compressor.config import PostTrainingQuantConfig
-                from neural_compressor.utils import set_random_seed
+                from neural_compressor import set_random_seed
                 set_random_seed(9527)
 
                 from neural_compressor.utils.create_obj_from_config import create_dataloader
diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py
index aa64f23559d..d0397f4ff63 100644
--- a/neural_compressor/benchmark.py
+++ b/neural_compressor/benchmark.py
@@ -29,8 +29,7 @@
 from .adaptor import FRAMEWORKS
 from .objective import MultiObjective
 from .config import BenchmarkConfig, options
-from .utils import logger
-from .utils import OPTIONS
+from .utils import alias_param, logger, OPTIONS
 from .utils.utility import GLOBAL_STATE, MODE
 from .model import BaseModel, Model
 from .utils import logger
@@ -380,6 +379,7 @@ def benchmark_with_raw_cmd(raw_cmd, conf=None):
     summary_benchmark()
 
 
+@alias_param("conf", param_alias='config')
 def fit(model, conf, b_dataloader=None, b_func=None):
     """Benchmark the model performance with the configure.
 
diff --git a/neural_compressor/conf/pythonic_config.py b/neural_compressor/conf/pythonic_config.py
index a2cddd25b46..ca16da078cd 100644
--- a/neural_compressor/conf/pythonic_config.py
+++ b/neural_compressor/conf/pythonic_config.py
@@ -111,12 +111,12 @@ class Options:
 
     Example::
 
-        from neural_compressor.utils.utility import set_random_seed, set_workspace, set_resume_from, set_tensorboard
+        from neural_compressor import set_random_seed, set_workspace, set_resume_from, set_tensorboard
         set_random_seed(2022)
         set_workspace("workspace_path")
         set_resume_from("workspace_path")
         set_tensorboard(True)
-        
+
     """
     def __init__(self, random_seed=1978, workspace=default_workspace,
                  resume_from=None, tensorboard=False):
diff --git a/neural_compressor/config.py b/neural_compressor/config.py
index 47696c70b2a..5b7d6df397b 100644
--- a/neural_compressor/config.py
+++ b/neural_compressor/config.py
@@ -18,6 +18,7 @@
 import datetime
 import logging
 from schema import Schema, And, Optional
+from .utils import alias_param
 
 logger = logging.getLogger("neural_compressor")
 default_workspace = './nc_workspace/{}/'.format(
@@ -162,7 +163,7 @@ class Options:
 
     Example::
 
-        from neural_compressor.utils.utility import set_random_seed, set_workspace, set_resume_from, set_tensorboard
+        from neural_compressor import set_random_seed, set_workspace, set_resume_from, set_tensorboard
         set_random_seed(2022)
         set_workspace("workspace_path")
         set_resume_from("workspace_path")
@@ -1664,6 +1665,7 @@ class MixedPrecisionConfig(object):
         conf = MixedPrecisionConfig()
         converted_model = mix_precision.fit(model, conf=conf)
     """
+    @alias_param("precisions", param_alias="precision")
     def __init__(self,
                  device="cpu",
                  backend="default",
diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py
index 1e16b615252..057344591ab 100644
--- a/neural_compressor/mix_precision.py
+++ b/neural_compressor/mix_precision.py
@@ -23,13 +23,14 @@
 
 from neural_compressor.data import check_dataloader
 from neural_compressor.metric import register_customer_metric
-from .utils.utility import time_limit, CpuInfo
-from .strategy import STRATEGIES
 from .config import _Config, options
-from .utils import logger
 from .model import Model
+from .strategy import STRATEGIES
+from .utils import alias_param, logger
+from .utils.utility import time_limit, CpuInfo
 
 
+@alias_param("conf", param_alias='config')
 def fit(model,
         conf,
         eval_func=None,
diff --git a/neural_compressor/utils/__init__.py b/neural_compressor/utils/__init__.py
index d943c962138..032235e886f 100644
--- a/neural_compressor/utils/__init__.py
+++ b/neural_compressor/utils/__init__.py
@@ -20,7 +20,7 @@
 from .collect_layer_histogram import LayerHistogramCollector
 from .logger import log, info, debug, warn, warning, error, fatal
 from .options import OPTIONS
-from .utility import set_random_seed
+from .utility import alias_param
 
 __all__ = ["LayerHistogramCollector", "log", "info", "debug", "warn", "warning", "error", "fatal",
-           "OPTIONS", "set_random_seed"]
+           "OPTIONS", "alias_param"]
diff --git a/neural_compressor/utils/utility.py b/neural_compressor/utils/utility.py
index a146eaff77b..84068bdf4a0 100644
--- a/neural_compressor/utils/utility.py
+++ b/neural_compressor/utils/utility.py
@@ -22,24 +22,26 @@
 file (in yaml) and use cfg_from_file(yaml_file) to load it and override the default
 options.
 """
-import re
 import ast
-import os
-import time
-import sys
-import pickle
+import cpuinfo
 import logging
 import importlib
-from contextlib import contextmanager
-from tempfile import NamedTemporaryFile
-import os.path as osp
-import threading, _thread
-import cpuinfo
+import re
 import numpy as np
-from neural_compressor.utils import logger
+import os
+import os.path as osp
+import pickle
 import prettytable as pt
 import psutil
 import subprocess
+import sys
+import threading
+import time
+import _thread
+from contextlib import contextmanager
+from functools import wraps
+from tempfile import NamedTemporaryFile
+from neural_compressor.utils import logger
 from enum import Enum
 from pkg_resources import parse_version
 
@@ -677,3 +679,25 @@ def compare_objects(obj1, obj2, ignore_attrs):
     for attr in attrs1 - set(ignore_attrs):
         if getattr(obj1, attr) != getattr(obj2, attr):
             return False
+
+
+def alias_param(param_name: str, param_alias: str):
+    """
+    Decorator for aliasing a param in a function
+
+    Args:
+        param_name: name of param in function to alias
+        param_alias: alias that can be used for this param
+    Returns:
+    """
+    def decorator(func):
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            alias_param_value = kwargs.get(param_alias)
+            if alias_param_value:
+                kwargs[param_name] = alias_param_value
+                del kwargs[param_alias]
+            result = func(*args, **kwargs)
+            return result
+        return wrapper
+    return decorator
diff --git a/test/itex/test_keras_in_keras_out.py b/test/itex/test_keras_in_keras_out.py
index d455e3d1703..3222956e953 100644
--- a/test/itex/test_keras_in_keras_out.py
+++ b/test/itex/test_keras_in_keras_out.py
@@ -128,7 +128,7 @@ def test_keras_in_keras_out(self):
 
         from neural_compressor.quantization import fit
         from neural_compressor.config import PostTrainingQuantConfig
-        from neural_compressor.utils.utility import set_random_seed
+        from neural_compressor import set_random_seed
         from neural_compressor.data.dataloaders.dataloader import DataLoader
         set_random_seed(9527)
         config = PostTrainingQuantConfig(backend='itex')
@@ -165,7 +165,7 @@ def test_keras_model_interface(self):
 
         from neural_compressor.quantization import fit
         from neural_compressor.config import PostTrainingQuantConfig
-        from neural_compressor.utils.utility import set_random_seed
+        from neural_compressor import set_random_seed
         from neural_compressor.data.dataloaders.dataloader import DataLoader
         set_random_seed(9527)
         config = PostTrainingQuantConfig(backend='itex')
diff --git a/test/itex/test_tensorflow_itex_2.x.py b/test/itex/test_tensorflow_itex_2.x.py
index 97cbe5e50da..32c648cf496 100644
--- a/test/itex/test_tensorflow_itex_2.x.py
+++ b/test/itex/test_tensorflow_itex_2.x.py
@@ -7,7 +7,7 @@
 from neural_compressor.data.dataloaders.dataloader import DataLoader
 from neural_compressor.quantization import fit
 from neural_compressor.config import PostTrainingQuantConfig
-from neural_compressor.utils.utility import set_random_seed
+from neural_compressor import set_random_seed
 from neural_compressor.adaptor.tf_utils.util import version1_lt_version2
 
 import tensorflow as tf

From 5a159ddb6238dc45c4fb9c1296f24a99f2ac68b0 Mon Sep 17 00:00:00 2001
From: "Cheng, Penghui" <penghui.cheng@intel.com>
Date: Tue, 9 May 2023 10:59:46 +0800
Subject: [PATCH 14/14] Fixed docstring error and UT coverage error

Signed-off-by: Cheng, Penghui <penghui.cheng@intel.com>
---
 neural_compressor/utils/utility.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/neural_compressor/utils/utility.py b/neural_compressor/utils/utility.py
index 84068bdf4a0..d1b1f33240a 100644
--- a/neural_compressor/utils/utility.py
+++ b/neural_compressor/utils/utility.py
@@ -682,19 +682,17 @@ def compare_objects(obj1, obj2, ignore_attrs):
 
 
 def alias_param(param_name: str, param_alias: str):
-    """
-    Decorator for aliasing a param in a function
+    """Decorator for aliasing a param in a function.
 
     Args:
-        param_name: name of param in function to alias
-        param_alias: alias that can be used for this param
-    Returns:
+        param_name: Name of param in function to alias.
+        param_alias: Alias that can be used for this param.
     """
     def decorator(func):
         @wraps(func)
         def wrapper(*args, **kwargs):
             alias_param_value = kwargs.get(param_alias)
-            if alias_param_value:
+            if alias_param_value:  # pragma: no cover
                 kwargs[param_name] = alias_param_value
                 del kwargs[param_alias]
             result = func(*args, **kwargs)