intel · yiliu30 · Mar 29, 2023 · Mar 27, 2023 · Mar 28, 2023 · Mar 28, 2023
diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py
@@ -22,7 +22,7 @@
 # we need to set a global 'NA' backend, or Model can't be used
 from .utils.utility import set_random_seed, set_tensorboard, set_workspace
 from .utils import options
-from .config import conf
+# from .config import conf
 from .config import DistillationConfig, PostTrainingQuantConfig, \
                     WeightPruningConfig, QuantizationAwareTrainingConfig, \
                     MixedPrecisionConfig
diff --git a/neural_compressor/adaptor/mxnet.py b/neural_compressor/adaptor/mxnet.py
@@ -25,7 +25,7 @@
                                              dump_elapsed_time, singleton)
 from neural_compressor.adaptor.mxnet_utils.util import *
 from collections import OrderedDict
-from ..experimental.data.dataloaders.base_dataloader import BaseDataLoader
+from neural_compressor.data.dataloaders.base_dataloader import BaseDataLoader
 from copy import deepcopy
 import math
 

diff --git a/neural_compressor/adaptor/onnxrt.py b/neural_compressor/adaptor/onnxrt.py
@@ -31,7 +31,7 @@
 from neural_compressor.utils.utility import LazyImport, dump_elapsed_time, \
                                             GLOBAL_STATE, MODE
 from neural_compressor.utils.utility import Statistics
-from neural_compressor.experimental.data.dataloaders.base_dataloader import BaseDataLoader
+from neural_compressor.data.dataloaders.base_dataloader import BaseDataLoader
 from neural_compressor.conf.dotdict import deep_get
 from neural_compressor.utils.utility import CpuInfo
 import math

diff --git a/neural_compressor/contrib/strategy/tpe.py b/neural_compressor/contrib/strategy/tpe.py
@@ -505,7 +505,7 @@ def stop(self, timeout, trials_count):
 
         if timeout == 0 and self.best_tune_result:
             need_stop = True
-        elif trials_count >= self.cfg.tuning.exit_policy.max_trials:
+        elif trials_count >= self.conf.quantization.tuning_criterion.max_trials:
             need_stop = True
         else:
             need_stop = False

diff --git a/neural_compressor/experimental/graph_optimization.py b/neural_compressor/experimental/graph_optimization.py
@@ -25,7 +25,7 @@
 import yaml
 from ..conf.config import Graph_Optimization_Conf
 from ..conf.dotdict import deep_get, deep_set, DotDict
-from ..strategy import STRATEGIES
+from .strategy import EXP_STRATEGIES
 from ..utils import logger
 from ..utils.create_obj_from_config import create_dataloader
 from ..utils.utility import CpuInfo, time_limit
@@ -139,7 +139,7 @@ def __call__(self):
 
         strategy = cfg.tuning.strategy.name.lower()
 
-        assert strategy in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy)
+        assert strategy in EXP_STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy)
 
         _resume = None
         # check if interrupted tuning procedure exists. if yes, it will resume the
@@ -152,7 +152,7 @@ def __call__(self):
             with open(self.resume_file, 'rb') as f:
                 _resume = pickle.load(f).__dict__
 
-        self.strategy = STRATEGIES[strategy](
+        self.strategy = EXP_STRATEGIES[strategy](
             self._model,
             self.conf,
             None,

diff --git a/neural_compressor/experimental/mixed_precision.py b/neural_compressor/experimental/mixed_precision.py
@@ -24,7 +24,7 @@
 from ..conf.config import MixedPrecision_Conf
 from ..conf.pythonic_config import Config
 from ..conf.dotdict import deep_get
-from ..strategy import STRATEGIES
+from .strategy import EXP_STRATEGIES
 from ..utils import logger
 from ..utils.create_obj_from_config import create_dataloader
 from ..utils.utility import CpuInfo, time_limit
@@ -149,7 +149,7 @@ def __call__(self):
 
         strategy = cfg.tuning.strategy.name.lower()
 
-        assert strategy in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy)
+        assert strategy in EXP_STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy)
 
         _resume = None
         # check if interrupted tuning procedure exists. if yes, it will resume the
@@ -162,7 +162,7 @@ def __call__(self):
             with open(self.resume_file, 'rb') as f:
                 _resume = pickle.load(f).__dict__
 
-        self.strategy = STRATEGIES[strategy](
+        self.strategy = EXP_STRATEGIES[strategy](
             self._model,
             self.conf,
             None,

diff --git a/neural_compressor/experimental/quantization.py b/neural_compressor/experimental/quantization.py
@@ -23,7 +23,7 @@
 import numpy as np
 from .component import Component
 from ..conf.dotdict import deep_get, deep_set, DotDict
-from ..strategy import STRATEGIES
+from .strategy import EXP_STRATEGIES
 from ..utils import logger
 from ..utils.utility import time_limit
 from ..utils.create_obj_from_config import create_dataloader
@@ -144,7 +144,7 @@ def pre_process(self):
                 strategy = "basic"
                 logger.warning(f"MSE_v2 does not support {self.framework} now, use basic instead.")
                 logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.")
-        assert strategy in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy)
+        assert strategy in EXP_STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy)
 
         _resume = None
         # check if interrupted tuning procedure exists. if yes, it will resume the
@@ -157,7 +157,7 @@ def pre_process(self):
             with open(self.resume_file, 'rb') as f:
                 _resume = pickle.load(f).__dict__
 
-        self.strategy = STRATEGIES[strategy](
+        self.strategy = EXP_STRATEGIES[strategy](
             self._model,
             self.conf,
             self._calib_dataloader,

diff --git a/neural_compressor/experimental/strategy/__init__.py b/neural_compressor/experimental/strategy/__init__.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Intel Neural Compressor Strategy."""
+
+from .strategy import EXP_STRATEGIES
+from os.path import dirname, basename, isfile, join
+import glob
+
+modules = glob.glob(join(dirname(__file__), "*.py"))
+
+for f in modules:
+    if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'):
+        __import__(basename(f)[:-3], globals(), locals(), level=1)
+
+__all__ = ["EXP_STRATEGIES"]
diff --git a/neural_compressor/experimental/strategy/auto_mixed_precision.py b/neural_compressor/experimental/strategy/auto_mixed_precision.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""The auto-mixed precision strategy."""
+
+import copy
+import numpy as np
+from collections import OrderedDict
+from .strategy import strategy_registry, TuneStrategy
+from ...utils import logger
+
+from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler
+from .utils.tuning_structs import OpTuningConfig
+
+
+@strategy_registry
+class AutoMixedPrecisionTuneStrategy(TuneStrategy):
+    """Tuning strategy for auto mixed precision."""
+
+    def next_tune_cfg(self):
+        """Generate the next tuning config.
+
+        Tuning configurations are generated according to the following rules:
+        1. First, it tries to convert all ops into target date type as many as possible.
+        2. If the accuracy does  not meets the requirements, it starts the stage of fallback 
+            which converts ops into higher precision.
+
+        Yields:
+            tune_config (dict): A dict containing the tuning configuration.
+        """
+        from copy import deepcopy
+
+        # filter quantization dtype
+        # TODO align with the old mixed-precison
+        target_dtypes = self.cfg.graph_optimization.precisions if self.cfg.graph_optimization \
+            else self.cfg.mixed_precision.precisions
+        target_dtypes = list(set(target_dtypes) - set(['fp32']))
+        tuning_space = self.tuning_space
+        initial_op_tuning_cfg = {}
+        for item in tuning_space.root_item.options:
+            if item.item_type == 'op':
+                op_name, op_type = item.name
+                initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space)
+
+        if not target_dtypes:
+            target_dtypes = ['bf16']
+        # step1. target_dtype AMAP, collect the ops that support target_dtype
+        bf16_items_name = []
+        op_tuning_cfg = {}
+        for idx, target_dtype in enumerate(target_dtypes):
+            bf16_items = tuning_space.query_items_by_quant_mode(target_dtype)
+            if len(bf16_items) == 0 and \
+                not (idx == len(target_dtypes) - 1 and len(bf16_items_name) == 0):
+                continue
+            bf16_items_name = [item.name for item in bf16_items]
+            op_tuning_cfg = deepcopy(initial_op_tuning_cfg)
+            for op_name_type in bf16_items_name:
+                op_tuning_cfg[op_name_type] = \
+                    OpTuningConfig(op_name_type[0], op_name_type[1], target_dtype, tuning_space)
+            calib_sampling_size = 1
+            op_tuning_cfg['calib_sampling_size'] = calib_sampling_size
+            yield op_tuning_cfg
+
+        # step2. fallback
+        target_dtype = 'fp32'
+        fallback_items_name_lst = bf16_items_name[::-1]
+        if fallback_items_name_lst:
+            logger.info(f"Start to fallback op to {target_dtype} one by one.")
+            self._fallback_started()
+        op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst)))
+        initial_op_tuning_cfg = deepcopy(op_tuning_cfg)
+        fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[],
+                                                initial_op_tuning_cfg=initial_op_tuning_cfg,
+                                                op_dtypes=op_dtypes, accumulate=False)
+        op_fallback_acc_impact = OrderedDict()
+        for op_index, op_tuning_cfg in enumerate(fallback_sampler):
+            op_tuning_cfg['calib_sampling_size'] = calib_sampling_size
+            yield op_tuning_cfg
+            acc, _ = self.last_tune_result
+            op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc
+
+        # do accumulated fallback according to the order in the previous stage
+        if len(op_fallback_acc_impact) > 0:
+            ordered_ops = sorted(op_fallback_acc_impact.keys(), key=lambda key: op_fallback_acc_impact[key],
+                                reverse=self.higher_is_better)
+            op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst)))
+            logger.info("Start to accumulate fallback to {target_dtype}.")
+            initial_op_tuning_cfg = deepcopy(op_tuning_cfg)
+            fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[],
+                                                    initial_op_tuning_cfg=initial_op_tuning_cfg,
+                                                    op_dtypes=op_dtypes, accumulate=True)
+            for op_tuning_cfg in fallback_sampler:
+                op_tuning_cfg['calib_sampling_size'] = calib_sampling_size
+                yield op_tuning_cfg
+
+    def traverse(self):
+        """Traverse the tuning space according to auto-mixed precision strategy."""
+        # get fp32 model baseline
+        self._eval_baseline()
+
+        trials_count = 0
+        for op_tuning_cfg in self.next_tune_cfg():
+            # add tune_cfg here as quantize use tune_cfg
+            tune_cfg = self._tune_cfg_converter(op_tuning_cfg)
+            trials_count += 1
+            tuning_history = self._find_tuning_history(tune_cfg)
+            if tuning_history and trials_count < self.cfg.tuning.exit_policy.max_trials:
+                self.last_tune_result = tuning_history['last_tune_result']
+                self.best_tune_result = tuning_history['best_tune_result']
+                logger.warn("Find evaluated tuning config, skip.")
+                continue
+
+            logger.debug("Dump current mixed precision configuration:")
+            logger.debug(tune_cfg)
+            self.last_qmodel = self.adaptor.quantize(
+                tune_cfg, self.model, self.calib_dataloader, self.q_func)
+            assert self.last_qmodel
+            # Return the last quantized model as a result. if performance only.
+            if self.cfg.tuning.exit_policy.performance_only:
+                self.best_qmodel = self.last_qmodel
+                self._add_tuning_history(copy.deepcopy(tune_cfg), (-1, [0]), q_config=self.last_qmodel.q_config)
+                return
+            self.last_tune_cfg = copy.deepcopy(tune_cfg)
+            if self.eval_dataloader or self.eval_func:
+                q_config = copy.deepcopy(self.last_qmodel.q_config)
+                self.last_tune_result = self._evaluate(self.last_qmodel)
+                self.cur_best_acc, self.cur_best_tuning_cfg = self.update_best_op_tuning_cfg(op_tuning_cfg)
+                need_stop = self.stop(self.cfg.tuning.exit_policy.timeout, trials_count)
+                # record the tuning history
+                saved_tune_cfg = copy.deepcopy(tune_cfg)
+                saved_last_tune_result = copy.deepcopy(self.last_tune_result)
+                self._add_tuning_history(saved_tune_cfg, saved_last_tune_result, q_config=q_config)
+            else:
+                # If the eval_dataloader was not specified under the config yaml file,
+                # We only converted the model with customized precisions.
+                self.best_qmodel = self.last_qmodel
+                need_stop = True
+
+            if need_stop:
+                break
+
+