From 2eabc87bbd7e96d040cd36cd5c64cf6bc576a1e5 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Mon, 27 Mar 2023 16:02:11 +0800 Subject: [PATCH 001/103] rewrite INC config class Signed-off-by: Cheng, Zixuan --- neural_compressor/config.py | 503 ++++++++++++++++++++++++++---------- 1 file changed, 363 insertions(+), 140 deletions(-) diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 537480f4f36..4885dab3bb0 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -146,9 +146,6 @@ def tensorboard(self, tensorboard): self._tensorboard = tensorboard -options = Options() - - class BenchmarkConfig: """Config Class for Benchmark. @@ -371,32 +368,116 @@ def __str__(self): accuracy_criterion = AccuracyCriterion() +class TuningCriterion: + """Class for Tuning Criterion. + + Args: + strategy: strategy name + strategy_kwargs: parameters for strategy + objective: objective with accuracy constraint guaranteed, supports 'performance', 'modelsize', 'footprint' + timeout: tuning timeout (seconds). Default value is 0 which means early stop + max_trials: max tune times. Default value is 100. Combine with timeout field to decide when to exit + + Example:: + from neural_compressor.config import TuningCriterion + + tuning_criterion=TuningCriterion( + timeout=0, + max_trials=100, + strategy="basic", + strategy_kwargs=None, + ) + """ + def __init__(self, strategy="basic", strategy_kwargs=None, timeout=0, max_trials=100, objective="performance"): + """Init a TuningCriterion object.""" + self.strategy = strategy + self.timeout = timeout + self.max_trials = max_trials + self.objective = objective + self.strategy_kwargs = strategy_kwargs + + @property + def max_trials(self): + """Get max_trials.""" + return self._max_trials + + @max_trials.setter + def max_trials(self, max_trials): + """Set max_trials.""" + if check_value('max_trials', max_trials, int): + self._max_trials = max_trials + + @property + def timeout(self): + """Get timeout.""" + return self._timeout + + @timeout.setter + def timeout(self, timeout): + """Set timeout.""" + if check_value('timeout', timeout, int): + self._timeout = timeout + + @property + def objective(self): + """Get objective.""" + return self._objective + + @objective.setter + def objective(self, objective): + """Set objective.""" + if check_value('objective', objective, str, + ['performance', 'accuracy', 'modelsize', 'footprint']): + self._objective = objective + + @property + def strategy(self): + """Get strategy.""" + return self._strategy + + @strategy.setter + def strategy(self, strategy): + """Set strategy.""" + if check_value('strategy', strategy, str, + ['basic', 'mse', 'bayesian', 'random', 'exhaustive', 'sigopt', 'tpe', 'mse_v2', 'hawq_v2']): + self._strategy = strategy + + @property + def strategy_kwargs(self): + """Get strategy_kwargs.""" + return self._strategy_kwargs + + @strategy_kwargs.setter + def strategy_kwargs(self, strategy_kwargs): + """Set strategy_kwargs.""" + self._strategy_kwargs = strategy_kwargs + + +tuning_criterion = TuningCriterion() + + class _BaseQuantizationConfig: def __init__(self, inputs=[], outputs=[], backend="default", domain="auto", + model_name="", + metric={}, recipes={}, quant_format="default", device="cpu", calibration_sampling_size=[100], op_type_dict=None, op_name_dict=None, - strategy="basic", - strategy_kwargs=None, - objective="performance", - timeout=0, - max_trials=100, performance_only=False, reduce_range=None, - example_inputs=None, excluded_precisions=[], quant_level="auto", accuracy_criterion=accuracy_criterion, + tuning_criterion=tuning_criterion, use_distributed_tuning=False): """Initialize _BaseQuantizationConfig class. - Args: inputs: inputs of model outputs: outputs of model @@ -405,6 +486,8 @@ def __init__(self, Adaptor will use specific quantization settings for different domains automatically, and explicitly specified quantization settings will override the automatic setting. If users set domain as auto, automatic detection for domain will be executed. + model_name: name of model + metric: dict of metric that will be used recipes: recipes for quantiztaion, support list is as below. 'smooth_quant': whether do smooth quant 'smooth_quant_args': parameters for smooth_quant @@ -424,14 +507,8 @@ def __init__(self, calibration_sampling_size: number of calibration sample op_type_dict: tuning constraints on optype-wise op_name_dict: tuning constraints on op-wise - strategy: strategy name - strategy_kwargs: parameters for strategy - objective: objective with accuracy constraint guaranteed, support 'performance', 'modelsize', 'footprint' - timeout: tuning timeout (seconds). default value is 0 which means early stop - max_trials: max tune times. default value is 100. Combine with timeout field to decide when to exit performance_only: whether do evaluation reduce_range: whether use 7 bit - example_inputs: used to trace PyTorch model with torch.jit/torch.fx excluded_precisions: precisions to be excluded, support 'bf16' quant_level: support auto, 0 and 1, 0 is conservative strategy, 1 is basic or user-specified strategy, auto (default) is the combination of 0 and 1. @@ -442,25 +519,23 @@ def __init__(self, self.outputs = outputs self.backend = backend self.domain = domain + self.model_name = model_name + self.metric=metric self.recipes = recipes self.quant_format = quant_format self.device = device self.op_type_dict = op_type_dict self.op_name_dict = op_name_dict - self.strategy = strategy - self.strategy_kwargs = strategy_kwargs - self.objective = objective - self.timeout = timeout - self.max_trials = max_trials self.performance_only = performance_only self.reduce_range = reduce_range self.excluded_precisions = excluded_precisions self.use_bf16 = "bf16" not in self.excluded_precisions self.accuracy_criterion = accuracy_criterion + self.tuning_criterion = tuning_criterion self.calibration_sampling_size = calibration_sampling_size self.quant_level = quant_level self.use_distributed_tuning=use_distributed_tuning - self._example_inputs = example_inputs + self._framework=None @property def domain(self): @@ -474,6 +549,29 @@ def domain(self, domain): ["auto", "cv", "object_detection", "nlp", "recommendation_system"]): self._domain = domain + @property + def model_name(self): + """Get model name.""" + return self._model_name + + @model_name.setter + def model_name(self, model_name): + """Set model name.""" + if check_value("model_name", model_name, str): + self._model_name = model_name + + @property + def metric(self): + """Get metric.""" + return self._metric + + @metric.setter + def metric(self, metric): + """Set metric.""" + if metric is not None and not isinstance(metric, dict): + raise ValueError("metric should be a dict.") + self._metric = metric + @property def recipes(self): """Get recipes.""" @@ -591,6 +689,17 @@ def accuracy_criterion(self, accuracy_criterion): if check_value("accuracy_criterion", accuracy_criterion, AccuracyCriterion): self._accuracy_criterion = accuracy_criterion + @property + def tuning_criterion(self): + """Get tuning_criterion.""" + return self.tuning_criterion + + @tuning_criterion.setter + def tuning_criterion(self, tuning_criterion): + """Set tuning_criterion.""" + if check_value("tuning_criterion", tuning_criterion, TuningCriterion): + self.tuning_criterion = tuning_criterion + @property def excluded_precisions(self): return self._excluded_precisions @@ -772,98 +881,6 @@ def inputs(self, inputs): if check_value('inputs', inputs, str): self._inputs = inputs - @property - def example_inputs(self): - """Get strategy_kwargs.""" - return self._example_inputs - - @example_inputs.setter - def example_inputs(self, example_inputs): - """Set example_inputs.""" - self._example_inputs = example_inputs - - -class TuningCriterion: - """Class for Tuning Criterion. - - Example:: - - from neural_compressor.config import TuningCriterion - - tuning_criterion=TuningCriterion( - timeout=0, # optional. tuning timeout (seconds). When set to 0, early stopping is enabled. - max_trials=100, # optional. max tuning times. - # combined with the `timeout` field to decide when to exit tuning. - strategy="basic", # optional. name of the tuning strategy. - strategy_kwargs=None, # optional. see concrete tuning strategy for available settings. - ) - """ - def __init__(self, strategy="basic", strategy_kwargs=None, timeout=0, max_trials=100, objective="performance"): - """Init a TuningCriterion object.""" - self.strategy = strategy - self.timeout = timeout - self.max_trials = max_trials - self.objective = objective - self.strategy_kwargs = strategy_kwargs - - @property - def max_trials(self): - """Get max_trials.""" - return self._max_trials - - @max_trials.setter - def max_trials(self, max_trials): - """Set max_trials.""" - if check_value('max_trials', max_trials, int): - self._max_trials = max_trials - - @property - def timeout(self): - """Get timeout.""" - return self._timeout - - @timeout.setter - def timeout(self, timeout): - """Set timeout.""" - if check_value('timeout', timeout, int): - self._timeout = timeout - - @property - def objective(self): - """Get objective.""" - return self._objective - - @objective.setter - def objective(self, objective): - """Set objective.""" - if check_value('objective', objective, str, - ['performance', 'accuracy', 'modelsize', 'footprint']): - self._objective = objective - - @property - def strategy(self): - """Get strategy.""" - return self._strategy - - @strategy.setter - def strategy(self, strategy): - """Set strategy.""" - if check_value('strategy', strategy, str, - ['basic', 'mse', 'bayesian', 'random', 'exhaustive', 'sigopt', 'tpe', 'mse_v2', 'hawq_v2']): - self._strategy = strategy - - @property - def strategy_kwargs(self): - """Get strategy_kwargs.""" - return self._strategy_kwargs - - @strategy_kwargs.setter - def strategy_kwargs(self, strategy_kwargs): - """Set strategy_kwargs.""" - self._strategy_kwargs = strategy_kwargs - -tuning_criterion = TuningCriterion() - class PostTrainingQuantConfig(_BaseQuantizationConfig): """Config Class for Post Training Quantization. @@ -873,11 +890,10 @@ class PostTrainingQuantConfig(_BaseQuantizationConfig): from neural_compressor.config PostTrainingQuantConfig, TuningCriterion conf = PostTrainingQuantConfig( - quant_level="auto", # the quantization level. + quant_level="auto", tuning_criterion=TuningCriterion( - timeout=0, # optional. tuning timeout (seconds). When set to 0, early stopping is enabled. - max_trials=100, # optional. max tuning times. - # combined with the `timeout` field to decide when to exit tuning. + timeout=0, + max_trials=100, ), ) """ @@ -896,12 +912,11 @@ def __init__(self, reduce_range=None, excluded_precisions=[], quant_level="auto", - tuning_criterion=tuning_criterion, accuracy_criterion=accuracy_criterion, - use_distributed_tuning=False, + tuning_criterion=tuning_criterion, + use_distributed_tuning=False ): """Init a PostTrainingQuantConfig object.""" - self.tuning_criterion = tuning_criterion super().__init__(inputs=inputs, outputs=outputs, device=device, @@ -912,15 +927,11 @@ def __init__(self, calibration_sampling_size=calibration_sampling_size, op_type_dict=op_type_dict, op_name_dict=op_name_dict, - strategy=tuning_criterion.strategy, - strategy_kwargs=tuning_criterion.strategy_kwargs, - objective=tuning_criterion.objective, - timeout=tuning_criterion.timeout, - max_trials=tuning_criterion.max_trials, reduce_range=reduce_range, excluded_precisions=excluded_precisions, quant_level=quant_level, accuracy_criterion=accuracy_criterion, + tuning_criterion=tuning_criterion, use_distributed_tuning=use_distributed_tuning) self.approach = approach @@ -935,17 +946,6 @@ def approach(self, approach): if check_value("approach", approach, str, ["static", "dynamic", "auto"]): self._approach = QUANTMAPPING[approach] - @property - def tuning_criterion(self): - """Get tuning_criterion.""" - return self._tuning_criterion - - @tuning_criterion.setter - def tuning_criterion(self, tuning_criterion): - """Set tuning_criterion.""" - if check_value("tuning_criterion", tuning_criterion, TuningCriterion): - self._tuning_criterion = tuning_criterion - class QuantizationAwareTrainingConfig(_BaseQuantizationConfig): """Config Class for Quantization Aware Training. @@ -1198,7 +1198,7 @@ def teacher_model(self, teacher_model): self._teacher_model = teacher_model -class MixedPrecisionConfig(PostTrainingQuantConfig): +class MixedPrecisionConfig(_BaseQuantizationConfig): """Config Class for MixedPrecision. Example:: @@ -1228,7 +1228,7 @@ def __init__(self, excluded_precisions=excluded_precisions, ) self.precision = precision - + @property def precision(self): """Get precision.""" @@ -1243,7 +1243,8 @@ def precision(self, precision): elif isinstance(precision, list): assert all([i in ["fp16", "bf16"] for i in precision]), "Only " \ "support 'fp16' and 'bf16' for mix precision." - self._precision = precision + self._precision = precision + class ExportConfig: """Config Class for Export.""" @@ -1336,12 +1337,14 @@ def dynamic_axes(self, dynamic_axes): """Set dynamic_axes.""" self._dynamic_axes = dynamic_axes + class ONNXQlinear2QDQConfig: """Config Class for ONNXQlinear2QDQ.""" def __init__(self): """Init an ONNXQlinear2QDQConfig object.""" pass + class Torch2ONNXConfig(ExportConfig): """Config Class for Torch2ONNX.""" def __init__( @@ -1394,3 +1397,223 @@ def __init__( dynamic_axes=dynamic_axes, ) self.kwargs = kwargs + + +class QuantizationConfig(_BaseQuantizationConfig): + def __init__(self, + inputs=[], + outputs=[], + backend='default', + device='cpu', + approach='post_training_static_quant', + calibration_sampling_size=[100], + op_type_dict=None, + op_name_dict=None, + performance_only=False, + reduce_range=None, + use_bf16=True, + quant_level="auto", + accuracy_criterion=accuracy_criterion, + tuning_criterion=tuning_criterion, + use_distributed_tuning=False): + excluded_precisions = ["bf16"] if not use_bf16 else [] + super().__init__( + inputs=inputs, + outputs=outputs, + backend=backend, + device=device, + calibration_sampling_size=calibration_sampling_size, + op_type_dict=op_type_dict, + op_name_dict=op_name_dict, + performance_only=performance_only, + reduce_range=reduce_range, + excluded_precisions=excluded_precisions, + accuracy_criterion=accuracy_criterion, + tuning_criterion=tuning_criterion, + quant_level=quant_level, + use_distributed_tuning=use_distributed_tuning + ) + self.approach = approach + + @property + def approach(self): + return self._approach + + @approach.setter + def approach(self, approach): + if check_value( + 'approach', approach, str, + ['post_training_static_quant', 'post_training_dynamic_quant', 'quant_aware_training'] + ): + self._approach = approach + + +class NASConfig: + def __init__(self, approach=None, search_space=None, search_algorithm=None, + metrics=[], higher_is_better=[], max_trials=3, seed=42, dynas=None): + self._approach = approach + self._search = DotDict({ + 'search_space': search_space, + 'search_algorithm': search_algorithm, + 'metrics': metrics, + 'higher_is_better': higher_is_better, + 'max_trials': max_trials, + 'seed': seed + }) + self.dynas = None + if approach == 'dynas' and dynas: + self.dynas = dynas.config + + @property + def approach(self): + return self._approach + + @approach.setter + def approach(self, approach): + self._approach = approach + + @property + def search(self): + return self._search + + @search.setter + def search(self, search): + self._search = search + + +class MXNet: + def __init__(self, precisions=None): + self._precisions = precisions + + @property + def precisions(self): + return self._precisions + + @precisions.setter + def precisions(self, precisions): + if not isinstance(precisions, list): + precisions = [precisions] + if check_value('precisions', precisions, str, ['int8', 'uint8', 'fp32', 'bf16', 'fp16']): + self._precisions = precisions + + +class ONNX(MXNet): + def __init__(self, graph_optimization_level=None, precisions=None): + super().__init__(precisions) + self._graph_optimization_level = graph_optimization_level + + @property + def graph_optimization_level(self): + return self._graph_optimization_level + + @graph_optimization_level.setter + def graph_optimization_level(self, graph_optimization_level): + if check_value('graph_optimization_level', graph_optimization_level, str, + ['DISABLE_ALL', 'ENABLE_BASIC', 'ENABLE_EXTENDED', 'ENABLE_ALL']): + self._graph_optimization_level = graph_optimization_level + + +class TensorFlow(MXNet): + def __init__(self, precisions=None): + super().__init__(precisions) + + +class Keras(MXNet): + def __init__(self, precisions=None): + super().__init__(precisions) + + +class PyTorch(MXNet): + def __init__(self, precisions=None): + super().__init__(precisions) + + +quantization = QuantizationConfig() +benchmark = BenchmarkConfig() +options = Options() +mixed_precision = MixedPrecisionConfig() +pruning = WeightPruningConfig() +distillation = DistillationConfig(teacher_model=None) +nas = NASConfig() +onnxruntime_config = ONNX() +tensorflow_config = TensorFlow() +keras_config = Keras() +pytorch_config = PyTorch() +mxnet_config = MXNet() + + +class Config: + def __init__(self, + quantization=quantization, + benchmark=benchmark, + options=options, + mixed_precision=mixed_precision, + pruning=pruning, + distillation=distillation, + nas=nas, + onnxruntime=onnxruntime_config, + tensorflow=tensorflow_config, + pytorch=pytorch_config, + mxnet=mxnet_config, + keras=keras_config, + ): + self._quantization = quantization + self._benchmark = benchmark + self._options = options + self._mixed_precision=mixed_precision + self._onnxruntime = onnxruntime + self._pruning = pruning + self._distillation = distillation + self._nas = nas + self._tensorflow = tensorflow + self._pytorch = pytorch + self._mxnet = mxnet + self._keras = keras + + @property + def distillation(self): + return self._distillation + + @property + def nas(self): + return self._nas + + @property + def tensorflow(self): + return self._tensorflow + + @property + def keras(self): + return self._keras + + @property + def pytorch(self): + return self._pytorch + + @property + def mxnet(self): + return self._mxnet + + @property + def pruning(self): + return self._pruning + + @property + def quantization(self): + return self._quantization + + @property + def benchmark(self): + return self._benchmark + + @property + def options(self): + return self._options + + @property + def mixed_precision(self): + return self._mixed_precision + + @property + def onnxruntime(self): + return self._onnxruntime From d7e77f3f616eec470b0a042419fa3fe696e888d1 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Mon, 27 Mar 2023 16:27:45 +0800 Subject: [PATCH 002/103] edit configs for quantization Signed-off-by: Cheng, Zixuan --- neural_compressor/config.py | 4 +- neural_compressor/quantization.py | 98 +++++++++++++++---------------- 2 files changed, 50 insertions(+), 52 deletions(-) diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 4885dab3bb0..813d8710876 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -692,13 +692,13 @@ def accuracy_criterion(self, accuracy_criterion): @property def tuning_criterion(self): """Get tuning_criterion.""" - return self.tuning_criterion + return self._tuning_criterion @tuning_criterion.setter def tuning_criterion(self, tuning_criterion): """Set tuning_criterion.""" if check_value("tuning_criterion", tuning_criterion, TuningCriterion): - self.tuning_criterion = tuning_criterion + self._tuning_criterion = tuning_criterion @property def excluded_precisions(self): diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index e818a592e2c..545b7de90cb 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -20,9 +20,8 @@ import pickle import random import numpy as np -from .conf.config import QuantConf from .conf.dotdict import deep_get, deep_set, DotDict -from .conf.pythonic_config import Config +from .config import Config from .model.model import BaseModel, get_model_fwk_name, get_model_type, Model, MODELS from .strategy import STRATEGIES from .utils import logger @@ -42,7 +41,7 @@ class PostTrainingQuant: Example:: - conf = PostTrainingQuantConfig() + conf = Config() quantizer = PostTrainingQuant(conf) quantizer.model = model quantizer.eval_func = eval_func @@ -53,13 +52,11 @@ def __init__(self, conf, **kwargs): """Initialize the parameters. Args: - conf (PostTrainingQuantConfig): A instance of PostTrainingQuantConfig to + conf (QuantizationConfig): A instance of QuantizationConfig to specify the quantization behavior. """ - conf = Config(quantization=conf, benchmark=None, pruning=None, distillation=None, nas=None) - self.conf = QuantConf() - self.conf.map_pyconfig_to_cfg(conf) - seed = self.conf.usr_cfg.tuning.random_seed + self.conf = Config(quantization=conf, benchmark=None, pruning=None, distillation=None, nas=None) + seed = self.conf.options.random_seed random.seed(seed) np.random.seed(seed) self._train_func = None @@ -74,15 +71,16 @@ def __init__(self, conf, **kwargs): def pre_proccess(self): """Create strategy to optimize model.""" - cfg = self.conf.usr_cfg + cfg = self.conf if os.environ.get("PERFORMANCE_ONLY") in ['0', '1']: performance_only = bool(int(os.environ.get("PERFORMANCE_ONLY"))) - deep_set(cfg, 'tuning.exit_policy.performance_only', performance_only) + deep_set(cfg, 'quantization.performance_only', performance_only) logger.info("Get environ 'PERFORMANCE_ONLY={}'," \ - " force setting 'tuning.exit_policy.performance_only = True'.".format(performance_only)) + " force setting 'quantization.performance_only = True'.".format(performance_only)) - strategy = cfg.tuning.strategy.name.lower() + strategy = cfg.quantization.tuning_criterion.strategy + if cfg.quantization.quant_level == "auto": strategy = "auto" @@ -91,9 +89,9 @@ def pre_proccess(self): strategy = "conservative" if strategy == "mse_v2": - if not (cfg.model.framework.startswith("tensorflow") or cfg.model.framework == 'pytorch_fx'): + if not (cfg.quantization._framework.startswith("tensorflow") or cfg.quantization._framework == 'pytorch_fx'): strategy = "basic" - logger.warning(f"MSE_v2 does not support {cfg.model.framework} now, use basic instead.") + logger.warning(f"MSE_v2 does not support {cfg.quantization._framework} now, use basic instead.") logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.") assert strategy in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy) @@ -101,8 +99,8 @@ def pre_proccess(self): _resume = None # check if interrupted tuning procedure exists. if yes, it will resume the # whole auto tune process. - self.resume_file = os.path.abspath(os.path.expanduser(cfg.tuning.workspace.resume)) \ - if cfg.tuning.workspace and cfg.tuning.workspace.resume else None + self.resume_file = os.path.abspath(os.path.expanduser(cfg.options.resume_from)) \ + if cfg.options.workspace and cfg.options.resume_from else None if self.resume_file: assert os.path.exists(self.resume_file), \ "The specified resume file {} doesn't exist!".format(self.resume_file) @@ -110,7 +108,7 @@ def pre_proccess(self): _resume = pickle.load(f).__dict__ if self._eval_func is None and self._eval_dataloader is None: - self.conf.usr_cfg.tuning.exit_policy.performance_only = True + self.conf.quantization.performance_only = True logger.info("Quantize model without tuning!") self.strategy = STRATEGIES[strategy]( @@ -126,9 +124,9 @@ def pre_proccess(self): def execute(self): """Quantization execute routinue based on strategy design.""" try: - with time_limit(self.conf.usr_cfg.tuning.exit_policy.timeout): + with time_limit(self.conf.quantization.tuning_criterion.timeout): logger.debug("Dump user configuration:") - logger.debug(self.conf.usr_cfg) + logger.debug(self.conf) self.strategy.traverse() except KeyboardInterrupt: pass @@ -183,53 +181,53 @@ def model(self, user_model): make sure the name is in supported slim model list. """ - cfg = self.conf.usr_cfg - if cfg.model.framework == 'NA': + cfg = self.conf + if cfg.quantization._framework == None: if isinstance(user_model, BaseModel): - cfg.model.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))] - if cfg.model.backend == "ipex": - assert cfg.model.framework == "pytorch_ipex", "Please wrap the model with correct Model class!" - if cfg.model.backend == "itex": + cfg.quantization._framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))] + if cfg.quantization.backend == "ipex": + assert cfg.quantization._framework == "pytorch_ipex", "Please wrap the model with correct Model class!" + if cfg.quantization.backend == "itex": if get_model_type(user_model.model) == 'keras': - assert cfg.model.framework == "keras", "Please wrap the model with KerasModel class!" + assert cfg.quantization._framework == "keras", "Please wrap the model with KerasModel class!" else: - assert cfg.model.framework == "pytorch_itex", \ + assert cfg.quantization._framework == "pytorch_itex", \ "Please wrap the model with TensorflowModel class!" else: framework = get_model_fwk_name(user_model) - cfg.model.framework = framework + cfg.quantization._framework = framework if framework == "tensorflow": - if get_model_type(user_model) == 'keras' and cfg.model.backend == 'itex': - cfg.model.framework = 'keras' + if get_model_type(user_model) == 'keras' and cfg.quantization.backend == 'itex': + cfg.quantization._framework = 'keras' if framework == "pytorch": - if cfg.model.backend == "default": - cfg.model.framework = "pytorch_fx" - elif cfg.model.backend == "ipex": - cfg.model.framework = "pytorch_ipex" + if cfg.quantization.backend == "default": + cfg.quantization._framework = "pytorch_fx" + elif cfg.quantization.backend == "ipex": + cfg.quantization._framework = "pytorch_ipex" if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") - if "tensorflow" in cfg.model.framework or cfg.model.framework == "keras": - self._model = Model(user_model, backend=cfg.model.framework, device=cfg.device) + if "tensorflow" in cfg.quantization._framework or cfg.quantization._framework == "keras": + self._model = Model(user_model, backend=cfg.quantization._framework, device=cfg.quantization.device) else: - self._model = Model(user_model, backend=cfg.model.framework) + self._model = Model(user_model, backend=cfg.quantization._framework) else: - if cfg.model.framework == "pytorch_ipex": + if cfg.quantization._framework == "pytorch_ipex": from neural_compressor.model.torch_model import IPEXModel assert type(user_model) == IPEXModel, \ "The backend is ipex, please wrap the model with IPEXModel class!" - elif cfg.model.framework == "pytorch_fx": + elif cfg.quantization._framework == "pytorch_fx": from neural_compressor.model.torch_model import PyTorchFXModel assert type(user_model) == PyTorchFXModel, \ "The backend is default, please wrap the model with PyTorchFXModel class!" self._model = user_model - if 'tensorflow' in cfg.model.framework: - self._model.name = cfg.model.name - self._model.output_tensor_names = cfg.model.outputs - self._model.input_tensor_names = cfg.model.inputs - self._model.workspace_path = cfg.tuning.workspace.path + if 'tensorflow' in cfg.quantization._framework: + self._model.name = cfg.quantization.model_name + self._model.output_tensor_names = cfg.quantization.outputs + self._model.input_tensor_names = cfg.quantization.inputs + self._model.workspace_path = cfg.options.workspace @property def eval_func(self): @@ -304,7 +302,7 @@ def metric(self, user_metric): The object of Metric or a dict of built-in metric configurations. """ - if deep_get(self.conf.usr_cfg, "evaluation.accuracy.metric"): + if deep_get(self.conf, "quantization.metric"): logger.warning("Override the value of `metric` field defined in yaml file" \ " as user defines the value of `metric` attribute by code.") @@ -323,11 +321,11 @@ def metric(self, user_metric): metric_cls = type(user_metric).__name__ name = 'user_' + metric_cls metric_cfg = {name: id(user_metric)} - metrics = METRICS(self.conf.usr_cfg.model.framework) + metrics = METRICS(self.conf.quantization._framework) metrics.register(name, metric_cls) - deep_set(self.conf.usr_cfg, "evaluation.accuracy.metric", metric_cfg) - self.conf.usr_cfg = DotDict(self.conf.usr_cfg) + deep_set(self.conf, "quantization.metric", metric_cfg) + self.conf = DotDict(self.conf) self._metric = user_metric @@ -457,9 +455,9 @@ def eval_func(model): Example:: # Quantization code for PTQ - from neural_compressor import PostTrainingQuantConfig, set_workspace + from neural_compressor import Config, set_workspace from neural_compressor import quantization - conf = PostTrainingQuantConfig() + conf = Config() # saved intermediate files in ./saved folder set_workspace("./saved") From e456cf856e542676431aa2a7b294a84e024af25d Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Mon, 27 Mar 2023 16:50:52 +0800 Subject: [PATCH 003/103] minor fix Signed-off-by: Cheng, Zixuan --- neural_compressor/__init__.py | 3 +-- neural_compressor/config.py | 47 +++++++++++++++++++---------------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py index 51cc9386f38..35c8cf0c357 100644 --- a/neural_compressor/__init__.py +++ b/neural_compressor/__init__.py @@ -22,8 +22,7 @@ # we need to set a global 'NA' backend, or Model can't be used from .utils.utility import set_random_seed, set_tensorboard, set_workspace from .utils import options -from .conf.config import conf -from .conf.pythonic_config import config +from .config import conf from .config import DistillationConfig, PostTrainingQuantConfig, \ WeightPruningConfig, QuantizationAwareTrainingConfig, \ MixedPrecisionConfig diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 813d8710876..9a93d523091 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -457,28 +457,7 @@ def strategy_kwargs(self, strategy_kwargs): class _BaseQuantizationConfig: - def __init__(self, - inputs=[], - outputs=[], - backend="default", - domain="auto", - model_name="", - metric={}, - recipes={}, - quant_format="default", - device="cpu", - calibration_sampling_size=[100], - op_type_dict=None, - op_name_dict=None, - performance_only=False, - reduce_range=None, - excluded_precisions=[], - quant_level="auto", - accuracy_criterion=accuracy_criterion, - tuning_criterion=tuning_criterion, - use_distributed_tuning=False): - """Initialize _BaseQuantizationConfig class. - Args: + """Args: inputs: inputs of model outputs: outputs of model backend: backend for model execution. Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep' @@ -514,6 +493,28 @@ def __init__(self, strategy, auto (default) is the combination of 0 and 1. accuracy_criterion: accuracy constraint settings use_distributed_tuning: whether use distributed tuning or not + """ + def __init__(self, + inputs=[], + outputs=[], + backend="default", + domain="auto", + model_name="", + metric={}, + recipes={}, + quant_format="default", + device="cpu", + calibration_sampling_size=[100], + op_type_dict=None, + op_name_dict=None, + performance_only=False, + reduce_range=None, + excluded_precisions=[], + quant_level="auto", + accuracy_criterion=accuracy_criterion, + tuning_criterion=tuning_criterion, + use_distributed_tuning=False): + """Initialize _BaseQuantizationConfig class. """ self.inputs = inputs self.outputs = outputs @@ -1617,3 +1618,5 @@ def mixed_precision(self): @property def onnxruntime(self): return self._onnxruntime + +config = Config() From 35f699c209e4473c5f7daff9af5ad7da88e2f847 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Tue, 28 Mar 2023 16:38:44 +0800 Subject: [PATCH 004/103] fix quantcfg and add dict Signed-off-by: Cheng, Zixuan --- neural_compressor/conf/pythonic_config.py | 414 +++++++++++++++++++++- neural_compressor/config.py | 241 +++---------- 2 files changed, 466 insertions(+), 189 deletions(-) diff --git a/neural_compressor/conf/pythonic_config.py b/neural_compressor/conf/pythonic_config.py index fc990e61179..ccfd307448d 100644 --- a/neural_compressor/conf/pythonic_config.py +++ b/neural_compressor/conf/pythonic_config.py @@ -17,12 +17,424 @@ import logging from .dotdict import DotDict -from ..config import _BaseQuantizationConfig, accuracy_criterion, BenchmarkConfig, \ +from ..config import ops_schema, AccuracyCriterion, accuracy_criterion, BenchmarkConfig, \ check_value, DistillationConfig, options, WeightPruningConfig logger = logging.getLogger("neural_compressor") +class _BaseQuantizationConfig: + """Args: + inputs: inputs of model + outputs: outputs of model + backend: backend for model execution. Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep' + domain: model domain. Support 'auto', 'cv', 'object_detection', 'nlp' and 'recommendation_system'. + Adaptor will use specific quantization settings for different domains automatically, and + explicitly specified quantization settings will override the automatic setting. + If users set domain as auto, automatic detection for domain will be executed. + recipes: recipes for quantiztaion, support list is as below. + 'smooth_quant': whether do smooth quant + 'smooth_quant_args': parameters for smooth_quant + 'fast_bias_correction': whether do fast bias correction + 'weight_correction': whether do weight correction + 'gemm_to_matmul': whether convert gemm to matmul and add, only valid for onnx models + 'graph_optimization_level': support 'DISABLE_ALL', 'ENABLE_BASIC', 'ENABLE_EXTENDED', 'ENABLE_ALL' + only valid for onnx models + 'first_conv_or_matmul_quantization': whether quantize the first conv or matmul + 'last_conv_or_matmul_quantization': whether quantize the last conv or matmul + 'pre_post_process_quantization': whether quantize the ops in preprocess and postprocess + 'add_qdq_pair_to_weight': whether add QDQ pair for weights, only vaild for onnxrt_trt_ep + 'optypes_to_exclude_output_quant': don't quantize output of specified optypes + 'dedicated_qdq_pair': whether dedicate QDQ pair, only vaild for onnxrt_trt_ep + quant_format: support 'default', 'QDQ' and 'QOperator' + device: support 'cpu' and 'gpu' + calibration_sampling_size: number of calibration sample + op_type_dict: tuning constraints on optype-wise + op_name_dict: tuning constraints on op-wise + strategy: strategy name + strategy_kwargs: parameters for strategy + objective: objective with accuracy constraint guaranteed, support 'performance', 'modelsize', 'footprint' + timeout: tuning timeout (seconds). default value is 0 which means early stop + max_trials: max tune times. default value is 100. Combine with timeout field to decide when to exit + performance_only: whether do evaluation + reduce_range: whether use 7 bit + example_inputs: used to trace PyTorch model with torch.jit/torch.fx + excluded_precisions: precisions to be excluded, support 'bf16' + quant_level: support auto, 0 and 1, 0 is conservative strategy, 1 is basic or user-specified + strategy, auto (default) is the combination of 0 and 1. + accuracy_criterion: accuracy constraint settings + use_distributed_tuning: whether use distributed tuning or not + """ + def __init__(self, + inputs=[], + outputs=[], + backend="default", + domain="auto", + recipes={}, + quant_format="default", + device="cpu", + calibration_sampling_size=[100], + op_type_dict=None, + op_name_dict=None, + strategy="basic", + strategy_kwargs=None, + objective="performance", + timeout=0, + max_trials=100, + performance_only=False, + reduce_range=None, + example_inputs=None, + excluded_precisions=[], + quant_level="auto", + accuracy_criterion=accuracy_criterion, + use_distributed_tuning=False): + """Initialize _BaseQuantizationConfig class. + """ + self.inputs = inputs + self.outputs = outputs + self.backend = backend + self.domain = domain + self.recipes = recipes + self.quant_format = quant_format + self.device = device + self.op_type_dict = op_type_dict + self.op_name_dict = op_name_dict + self.strategy = strategy + self.strategy_kwargs = strategy_kwargs + self.objective = objective + self.timeout = timeout + self.max_trials = max_trials + self.performance_only = performance_only + self.reduce_range = reduce_range + self.excluded_precisions = excluded_precisions + self.use_bf16 = "bf16" not in self.excluded_precisions + self.accuracy_criterion = accuracy_criterion + self.calibration_sampling_size = calibration_sampling_size + self.quant_level = quant_level + self.use_distributed_tuning=use_distributed_tuning + self._example_inputs = example_inputs + + @property + def domain(self): + """Get domain.""" + return self._domain + + @domain.setter + def domain(self, domain): + """Set domain.""" + if check_value("domain", domain, str, + ["auto", "cv", "object_detection", "nlp", "recommendation_system"]): + self._domain = domain + + @property + def recipes(self): + """Get recipes.""" + return self._recipes + + @recipes.setter + def recipes(self, recipes): + """Set recipes.""" + if recipes is not None and not isinstance(recipes, dict): + raise ValueError("recipes should be a dict.") + + def smooth_quant(val=None): + if val is not None: + return check_value("smooth_quant", val, bool) + else: + return False + + def smooth_quant_args(val=None): + if val is not None: + check_value("smooth_quant_args", val, dict) + for k, v in val.items(): + if k == "alpha": + check_value("alpha", v, float) + return True + else: + return {} + + def fast_bias_correction(val=None): + if val is not None: + return check_value("fast_bias_correction", val, bool) + else: + return False + + def weight_correction(val=None): + if val is not None: + return check_value("weight_correction", val, bool) + else: + return False + + def gemm_to_matmul(val=None): + if val is not None: + return check_value("gemm_to_matmul", val, bool) + else: + return True + + def graph_optimization_level(val=None): + if val is not None: + return check_value("graph_optimization_level", val, str, + ["DISABLE_ALL", "ENABLE_BASIC", "ENABLE_EXTENDED", "ENABLE_ALL"]) + else: + return None + + def first_conv_or_matmul_quantization(val=None): + if val is not None: + return check_value("first_conv_or_matmul_quantization", val, bool) + else: + return True + + def last_conv_or_matmul_quantization(val=None): + if val is not None: + return check_value("last_conv_or_matmul_quantization", val, bool) + else: + return True + + def pre_post_process_quantization(val=None): + if val is not None: + return check_value("pre_post_process_quantization", val, bool) + else: + return True + + def add_qdq_pair_to_weight(val=None): + if val is not None: + return check_value("add_qdq_pair_to_weight", val, bool) + else: + return False + + def optypes_to_exclude_output_quant(val=None): + if val is not None: + return isinstance(val, list) + else: + return [] + + def dedicated_qdq_pair(val=None): + if val is not None: + return check_value("dedicated_qdq_pair", val, bool) + else: + return False + + RECIPES = {"smooth_quant": smooth_quant, + "smooth_quant_args": smooth_quant_args, + "fast_bias_correction": fast_bias_correction, + "weight_correction": weight_correction, + "gemm_to_matmul": gemm_to_matmul, + "graph_optimization_level": graph_optimization_level, + "first_conv_or_matmul_quantization": first_conv_or_matmul_quantization, + "last_conv_or_matmul_quantization": last_conv_or_matmul_quantization, + "pre_post_process_quantization": pre_post_process_quantization, + "add_qdq_pair_to_weight": add_qdq_pair_to_weight, + "optypes_to_exclude_output_quant": optypes_to_exclude_output_quant, + "dedicated_qdq_pair": dedicated_qdq_pair + } + self._recipes = {} + for k in RECIPES.keys(): + if k in recipes and RECIPES[k](recipes[k]): + self._recipes.update({k: recipes[k]}) + else: + self._recipes.update({k: RECIPES[k]()}) + + @property + def accuracy_criterion(self): + return self._accuracy_criterion + + @accuracy_criterion.setter + def accuracy_criterion(self, accuracy_criterion): + if check_value("accuracy_criterion", accuracy_criterion, AccuracyCriterion): + self._accuracy_criterion = accuracy_criterion + + @property + def excluded_precisions(self): + return self._excluded_precisions + + @excluded_precisions.setter + def excluded_precisions(self, excluded_precisions): + if check_value("excluded_precisions", excluded_precisions, str, ["bf16", "fp16"]): + self._excluded_precisions = excluded_precisions + self._use_bf16 = "bf16" not in excluded_precisions + + @property + def quant_level(self): + return self._quant_level + + @quant_level.setter + def quant_level(self, quant_level): + self._quant_level = quant_level + + @property + def use_distributed_tuning(self): + return self._use_distributed_tuning + + @use_distributed_tuning.setter + def use_distributed_tuning(self, use_distributed_tuning): + if check_value('use_distributed_tuning', use_distributed_tuning, bool): + self._use_distributed_tuning = use_distributed_tuning + + @property + def reduce_range(self): + return self._reduce_range + + @reduce_range.setter + def reduce_range(self, reduce_range): + if reduce_range is None or check_value('reduce_range', reduce_range, bool): + self._reduce_range = reduce_range + + @property + def performance_only(self): + return self._performance_only + + @performance_only.setter + def performance_only(self, performance_only): + if check_value('performance_only', performance_only, bool): + self._performance_only = performance_only + + @property + def max_trials(self): + return self._max_trials + + @max_trials.setter + def max_trials(self, max_trials): + if check_value('max_trials', max_trials, int): + self._max_trials = max_trials + + @property + def timeout(self): + return self._timeout + + @timeout.setter + def timeout(self, timeout): + if check_value('timeout', timeout, int): + self._timeout = timeout + + @property + def objective(self): + return self._objective + + @objective.setter + def objective(self, objective): + if check_value('objective', objective, str, + ['performance', 'accuracy', 'modelsize', 'footprint']): + self._objective = objective + + @property + def strategy(self): + return self._strategy + + @strategy.setter + def strategy(self, strategy): + if check_value('strategy', strategy, str, + ['basic', 'mse', 'bayesian', 'random', 'exhaustive', 'sigopt', 'tpe', 'mse_v2', 'hawq_v2']): + self._strategy = strategy + + @property + def strategy_kwargs(self): + return self._strategy_kwargs + + @strategy_kwargs.setter + def strategy_kwargs(self, strategy_kwargs): + self._strategy_kwargs = strategy_kwargs + + @property + def op_name_dict(self): + return self._op_name_dict + + @op_name_dict.setter + def op_name_dict(self, op_name_dict): + if op_name_dict is None: + self._op_name_dict = op_name_dict + elif isinstance(op_name_dict, dict): + for k, v in op_name_dict.items(): + ops_schema.validate(v) + self._op_name_dict = op_name_dict + else: + assert False, ("Type of op_name_dict should be dict but not {}, ".format( + type(op_name_dict))) + + @property + def op_type_dict(self): + return self._op_type_dict + + @op_type_dict.setter + def op_type_dict(self, op_type_dict): + if op_type_dict is None: + self._op_type_dict = op_type_dict + elif isinstance(op_type_dict, dict): + for k, v in op_type_dict.items(): + ops_schema.validate(v) + self._op_type_dict = op_type_dict + else: + assert False, ("Type of op_type_dict should be dict but not {}".format( + type(op_type_dict))) + + @property + def calibration_sampling_size(self): + return self._calibration_sampling_size + + @calibration_sampling_size.setter + def calibration_sampling_size(self, sampling_size): + if check_value('calibration_sampling_size', sampling_size, int): + if isinstance(sampling_size, int): + sampling_size =[sampling_size] + self._calibration_sampling_size = sampling_size + + @property + def device(self): + return self._device + + @device.setter + def device(self, device): + if check_value('device', device, str, ['cpu', 'gpu']): + self._device = device + + @property + def quant_format(self): + return self._quant_format + + @quant_format.setter + def quant_format(self, quant_format): + if check_value('quant_format', quant_format, str, + ['default', 'QDQ', 'QOperator']): + self._quant_format = quant_format + + @property + def backend(self): + return self._backend + + @backend.setter + def backend(self, backend): + if check_value('backend', backend, str, [ + 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep']): + self._backend = backend + + @property + def outputs(self): + return self._outputs + + @outputs.setter + def outputs(self, outputs): + if check_value('outputs', outputs, str): + self._outputs = outputs + + @property + def inputs(self): + return self._inputs + + @inputs.setter + def inputs(self, inputs): + if check_value('inputs', inputs, str): + self._inputs = inputs + + @property + def example_inputs(self): + """Get strategy_kwargs.""" + return self._example_inputs + + @example_inputs.setter + def example_inputs(self, example_inputs): + """Set example_inputs.""" + self._example_inputs = example_inputs + + class QuantizationConfig(_BaseQuantizationConfig): def __init__(self, inputs=[], diff --git a/neural_compressor/config.py b/neural_compressor/config.py index b8f773002b0..96bcb72691e 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -149,27 +149,11 @@ def tensorboard(self, tensorboard): class BenchmarkConfig: """Config Class for Benchmark. - Args: - inputs (List, optional): A list of strings containing the inputs of model. Default is an empty list. - outputs (List, optional): A list of strings containing the outputs of model. Default is an empty list. - backend (str, optional): backend name for model execution. Supported values include: 'default', 'itex', - 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep'. Default value is 'default'. - warmup (int, optional): The number of iterations to perform warmup before running performance tests. - Default value is 5. - iteration (int, optional): The number of iterations to run performance tests. Default is -1. - cores_per_instance (int, optional): The number of CPU cores to use per instance. Default value is None. - num_of_instance (int, optional): The number of instances to use for performance testing. - Default value is None. - inter_num_of_threads (int, optional): The number of threads to use for inter-thread operations. - Default value is None. - intra_num_of_threads (int, optional): The number of threads to use for intra-thread operations. - Default value is None. - Example:: # Run benchmark according to config - from neural_compressor.benchmark import fit + conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7) fit(model='./int8.pb', config=conf, b_dataloader=eval_dataloader) """ @@ -380,6 +364,11 @@ def __str__(self): """Get criterion.""" return self.criterion + def keys(self): + return ('higher_is_better', 'criterion', 'tolerable_loss') + + def __getitem__(self, item): + return getattr(self, item) accuracy_criterion = AccuracyCriterion() @@ -404,12 +393,14 @@ class TuningCriterion: strategy_kwargs=None, ) """ - def __init__(self, strategy="basic", strategy_kwargs=None, timeout=0, max_trials=100, objective="performance"): + def __init__(self, strategy="basic", strategy_kwargs=None, timeout=0, + max_trials=100, objective="performance", multi_objectives={}): """Init a TuningCriterion object.""" self.strategy = strategy self.timeout = timeout self.max_trials = max_trials self.objective = objective + self.multi_objectives = multi_objectives self.strategy_kwargs = strategy_kwargs @property @@ -441,11 +432,24 @@ def objective(self): @objective.setter def objective(self, objective): - """Set objective.""" if check_value('objective', objective, str, ['performance', 'accuracy', 'modelsize', 'footprint']): self._objective = objective + @property + def multi_objectives(self): + """Get multi-objectives.""" + return self._multi_objectives + + @multi_objectives.setter + def multi_objectives(self, multi_objectives): + if check_value('multi_objectives', multi_objectives, dict): + for k, v in multi_objectives.items(): + check_value('multi_objectives', k, str, ['objective', 'weight', 'higher_is_better']) + if k == 'objective': + check_value('objective', v, str, ['performance', 'accuracy', 'modelsize', 'footprint']) + self._multi_objectives = multi_objectives + @property def strategy(self): """Get strategy.""" @@ -482,7 +486,6 @@ class _BaseQuantizationConfig: explicitly specified quantization settings will override the automatic setting. If users set domain as auto, automatic detection for domain will be executed. model_name: name of model - metric: dict of metric that will be used recipes: recipes for quantiztaion, support list is as below. 'smooth_quant': whether do smooth quant 'smooth_quant_args': parameters for smooth_quant @@ -502,7 +505,6 @@ class _BaseQuantizationConfig: calibration_sampling_size: number of calibration sample op_type_dict: tuning constraints on optype-wise op_name_dict: tuning constraints on op-wise - performance_only: whether do evaluation reduce_range: whether use 7 bit excluded_precisions: precisions to be excluded, support 'bf16' quant_level: support auto, 0 and 1, 0 is conservative strategy, 1 is basic or user-specified @@ -516,14 +518,12 @@ def __init__(self, backend="default", domain="auto", model_name="", - metric={}, recipes={}, quant_format="default", device="cpu", calibration_sampling_size=[100], op_type_dict=None, op_name_dict=None, - performance_only=False, reduce_range=None, excluded_precisions=[], quant_level="auto", @@ -537,13 +537,11 @@ def __init__(self, self.backend = backend self.domain = domain self.model_name = model_name - self.metric=metric self.recipes = recipes self.quant_format = quant_format self.device = device self.op_type_dict = op_type_dict self.op_name_dict = op_name_dict - self.performance_only = performance_only self.reduce_range = reduce_range self.excluded_precisions = excluded_precisions self.use_bf16 = "bf16" not in self.excluded_precisions @@ -577,18 +575,6 @@ def model_name(self, model_name): if check_value("model_name", model_name, str): self._model_name = model_name - @property - def metric(self): - """Get metric.""" - return self._metric - - @metric.setter - def metric(self, metric): - """Set metric.""" - if metric is not None and not isinstance(metric, dict): - raise ValueError("metric should be a dict.") - self._metric = metric - @property def recipes(self): """Get recipes.""" @@ -753,61 +739,6 @@ def reduce_range(self, reduce_range): if reduce_range is None or check_value('reduce_range', reduce_range, bool): self._reduce_range = reduce_range - @property - def performance_only(self): - return self._performance_only - - @performance_only.setter - def performance_only(self, performance_only): - if check_value('performance_only', performance_only, bool): - self._performance_only = performance_only - - @property - def max_trials(self): - return self._max_trials - - @max_trials.setter - def max_trials(self, max_trials): - if check_value('max_trials', max_trials, int): - self._max_trials = max_trials - - @property - def timeout(self): - return self._timeout - - @timeout.setter - def timeout(self, timeout): - if check_value('timeout', timeout, int): - self._timeout = timeout - - @property - def objective(self): - return self._objective - - @objective.setter - def objective(self, objective): - if check_value('objective', objective, str, - ['performance', 'accuracy', 'modelsize', 'footprint']): - self._objective = objective - - @property - def strategy(self): - return self._strategy - - @strategy.setter - def strategy(self, strategy): - if check_value('strategy', strategy, str, - ['basic', 'mse', 'bayesian', 'random', 'exhaustive', 'sigopt', 'tpe', 'mse_v2', 'hawq_v2']): - self._strategy = strategy - - @property - def strategy_kwargs(self): - return self._strategy_kwargs - - @strategy_kwargs.setter - def strategy_kwargs(self, strategy_kwargs): - self._strategy_kwargs = strategy_kwargs - @property def op_name_dict(self): return self._op_name_dict @@ -897,6 +828,10 @@ def inputs(self): def inputs(self, inputs): if check_value('inputs', inputs, str): self._inputs = inputs + + @property + def framework(self): + return self._framework class PostTrainingQuantConfig(_BaseQuantizationConfig): @@ -987,7 +922,9 @@ def __init__(self, op_name_dict=None, reduce_range=None, excluded_precisions=[], - quant_level="auto"): + quant_level="auto", + accuracy_criterion=accuracy_criterion, + tuning_criterion=tuning_criterion): """Init a QuantizationAwareTrainingConfig object.""" super().__init__(inputs=inputs, outputs=outputs, @@ -997,7 +934,9 @@ def __init__(self, op_name_dict=op_name_dict, reduce_range=reduce_range, excluded_precisions=excluded_precisions, - quant_level=quant_level) + quant_level=quant_level, + accuracy_criterion=accuracy_criterion, + tuning_criterion=tuning_criterion) self._approach = 'quant_aware_training' @property @@ -1217,16 +1156,6 @@ def teacher_model(self, teacher_model): class MixedPrecisionConfig(_BaseQuantizationConfig): """Config Class for MixedPrecision. - - Args: - device (String, optional): device for execution. Support 'cpu' and 'gpu', default is 'cpu' - backend (String, optional): backend for model execution. Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep', default is 'default' - precision (String, optional): target precision for mix precision conversion. Support 'bf16' and 'fp16', default is 'bf16' - inputs (List, optional): inputs of model, default is [] - outputs (List, optional): outputs of model, default is [] - tuning_criterion (TuningCriterion object, optional): accuracy tuning settings, it won't work if there is no accuracy tuning process - accuracy_criterion (AccuracyCriterion object, optional): accuracy constraint settings, it won't work if there is no accuracy tuning process - excluded_precisions (List, optional): precisions to be excluded during mix precision conversion, default is [] Example:: @@ -1366,17 +1295,7 @@ def dynamic_axes(self, dynamic_axes): class ONNXQlinear2QDQConfig: - """Config Class for ONNXQlinear2QDQ. - - Example:: - - from neural_compressor.config import ONNXQlinear2QDQConfig - from neural_compressor.model import Model - - conf = ONNXQlinear2QDQConfig() - model = Model(model) - model.export('new_model.onnx', conf) - """ + """Config Class for ONNXQlinear2QDQ.""" def __init__(self): """Init an ONNXQlinear2QDQConfig object.""" pass @@ -1411,28 +1330,7 @@ def __init__( class TF2ONNXConfig(ExportConfig): - """Config Class for TF2ONNX. - - Args: - dtype (str, optional): The data type of export target model. Supports 'fp32' and 'int8'. - Defaults to "int8". - opset_version (int, optional): The version of the ONNX operator set to use. Defaults to 14. - quant_format (str, optional): The quantization format for the export target model. - Supports 'default', 'QDQ' and 'QOperator'. Defaults to "QDQ". - example_inputs (list, optional): A list example inputs to use for tracing the model. - Defaults to None. - input_names (list, optional): A list of model input names. Defaults to None. - output_names (list, optional): A list of model output names. Defaults to None. - dynamic_axes (dict, optional): A dictionary of dynamic axis information. Defaults to None. - **kwargs: Additional keyword arguments. - - Examples:: - - # tensorflow QDQ int8 model 'q_model' export to ONNX int8 model - from neural_compressor.config import TF2ONNXConfig - config = TF2ONNXConfig() - q_model.export(output_graph, config) - """ + """Config Class for TF2ONNX.""" def __init__( self, dtype="int8", @@ -1457,55 +1355,6 @@ def __init__( self.kwargs = kwargs -class QuantizationConfig(_BaseQuantizationConfig): - def __init__(self, - inputs=[], - outputs=[], - backend='default', - device='cpu', - approach='post_training_static_quant', - calibration_sampling_size=[100], - op_type_dict=None, - op_name_dict=None, - performance_only=False, - reduce_range=None, - use_bf16=True, - quant_level="auto", - accuracy_criterion=accuracy_criterion, - tuning_criterion=tuning_criterion, - use_distributed_tuning=False): - excluded_precisions = ["bf16"] if not use_bf16 else [] - super().__init__( - inputs=inputs, - outputs=outputs, - backend=backend, - device=device, - calibration_sampling_size=calibration_sampling_size, - op_type_dict=op_type_dict, - op_name_dict=op_name_dict, - performance_only=performance_only, - reduce_range=reduce_range, - excluded_precisions=excluded_precisions, - accuracy_criterion=accuracy_criterion, - tuning_criterion=tuning_criterion, - quant_level=quant_level, - use_distributed_tuning=use_distributed_tuning - ) - self.approach = approach - - @property - def approach(self): - return self._approach - - @approach.setter - def approach(self, approach): - if check_value( - 'approach', approach, str, - ['post_training_static_quant', 'post_training_dynamic_quant', 'quant_aware_training'] - ): - self._approach = approach - - class NASConfig: def __init__(self, approach=None, search_space=None, search_algorithm=None, metrics=[], higher_is_better=[], max_trials=3, seed=42, dynas=None): @@ -1586,7 +1435,17 @@ def __init__(self, precisions=None): super().__init__(precisions) -quantization = QuantizationConfig() +class Metric(): + def __init__(self, metric_cfg=None): + self._metric_cfg = metric_cfg + + @property + def metric_cfg(self): + return self._metric_cfg + + +quantization = PostTrainingQuantConfig() +qat_quantization = QuantizationAwareTrainingConfig() benchmark = BenchmarkConfig() options = Options() mixed_precision = MixedPrecisionConfig() @@ -1598,7 +1457,7 @@ def __init__(self, precisions=None): keras_config = Keras() pytorch_config = PyTorch() mxnet_config = MXNet() - +metric_config = Metric() class Config: def __init__(self, @@ -1614,6 +1473,7 @@ def __init__(self, pytorch=pytorch_config, mxnet=mxnet_config, keras=keras_config, + metric=metric_config ): self._quantization = quantization self._benchmark = benchmark @@ -1627,6 +1487,7 @@ def __init__(self, self._pytorch = pytorch self._mxnet = mxnet self._keras = keras + self._metric = metric @property def distillation(self): @@ -1675,5 +1536,9 @@ def mixed_precision(self): @property def onnxruntime(self): return self._onnxruntime + + @property + def metric(self): + return self._metric config = Config() From 1c3e2402d4756bfa35127f2bcbfef84aaae71d5f Mon Sep 17 00:00:00 2001 From: Yi30 <106061964+yiliu30@users.noreply.github.com> Date: Wed, 29 Mar 2023 13:42:29 +0800 Subject: [PATCH 005/103] Adjust strategy layer for new API (#766) * refactor conf Signed-off-by: yiliu30 * copy strategy layer into experimental Signed-off-by: yiliu30 * adjusted the import relationship Signed-off-by: yiliu30 * replace strategy with exp_strategy Signed-off-by: yiliu30 * fixed import bug Signed-off-by: yiliu30 * refactor ut Signed-off-by: yiliu30 * refactor strategy Signed-off-by: yiliu30 * update obj config Signed-off-by: yiliu30 * fixed ut Signed-off-by: yiliu30 * remove set framwork Signed-off-by: yiliu30 * remove some comments Signed-off-by: yiliu30 --------- Signed-off-by: yiliu30 --- neural_compressor/__init__.py | 2 +- neural_compressor/adaptor/mxnet.py | 2 +- neural_compressor/adaptor/onnxrt.py | 2 +- neural_compressor/contrib/strategy/tpe.py | 2 +- .../experimental/graph_optimization.py | 6 +- .../experimental/mixed_precision.py | 6 +- .../experimental/quantization.py | 6 +- .../experimental/strategy/__init__.py | 30 + .../strategy/auto_mixed_precision.py | 156 ++ .../experimental/strategy/basic.py | 295 ++++ .../experimental/strategy/bayesian.py | 444 +++++ .../experimental/strategy/conservative.py | 412 +++++ .../experimental/strategy/exhaustive.py | 49 + .../experimental/strategy/hawq_v2.py | 110 ++ .../experimental/strategy/mse.py | 197 +++ .../experimental/strategy/mse_v2.py | 209 +++ .../experimental/strategy/random.py | 55 + .../experimental/strategy/strategy.py | 1556 +++++++++++++++++ .../experimental/strategy/utils/__init__.py | 22 + .../experimental/strategy/utils/constant.py | 35 + .../strategy/utils/tuning_sampler.py | 463 +++++ .../strategy/utils/tuning_space.py | 728 ++++++++ .../strategy/utils/tuning_structs.py | 104 ++ .../experimental/strategy/utils/utility.py | 57 + neural_compressor/quantization.py | 4 +- neural_compressor/strategy/auto.py | 12 +- .../strategy/auto_mixed_precision.py | 2 +- neural_compressor/strategy/bayesian.py | 2 +- neural_compressor/strategy/strategy.py | 418 ++--- .../strategy/utils/tuning_space.py | 12 +- test/strategy/test_basic.py | 173 +- test/strategy/test_basic_1.x.py | 221 +++ 32 files changed, 5403 insertions(+), 389 deletions(-) create mode 100644 neural_compressor/experimental/strategy/__init__.py create mode 100644 neural_compressor/experimental/strategy/auto_mixed_precision.py create mode 100644 neural_compressor/experimental/strategy/basic.py create mode 100644 neural_compressor/experimental/strategy/bayesian.py create mode 100644 neural_compressor/experimental/strategy/conservative.py create mode 100644 neural_compressor/experimental/strategy/exhaustive.py create mode 100644 neural_compressor/experimental/strategy/hawq_v2.py create mode 100644 neural_compressor/experimental/strategy/mse.py create mode 100644 neural_compressor/experimental/strategy/mse_v2.py create mode 100644 neural_compressor/experimental/strategy/random.py create mode 100644 neural_compressor/experimental/strategy/strategy.py create mode 100644 neural_compressor/experimental/strategy/utils/__init__.py create mode 100644 neural_compressor/experimental/strategy/utils/constant.py create mode 100644 neural_compressor/experimental/strategy/utils/tuning_sampler.py create mode 100644 neural_compressor/experimental/strategy/utils/tuning_space.py create mode 100644 neural_compressor/experimental/strategy/utils/tuning_structs.py create mode 100644 neural_compressor/experimental/strategy/utils/utility.py create mode 100644 test/strategy/test_basic_1.x.py diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py index 35c8cf0c357..bee0012eb81 100644 --- a/neural_compressor/__init__.py +++ b/neural_compressor/__init__.py @@ -22,7 +22,7 @@ # we need to set a global 'NA' backend, or Model can't be used from .utils.utility import set_random_seed, set_tensorboard, set_workspace from .utils import options -from .config import conf +# from .config import conf from .config import DistillationConfig, PostTrainingQuantConfig, \ WeightPruningConfig, QuantizationAwareTrainingConfig, \ MixedPrecisionConfig diff --git a/neural_compressor/adaptor/mxnet.py b/neural_compressor/adaptor/mxnet.py index 6a6e9087148..bf368651353 100644 --- a/neural_compressor/adaptor/mxnet.py +++ b/neural_compressor/adaptor/mxnet.py @@ -25,7 +25,7 @@ dump_elapsed_time, singleton) from neural_compressor.adaptor.mxnet_utils.util import * from collections import OrderedDict -from ..experimental.data.dataloaders.base_dataloader import BaseDataLoader +from neural_compressor.data.dataloaders.base_dataloader import BaseDataLoader from copy import deepcopy import math diff --git a/neural_compressor/adaptor/onnxrt.py b/neural_compressor/adaptor/onnxrt.py index 0ab92ada06d..5a1adb95175 100644 --- a/neural_compressor/adaptor/onnxrt.py +++ b/neural_compressor/adaptor/onnxrt.py @@ -31,7 +31,7 @@ from neural_compressor.utils.utility import LazyImport, dump_elapsed_time, \ GLOBAL_STATE, MODE from neural_compressor.utils.utility import Statistics -from neural_compressor.experimental.data.dataloaders.base_dataloader import BaseDataLoader +from neural_compressor.data.dataloaders.base_dataloader import BaseDataLoader from neural_compressor.conf.dotdict import deep_get from neural_compressor.utils.utility import CpuInfo import math diff --git a/neural_compressor/contrib/strategy/tpe.py b/neural_compressor/contrib/strategy/tpe.py index 2082fa8021b..8e8d1f653ac 100644 --- a/neural_compressor/contrib/strategy/tpe.py +++ b/neural_compressor/contrib/strategy/tpe.py @@ -505,7 +505,7 @@ def stop(self, timeout, trials_count): if timeout == 0 and self.best_tune_result: need_stop = True - elif trials_count >= self.cfg.tuning.exit_policy.max_trials: + elif trials_count >= self.conf.quantization.tuning_criterion.max_trials: need_stop = True else: need_stop = False diff --git a/neural_compressor/experimental/graph_optimization.py b/neural_compressor/experimental/graph_optimization.py index d1351a5b4d5..a47a221c02d 100644 --- a/neural_compressor/experimental/graph_optimization.py +++ b/neural_compressor/experimental/graph_optimization.py @@ -25,7 +25,7 @@ import yaml from ..conf.config import Graph_Optimization_Conf from ..conf.dotdict import deep_get, deep_set, DotDict -from ..strategy import STRATEGIES +from .strategy import EXP_STRATEGIES from ..utils import logger from ..utils.create_obj_from_config import create_dataloader from ..utils.utility import CpuInfo, time_limit @@ -139,7 +139,7 @@ def __call__(self): strategy = cfg.tuning.strategy.name.lower() - assert strategy in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy) + assert strategy in EXP_STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy) _resume = None # check if interrupted tuning procedure exists. if yes, it will resume the @@ -152,7 +152,7 @@ def __call__(self): with open(self.resume_file, 'rb') as f: _resume = pickle.load(f).__dict__ - self.strategy = STRATEGIES[strategy]( + self.strategy = EXP_STRATEGIES[strategy]( self._model, self.conf, None, diff --git a/neural_compressor/experimental/mixed_precision.py b/neural_compressor/experimental/mixed_precision.py index 438f2e749bb..448e3bab6a8 100644 --- a/neural_compressor/experimental/mixed_precision.py +++ b/neural_compressor/experimental/mixed_precision.py @@ -24,7 +24,7 @@ from ..conf.config import MixedPrecision_Conf from ..conf.pythonic_config import Config from ..conf.dotdict import deep_get -from ..strategy import STRATEGIES +from .strategy import EXP_STRATEGIES from ..utils import logger from ..utils.create_obj_from_config import create_dataloader from ..utils.utility import CpuInfo, time_limit @@ -149,7 +149,7 @@ def __call__(self): strategy = cfg.tuning.strategy.name.lower() - assert strategy in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy) + assert strategy in EXP_STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy) _resume = None # check if interrupted tuning procedure exists. if yes, it will resume the @@ -162,7 +162,7 @@ def __call__(self): with open(self.resume_file, 'rb') as f: _resume = pickle.load(f).__dict__ - self.strategy = STRATEGIES[strategy]( + self.strategy = EXP_STRATEGIES[strategy]( self._model, self.conf, None, diff --git a/neural_compressor/experimental/quantization.py b/neural_compressor/experimental/quantization.py index 3701f4e3def..8aa059242df 100644 --- a/neural_compressor/experimental/quantization.py +++ b/neural_compressor/experimental/quantization.py @@ -23,7 +23,7 @@ import numpy as np from .component import Component from ..conf.dotdict import deep_get, deep_set, DotDict -from ..strategy import STRATEGIES +from .strategy import EXP_STRATEGIES from ..utils import logger from ..utils.utility import time_limit from ..utils.create_obj_from_config import create_dataloader @@ -144,7 +144,7 @@ def pre_process(self): strategy = "basic" logger.warning(f"MSE_v2 does not support {self.framework} now, use basic instead.") logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.") - assert strategy in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy) + assert strategy in EXP_STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy) _resume = None # check if interrupted tuning procedure exists. if yes, it will resume the @@ -157,7 +157,7 @@ def pre_process(self): with open(self.resume_file, 'rb') as f: _resume = pickle.load(f).__dict__ - self.strategy = STRATEGIES[strategy]( + self.strategy = EXP_STRATEGIES[strategy]( self._model, self.conf, self._calib_dataloader, diff --git a/neural_compressor/experimental/strategy/__init__.py b/neural_compressor/experimental/strategy/__init__.py new file mode 100644 index 00000000000..f4a137cb792 --- /dev/null +++ b/neural_compressor/experimental/strategy/__init__.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Intel Neural Compressor Strategy.""" + +from .strategy import EXP_STRATEGIES +from os.path import dirname, basename, isfile, join +import glob + +modules = glob.glob(join(dirname(__file__), "*.py")) + +for f in modules: + if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + __import__(basename(f)[:-3], globals(), locals(), level=1) + +__all__ = ["EXP_STRATEGIES"] diff --git a/neural_compressor/experimental/strategy/auto_mixed_precision.py b/neural_compressor/experimental/strategy/auto_mixed_precision.py new file mode 100644 index 00000000000..76ef7c8bb7e --- /dev/null +++ b/neural_compressor/experimental/strategy/auto_mixed_precision.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The auto-mixed precision strategy.""" + +import copy +import numpy as np +from collections import OrderedDict +from .strategy import strategy_registry, TuneStrategy +from ...utils import logger + +from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler +from .utils.tuning_structs import OpTuningConfig + + +@strategy_registry +class AutoMixedPrecisionTuneStrategy(TuneStrategy): + """Tuning strategy for auto mixed precision.""" + + def next_tune_cfg(self): + """Generate the next tuning config. + + Tuning configurations are generated according to the following rules: + 1. First, it tries to convert all ops into target date type as many as possible. + 2. If the accuracy does not meets the requirements, it starts the stage of fallback + which converts ops into higher precision. + + Yields: + tune_config (dict): A dict containing the tuning configuration. + """ + from copy import deepcopy + + # filter quantization dtype + # TODO align with the old mixed-precison + target_dtypes = self.cfg.graph_optimization.precisions if self.cfg.graph_optimization \ + else self.cfg.mixed_precision.precisions + target_dtypes = list(set(target_dtypes) - set(['fp32'])) + tuning_space = self.tuning_space + initial_op_tuning_cfg = {} + for item in tuning_space.root_item.options: + if item.item_type == 'op': + op_name, op_type = item.name + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) + + if not target_dtypes: + target_dtypes = ['bf16'] + # step1. target_dtype AMAP, collect the ops that support target_dtype + bf16_items_name = [] + op_tuning_cfg = {} + for idx, target_dtype in enumerate(target_dtypes): + bf16_items = tuning_space.query_items_by_quant_mode(target_dtype) + if len(bf16_items) == 0 and \ + not (idx == len(target_dtypes) - 1 and len(bf16_items_name) == 0): + continue + bf16_items_name = [item.name for item in bf16_items] + op_tuning_cfg = deepcopy(initial_op_tuning_cfg) + for op_name_type in bf16_items_name: + op_tuning_cfg[op_name_type] = \ + OpTuningConfig(op_name_type[0], op_name_type[1], target_dtype, tuning_space) + calib_sampling_size = 1 + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + + # step2. fallback + target_dtype = 'fp32' + fallback_items_name_lst = bf16_items_name[::-1] + if fallback_items_name_lst: + logger.info(f"Start to fallback op to {target_dtype} one by one.") + self._fallback_started() + op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) + initial_op_tuning_cfg = deepcopy(op_tuning_cfg) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=False) + op_fallback_acc_impact = OrderedDict() + for op_index, op_tuning_cfg in enumerate(fallback_sampler): + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + acc, _ = self.last_tune_result + op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc + + # do accumulated fallback according to the order in the previous stage + if len(op_fallback_acc_impact) > 0: + ordered_ops = sorted(op_fallback_acc_impact.keys(), key=lambda key: op_fallback_acc_impact[key], + reverse=self.higher_is_better) + op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) + logger.info("Start to accumulate fallback to {target_dtype}.") + initial_op_tuning_cfg = deepcopy(op_tuning_cfg) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=True) + for op_tuning_cfg in fallback_sampler: + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + + def traverse(self): + """Traverse the tuning space according to auto-mixed precision strategy.""" + # get fp32 model baseline + self._eval_baseline() + + trials_count = 0 + for op_tuning_cfg in self.next_tune_cfg(): + # add tune_cfg here as quantize use tune_cfg + tune_cfg = self._tune_cfg_converter(op_tuning_cfg) + trials_count += 1 + tuning_history = self._find_tuning_history(tune_cfg) + if tuning_history and trials_count < self.cfg.tuning.exit_policy.max_trials: + self.last_tune_result = tuning_history['last_tune_result'] + self.best_tune_result = tuning_history['best_tune_result'] + logger.warn("Find evaluated tuning config, skip.") + continue + + logger.debug("Dump current mixed precision configuration:") + logger.debug(tune_cfg) + self.last_qmodel = self.adaptor.quantize( + tune_cfg, self.model, self.calib_dataloader, self.q_func) + assert self.last_qmodel + # Return the last quantized model as a result. if performance only. + if self.cfg.tuning.exit_policy.performance_only: + self.best_qmodel = self.last_qmodel + self._add_tuning_history(copy.deepcopy(tune_cfg), (-1, [0]), q_config=self.last_qmodel.q_config) + return + self.last_tune_cfg = copy.deepcopy(tune_cfg) + if self.eval_dataloader or self.eval_func: + q_config = copy.deepcopy(self.last_qmodel.q_config) + self.last_tune_result = self._evaluate(self.last_qmodel) + self.cur_best_acc, self.cur_best_tuning_cfg = self.update_best_op_tuning_cfg(op_tuning_cfg) + need_stop = self.stop(self.cfg.tuning.exit_policy.timeout, trials_count) + # record the tuning history + saved_tune_cfg = copy.deepcopy(tune_cfg) + saved_last_tune_result = copy.deepcopy(self.last_tune_result) + self._add_tuning_history(saved_tune_cfg, saved_last_tune_result, q_config=q_config) + else: + # If the eval_dataloader was not specified under the config yaml file, + # We only converted the model with customized precisions. + self.best_qmodel = self.last_qmodel + need_stop = True + + if need_stop: + break + + diff --git a/neural_compressor/experimental/strategy/basic.py b/neural_compressor/experimental/strategy/basic.py new file mode 100644 index 00000000000..33ea8c7d675 --- /dev/null +++ b/neural_compressor/experimental/strategy/basic.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The basic tuning strategy.""" +import copy +import numpy as np +from collections import OrderedDict +from .strategy import strategy_registry, TuneStrategy +from ...utils import logger + +from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler +from .utils.tuning_structs import OpTuningConfig +from .utils.constant import TUNING_ITEMS_LST + +@strategy_registry +class BasicTuneStrategy(TuneStrategy): + """The basic tuning strategy. + + There are three stages executed by Basic strategy sequentially, + and the tuning process ends once the condition meets the exit policy. + """ + + def distributed_next_tune_cfg_lst(self, comm): + """Generate and yield the next tuning config list with below order. + + 1. OP Type Wise Tuning + 2. Fallback OP One by One + 3. Fallback Multiple OPs Accumulated + + Yields: + tuning_config_list (list): A list containing dicts of the tuning configuration for quantization. + """ + from copy import deepcopy + tuning_space = self.tuning_space + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + rank = comm.Get_rank() + for calib_sampling_size in calib_sampling_size_lst: + # Initialize the tuning config for each op according to the quantization approach + op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() + # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) + early_stop_tuning = False + stage1_cnt = 0 + quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] + quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] + stage1_max = 1e9 # TODO set a more appropriate value + op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + ############ stage 1: yield op_tune_cfg_lst + op_tuning_cfg_lst_stage_1 = [] + for op_tuning_cfg in op_wise_tuning_sampler: + stage1_cnt += 1 + if early_stop_tuning and stage1_cnt > stage1_max: + logger.info("Early stopping the stage 1.") + break + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg_lst_stage_1.append(deepcopy(op_tuning_cfg)) + logger.info("yield op_tuning_cfg_lst_stage_1 with length {}".format(len(op_tuning_cfg_lst_stage_1))) + yield op_tuning_cfg_lst_stage_1 + + #### Coordinate: only master knows cur best tune cfg + cur_best_tuning_cfg = self.cur_best_tuning_cfg if rank == 0 else None + if rank == 0: + comm.bcast(cur_best_tuning_cfg, root=0) + else: + self.cur_best_tuning_cfg = comm.bcast(cur_best_tuning_cfg, root=0) + + ############ stage 2: yield new_op_tuning_cfg_lst (length of 1) + # Fallback the ops supported both static and dynamic from static to dynamic + # Tuning items: None + if self.cfg.quantization.approach == 'post_training_auto_quant': + static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if + item in tuning_space.query_items_by_quant_mode('dynamic')] + if static_dynamic_items: + logger.info("Fallback all ops that support both dynamic and static to dynamic.") + else: + logger.info("Non ops that support both dynamic") + + new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) + for item in static_dynamic_items: + new_op_tuning_cfg[item.name] = self._initial_dynamic_cfg_based_on_static_cfg( + new_op_tuning_cfg[item.name]) + new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + op_tuning_cfg_lst_stage_2 = [deepcopy(new_op_tuning_cfg)] + logger.info("yield op_tuning_cfg_lst_stage_2 with length {}".format(len(op_tuning_cfg_lst_stage_2))) + yield op_tuning_cfg_lst_stage_2 + + #### Coordinate: only master knows cur best tune cfg + cur_best_tuning_cfg = self.cur_best_tuning_cfg if rank == 0 else None + if rank == 0: + comm.bcast(cur_best_tuning_cfg, root=0) + else: + self.cur_best_tuning_cfg = comm.bcast(cur_best_tuning_cfg, root=0) + + best_op_tuning_cfg_stage1 = deepcopy(self.cur_best_tuning_cfg) + + # Fallback + ############ stage 3, 4: yield op_tuning_cfg_lst + op_tuning_cfg_lst_stage_3 = [] + op_tuning_cfg_lst_stage_4 = [] + for target_dtype in ['bf16', 'fp32']: + target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) + fallback_items_lst = [item for item in quant_ops if item in target_type_lst] + if fallback_items_lst: + logger.info(f"Start to fallback op to {target_dtype} one by one.") + self._fallback_started() + fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up + op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) + initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=False) + op_fallback_acc_impact = OrderedDict() + for op_index, op_tuning_cfg in enumerate(fallback_sampler): + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + # yield op_tuning_cfg + op_tuning_cfg_lst_stage_3.append(deepcopy(op_tuning_cfg)) + logger.info("yield op_tuning_cfg_lst_stage_3 with length {}".format(len(op_tuning_cfg_lst_stage_3))) + yield op_tuning_cfg_lst_stage_3 + + # Only master updates op_fallback_acc_impact + if rank == 0: + for op_index, op_tuning_cfg in enumerate(fallback_sampler): + acc, _ = self.eval_results[op_index] + op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc + + #### Coordinate: only master knows op_fallback_acc_impact + op_fallback_acc_impact = op_fallback_acc_impact if rank == 0 else None + if rank == 0: + comm.bcast(op_fallback_acc_impact, root=0) + else: + op_fallback_acc_impact = comm.bcast(op_fallback_acc_impact, root=0) + + # Fallback OPs accumulated according to the order in the previous stage + if len(op_fallback_acc_impact) > 0: + ordered_ops = sorted(op_fallback_acc_impact.keys(), + key=lambda key: op_fallback_acc_impact[key], + reverse=self.higher_is_better) + op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) + logger.info(f"Start to accumulate fallback to {target_dtype}.") + initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=True) + for op_tuning_cfg in fallback_sampler: + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + # yield op_tuning_cfg + op_tuning_cfg_lst_stage_4.append(deepcopy(op_tuning_cfg)) + logger.info("yield op_tuning_cfg_lst_stage_4 with length {}".format(len(op_tuning_cfg_lst_stage_4))) + yield op_tuning_cfg_lst_stage_4 + + def next_tune_cfg(self): + """Generate and yield the next tuning config with below order. + + 1. OP Type Wise Tuning: tries to quantize the OPs as many as possible + and traverse all OP type wise tuning configs + 2. Fallback OP One by One: it performs high-precision OP (FP32, BF16 ...) + fallbacks one by one based on the tuning config with the best result + in the previous stage, and records the impact of each OP. + 3. Fallback Multiple OPs Accumulated: first sorted the OPs list + according to the impact score in stage II, and tries to incrementally + fallback multiple OPs to high precision according to the sorted OP list. + + Returns: + tune_config (dict): A dict containing the tuning configuration for quantization. + """ + from copy import deepcopy + tuning_space = self.tuning_space + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + for calib_sampling_size in calib_sampling_size_lst: + # Initialize the tuning config for each op according to the quantization approach. + op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() + # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) + early_stop_tuning = False + stage1_cnt = 0 + quant_ops = quant_mode_wise_items.get('static', []) + quant_ops += quant_mode_wise_items.get('dynamic', []) + stage1_max = 1e9 # TODO set a more appropriate value + op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + for index, op_tuning_cfg in enumerate(op_wise_tuning_sampler): + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + # Apply all recipes, if not got the qmodel that meet the requirements, discard it. + if index == 1 and not self.applied_all_recipes_flag: + logger.info("Apply all recipes.") + self.applied_all_recipes_flag = True + yield self.apply_all_tuning_recipes(deepcopy(self.cur_best_tuning_cfg)) + stage1_cnt += 1 + if early_stop_tuning and stage1_cnt > stage1_max: + logger.info("Early stopping the stage 1.") + break + yield op_tuning_cfg + + # Apply all recipes, if not got the qmodel that meet the requirements, discard it. + if stage1_cnt == 1 and not self.applied_all_recipes_flag: + logger.info("Apply all recipes.") + self.applied_all_recipes_flag = True + yield self.apply_all_tuning_recipes(deepcopy(self.cur_best_tuning_cfg)) + + # Fallback the ops supported both static and dynamic from static to dynamic + # Tuning items: None + if self.cfg.quantization.approach == 'post_training_auto_quant': + static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if + item in tuning_space.query_items_by_quant_mode('dynamic')] + if static_dynamic_items: + logger.info("Fallback all ops that support both dynamic and static to dynamic.") + else: + logger.info("Non ops that support both dynamic") + + new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) + for item in static_dynamic_items: + new_op_tuning_cfg[item.name] = self._initial_dynamic_cfg_based_on_static_cfg( + new_op_tuning_cfg[item.name]) + new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield new_op_tuning_cfg + + logger.info("Apply recipe one by one.") + for tune_cfg in self.apply_recipe_one_by_one(deepcopy(self.cur_best_tuning_cfg)): + yield tune_cfg + best_op_tuning_cfg_stage1 = deepcopy(self.cur_best_tuning_cfg) + + # Fallback + for target_dtype in ['bf16', 'fp32']: + target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) + fallback_items_lst = [item for item in quant_ops if item in target_type_lst] + if fallback_items_lst: + logger.info(f"Start to fallback op to {target_dtype} one by one.") + self._fallback_started() + fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up + op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) + initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=False) + op_fallback_acc_impact = OrderedDict() + for op_index, op_tuning_cfg in enumerate(fallback_sampler): + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + acc, _ = self.last_tune_result + op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc + + + # Fallback OPs accumulated according to the order in the previous stage + if len(op_fallback_acc_impact) > 0: + ordered_ops = sorted(op_fallback_acc_impact.keys(), + key=lambda key: op_fallback_acc_impact[key], + reverse=self.higher_is_better) + op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) + logger.info(f"Start to accumulate fallback to {target_dtype}.") + initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=True) + for op_tuning_cfg in fallback_sampler: + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + + def _initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg:OpTuningConfig): + op_state = op_static_cfg.get_state() + op_name = op_static_cfg.op_name + op_type = op_static_cfg.op_type + op_name_type = (op_name, op_type) + op_quant_mode = 'dynamic' + tuning_space = self.tuning_space + dynamic_state = {} + for att in ['weight', 'activation']: + if att not in op_state: continue + # Add dtype + full_path = self.tuning_space.get_op_default_path_by_pattern(op_name_type, op_quant_mode) + dynamic_state[att + '_dtype'] = self.tuning_space.ops_data_type[op_name_type][full_path[att]] + for method_name, method_val in op_state[att].items(): + att_and_method_name = (att, method_name) + if att_and_method_name not in TUNING_ITEMS_LST: continue + if tuning_space.query_item_option(op_name_type, full_path[att], att_and_method_name, method_val): + dynamic_state[att_and_method_name] = method_val + else: + quant_mode_item = tuning_space.get_item_by_path((op_name_type, *full_path[att])) + if quant_mode_item and quant_mode_item.get_option_by_name(att_and_method_name): + tuning_item = quant_mode_item.get_option_by_name(att_and_method_name) + dynamic_state[att_and_method_name] = tuning_item.options[0] if tuning_item else None + return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state) + + \ No newline at end of file diff --git a/neural_compressor/experimental/strategy/bayesian.py b/neural_compressor/experimental/strategy/bayesian.py new file mode 100644 index 00000000000..58edcdee024 --- /dev/null +++ b/neural_compressor/experimental/strategy/bayesian.py @@ -0,0 +1,444 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The Bayesian tuning strategy.""" + +import copy +import warnings +import numpy as np +from scipy.optimize import minimize +from sklearn.gaussian_process.kernels import Matern +from sklearn.gaussian_process import GaussianProcessRegressor + +from collections import OrderedDict +from copy import deepcopy + +from ...utils import logger +from .strategy import strategy_registry, TuneStrategy +from .utils.tuning_sampler import OpWiseTuningSampler +from .utils.tuning_structs import OpTuningConfig + + +@strategy_registry +class BayesianTuneStrategy(TuneStrategy): + """The Bayesian tuning strategy.""" + + def __init__(self, model, conf, q_dataloader, q_func=None, eval_dataloader=None, + eval_func=None, dicts=None, q_hooks=None): + """Init the BaySian tuning strategy.""" + super().__init__(model, conf, q_dataloader, q_func, eval_dataloader, + eval_func, dicts, q_hooks) + self.bayes_opt = None + + def __getstate__(self): + """Magic method for pickle saving. + + Returns: + dict: Saved dict for resuming + """ + for history in self.tuning_history: + if self._same_yaml(history['cfg'], self.cfg): + history['bayes_opt'] = self.bayes_opt + save_dict = super().__getstate__() + return save_dict + + def _params_to_tune_configs(self, params): + op_tuning_cfg = {} + calib_sampling_size_lst = self.tuning_space.root_item.get_option_by_name('calib_sampling_size').options + for op_name_type, configs in self.op_configs.items(): + if len(configs) == 1: + op_tuning_cfg[op_name_type] = configs[0] + else: + op_tuning_cfg[op_name_type] = configs[min(len(configs) - 1, int(params[op_name_type[0]]))] + if len(calib_sampling_size_lst) > 1: + calib_sampling_size = calib_sampling_size_lst[min(len(configs) - 1, int(params['calib_sampling_size']))] + else: + calib_sampling_size = calib_sampling_size_lst[0] + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + return op_tuning_cfg + + def next_tune_cfg(self): + """Generate the next tuning config according to bayesian search algorithm. + + This strategy comes from the Bayesian optimization package and changed it to a discrete version. + It uses Gaussian processes to define the prior/posterior distribution over the black-box + function with the tuning history and then finds the tuning configuration that maximizes + the expected improvement. + + Returns: + tune_config (dict): A dict containing the tuning configuration for quantization. + """ + params = None + pbounds = {} + tuning_space = self.tuning_space + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() + op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + self.op_configs = op_wise_pool.get_opwise_candidate() + + for op_name_type, configs in self.op_configs.items(): + if len(configs) > 1: + pbounds[op_name_type[0]] = (0, len(configs)) + if len(calib_sampling_size_lst) > 1: + pbounds['calib_sampling_size'] = (0, len(calib_sampling_size_lst)) + if len(pbounds) == 0: + yield self._params_to_tune_configs(params) + return + if self.bayes_opt is None: + self.bayes_opt = BayesianOptimization( + pbounds=pbounds, random_seed=self.cfg.tuning.random_seed) + while True: + params = self.bayes_opt.gen_next_params() + logger.debug("Dump current bayesian params:") + logger.debug(params) + yield self._params_to_tune_configs(params) + try: + self.bayes_opt._space.register(params, self.last_tune_result[0]) + except KeyError: + logger.debug("Find registered params, skip it.") + pass + +# Util part +# Bayesian opt acq function + + +def acq_max(ac, gp, y_max, bounds, random_seed, n_warmup=10000, n_iter=10): + """Find the maximum of the acquisition function parameters. + + Args: + ac: The acquisition function object that return its point-wise value. + gp: A gaussian process fitted to the relevant data. + y_max: The current maximum known value of the target function. + bounds: The variables bounds to limit the search of the acq max. + random_seed: instance of np.RandomState random number generator + n_warmup: number of times to randomly sample the acquisition function + n_iter: number of times to run scipy.minimize + + Returns: + x_max: The arg max of the acquisition function. + """ + # Warm up with random points + x_tries = np.random.uniform(bounds[:, 0], bounds[:, 1], + size=(n_warmup, bounds.shape[0])) + ys = ac(x_tries, gp=gp, y_max=y_max) + x_max = x_tries[ys.argmax()] + max_acq = ys.max() + + # Explore the parameter space more thoroughly + x_seeds = np.random.uniform(bounds[:, 0], bounds[:, 1], + size=(n_iter, bounds.shape[0])) + for x_try in x_seeds: + # Find the minimum of minus the acquisition function + res = minimize(lambda x: -ac(x.reshape(1, -1), gp=gp, y_max=y_max), + x_try.reshape(1, -1), + bounds=bounds, + method="L-BFGS-B") + + # See if success + if not res.success: + continue + + if isinstance(res.fun, float): + res.fun = np.array([res.fun]) + # Store it if better than previous minimum(maximum). + if max_acq is None or -res.fun[0] >= max_acq: + x_max = res.x + max_acq = -res.fun[0] + + # Clip output to make sure it lies within the bounds. Due to floating + # point technicalities this is not always the case. + return np.clip(x_max, bounds[:, 0], bounds[:, 1]) + + +def _hashable(x): + """Ensure that an point is hashable by a python dict.""" + return tuple(map(float, x)) + +# Target space part +class TargetSpace(object): + """Holds the param-space coordinates (X) and target values (Y). + + Allows for constant-time appends while ensuring no duplicates are added. + """ + + def __init__(self, pbounds, random_seed=9527): + """Construct a TargetSpace. + + Args: + target_func (function): Function to be maximized. + pbounds (dict): Dictionary with parameters names as keys and a tuple with minimum and maximum values. + random_seed (int): Optionally specify a seed for a random number generator + """ + self.random_seed = random_seed + # Get the name of the parameters + names = list(pbounds.keys()) + self._keys = deepcopy(names) + # Create an array with parameters bounds + self._bounds = np.array( + [pbounds[name] for name in names], + dtype=np.float32 + ) + + # preallocated memory for X and Y points + self._params = np.empty(shape=(0, self.dim)) + self._target = np.empty(shape=(0)) + + # keep track of unique points we have seen so far + self._cache = {} + + def __contains__(self, x): + """Check if param x is cached in this space.""" + return _hashable(x) in self._cache + + def __len__(self): + """Get the total count of stored items.""" + assert len(self._params) == len(self._target) + return len(self._target) + + @property + def empty(self): + """Check if the space is empty.""" + return len(self) == 0 + + @property + def params(self): + """Get all params stored in this space.""" + return self._params + + @property + def target(self): + """Get all target values in this space.""" + return self._target + + @property + def dim(self): + """Get the dimension of this space.""" + return len(self._keys) + + @property + def keys(self): + """Get all keys of this space.""" + return self._keys + + @property + def bounds(self): + """Get the bounds of this space.""" + return self._bounds + + def params_to_array(self, params): + """Generate an array from params. + + Args: + params (Dict): The dict contains keys in `self.keys`, and + corresponding param. + + Returns: + np.array: An array contains all params. + """ + try: + assert set(params) == set(self.keys) + except AssertionError: + raise ValueError( + "Parameters' keys ({}) do ".format(list(params.keys())) + + "not match the expected set of keys ({}).".format(self.keys) + ) + return np.asarray([params[key] for key in self.keys]) + + def array_to_params(self, x): + """Generate an params' dict from array. + + Args: + x (np.array): The array contains all params. + + Returns: + dict: the dict contains keys and the params corresponding to it. + """ + try: + assert len(x) == len(self.keys) + except AssertionError: + raise ValueError( + "Size of array ({}) is different than the ".format(len(x)) + + "expected number of parameters ({}).".format(len(self.keys)) + ) + return dict(zip(self.keys, x)) + + def _as_array(self, x): + try: + x = np.asarray(x, dtype=float) + except TypeError: + x = self.params_to_array(x) + + x = x.ravel() + try: + assert x.size == self.dim + except AssertionError: + raise ValueError( + "Size of array ({}) is different than the ".format(len(x)) + + "expected number of parameters ({}).".format(len(self.keys)) + ) + return x + + def register(self, params, target): + """Append a point and its target value to the known data. + + Runs in amortized constant time. + + Args: + params (ndarray): a single point, with len(params) == self.dim + target (float): target function value + + Raises: + KeyError: if the point is not unique + """ + x = self._as_array(params) + if x in self: + raise KeyError('Params point {} is not unique'.format(x)) + + # Insert data into unique dictionary + self._cache[_hashable(x.ravel())] = target + + self._params = np.concatenate([self._params, x.reshape(1, -1)]) + self._target = np.concatenate([self._target, [target]]) + + def get_target(self, params): + """Get the target value of params. + + Args: + params (ndarray): a single point, with len(params) == self.dim + + Returns: + target (float): target function value. + """ + x = self._as_array(params) + target = self._cache[_hashable(x)] + return target + + def random_sample(self): + """Create random points within the bounds of the space. + + Returns: + data (ndarray): [num x dim] array points with dimensions corresponding to `self._keys` + """ + # TODO: support integer, category, and basic scipy.optimize constraints + data = np.empty((1, self.dim)) + for col, (lower, upper) in enumerate(self._bounds): + data.T[col] = np.random.uniform( # pylint: disable=unsupported-assignment-operation + lower, upper, size=1) + return data.ravel() + + def max(self): + """Get maximum target value found and corresponding parametes.""" + try: + res = { + 'target': self.target.max(), + 'params': dict( + zip(self.keys, self.params[self.target.argmax()]) + ) + } + except ValueError: + res = {} + return res + + def res(self): + """Get all target values found and corresponding parametes.""" + params = [dict(zip(self.keys, p)) for p in self.params] + + return [ + {"target": target, "params": param} + for target, param in zip(self.target, params) + ] + +# Tuning part +class BayesianOptimization(): + """The class for bayesian optimization. + + This class takes the parameters bounds in order to find which values for + the parameters yield the maximum value using bayesian optimization. + """ + + def __init__(self, pbounds, random_seed=9527, verbose=2): + """Init bayesian optimization. + + Args: + pbounds (dict): Dictionary with parameters names as keys and a tuple with + minimum and maximum values. + random_seed (int, optional): The seed for random searching. Default to 9527. + verbose (int, optional): The level of verbosity. Default to 2. + """ + self._random_seed = random_seed + # Data structure containing the bounds of its domain, + # and a record of the points we have evaluated. + self._space = TargetSpace(pbounds, random_seed) + + # Internal GP regressor + self._gp = GaussianProcessRegressor( + kernel=Matern(nu=2.5), + alpha=1e-6, + normalize_y=True, + n_restarts_optimizer=5, + random_state=self._random_seed, + ) + self._verbose = verbose + + @property + def space(self): + """Get the target space.""" + return self._space + + @property + def max(self): + """Get the maximum value of target space.""" + return self._space.max() + + @property + def res(self): + """Get the minimum value of target space.""" + return self._space.res() + + @staticmethod + def _ucb(x, gp, y_max, kappa=2.576): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + mean, std = gp.predict(x, return_std=True) + return mean + kappa * std + + def suggest(self): + """Suggest the most promising points.""" + if len(set(self._space.target)) < 2: + return self._space.array_to_params(self._space.random_sample()) + + # Sklearn's GP throws a large number of warnings at times, but + # we don't really need to see them here. + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + self._gp.fit(self._space.params, self._space.target) + + # Finding argmax of the acquisition function. + suggestion = acq_max( + ac=self._ucb, + gp=self._gp, + y_max=self._space.target.max(), + bounds=self._space.bounds, + random_seed=self._random_seed + ) + return self._space.array_to_params(suggestion) + + def gen_next_params(self): + """Get the next parameter.""" + next_params = self.suggest() + return next_params diff --git a/neural_compressor/experimental/strategy/conservative.py b/neural_compressor/experimental/strategy/conservative.py new file mode 100644 index 00000000000..7608ca1a894 --- /dev/null +++ b/neural_compressor/experimental/strategy/conservative.py @@ -0,0 +1,412 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The conservative tuning strategy for quantization level 0.""" +import copy +import os +import numpy as np + +from collections import deque +from collections import OrderedDict as COrderedDict +from copy import deepcopy +from typing import Dict, List, Tuple, OrderedDict + +from .strategy import strategy_registry, TuneStrategy +from .utils.tuning_space import TuningItem +from ...utils import logger +from ...utils.utility import Statistics +from ...algorithm import AlgorithmScheduler + +@strategy_registry +class ConservativeTuneStrategy(TuneStrategy): + """Tuning strategy with accuracy first, performance second. + + The quantization level O0 is designed for user who want to keep the accuracy + of the model after quantization. It starts with the original(fp32) model, + and then quantize the OPs to lower precision OP type wisely and OP wisely. + """ + + def __init__(self, model, conf, q_dataloader, q_func=None, eval_dataloader=None, + eval_func=None, dicts=None, q_hooks=None): + """Init conservative tuning strategy.""" + super().__init__(model, conf, q_dataloader, q_func, eval_dataloader, + eval_func, dicts, q_hooks) + self.acc_meet_flag = False + + def next_tune_cfg(self): + """Generate and yield the next tuning config with below order. + + 1. Query all quantifiable ops and save as a list of [(op_name, op_type), ...] + 2. Classify the op by its op type + 3. Add op to quant_queue according to the op type priority + 4. Go through the quant_queue and replace it with the fp32 config in tune_cfg if + accuracy meets the requirements else continue + 5. For bf16 and fp16 operators, do the same as int8 operators. + + Returns: + tune_config (dict): It's a dict containing the tuning configuration to run. + """ + tuning_space = self.tuning_space + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size = calib_sampling_size_lst[0] + tune_cfg = self._initialize_tune_cfg() + tune_cfg['calib_sampling_size'] = calib_sampling_size + op_type_priority = self._get_op_type_priority() + quant_items_pool = self._quant_items_pool(op_type_priority) + logger.info(f"*** Try to convert op into lower precision to improve performance.") + for dtype, op_items in quant_items_pool.items(): + logger.info(f"*** Start to convert op into {dtype}.") + for op_type, items_lst in op_items.items(): + logger.info(f"*** Try to convert all {op_type} ops into {dtype}.") + tmp_tune_cfg = deepcopy(tune_cfg) + for item, quant_mode in items_lst: + op_info = item.name + op_config = tuning_space.get_default_config(op_info, quant_mode) + tmp_tune_cfg[op_info] = op_config + yield tmp_tune_cfg + if self.acc_meet_flag: + logger.info(f"*** Convert all {op_type} ops to {dtype} and accuracy still meet the requirements") + tune_cfg = deepcopy(tmp_tune_cfg) + else: + tmp_tune_cfg = deepcopy(tune_cfg) + logger.info(f"*** Convert all {op_type} ops to {dtype} but accuracy not meet the requirements") + logger.info(f"*** Try to convert {op_type} op into {dtype} one by one.") + for item, quant_mode in items_lst: + op_info = item.name + op_config = tuning_space.get_default_config(op_info, quant_mode) + tmp_tune_cfg[op_info] = op_config + yield tmp_tune_cfg + if self.acc_meet_flag: + tune_cfg[op_info] = op_config + logger.info((f"*** Convert one {op_type} op({op_info}) " + f"into {dtype} and accuracy still meet the requirements")) + else: + tmp_tune_cfg[op_info] = tune_cfg[op_info] + logger.info(f"*** Skip convert {op_info}.") + logger.info(f"*** Ending tuning process due to no quantifiable op left.") + + def traverse(self): + """Traverse the tuning space.""" + self._eval_baseline() + + # Start tuning + trials_count = 0 + for op_tuning_cfg in self.next_tune_cfg(): + tune_cfg = self._tune_cfg_converter(op_tuning_cfg) + trials_count += 1 + tuning_history = self._find_tuning_history(tune_cfg) + if tuning_history and trials_count < self.cfg.tuning.exit_policy.max_trials: + self.last_tune_result = tuning_history['last_tune_result'] + self.best_tune_result = tuning_history['best_tune_result'] + logger.warn("Find evaluated tuning config, skip.") + continue + logger.debug("Dump current tuning configuration:") + logger.debug(tune_cfg) + self.tuning_times += 1 + # set the parameter for pre quantization algos and run + self.set_param_for_pre_quantization_algos(self.algo_scheduler, tune_cfg, self.model) + self.model = self.algo_scheduler('pre_quantization') + # quantize + q_model = self.adaptor.quantize(copy.deepcopy(tune_cfg), self.model, self.calib_dataloader, self.q_func) + assert self.adaptor.pre_optimized_model + # set the parameter for post quantization algos and run + self.set_param_for_post_quantization_algos(self.algo_scheduler, tune_cfg, self.adaptor.pre_optimized_model, + q_model) + self.last_qmodel = self.algo_scheduler('post_quantization') + self.last_tune_cfg = copy.deepcopy(tune_cfg) + # Remove the reference to model + self.algo_scheduler.reset_exec_algorithms() + assert self.last_qmodel + # Return the last quantized model as a result. if performance only. + if self.cfg.tuning.exit_policy.performance_only: + self.best_qmodel = self.last_qmodel + self._add_tuning_history(copy.deepcopy(tune_cfg), (-1, [0]), q_config=self.last_qmodel.q_config) + return + self.last_tune_cfg = copy.deepcopy(tune_cfg) + self.last_tune_result = self._evaluate(self.last_qmodel) + self.acc_meet_flag = self.objectives.accuracy_meets() + if self.acc_meet_flag: + # For the first tuning + if not self.best_tune_result: + self.best_tune_result = self.last_tune_result + self.best_qmodel = self.last_qmodel + self.best_tune_result = self.last_tune_result + else: + # Update current tuning config and model with best performance + get_better_performance = self._compare_performace(self.last_tune_result, self.best_tune_result) + if get_better_performance: + logger.info(f"*** Update the model with better performance.") + self.best_qmodel = self.last_qmodel + self.best_tune_result = self.last_tune_result + else: + logger.info(f"*** The qmodel was not updated due to not achieving better performance.") + # Dump the current state to log + self._dump_tuning_state(trials_count, self.last_tune_result, self.best_tune_result, self.baseline) + # Judge stop or continue tuning + need_stop = self.stop(trials_count) + # Record the tuning history + saved_tune_cfg = copy.deepcopy(tune_cfg) + saved_last_tune_result = copy.deepcopy(self.last_tune_result) + self._add_tuning_history(saved_tune_cfg, + saved_last_tune_result, + q_config=q_model.q_config) + self.tune_result_record.append(copy.deepcopy(self.last_tune_result)) + self.tune_cfg = tune_cfg + self._dump_tuning_process_statistics() + if need_stop: + if self.cfg.tuning.diagnosis and self.cfg.tuning.diagnosis.diagnosis_after_tuning: + logger.debug(f'*** Start to do diagnosis (inspect tensor).') + self._diagnosis() + self._recover_best_qmodel_from_tuning_cfg() + if self.use_multi_objective and len(self.tune_result_record) > 1 and \ + self.best_tune_result is not None: + best_trail, best_result = self.objectives.best_result(self.tune_result_record, + copy.deepcopy(self.baseline)) + if best_result != self.best_tune_result: + from neural_compressor.utils.utility import recover + self.best_qmodel = recover(self.model.model, + os.path.join(self.cfg.tuning.workspace.path, 'history.snapshot'), + best_trail) + self.best_tune_result = best_result + self._dump_tuning_process_statistics() + break + + def stop(self, trials_count): + """Check whether needed to stop the traverse procedure. + + Args: + trials_count (int): current total count of tuning trails. + + Returns: + bool: whether needed to stop the traverse procedure. + """ + need_stop = False + if trials_count >= self.cfg.tuning.exit_policy.max_trials: + need_stop = True + return need_stop + + def _compare_performace(self, last_tune_result, best_tune_result): # pragma: no cover + """Compare the tuning result with performance only. + + Args: + last_tune_result (list): The list of last tuning result. + best_tune_result (list): The list of best tuning result. + + Returns: + bool: whether the best tuning result is better than last tuning result + in performance. + """ + _, last_perf = last_tune_result + _, best_perf = best_tune_result + return last_perf[0] < best_perf[0] + + def _dump_tuning_state(self, trials_count, last_tune_result, best_tune_result, baseline): + if last_tune_result: + last_tune = last_tune_result[0] if \ + isinstance(last_tune_result[0], list) else [last_tune_result[0]] + for name, data in zip(self.metric_name, last_tune): + if len(self.tune_data[name]) == 1: + self.tune_data[name].append(data) + else: + self.tune_data[name][1] = data + + if self.metric_weight and len(last_tune) > 1: + weighted_acc = np.mean(np.array(last_tune) * self.metric_weight) + if len(self.tune_data['Weighted accuracy']) == 1: + self.tune_data['Weighted accuracy'].append(weighted_acc) + else: + self.tune_data['Weighted accuracy'][1] = weighted_acc + last_tune = [weighted_acc] + + last_tune_msg = '[Accuracy (int8|fp32):' + \ + ''.join([' {:.4f}|{:.4f}'.format(last, base) for last, base in \ + zip(last_tune, self.tune_data['baseline'])]) + \ + ''.join([', {} (int8|fp32): {:.4f}|{:.4f}'.format( \ + x, y, z) for x, y, z in zip( \ + self.objectives.representation, last_tune_result[1], baseline[1]) \ + if x != 'Accuracy']) + ']' + else: # pragma: no cover + last_tune_msg = 'n/a' + for name in self.tune_data.keys() - {'baseline'}: + if len(self.tune_data[name]) == 1: + self.tune_data[name].append('n/a') + else: + self.tune_data[name][1] = 'n/a' + + if best_tune_result: + best_tune = best_tune_result[0] if isinstance(best_tune_result[0], list) \ + else [best_tune_result[0]] + + for name, data in zip(self.metric_name, best_tune): + if len(self.tune_data[name]) == 2: + self.tune_data[name].append(data) + else: + self.tune_data[name][2] = data + + if self.metric_weight and len(best_tune) > 1: + weighted_acc = np.mean(np.array(best_tune) * self.metric_weight) + + if len(self.tune_data['Weighted accuracy']) == 2: + self.tune_data['Weighted accuracy'].append(weighted_acc) + else: # pragma: no cover + self.tune_data['Weighted accuracy'][2] = weighted_acc + + best_tune = [weighted_acc] + + best_tune_msg = '[Accuracy:' + ''.join([' {:.4f}'.format(best) \ + for best in best_tune]) + ''.join([', {}: {:.4f}'.format(x,y) \ + for x,y in zip(self.objectives.representation, \ + best_tune_result[1]) if x != 'Accuracy']) + ']' + + else: + best_tune_msg = 'n/a' + for name in self.tune_data.keys() - {'baseline'}: + if len(self.tune_data[name]) == 2: + self.tune_data[name].append('n/a') + else: + self.tune_data[name][2] = 'n/a' + + logger.info("Tune {} result is: {}, Best tune result is: {}".format(trials_count, + last_tune_msg, + best_tune_msg)) + output_data = [[info_type, + '{:.4f} '.format(self.tune_data[info_type][0]) if \ + not isinstance(self.tune_data[info_type][0], str) else self.tune_data[info_type][0], + '{:.4f} '.format(self.tune_data[info_type][1]) if \ + not isinstance(self.tune_data[info_type][1], str) else self.tune_data[info_type][1], + '{:.4f} '.format(self.tune_data[info_type][2]) if \ + not isinstance(self.tune_data[info_type][2], str) else self.tune_data[info_type][2]] \ + for info_type in self.tune_data.keys() if info_type != 'baseline'] + + output_data.extend([[obj, + '{:.4f} '.format(baseline[1][i]) if baseline else 'n/a', + '{:.4f} '.format(last_tune_result[1][i]) if last_tune_result else 'n/a', + '{:.4f} '.format(best_tune_result[1][i]) if best_tune_result else 'n/a'] \ + for i, obj in enumerate(self.objectives.representation)]) + + Statistics(output_data, + header='Tune Result Statistics', + field_names=['Info Type', 'Baseline', 'Tune {} result'.format(trials_count), \ + 'Best tune result']).print_stat() + + def _get_op_type_priority(self): + optypewise_cap = self.capability['optypewise'] + op_type_priority = list(optypewise_cap.keys()) + return op_type_priority + + def _sorted_item_by_op_type(self, + items_lst: List[Tuple[TuningItem, str]], + op_type_priority: List[str]) -> OrderedDict[str, List]: + """Socring the tuning items according to its op type. + + Args: + items_lst: The tuning item list. # [(op_item, quant_mode), ... ] + op_type_priority: The op type list with the order. # [optype_1, optype_2] + + Returns: + The tuning items list that sorted according to its op type. + OrderDict: + # op_type: [(TuningItem, quant_mode), ...] + conv2d: [(TuningItem, static), (TuningItem, static)] + linear: [(TuningItem, static), (TuningItem, static)] + """ + op_type_lst_from_items_lst = list(set([item[0].name[1] for item in items_lst])) + # For items whose op type does not exist in the priority list, assign it with lowest priority. + sorted_op_type_lst = [op_type for op_type in op_type_priority if op_type in op_type_lst_from_items_lst] + sorted_op_type_lst += list(set(op_type_lst_from_items_lst) - set(op_type_priority)) + sorted_items = COrderedDict() + for op_type in sorted_op_type_lst: + sorted_items[op_type] = [] + for op_item, quant_mode in items_lst: + op_type = op_item.name[1] + sorted_items[op_type].append((op_item, quant_mode)) + return sorted_items + + def _initialize_tune_cfg(self): + """Initialize the tuning config with fp32 AMAP. + + Returns: + The intialized tuning config. + """ + tuning_space = self.tuning_space + quant_mode_wise_items = tuning_space.quant_mode_wise_items + # Initialize the tuning config + initial_tuning_cfg = {} + all_ops = set() + fp32_ops = [] + for quant_mode, items_lst in quant_mode_wise_items.items(): + items_name_lst = [item.name for item in items_lst] + all_ops = all_ops.union(set(items_name_lst)) + if quant_mode == "fp32": + fp32_ops += [item.name for item in items_lst] + non_fp32_ops_dtype = {} + fp32_ops_set = set(fp32_ops) + for quant_mode, items_lst in quant_mode_wise_items.items(): + items_name_set = set([item.name for item in items_lst]) + tmp_non_fp32_ops = items_name_set.difference(fp32_ops_set) + if tmp_non_fp32_ops: + for op_info in tmp_non_fp32_ops: + non_fp32_ops_dtype[op_info] = quant_mode + for op_info in fp32_ops: + initial_tuning_cfg[op_info] = tuning_space.get_default_config(op_info, "fp32") + for op_info, quant_mode in non_fp32_ops_dtype.items(): + initial_tuning_cfg[op_info] = tuning_space.get_default_config(op_info, quant_mode) + return initial_tuning_cfg + + def _quant_items_pool(self, op_type_priority: List[str]) -> OrderedDict[ + str, OrderedDict[str, List[Tuple[TuningItem, str]]]]: + """Create the op queue to be quantized. + + -------------------------------------------------------------------------- + | Level 1 | bf16 | fp16 | static/dynamic | + | Level 2 | conv2d, linear, ...| conv2d, linear, ...| conv2d, linear, ...| + + Args: + op_type_priority: The optype list with priority. + + Returns: + The op item pool to convert into lower precision. + quant_items_pool(OrderDict): + bf16: + OrderDict: + conv2d: [(TuningItem, bf16), (TuningItem, bf16)] + linear: [(TuningItem, bf16), (TuningItem, bf16)] + int8: + OrderDict: + # (TuningItem, quant_mode) + conv2d: [(TuningItem, static), (TuningItem, static)] + linear: [(TuningItem, static), (TuningItem, static)] + """ + quant_mode_wise_items = self.tuning_space.quant_mode_wise_items + # Add all quantized pair into queue + quant_items_pool = COrderedDict() + # collect and sorted all ops that support bf16 and fp16 + for quant_mode in ['bf16', 'fp16']: + if quant_mode in quant_mode_wise_items: + op_item_pairs = [(op_item, quant_mode) for op_item in quant_mode_wise_items[quant_mode]] + op_item_pairs = self._sorted_item_by_op_type(op_item_pairs, op_type_priority) + quant_items_pool[quant_mode] = op_item_pairs + op_item_pairs = [] + quant_ops_name_set = set() + # collect and sorted all ops that support int8 + for quant_mode, items_lst in quant_mode_wise_items.items(): + if "static" in quant_mode or 'dynamic' in quant_mode: + _quant_mode = "static" if "static" in quant_mode else "dynamic" + op_item_pairs += [(item, _quant_mode) for item in items_lst if item.name not in quant_ops_name_set] + quant_ops_name_set = quant_ops_name_set.union([item.name for item in items_lst]) + op_item_pairs = self._sorted_item_by_op_type(op_item_pairs, op_type_priority) + quant_items_pool['int8'] = op_item_pairs + return quant_items_pool diff --git a/neural_compressor/experimental/strategy/exhaustive.py b/neural_compressor/experimental/strategy/exhaustive.py new file mode 100644 index 00000000000..b40d5b70397 --- /dev/null +++ b/neural_compressor/experimental/strategy/exhaustive.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The exhaustive tuning strategy.""" +from collections import OrderedDict +from .strategy import strategy_registry, TuneStrategy + +from .utils.tuning_sampler import OpWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler +from .utils.tuning_structs import OpTuningConfig +from ...utils import logger + +@strategy_registry +class ExhaustiveTuneStrategy(TuneStrategy): + """The exhaustive tuning strategy.""" + + def next_tune_cfg(self): + """Generate and yield the next tuning config using exhaustive search in tuning space. + + It sequentially traverse all possible quantization tuning configurations + in a tuning space. From the perspective of the impact on performance, + we currently only traverse all possible quantization tuning configs. + Same reason as Bayesian, fallback datatypes are not included for now. + + Returns: + tune_config (dict): A dict containing the tuning configuration for quantization. + """ + tuning_space = self.tuning_space + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + for calib_sampling_size in calib_sampling_size_lst: + op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() + op_wise_tuning_sampler = OpWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + for op_tuning_cfg in op_wise_tuning_sampler: + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + return diff --git a/neural_compressor/experimental/strategy/hawq_v2.py b/neural_compressor/experimental/strategy/hawq_v2.py new file mode 100644 index 00000000000..1fd76b9b7dd --- /dev/null +++ b/neural_compressor/experimental/strategy/hawq_v2.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The HAWQ_V2 tuning strategy.""" +from collections import OrderedDict +from copy import deepcopy + +from .strategy import strategy_registry, TuneStrategy + +from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler +from .utils.tuning_structs import OpTuningConfig +from .utils.constant import TUNING_ITEMS_LST +from ...utils import logger + +@strategy_registry +class HAWQ_V2TuneStrategy(TuneStrategy): + """The HAWQ V2 tuning strategy. + + HAWQ_V2 implements the "Hawq-v2: Hessian aware trace-weighted quantization of neural networks". + We made a small change to it by using the hessian trace to score the op impact and then + fallback the OPs according to the scoring result. + + """ + + def next_tune_cfg(self): + """Generate and yield the next tuning config using HAWQ v2 search in tuning space. + + Returns: + tune_config (dict): A dict containing the tuning configuration for quantization. + """ + tuning_space = self.tuning_space + calib_size = tuning_space.root_item.get_option_by_name('calib_sampling_size').options[0] + + # Initialize the tuning config for each op according to the quantization approach + op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() + # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) + early_stop_tuning = True + stage1_cnt = 0 + quant_ops = quant_mode_wise_items.get('static', []) + quant_ops += quant_mode_wise_items.get('dynamic', []) + stage1_max = 1 # TODO set a more appropriate value + op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + for op_tuning_cfg in op_wise_tuning_sampler: + stage1_cnt += 1 + if early_stop_tuning and stage1_cnt > stage1_max: + logger.info("Early stopping the stage 1.") + break + op_tuning_cfg['calib_sampling_size'] = calib_size + yield op_tuning_cfg + # Start compute the hessian trace + logger.info(f"************** Start compute the hessian trace *****************") + target_dtype = "fp32" + hawq_v2_criterion =self.cfg.tuning.strategy.hawq_v2_loss + # assert hawq_v2_criterion is not None, "HAWQ-V2 strategy needs model loss function to compute the gradient, \ + # Please assign it by strategy_kwargs({'hawq_v2_loss': hawq_v2_loss})." + op_to_traces = self.adaptor.calculate_hessian_trace(fp32_model = self._fp32_model, + dataloader = self.calib_dataloader, + q_model = self.last_qmodel, + criterion =hawq_v2_criterion, + enable_act = False) + sorted_op_to_traces = dict(sorted(op_to_traces.items(), key=lambda item: item[1], reverse=True)) + logger.info(f"************** Hessian Trace *****************") + for op_name, trace in sorted_op_to_traces.items(): + logger.info(f"*** op: {op_name}, hessian trace : {trace}") + logger.info(f"************************************************") + # WA for op mapping + ordered_ops_tmp = {} + for op_info in list(initial_op_tuning_cfg.keys()): + op_name, op_type = op_info + for op_trace_name in op_to_traces.keys(): + if isinstance(op_trace_name, str) and op_trace_name.startswith(op_name): + if op_name in ordered_ops_tmp: + logger.info((f"*** Already assigned the hessian trace to {op_name}", + f"update it with the value of {op_trace_name}")) + ordered_ops_tmp[op_name] = op_to_traces[op_trace_name] + + ordered_ops_tmp = sorted(ordered_ops_tmp.keys(), + key=lambda key: ordered_ops_tmp[key], + reverse=self.higher_is_better) + # WA for add op type + op_info_map = {} + for op_info in list(initial_op_tuning_cfg.keys()): + op_info_map[op_info[0]] = op_info # op_name: (op_name, op_type) + tmp_ordered_ops = [op_info_map[op_name] for op_name in ordered_ops_tmp] + op_dtypes = OrderedDict(zip(tmp_ordered_ops, [target_dtype] * len(ordered_ops_tmp))) + + logger.info(f"Start to accumulate fallback to {target_dtype}.") + initial_op_tuning_cfg = deepcopy(op_tuning_cfg) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=True, + skip_first=False) + for op_tuning_cfg in fallback_sampler: + op_tuning_cfg['calib_sampling_size'] = calib_size + yield op_tuning_cfg + diff --git a/neural_compressor/experimental/strategy/mse.py b/neural_compressor/experimental/strategy/mse.py new file mode 100644 index 00000000000..55955774e74 --- /dev/null +++ b/neural_compressor/experimental/strategy/mse.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""MSE tuning strategy.""" +from copy import deepcopy +import numpy as np +from collections import OrderedDict +from typing import Dict, Any, List +from .strategy import strategy_registry, TuneStrategy +from ...utils import logger +from time import time + +from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler +from .utils.tuning_structs import OpTuningConfig + +@strategy_registry +class MSETuneStrategy(TuneStrategy): + """The tuning strategy using MSE policy in tuning space. + + The MSE strategy needs to get the tensors for each OP of raw FP32 models and the quantized model based on + the best model-wise tuning configuration. It then calculates the MSE (Mean Squared Error) for each OP, sorts + those OPs according to the MSE value, and performs the op-wise fallback in this order. + """ + + def __init__(self, model, conf, q_dataloader, q_func=None, eval_dataloader=None, + eval_func=None, dicts=None, q_hooks=None): + """Init an mse tuning strategy.""" + super().__init__(model, conf, q_dataloader, q_func, eval_dataloader, + eval_func, dicts, q_hooks) + self.ordered_ops = None + + + def __getstate__(self): + """Magic method for pickle saving. + + Returns: + save_dict: Saved dict for resuming + """ + for history in self.tuning_history: + if self._same_yaml(history['cfg'], self.cfg): + history['ordered_ops'] = self.ordered_ops + save_dict = super().__getstate__() + return save_dict + + def _mse_metric_gap(self, fp32_tensor, dequantize_tensor): + """Calculate the euclidean distance between fp32 tensor and int8 dequantize tensor. + + Args: + fp32_tensor (tensor): The FP32 tensor. + dequantize_tensor (tensor): The INT8 dequantize tensor. + """ + fp32_max = np.max(fp32_tensor) + fp32_min = np.min(fp32_tensor) + dequantize_max = np.max(dequantize_tensor) + dequantize_min = np.min(dequantize_tensor) + fp32_tensor = (fp32_tensor - fp32_min) / (fp32_max - fp32_min) + dequantize_tensor = (dequantize_tensor - dequantize_min) / \ + (dequantize_max - dequantize_min) + diff_tensor = fp32_tensor - dequantize_tensor + euclidean_dist = np.sum(diff_tensor ** 2) + return euclidean_dist / fp32_tensor.size + + def mse_impact_lst(self, op_list: List, fp32_model, best_qmodel): + """Calculate and generate the MSE impact list. + + Args: + op_list (List[Tuple(str, str)]): List of ops in format of [(op_name, op_type), ...]. + fp32_model (Model): The original FP32 model before quantization. + current_best_model (Model): The currently best quantized model. + + Returns: + ordered_op_name_types (List[Tuple(str, str)]): The sorted list of ops by its MSE + impaction, in the same format of 'op_list'. + """ + op_name_lst = [element[0] for element in op_list ] + op_mapping = {} + for (op_name, op_type) in list(op_list): + op_mapping[op_name] = (op_name, op_type) + current_best_tune_cfg = self._tune_cfg_converter(self.cur_best_tuning_cfg) + fp32_dump_content = self.adaptor.inspect_tensor(fp32_model, + self.calib_dataloader, op_name_lst, [1], inspect_type='activation', + save_to_disk=True, save_path="./nc_workspace/", + quantization_cfg=current_best_tune_cfg) + fp32_tensor_dict = fp32_dump_content['activation'][0] + best_qmodel = self.adaptor.quantize(current_best_tune_cfg, self.model, self.calib_dataloader, self.q_func) + quant_dump_content = self.adaptor.inspect_tensor(best_qmodel, + self.calib_dataloader, op_name_lst, [1], inspect_type='activation', + save_to_disk=True, save_path="./nc_workspace/", + quantization_cfg=current_best_tune_cfg) + dequantize_tensor_dict = quant_dump_content['activation'][0] + ops_mse = { + op: self._mse_metric_gap( + list(fp32_tensor_dict[op].values())[0], + list(dequantize_tensor_dict[op].values())[0]) for op in fp32_tensor_dict} + ordered_op_names = sorted(ops_mse.keys(), key=lambda key: ops_mse[key], reverse=self.higher_is_better) + + ordered_op_name_types = [op_mapping[name] for name in ordered_op_names] + return ordered_op_name_types + + + def next_tune_cfg(self): + """Generate and yield the next tuning config. + + Returns: + tune_config (dict): A dict containing the tuning configuration for quantization. + """ + tuning_space = self.tuning_space + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + for calib_sampling_size in calib_sampling_size_lst: + op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() + # Optype-wise tuning + early_stop_tuning = True + stage1_cnt = 0 + int8_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] + int8_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] + stage1_max = min(5, len(int8_ops)) # TODO set a more appropriate value + op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + for op_tuning_cfg in op_wise_tuning_sampler: + stage1_cnt += 1 + if early_stop_tuning and stage1_cnt > stage1_max: + logger.info("Early stopping the stage 1.") + break + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + + # Fallback the ops supported both static and dynamic from static to dynamic + static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if + item in tuning_space.query_items_by_quant_mode('dynamic')] + if static_dynamic_items: + logger.info("Fallback all ops that support both dynamic and static to dynamic.") + else: + logger.info("No op support both dynamic and static") + + def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): + new_op_tuning_cfg = deepcopy(op_tuning_cfg) + new_op_tuning_cfg.op_quant_mode = 'dynamic' + return new_op_tuning_cfg + + new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) + for item in static_dynamic_items: + new_op_tuning_cfg[item.name] = dynamic_op_tuning_cfg_from_static(new_op_tuning_cfg[item.name]) + new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield new_op_tuning_cfg + + best_op_tuning_cfg_stage1 = deepcopy(self.cur_best_tuning_cfg) + + # Fallback to float point datatypes ('bf16' or 'fp32') + for target_dtype in ['bf16', 'fp32']: + fallback_items_lst = [item for item in int8_ops if + item in tuning_space.query_items_by_quant_mode(target_dtype)] + if fallback_items_lst: + logger.info(f"Start to fallback op to {target_dtype} one by one.") + # Replace it with sorted items list + fallback_items_name_lst = [item.name for item in fallback_items_lst] + # TODO check the best_qmodel + ordered_op_name_types = self.mse_impact_lst(fallback_items_name_lst, self.model, self.best_qmodel) + self.ordered_ops = [op_name for (op_name, op_type) in ordered_op_name_types] + op_dtypes = OrderedDict(zip(ordered_op_name_types, [target_dtype] * len(fallback_items_name_lst))) + initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=False) + op_fallback_acc_impact = OrderedDict() + for op_index, op_tuning_cfg in enumerate(fallback_sampler): + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + acc, _ = self.last_tune_result + op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc + + # Do accumulated fallback according to the order in the previous stage + if len(op_fallback_acc_impact) > 0: + ordered_ops = sorted(op_fallback_acc_impact.keys(), + key=lambda key: op_fallback_acc_impact[key], + reverse=self.higher_is_better) + op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) + logger.info(f"Start to accumulate fallback to {target_dtype}.") + initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=True) + for op_tuning_cfg in fallback_sampler: + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg diff --git a/neural_compressor/experimental/strategy/mse_v2.py b/neural_compressor/experimental/strategy/mse_v2.py new file mode 100644 index 00000000000..6492ae26dca --- /dev/null +++ b/neural_compressor/experimental/strategy/mse_v2.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The MSE_V2 tuning strategy.""" +import copy +from copy import deepcopy +import numpy as np +from collections import OrderedDict +from typing import Dict, Any, List +from .strategy import strategy_registry, TuneStrategy +from ...utils import logger +from time import time + +from .utils.tuning_sampler import OpTypeWiseTuningSampler +from .utils.tuning_structs import OpTuningConfig + +@strategy_registry +class MSE_V2TuneStrategy(TuneStrategy): + """The `mse_v2` tuning strategy. + + MSE_v2 is a strategy with a two stages fallback and revert fallback. + Note that, only tensorflow framework and pytorch FX backend is currently supported for mse_v2 + tuning strategy. + """ + + def _tuning_record_msg(self, records): + records_str_lst = [[str(e) for e in record] for record in records] + record_msg = '\n'.join(','.join(record) for record in records_str_lst) + return record_msg + + def next_tune_cfg(self): + """Generate and yield the next tuning config with below order. + + 1. In the fallback stage, it uses multi-batch data to score the op impact + and then fallback the op with the highest score util found the quantized model + that meets accuracy criteria. + 2. In the revert fallback stage, it also scores + the impact of fallback OPs in the previous stage and selects the op + with the lowest score to revert the fallback until the quantized model + that does not meets accuracy criteria. + + Returns: + tune_config (dict): A dict containing the tuning configuration for quantization. + """ + best_op_tuning_cfg = None + if len(self.metric_name) == 1 or self.metric_weight is not None: + best_acc = float('-inf') if self.higher_is_better else float('inf') + else: + best_acc = [float('-inf') if higher_is_better else float('inf') for \ + higher_is_better in self.metric_criterion] + + from copy import deepcopy + tuning_space = self.tuning_space + initial_op_tuning_cfg = {} + for item in tuning_space.root_item.options: + if item.item_type == 'op': + op_name, op_type = item.name + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + for calib_sampling_size in calib_sampling_size_lst: + # Collect the ops that support static and dynamic + quant_mode_wise_items = OrderedDict() + query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] + pre_items = set() + for quant_mode in query_order: + items = tuning_space.query_items_by_quant_mode(quant_mode) + filtered_items = [item for item in items if item not in pre_items] + pre_items = pre_items.union(set(items)) + quant_mode_wise_items[quant_mode] = filtered_items + + def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): + for item in items_lst: + op_item_dtype_dict[item.name] = target_quant_mode + + op_item_dtype_dict = OrderedDict() + for quant_mode, quant_mode_items in quant_mode_wise_items.items(): + initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) + + # Optype-wise tuning + early_stop_tuning = True + stage1_cnt = 0 + int8_ops = quant_mode_wise_items['dynamic'] + quant_mode_wise_items['static'] + stage1_max = 2 # TODO set a more appropriate value + op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + for op_tuning_cfg in op_wise_tuning_sampler: + stage1_cnt += 1 + if early_stop_tuning and stage1_cnt > stage1_max: + logger.info("Early stopping the stage 1.") + break + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + + # Fallback the ops supported both static and dynamic from static to dynamic + static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if + item in tuning_space.query_items_by_quant_mode('dynamic')] + if static_dynamic_items: + logger.info("Fallback all ops that support both dynamic and static to dynamic.") + else: + logger.info("No op support both dynamic and static") + + def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): + new_op_tuning_cfg = deepcopy(op_tuning_cfg) + new_op_tuning_cfg.op_quant_mode = 'dynamic' + return new_op_tuning_cfg + + new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) + for item in static_dynamic_items: + new_op_tuning_cfg[item.name] = dynamic_op_tuning_cfg_from_static(new_op_tuning_cfg[item.name]) + new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield new_op_tuning_cfg + + # Fallback one by one by op sensitivity(mse) + # 1. while the accuracy requirements not met: # to improve the accuracy + # 1) calculate the sensitivity of int8 ops in current state. + # 2) fallback the op with higher sensitivity accumulatively + # 2. after the accuracy requirements met: # to improve the performance + # 1) calculate the sensitivity of fp32 ops in the current state + # 2) re-quantize the op with lower sensitivity accumulatively + tune_cfg = deepcopy(self.cur_best_tuning_cfg) + requantize_cfg = deepcopy(self._tune_cfg_converter(self.cur_best_tuning_cfg)) + self.output_op_names = self.adaptor.get_output_op_names(self.last_qmodel) + self.confidence_batches = (self.cfg.tuning.strategy.confidence_batches + if self.cfg.tuning.strategy.confidence_batches != None else 2) + tune_cfg_backup = deepcopy(tune_cfg) + quant_ops_in_tune_cfg = self._collect_ops_by_quant_mode(tune_cfg, 'dynamic') + \ + self._collect_ops_by_quant_mode(tune_cfg, 'static') + op_quant_cfgs = {op_info: tune_cfg_backup[op_info] for op_info in quant_ops_in_tune_cfg} + fallback_records = [] + self.re_quant = True + while not self.objectives.compare(self.last_tune_result, self.baseline): + # Record the time of calcutating the sensitivity + start = time() + ops_lst = self.adaptor.calculate_op_sensitivity(self.model, + self.calib_dataloader, + deepcopy(self._tune_cfg_converter(tune_cfg)), + self.output_op_names, + self.confidence_batches, + fallback=True) + logger.debug(f"*** The op sensitivity analysis took {time() - start:.2f}s.") + select_op_info = ops_lst[0] + logger.info(f"*** The op {select_op_info} have the highest sensitivity in the current state, \ + fallback it to fp32.") + tune_cfg[select_op_info] = OpTuningConfig(select_op_info[0], + select_op_info[1], + 'fp32', + self.tuning_space) + # Record the fallback history + if not fallback_records: + fallback_records = [[select_op_info]] + else: + fallback_records.append(fallback_records[-1] + [select_op_info]) + logger.debug(f"*** The fallback ops record: \n{self._tuning_record_msg(fallback_records)}") + yield tune_cfg + + logger.info(f"*** The accuracy meeting the accuracy requirements, stop fallback ops.") + while self.objectives.compare(self.last_tune_result, self.baseline): + if len(fallback_records) == 0 or len(fallback_records[-1]) <= 1: + logger.info(f"*** Stop re-quant due to no int8 op or only 1 int8 op left.") + break + logger.info(f"*** Start to re-quant the fallback op in the previous stage.") + # Track the current fallback ops + tmp_fallback_ops = fallback_records[-1] if fallback_records else [] + start = time() + ops_lst = self.adaptor.calculate_op_sensitivity(self.model, + self.calib_dataloader, + deepcopy(self._tune_cfg_converter(tune_cfg)), + self.output_op_names, + self.confidence_batches, + fallback=False, + requantize_cfgs=requantize_cfg['op']) + logger.debug(f"*** The op sensitivity analysis took {time() - start:.2f}s.") + if not ops_lst: + logger.warning("No op to be requantized") + break + for select_op_info in ops_lst: + #assert select_op_info in tmp_fallback_ops, f"{select_op_info} not in fallback list." + if select_op_info not in tmp_fallback_ops: + logger.debug(f"{select_op_info} not in fallback list.") + continue + + new_fallback_ops = deepcopy(tmp_fallback_ops) + new_fallback_ops.remove(select_op_info) + if new_fallback_ops not in fallback_records: + logger.info(f"*** The op {select_op_info} have the lowest sensitivity in the current state, \ + re-quantize it.") + tune_cfg[select_op_info] = op_quant_cfgs[select_op_info] + fallback_records.append(new_fallback_ops) + logger.debug(f"*** The fallback ops record: \n{self._tuning_record_msg(fallback_records)}") + yield tune_cfg + break + else: + logger.debug(f"*** Skip re-qaunt {select_op_info}, due the config has been evallated.") + continue + self.re_quant = False + logger.info(f"*** The accuracy not meeting the accuracy requirements, stop re-quantize ops.") \ No newline at end of file diff --git a/neural_compressor/experimental/strategy/random.py b/neural_compressor/experimental/strategy/random.py new file mode 100644 index 00000000000..7148100a76a --- /dev/null +++ b/neural_compressor/experimental/strategy/random.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""The random tuning strategy.""" +import numpy as np +from .strategy import strategy_registry, TuneStrategy +from collections import OrderedDict + +from .utils.tuning_sampler import OpWiseTuningSampler, FallbackTuningSampler +from .utils.tuning_structs import OpTuningConfig +from ...utils import logger + +@strategy_registry +class RandomTuneStrategy(TuneStrategy): + """The random tuning strategy.""" + + def next_tune_cfg(self): + """Generate and yield the next tuning config by random searching in tuning space. + + Random strategy is used to randomly choose quantization tuning configurations + from the tuning space. As with the Exhaustive strategy, it also only considers + quantization tuning configs to generate a better-performance quantized model. + + Returns: + tune_config (dict): A dict containing the tuning configuration for quantization. + """ + tuning_space = self.tuning_space + op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() + op_wise_tuning_sampler = OpWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + op_tuning_cfg_lst = list(op_wise_tuning_sampler) + op_tuning_cfg_cnt = len(op_tuning_cfg_lst) + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + calib_sampling_size_cnt = len(calib_sampling_size_lst) + while True: + calib_index = np.random.choice(calib_sampling_size_cnt) + calib_sampling_size = calib_sampling_size_lst[calib_index] + op_tuning_cfg_index = np.random.choice(op_tuning_cfg_cnt) + op_tuning_cfg = op_tuning_cfg_lst[op_tuning_cfg_index] + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + yield op_tuning_cfg + return diff --git a/neural_compressor/experimental/strategy/strategy.py b/neural_compressor/experimental/strategy/strategy.py new file mode 100644 index 00000000000..b7abb59ed5f --- /dev/null +++ b/neural_compressor/experimental/strategy/strategy.py @@ -0,0 +1,1556 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The base class for tuning strategy.""" + +from abc import abstractmethod +from enum import EnumMeta +import os +import math +import copy +from copy import deepcopy +import pickle +from collections import OrderedDict, defaultdict +from pathlib import Path +import yaml +import numpy as np +from typing import OrderedDict as T_OrderedDict + +from neural_compressor.adaptor.tensorflow import TensorFlowAdaptor +from ...objective import MultiObjective +from ...adaptor import FRAMEWORKS +from ...utils.utility import Statistics, dump_data_to_local +from ...utils.utility import fault_tolerant_file, equal_dicts, GLOBAL_STATE, MODE +from ...utils.create_obj_from_config import create_eval_func, create_train_func +from ...utils.utility import LazyImport +from ...utils import logger +from ...version import __version__ +from ...conf.dotdict import DotDict, deep_get, deep_set +from ...algorithm import AlgorithmScheduler, ALGORITHMS + +import copy +import numpy as np +from collections import OrderedDict +from time import time +from ...utils import logger +import sys + + +from .utils.tuning_space import TuningItem, TuningSpace +from .utils.tuning_structs import OpTuningConfig +from .utils.constant import FALLBACK_RECIPES_SET + + +EXP_STRATEGIES = {} + + +def strategy_registry(cls): + """Class decorator used to register all TuneStrategy subclasses. + + Args: + cls (class): The class of register. + + Returns: + cls: The class of register. + """ + assert cls.__name__.endswith( + 'TuneStrategy' + ), "The name of subclass of TuneStrategy should end with \'TuneStrategy\' substring." + if cls.__name__[:-len('TuneStrategy')].lower() in EXP_STRATEGIES: + raise ValueError('Cannot have two strategies with the same name') + EXP_STRATEGIES[cls.__name__[:-len('TuneStrategy')].lower()] = cls + return cls + +@strategy_registry +class TuneStrategy(object): + """Basic class for tuning strategy.""" + + def __init__(self, model, conf, q_dataloader=None, q_func=None, eval_dataloader=None, + eval_func=None, resume=None, q_hooks=None): + """Init the TuneStrategy. + + Args: + model: The FP32 model specified for low precision tuning. + conf: The Conf class instance includes all user configurations. + q_dataloader: Data loader for calibration, mandatory for post-training quantization. Defaults to None. + q_func: Training function for quantization aware training. Defaults to None. Defaults to None. + eval_dataloader: Data loader for evaluation. Defaults to None. + eval_func: The evaluation function provided by user. This function takes model as parameter, and + evaluation dataset and metrics should be encapsulated in this function implementation and + outputs a higher-is-better accuracy scalar value. + resume: The dict containing resume information. Defaults to None. + q_hooks: The dict of training hooks, supported keys are: on_epoch_begin, on_epoch_end, on_step_begin, + on_step_end. Their values are functions to be executed in adaptor layer.. Defaults to None. + last_qmodel: The quantized model that generated from the last tuning. + best_qmodel: The best quantized model that generated during the tuning process. + """ + self.model = model + self.cfg = conf.usr_cfg + self.cfg_bk = copy.deepcopy(self.cfg) + self.history_path = self._create_path(self.cfg.tuning.workspace.path, './history.snapshot') + self.deploy_path = self._create_path(self.cfg.tuning.workspace.path, 'deploy.yaml') + self.eval_dataloader = eval_dataloader + self.calib_dataloader = q_dataloader + self.q_func = q_func + self.q_hooks = q_hooks + self.eval_func = eval_func + GLOBAL_STATE.STATE = MODE.QUANTIZATION + framework, framework_specific_info = self._set_framework_info(q_dataloader, q_func) + self.adaptor = FRAMEWORKS[framework](framework_specific_info) + self.framework = framework + + self.set_q_func() + self._set_objectives() + self.tune_data = {} + self.tune_result_record = [] + self.tuning_history = [] + self.tuning_result_data = [] + # The tuning history ever made, structured like below: + # [ + # { + # 'version': __version__, + # 'cfg': cfg1, + # 'framework': tensorflow + # 'baseline': baseline1, + # 'last_tune_result': last_tune_result1, + # 'best_tune_result': best_tune_result1, + # 'history': [ + # # tuning history under same yaml config + # {'tune_cfg': tune_cfg1, 'tune_result': \ + # tune_result1, 'q_config': q_config1, ...}, + + # ..., + # ], + # # new fields added by subclass for resuming + # ..., + # }, + # # tuning history under different yaml configs + # ..., + # ] + + self.baseline = None + self.last_tune_result = None + self.last_qmodel = None + self.last_tune_cfg = None + self.best_qmodel = None + self.best_tune_result = None + self.best_tuning_cfg = None # track the best tuning config correspondence to the best quantized model + self.cur_best_acc = self.initial_best_acc() # track the current best accuracy + self.cur_best_tuning_cfg = {} # track tuning cfg with the current best accuracy + self.re_quant = False + + self.capability = self.adaptor.query_fw_capability(model) + logger.debug(self.capability) + self.set_tuning_space(conf) + + #For algo scheduler + self.algo_scheduler = AlgorithmScheduler(self.cfg.quantization.recipes) + self.algo_scheduler.dataloader = self.calib_dataloader # reuse the calibration iteration + self.algo_scheduler.origin_model = self.model + self.algo_scheduler.adaptor = self.adaptor + + self._optype_statistics = None + self.fallback_stats_baseline = None + self.fallback_stats = None + self.tuning_times = 0 + self.fallback_start_point = 0 + self.metric_met_point = 0 + + # for recipes + # {recipe name: the list of supported value} + self._tuning_recipes = OrderedDict() + # {recipe name: the default value when not tuning} + self._tuning_recipes_default_values = {} + # {recipe name: the value specified by user} + self._not_tuning_recipes_values = {} + self._initialize_recipe() + self.applied_all_recipes_flag = False + if resume is not None: self.setup_resume(resume) + + + @abstractmethod + def next_tune_cfg(self): + """Interface for generate the next tuning config. + + The generator of yielding next tuning config to traverse by concrete strategies or quantization level + according to last tuning result and traverse logic. + + It should be implemented by the sub-class. + + Yields: + tune_config (dict): It's a dict containing the tuning configuration to traverse. + """ + raise NotImplementedError + + def _initialize_recipe(self): + """Divide the recipe into two categories tuning/not tuning.""" + from .utils.utility import get_adaptor_name + from ...utils.constant import RECIPES as fwk_recipes + from ...utils.constant import RECIPES_PRIORITY as fwk_recipes_priority + # get all recipes supported by adaptor. + adaptor_name = get_adaptor_name(self.adaptor) + adaptor_recipes = fwk_recipes['common'] + # TODO WA due to smooth quant only supported by ort/pt currently. + if not adaptor_name not in ['onnx', 'pytorch']: + adaptor_recipes.pop('smooth_quant', None) + for adaptor_name_key, adaptor_recipes_val in fwk_recipes.items(): + if adaptor_name_key.startswith(adaptor_name): + adaptor_recipes.update(adaptor_recipes_val) + # divide it into two categories: + # tuning lst: the value is equal to the default value + # not tuning list: the value is not equal to the default value + logger.info(f"Adaptor has {len(adaptor_recipes)} recipes.") + logger.debug(adaptor_recipes) + usr_recipes_cfg = self.cfg_bk.quantization.recipes if self.cfg_bk.quantization.recipes else {} + for recipe_name, recipe_val in usr_recipes_cfg.items(): + # for not tuning recipes, use the value specified by user. + if recipe_name in adaptor_recipes and recipe_val != adaptor_recipes[recipe_name][0]: + self._not_tuning_recipes_values[recipe_name] = recipe_val + # sorted the recipes and set the default value to be used before recipe tuning + for recipe_name in fwk_recipes_priority: + if recipe_name in adaptor_recipes and recipe_name not in self._not_tuning_recipes_values: + # TODO skip tuning smooth_quant first + if recipe_name == 'smooth_quant': continue + self._tuning_recipes[recipe_name] = adaptor_recipes[recipe_name] + self._tuning_recipes_default_values[recipe_name] = adaptor_recipes[recipe_name][0] + logger.info(f"{len(self._not_tuning_recipes_values)} recipes specified by user.") + logger.debug(self._not_tuning_recipes_values) + logger.info(f"{len(self._tuning_recipes)} recipes require future tuning.") + logger.debug(self._tuning_recipes) + + + def distributed_next_tune_cfg_lst(self, comm): + """Interface for generate the distributed next tuning config list. + + The generator of yielding next tuning config list to distributed traverse by concrete strategies or + quantization level according to tuning result and traverse logic. + + It should be implemented by the sub-class. Currently, it is only implemented in the BasicTuneStrategy. + """ + pass + + def meet_acc_req(self, eval_res): + """Compare the result of last tuning with baseline to check whether the result meet requirements. + + Args: + eval_res: The evaluation result of tuning. + + Returns: + Return True if the accuracy meets requirements else False. + """ + self.last_tune_result = eval_res + return self.objectives.accuracy_meet_req(deepcopy(self.last_tune_result)) + + def master_worker_handle(self, comm): + """Matster worker handles the task assignment and result management. + + Master node send all task ids to all free nodes, and wait until any result. + When receiving any result, directly send a new task id to the sender (it's free). + + Args: + comm (MPI.COMM): The instance of comunication for MPI. + """ + MPI = LazyImport("mpi4py.MPI") + size = comm.Get_size() + for process_id in range(1, min(len(self.tune_cfg_lst) + 1, size)): + tune_cfg_id = process_id - 1 + logger.info("~~~~~~master sending tune cfg: {} to rank {}".format(tune_cfg_id, process_id)) + comm.send( + obj=tune_cfg_id, # just send the tune cfg id is enough + dest=process_id, # rank 0 send to rank 1, 2, ... + tag=tune_cfg_id # tag, the index of tune cfg 0,1,2,3 + ) + import time as ttime + ttime.sleep(0.5) # WA for UT + + cur_cfg_id = min(len(self.tune_cfg_lst), size - 1) # 4 master should be aware of the next config id to send + self.eval_results = {} # record all results + self.num_acks = 0 # number of all response acks, break when it equals to len() + status = MPI.Status() # used to obtain the source and the tag for each received message + + self.already_ack_id_lst = set() + self.requirements_met_min_cfg_id = sys.maxsize + + # stuck here to receive any result + while True: + eval_res = comm.recv( + source=MPI.ANY_SOURCE, + tag=MPI.ANY_TAG, + status=status # get MPI status object + ) + self.num_acks += 1 + sender_rank = status.Get_source() # sender rank + tag = status.Get_tag() # the task id that is finished + + logger.info("~~~~~~master receiving eval result: {} from rank {}".format(eval_res, sender_rank)) + + self.last_tune_result = eval_res # for context coordination of stage 3 + self.eval_results[tag] = eval_res + + self.overall_trials += 1 + self.best_tune_cfg_id = None + self.already_ack_id_lst.add(tag) + + # if meet accuracy requirement, then update minimum id that met requirement + if(self.meet_acc_req(eval_res)): + logger.info("~~~~~~master has one tuning cfg meet acc: {}".format(tag)) + self.met_flag = True + self.requirements_met_min_cfg_id = min(self.requirements_met_min_cfg_id, tag) + + # must ensure every id lower than current min_id has been acknowledged + # because a tune cfg (not acked yet) with lower id can have better acc + for i in range(self.requirements_met_min_cfg_id): + if i not in self.already_ack_id_lst: + logger.info("~~~~~~master has one tuning cfg meet acc: {} but not collect all acks before"\ + .format(tag)) + self.met_flag = False # not completely collected yet! + break + + if self.met_flag: + # found the best tune cfg! + logger.info("~~~~~~master has one tuning cfg meet acc: {} and also collect all acks before"\ + .format(tag)) + self.best_tune_cfg_id = self.requirements_met_min_cfg_id + else: + # get the current best acc but not meet requirements + logger.info("~~~~~~master gets the current best acc: {} but not meet requirements".format(tag)) + self.cur_best_acc, self.cur_best_tuning_cfg = self.update_best_op_tuning_cfg(self.tune_cfg_lst[tag]) + + if self.best_tune_cfg_id is not None: + #### we find the best tune cfg id that meet requirements!! + logger.info("~~~~~~master finds best tune cfg id~~~~~~~") + logger.info(self.best_tune_cfg_id) + logger.info(self.tune_cfg_lst[self.best_tune_cfg_id]) + break + + # send the next cfg if not exceed max trials + if self.overall_trials > self.cfg.tuning.exit_policy.max_trials: + self.max_trial_flag = True + # elif time.time() - self.overall_time_start > self.cfg.tuning.exit_policy.timeout: + # self.max_time_flag = True + elif cur_cfg_id < len(self.tune_cfg_lst): + logger.info("~~~~~~master sends new tuning cfg {} to rank: {}".format(cur_cfg_id, sender_rank)) + comm.send(obj=cur_cfg_id, dest=sender_rank, tag=cur_cfg_id) + cur_cfg_id += 1 + else: + logger.info("All tune configs are sent, no more sending, just collecting...") + + if len(self.tune_cfg_lst) == self.num_acks: # all collected (ack should collected == acks) + # all processes ended + # return self.requirements_met_min_cfg_id if it has been updated + if self.requirements_met_min_cfg_id == sys.maxsize: + logger.info("~~~~~~Not found any tune cfg that meet requirements~~~~~~") + self.cur_best_tuning_cfg = self.tune_cfg_lst[0] # TODO select cur_best_tuning_cfg + else: + logger.info("~~~~~~Find best tune cfg id~~~~~~") + logger.info(self.requirements_met_min_cfg_id) + self.met_flag = True + self.best_tune_cfg_id = self.requirements_met_min_cfg_id + logger.info(self.tune_cfg_lst[self.best_tune_cfg_id]) + break + + # send END signal to all other slaves + logger.info("~~~~~~master sends END signal to all other slaves~~~~") + for process_id in range(1, size): + logger.info("~~~~~~master sends END signal to rank: {}".format(process_id)) + comm.send( + obj="MET" if self.met_flag else "NOT MET", # send whether met criterion in the current stage + dest=process_id, # rank 0 send to rank 1, 2, ... + tag=len(self.tune_cfg_lst) + ) + + if self.best_tune_cfg_id is not None: + self.best_qmodel = self.adaptor.quantize( + copy.deepcopy(self.tune_cfg_lst[self.best_tune_cfg_id]), self.model, self.calib_dataloader, \ + self.q_func) + + + def slave_worker_handle(self, comm): + """Slave worker handles the task processing. + + When receiving any task id, slave node finds it in self.tune_cfg_lst and run it. + Then slave node sends back the tune result to master node. + + Args: + comm (MPI.COMM): The instance of comunication for MPI. + """ + MPI = LazyImport("mpi4py.MPI") + status = MPI.Status() + while True: + task = comm.recv( + source=MPI.ANY_SOURCE, + tag=MPI.ANY_TAG, + status=status # sender (master) + ) + cfg_idx = status.Get_tag() + if status.Get_tag() >= len(self.tune_cfg_lst): + logger.info("~~~~~~slave {} receiving END signal in the current stage".format(comm.Get_rank())) + if task == "MET": + logger.info("~~~~~~met criterion in this stage!") + self.met_flag = True + break + tune_cfg = self.tune_cfg_lst[cfg_idx] + + # set the parameter for pre quantization algos and run + self.set_param_for_pre_quantization_algos(self.algo_scheduler, tune_cfg, self.model) + self.model = self.algo_scheduler('pre_quantization') + # quantize + q_model = self.adaptor.quantize(copy.deepcopy(tune_cfg), self.model, self.calib_dataloader, self.q_func) + assert self.adaptor.pre_optimized_model + # set the parameter for post quantization algos and run + self.set_param_for_post_quantization_algos(self.algo_scheduler, tune_cfg, self.adaptor.pre_optimized_model, + q_model) + self.last_qmodel = self.algo_scheduler('post_quantization') + self.last_tune_cfg = copy.deepcopy(tune_cfg) + # Remove the reference to model + self.algo_scheduler.reset_exec_algorithms() + assert self.last_qmodel + self.last_tune_result = self._evaluate(self.last_qmodel) + + ##### send back the tuning statistics ######### + logger.debug("##### Slave sends back the tuning statistics #########") + logger.debug(self.last_tune_result) + comm.send( + obj=self.last_tune_result, + dest=0, # rank 0 send to rank 1, 2, ... + tag=cfg_idx + ) + + def distributed_traverse(self): + """Disributed traverse the tuning space. + + The main traverse logic which could be override by some concrete strategy which needs more hooks. + """ + MPI = LazyImport("mpi4py.MPI") + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + + self.met_flag = False + self.max_trial_flag = False # whether exceed max trials + self.max_time_flag = False # whether exceed max time + self.overall_trials = 0 + self.overall_time_start = time() + + # for all the stages, handle the tune cfg lst + # the tune cfg lst is generated/yielded each time by distributed_next_self.tune_cfg_lst + # we must pass the comm to the specific strategy because slaves may not know + # contexts such as the best_tune_cfg + # master should make sure slaves have all the contexts needed before going to the next computation stage + for op_tuning_cfg_lst in self.distributed_next_tune_cfg_lst(comm): + self.tune_cfg_lst = [self._tune_cfg_converter(op_tuning_cfg) for op_tuning_cfg in op_tuning_cfg_lst] + if self.tune_cfg_lst == []: + # skip empty list at some stages + continue + if rank == 0: + self.master_worker_handle(comm) + else: + self.slave_worker_handle(comm) + logger.debug("# if self.met_flag or self.max_trial_flag or self.max_time_flag:" \ + .format(self.met_flag or self.max_trial_flag or self.max_time_flag)) + if self.met_flag or self.max_trial_flag or self.max_time_flag: + break + + def _open_all_recipes(self): + """Open all tunable recipes.""" + opened_recipes = {} + for recipe_name, recipe_val_lst in self._tuning_recipes.items(): + opened_recipes[recipe_name] = recipe_val_lst[-1] + logger.info("Opened all recipes.") + logger.info(opened_recipes) + + def _fallback_ops(self, tune_cfg, recipe_op_lst, tuning_space): + """Fallback ops in recipe op list.""" + for op_name_type in recipe_op_lst: + tune_cfg.update({op_name_type: OpTuningConfig(op_name_type[0], \ + op_name_type[1],'fp32', tuning_space)}) + return tune_cfg + + def apply_all_tuning_recipes(self, tune_cfg): + """Apply all tunable recipes with their value.""" + tune_cfg['recipe_cfgs'] = tune_cfg.get('recipe_cfgs', {}) + for recipe_name, recipe_val_lst in self._tuning_recipes.items(): + tune_cfg['recipe_cfgs'][recipe_name] = recipe_val_lst[-1] + if recipe_name in FALLBACK_RECIPES_SET and 'recipes_ops' in self.capability and \ + len(self.capability['recipes_ops'].get(recipe_name, [])) > 0: + logger.info(f"Applied recipe {recipe_name}.") + tune_cfg = self._fallback_ops(tune_cfg, self.capability['recipes_ops'][recipe_name],\ + self.tuning_space) + return tune_cfg + + def apply_recipe_one_by_one(self, tune_cfg): + """Apply the tunable recipes one by one. + + For recipes only have two options, apply the last one. + For recipes with multiple values. such as alpha of smooth quant, apply it one by one. + """ + from .utils.tuning_sampler import TuningSamplerRegistry + all_registered_samplers = TuningSamplerRegistry.sampler_dict + for recipe_name, recipe_vals in self._tuning_recipes.items(): + if recipe_name in FALLBACK_RECIPES_SET and 'recipes_ops' in self.capability and \ + len(self.capability['recipes_ops'].get(recipe_name, [])) > 0: + logger.info(f"Applied recipe {recipe_name} with value {recipe_vals[-1]}") + new_tune_cfg = self._fallback_ops(copy.deepcopy(tune_cfg), \ + self.capability['recipes_ops'][recipe_name], self.tuning_space) + yield new_tune_cfg + if recipe_name in all_registered_samplers: + recipe_sampler = all_registered_samplers[recipe_name](tuning_space=None, + tuning_order_lst=[], + initial_op_tuning_cfg=copy.deepcopy(tune_cfg), + kwargs={recipe_name: recipe_vals}) + for new_tune_cfg in recipe_sampler: + yield new_tune_cfg + + def set_param_for_pre_quantization_algos(self, algo_scheduler, tune_cfg, fp32_model) -> None: + """Set the parameter for pre-quantization algos, such as smooth quantization. + + Args: + algo_scheduler: algo scheduler + tune_cfg: the tuning config + fp32_model: the fp32 model + """ + algo_scheduler.origin_model = fp32_model + algo_scheduler.calib_iter = tune_cfg['calib_iteration'] + algo_scheduler.q_model = fp32_model + + recipe_cfgs = tune_cfg.get('recipe_cfgs', None) + algo_scheduler.reset_exec_algorithms() + if recipe_cfgs and recipe_cfgs.get('smooth_quant', False): + # skip assign alpha to sq first. + # set the alpha to 0.5 by default + # smooth_quant_args = recipe_cfgs.get('smooth_quant_args', {'alpha': 0.5}) + sq_algo = ALGORITHMS()['smooth_quant'] + #sq_algo.alpha = smooth_quant_args['alpha'] + #logger.debug(f"Set smooth quant with alpha {smooth_quant_args['alpha']} as the pre-quantization algo.") + algo_scheduler.append_algorithm('pre_quantization', sq_algo) + + + def set_param_for_post_quantization_algos(self, algo_scheduler, tune_cfg, pre_optimized_model, q_model) -> None: + """Set the parameter for post-quantization algos, such as bias correction, weight correction. + + Args: + algo_scheduler: algo scheduler + tune_cfg: the tuning config. + pre_optimized_model: the pre-optimized model + q_model: the quantized model + """ + algo_scheduler.origin_model = pre_optimized_model + # if no pre-process algos, return the fp32 model directly. + algo_scheduler.q_model = q_model + + algo_scheduler.reset_exec_algorithms() + recipe_cfgs = tune_cfg.get('recipe_cfgs', None) + # for fast_bias_correction + if recipe_cfgs and recipe_cfgs.get('fast_bias_correction', False): + fbc_algo = ALGORITHMS()['fast_bias_correction'] + fbc_algo.quantization_cfg = deepcopy(tune_cfg) + algo_scheduler.append_algorithm('post_quantization', fbc_algo) + logger.debug(f"Add fast bias correction as the post quantization algo.") + # for weight correction + if recipe_cfgs and recipe_cfgs.get('weight_correction', False): + w_algo = ALGORITHMS()['weight_correction'] + w_algo.quantization_cfg = deepcopy(tune_cfg) + algo_scheduler.append_algorithm('post_quantization', w_algo) + logger.debug(f"Add weight correction as the post quantization algo.") + + def traverse(self): + """Traverse the tuning space. + + The main traverse logic which could be override by some concrete strategy which needs more hooks. + """ + self._eval_baseline() + logger.info("use distributed traverse: {}".format(self.cfg.tuning.use_distributed_tuning)) + if self.cfg.tuning.use_distributed_tuning: + return self.distributed_traverse() + trials_count = 0 + traverse_start_time = time() + for op_tuning_cfg in self.next_tune_cfg(): + tuning_start_time = time() + tune_cfg = self._tune_cfg_converter(op_tuning_cfg) + trials_count += 1 + tuning_history = self._find_tuning_history(tune_cfg) + if tuning_history and trials_count < self.cfg.tuning.exit_policy.max_trials: + self.last_tune_result = tuning_history['last_tune_result'] + self.best_tune_result = tuning_history['best_tune_result'] + logger.warn("Find evaluated tuning config, skip.") + continue + self._remove_redundant_qmodel() + logger.debug("Dump current tuning configuration:") + logger.debug(tune_cfg) + self.tuning_times += 1 + # set the parameter for pre quantization algos and run + self.set_param_for_pre_quantization_algos(self.algo_scheduler, tune_cfg, self.model) + self.model = self.algo_scheduler('pre_quantization') + # quantize + q_model = self.adaptor.quantize(copy.deepcopy(tune_cfg), self.model, self.calib_dataloader, self.q_func) + assert self.adaptor.pre_optimized_model + # set the parameter for post quantization algos and run + self.set_param_for_post_quantization_algos(self.algo_scheduler, tune_cfg, self.adaptor.pre_optimized_model, + q_model) + self.last_qmodel = self.algo_scheduler('post_quantization') + self.last_tune_cfg = copy.deepcopy(tune_cfg) + # Remove the reference to model + self.algo_scheduler.reset_exec_algorithms() + assert self.last_qmodel + # Return the last quantized model as a result. if performance only. + if self.cfg.tuning.exit_policy.performance_only: + self.best_qmodel = self.last_qmodel + self._add_tuning_history(copy.deepcopy(tune_cfg), (-1, [0]), q_config=self.last_qmodel.q_config) + return + self.last_tune_result = self._evaluate(self.last_qmodel) + self.cur_best_acc, self.cur_best_tuning_cfg = self.update_best_op_tuning_cfg(op_tuning_cfg) + need_stop = self.stop(self.cfg.tuning.exit_policy.timeout, trials_count) + + # record the tuning history + saved_tune_cfg = copy.deepcopy(tune_cfg) + saved_last_tune_result = copy.deepcopy(self.last_tune_result) + self._add_tuning_history(saved_tune_cfg, + saved_last_tune_result, + q_config=q_model.q_config) + self.tune_result_record.append(copy.deepcopy(self.last_tune_result)) + self.tune_cfg = tune_cfg + now_time = time() + acc_res_msg = "" + performace_res_msg = "" + if self.tuning_result_data: + acc_res_msg = "[ " + "| ".join(self.tuning_result_data[0]) + " ]" + performace_res_msg = "[ " + "| ".join(self.tuning_result_data[1]) + " ]" + logger.debug(f"*** The accuracy of last tuning is: {acc_res_msg}") + logger.debug(f"*** The perfomance of last tuning is: {performace_res_msg}") + logger.debug(f"*** The last tuning time: {(now_time - tuning_start_time):.2f} s") + logger.debug(f"*** The tuning process lasted time: {(now_time - traverse_start_time):.2f} s") + + self._dump_tuning_process_statistics() + if need_stop: + if self.re_quant: + logger.info("*** Do not stop the tuning process, re-quantize the ops.") + continue + # recover the best quantized model from tuning config + self._recover_best_qmodel_from_tuning_cfg() + if self.cfg.tuning.diagnosis and self.cfg.tuning.diagnosis.diagnosis_after_tuning: + logger.debug(f'*** Start to do diagnosis (inspect tensor).') + self._diagnosis() + if self.use_multi_objective and len(self.tune_result_record) > 1 and \ + self.best_tune_result is not None: + best_trail, best_result = self.objectives.best_result(self.tune_result_record, + copy.deepcopy(self.baseline)) + if best_result != self.best_tune_result: + from neural_compressor.utils.utility import recover + self.best_qmodel = recover(self.model.model, + os.path.join(self.cfg.tuning.workspace.path, 'history.snapshot'), + best_trail) + logger.debug(f"*** Update the best qmodel by recovering from history.") + self.best_tune_result = best_result + self._dump_tuning_process_statistics() + break + self._recover_best_qmodel_from_tuning_cfg() + + def _remove_redundant_qmodel(self): + """Remove the redundant quantized model to reduce memory use. + + During the tuning process, the strategy only keeps the best tuning config + instead of the best quantized model to reduce memory use. + """ + self.last_qmodel = None + self.best_qmodel = None + + def _can_create_eval_func_from_cfg(self): + """Determine whether an eval function can be created from cfg. + + Returns: + Returns True if the eval func can be created from config, False otherwise. + """ + if self.cfg.evaluation and self.cfg.evaluation.accuracy and \ + (self.cfg.evaluation.accuracy.metric or self.cfg.evaluation.accuracy.multi_metrics)\ + and self.eval_dataloader: + return True + return False + + def _eval_baseline(self): + """Evaluate the fp32 model if needed.""" + if not self._can_create_eval_func_from_cfg() and not self.eval_func: + logger.info("Neither evaluation function nor metric is defined." \ + " Generate a quantized model with default quantization configuration.") + self.cfg.tuning.exit_policy.performance_only = True + logger.info("Force setting 'tuning.exit_policy.performance_only = True'.") + + if not self.cfg.tuning.exit_policy.performance_only: + # get fp32 model baseline + if self.baseline is None: + logger.info("Get FP32 model baseline.") + self._fp32_model = self.model + self.baseline = self._evaluate(self.model) + self.objectives.baseline = self.baseline + # record the FP32 baseline + self._add_tuning_history() + self.show_baseline_info() + + def _recover_best_qmodel_from_tuning_cfg(self): + """Recover the best quantized model from tuning config.""" + if self.best_tuning_cfg and not self.best_qmodel: + self.best_qmodel = self.adaptor.quantize(copy.deepcopy(self.best_tuning_cfg), self.model, + self.calib_dataloader, self.q_func) + + def _fallback_started(self): + self.fallback_start_point = self.tuning_times + + def _update_optype_statistics(self): + self._optype_statistics = defaultdict(lambda:defaultdict(int)) + + for op_name_type, op_tune_cfg in self.tune_cfg['op'].items(): + optype = op_name_type[1] + quant_mode = op_tune_cfg['activation']['quant_mode'] + if isinstance(quant_mode, tuple) or isinstance(quant_mode, list): + quant_mode = quant_mode[0] + dtype = 'INT8' if quant_mode in ('static', 'dynamic') \ + else quant_mode.upper() + self._optype_statistics[optype]['Total'] += 1 + self._optype_statistics[optype][dtype] += 1 + return + + def _dump_tuning_process_statistics(self): + self._update_optype_statistics() + + logger.debug("Current tuning process statistics:") + logger.debug(f"Total Tuning Times: {self.tuning_times}") + logger.debug("Fallback started at Tune {}".format(self.fallback_start_point)) + logger.debug("Objective(s) met at Tune {}".format(self.metric_met_point)) + + fallback_stats = self._calculate_fallback_op_count() + if self.fallback_stats_baseline == None: + self.fallback_stats_baseline = fallback_stats + logger.debug(f"Fallbacked ops count: {self.fallback_stats_baseline - fallback_stats}") + + if isinstance(self.adaptor, TensorFlowAdaptor): + self._compare_optype_statistics() + + return + + def _calculate_fallback_op_count(self, target_dtype='INT8'): + fallback_stats = defaultdict(int) + + for optype in self._optype_statistics: + for dtype, count in self._optype_statistics[optype].items(): + fallback_stats[dtype] += count + + return fallback_stats[target_dtype] + + + def _compare_optype_statistics(self, fields=None, optypes=None, + skip_fields=None, skip_optypes=None): + assert(fields == None or skip_fields == None) + assert(optypes == None or skip_optypes == None) + if not isinstance(self.adaptor, TensorFlowAdaptor): + logger.debug("OpType statistics comparation is only available for TensorFlow adaptor.") + return + + adaptor_statistics = self.adaptor.optype_statistics + + def _field_skipped(field): + if fields != None: + return field not in fields + elif skip_fields != None: + return field in skip_fields + + def _optype_skipped(optype): + if optypes != None: + return optype not in optypes + elif skip_optypes != None: + return optype in skip_optypes + + + field_names = adaptor_statistics[0][1:] + adaptor_data = { + line[0].lower() : {dtype : count for dtype, count in zip(field_names, line[1:])} + for line in adaptor_statistics[1]} + strategy_data = self._optype_statistics + + # compare adaptor statistics to strategy statistics + logger.debug("Statistics difference between adaptor and tuning config:") + has_difference = False + difference_count = 0 + for optype in adaptor_data: + if optype not in strategy_data or _optype_skipped(optype): continue + for field in field_names: + if _field_skipped(field): continue + adaptor_count = adaptor_data[optype][field] + strategy_count = strategy_data[optype][field] + if adaptor_count != strategy_count: + has_difference = True + if field == 'INT8': + difference_count += abs(strategy_count - adaptor_count) + logger.debug("\t{}: [adaptor: {} | tune_cfg: {}]".format( + (optype, field), adaptor_count, strategy_count)) + if not has_difference: + logger.debug("\tNone") + logger.debug(f"\tDifference(s) in total: {difference_count}") + return + + def initial_tuning_cfg(self): + """Init the tuning config. + + Initialize the tuning config according to the quantization approach. + + Returns: + op_item_dtype_dict (OrderedDict): key is (op_name, op_type); value is quantization mode. + quant_mode_wise_items (OrderedDict): key is quant_mode/precision; value is item list. + initial_op_tuning_cfg (OrderedDict): key is (op_name, op_type); value is the initialized tuning config. + """ + from .utils.constant import auto_query_order, static_query_order, dynamic_query_order + from .utils.tuning_space import initial_tuning_cfg_with_quant_mode + if self.cfg.quantization.approach == 'post_training_auto_quant': + query_order = auto_query_order + elif self.cfg.quantization.approach == 'post_training_dynamic_quant': + query_order = dynamic_query_order + elif self.cfg.quantization.approach == 'post_training_static_quant': + query_order = static_query_order + elif self.cfg.quantization.approach == 'quant_aware_training': + logger.info("!!! Currently, the qat tuning is not supported by strategy.") + query_order = auto_query_order + + quant_mode_wise_items = OrderedDict() # mode, op_item_lst + pre_items = set() + # Collect op items supported the specified mode. + for quant_mode in query_order: + items = self.tuning_space.query_items_by_quant_mode(quant_mode) + filtered_items = list(filter(lambda item: item not in pre_items, items)) + pre_items = pre_items.union(set(items)) + quant_mode_wise_items[quant_mode] = filtered_items + + def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): + for item in items_lst: + op_item_dtype_dict[item.name] = target_quant_mode + + op_item_dtype_dict = OrderedDict() + for quant_mode, quant_mode_items in quant_mode_wise_items.items(): + initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) + + initial_op_tuning_cfg = {} + for op_name_type, quant_mode in op_item_dtype_dict.items(): + initial_op_tuning_cfg[op_name_type] = initial_tuning_cfg_with_quant_mode(op_name_type, + quant_mode, + self.tuning_space) + return op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg + + def show_baseline_info(self): + """Display the accuracy and duration of the the baseline model.""" + if self.baseline: + self.tune_data['baseline'] = self.baseline[0] if \ + isinstance(self.baseline[0], list) else [self.baseline[0]] + for name, data in zip(self.metric_name, self.tune_data['baseline']): + self.tune_data[name] = [data] + if self.metric_weight: + # baseline is weighted accuracy + self.tune_data['Weighted accuracy'] = \ + [np.mean(np.array(self.tune_data['baseline']) * self.metric_weight)] + self.tune_data['baseline'] = self.tune_data['Weighted accuracy'] + baseline_msg = '[Accuracy:' + \ + ''.join([' {:.4f}'.format(i) for i in self.tune_data['baseline']]) + \ + ''.join([', {}: {:.4f}'.format(x,y) for x,y in zip( \ + self.objectives.representation, self.baseline[1]) if x != 'Accuracy']) + ']' + else: # pragma: no cover + if self.metric_weight: + self.tune_data['Weighted accuracy'] = ['n/a'] + self.tune_data['baseline'] = ['n/a'] + + for name, data in zip(self.metric_name, self.tune_data['baseline']): + self.tune_data[name] = ['n/a'] + baseline_msg = 'n/a' + logger.info("FP32 baseline is: {}".format(baseline_msg)) + + def initial_best_acc(self): + """Init the best accuracy. + + Returns: + The initial value of best accuracy. + """ + if len(self.metric_name) == 1 or self.metric_weight is not None: + best_acc = float('-inf') if self.higher_is_better else float('inf') + else: + best_acc = [float('-inf') if higher_is_better else float('inf') for \ + higher_is_better in self.metric_criterion] + return best_acc + + def _tune_cfg_converter(self, op_tuning_cfg): + """Convert op_tuning_cfg for adaptor. + + Args: + op_tuning_cfg (Dict): the op tuning config. + """ + tune_cfg = {'op': OrderedDict()} + for op_name_type, op_config in op_tuning_cfg.items(): + if isinstance(op_config, OpTuningConfig): + tune_cfg['op'][op_name_type] = op_config.get_state() + op_cap_lst = self.capability['opwise'][op_name_type] + # Add pattern for diagnosis + for op_cap in op_cap_lst: + if 'pattern' in op_cap: + op_pattern = {} + op_pattern['sequence'] = op_cap['pattern']['sequence'][0] if\ + 'sequence' in op_cap['pattern'] else None + op_pattern['precision'] = op_cap['pattern']['precision'][0] if\ + 'precision' in op_cap['pattern'] else None + tune_cfg['op'][op_name_type]['pattern'] = op_pattern + else: + tune_cfg[op_name_type] = op_config + tune_cfg['calib_sampling_size'] = op_tuning_cfg['calib_sampling_size'] + if self.calib_dataloader is not None: + tune_cfg['calib_iteration'] = math.ceil(int(tune_cfg['calib_sampling_size']) / \ + self.calib_dataloader.batch_size) + else: + tune_cfg['calib_iteration'] = 1 + tune_cfg['advance'] = self.cfg.quantization.advance + tune_cfg['approach'] = self.cfg.quantization.approach + # Add the recipe config + tune_cfg['recipe_cfgs'] = tune_cfg.get('recipe_cfgs', {}) + # For not tuning recipe, tune cfg use it directly + tune_cfg['recipe_cfgs'].update(self._not_tuning_recipes_values) + # WA for get the smooth quant args + if 'smooth_quant_args' in self.cfg_bk.quantization.recipes: + tune_cfg['recipe_cfgs']['smooth_quant_args'] = self.cfg_bk.quantization.recipes['smooth_quant_args'] + # For tuning recipe, use the default value if it not specified by recipe tuning sampler. + for recipe_name, recipe_val in self._tuning_recipes_default_values.items(): + if recipe_name not in tune_cfg['recipe_cfgs']: + tune_cfg['recipe_cfgs'][recipe_name] = recipe_val + return tune_cfg + + def set_tuning_space(self, conf): + """Create the tuning space. + + Create the tuning space based on the framework capability and user configuration. + + Args: + conf: The Conf class instance includes all user configurations. + """ + calib_sampling_size_lst = self.cfg.quantization.calibration.sampling_size + calib_sampling_size_lst = [int(calib_sampling_size) for calib_sampling_size in calib_sampling_size_lst] + if self.calib_dataloader: + self.calib_iter = [math.ceil(int(x) / self.calib_dataloader.batch_size) \ + for x in calib_sampling_size_lst] + else: + self.calib_iter = 1 + # create tuning space + adaptor_cap = { + 'calib': {'calib_sampling_size': calib_sampling_size_lst}, + 'op': self.capability['opwise'] + } + self.tuning_space = TuningSpace(adaptor_cap, conf=conf, framework=self.framework) + + def setup_resume(self, resume): + """Resume the best quantized model from tuning history. + + Args: + resume: The dict containing resume information. + """ + self.__dict__.update(resume) + for history in self.tuning_history: + if self._same_yaml(history['cfg'], self.cfg): + self.__dict__.update({k: v for k, v in history.items() \ + if k not in ['version', 'history']}) + logger.info("Start to resume tuning process.") + # resume the best tuning model if needed + try: + index = history['id'] - 1 + resume_tuning_cfg = history['history'][index]['tune_cfg'] + self.best_qmodel = self.adaptor.quantize(resume_tuning_cfg, + self.model, + self.calib_dataloader, + self.q_func) + except: + logger.debug("Can not resume the best quantize model from history.") + + break + + def set_q_func(self): + """Set the training function for quantization aware training.""" + if self.q_func == None and self.cfg.quantization.approach == 'quant_aware_training': + train_cfg = self.cfg.quantization.train + assert train_cfg, "train field of quantization section in yaml file must " \ + "be configured for quantization aware training if q_func is NOT set." + assert self.calib_dataloader, "dataloader field of train field of quantization " \ + "section in yaml file must be configured." + self.q_func = create_train_func(self.framework, self.calib_dataloader, \ + self.adaptor, train_cfg, hooks=self.q_hooks) + + def _create_path(self, custom_path, filename): + new_path = os.path.join(os.path.abspath(os.path.expanduser(custom_path)),filename) + path = Path(os.path.dirname(new_path)) + path.mkdir(exist_ok=True, parents=True) + return new_path + + def _set_framework_info(self, q_dataloader, q_func=None): + framework_specific_info = {'device': self.cfg.device, + 'approach': self.cfg.quantization.approach, + 'random_seed': self.cfg.tuning.random_seed, + 'performance_only': self.cfg.tuning.exit_policy.performance_only,} + framework = self.cfg.model.framework.lower() + framework_specific_info.update({'backend': self.cfg.model.get('backend', 'default')}) + framework_specific_info.update({'format': self.cfg.model.get('quant_format', 'default')}) + framework_specific_info.update({'domain': self.cfg.model.get('domain', 'auto')}) + + self.mixed_precision_mode = bool('mixed_precision' in self.cfg) or \ + bool('graph_optimization' in self.cfg) + + if 'tensorflow' in framework: + framework_specific_info.update( + {"inputs": self.cfg.model.inputs, + "outputs": self.cfg.model.outputs, + 'workspace_path': self.cfg.tuning.workspace.path, + 'recipes': self.cfg.quantization.recipes, + 'use_bf16': self.cfg.use_bf16 if self.cfg.use_bf16 is not None else False}) + for item in ['scale_propagation_max_pooling', 'scale_propagation_concat']: + if item not in framework_specific_info['recipes']: + framework_specific_info['recipes'].update({item: True}) + if self.cfg.model.backend == 'itex': + self.cfg.model.framework = 'tensorflow_itex' + framework = 'tensorflow_itex' + if 'keras' in framework: + framework_specific_info.update({ + 'workspace_path': self.cfg.tuning.workspace.path, }) + if framework == 'mxnet': + framework_specific_info.update({"q_dataloader": q_dataloader}) + if 'onnx' in framework.lower(): + if self.mixed_precision_mode: + framework_specific_info.update({"approach": "post_training_dynamic_quant"}) + framework_specific_info.update({"deploy_path": os.path.dirname(self.deploy_path)}) + framework_specific_info.update({'workspace_path': self.cfg.tuning.workspace.path}) + framework_specific_info.update({'recipes': self.cfg.quantization.recipes}) + framework_specific_info.update({'reduce_range': self.cfg.reduce_range}) + framework_specific_info.update({'recipes': self.cfg.quantization.get('recipes', {})}) + if framework.lower() == 'onnxrt_qdq' or \ + framework_specific_info['backend'] == 'onnxrt_trt_ep': + framework_specific_info.update({'format': 'QDQ'}) + framework = 'onnxrt_qdq' + if framework == 'pytorch_ipex' or framework == 'pytorch' or framework == 'pytorch_fx': + if self.cfg.model.backend == 'ipex': + self.cfg.model.framework = 'pytorch_ipex' + framework = 'pytorch_ipex' + elif self.cfg.model.backend == 'default': + self.cfg.model.framework = 'pytorch_fx' + framework = 'pytorch_fx' + if self.mixed_precision_mode: + framework_specific_info.update({"approach": "post_training_dynamic_quant"}) + framework_specific_info.update({"q_dataloader": q_dataloader}) + framework_specific_info.update({"use_bf16": self.cfg.use_bf16 \ + if self.cfg.use_bf16 is not None else True}) + framework_specific_info.update( + {"workspace_path": os.path.dirname(self.deploy_path)}) + if self.cfg['quantization']['op_wise'] is not None \ + and 'default_qconfig' in self.cfg['quantization']['op_wise']: + framework_specific_info.update( + {"default_qconfig": self.cfg['quantization']['op_wise']['default_qconfig']}) + framework_specific_info.update({"q_func": q_func}) + framework_specific_info.update({"example_inputs": self.cfg.quantization.example_inputs}) + return framework, framework_specific_info + + def _set_objectives(self): + self.higher_is_better = bool(self.cfg.tuning.accuracy_criterion.higher_is_better) + self.use_multi_objective = deep_get(self.cfg, 'tuning.multi_objectives') and \ + len(self.cfg.tuning.multi_objectives.objective) > 1 + objectives = [i.lower() for i in self.cfg.tuning.multi_objectives.objective] if \ + self.use_multi_objective else [self.cfg.tuning.objective.lower()] + self.metric_weight = deep_get(self.cfg, 'evaluation.accuracy.multi_metrics.weight') + self.metric_name = ['Accuracy'] if \ + not deep_get(self.cfg, 'evaluation.accuracy.multi_metrics') else \ + self.cfg.evaluation.accuracy.multi_metrics.keys()-{'weight','higher_is_better'} + if len(self.metric_name) == 1: + self.metric_criterion = [self.higher_is_better] + elif not deep_get(self.cfg, 'evaluation.accuracy.multi_metrics.higher_is_better'): + # default is True + self.metric_criterion = [True] * len(self.metric_name) + else: + self.metric_criterion = \ + deep_get(self.cfg, 'evaluation.accuracy.multi_metrics.higher_is_better') + + self.objectives = MultiObjective(objectives, + self.cfg.tuning.accuracy_criterion, + self.metric_criterion, + self.metric_weight, + deep_get(self.cfg, 'tuning.multi_objectives.higher_is_better'), + deep_get(self.cfg, 'tuning.multi_objectives.weight')) + + def _same_yaml(self, src_yaml, dst_yaml): + """Check if the two yamls are the same. + + The check will exclude those keys which do not really impact the tuning result, such as + tensorboard, workspace, resume options under the tuning section of YAML. + """ + if equal_dicts(src_yaml, dst_yaml, ignore_keys=['tuning']) and \ + equal_dicts(src_yaml.tuning, src_yaml.tuning, compare_keys=['objective', + 'accuracy_criterion', + 'random_seed', + 'exit_policy']): + return True + + return False + + def update_best_op_tuning_cfg(self, op_tuning_cfg): + """Track and update the best tuning config with correspondence accuracy result. + + Args: + op_tuning_cfg: The tuning config. + + Returns: + The current best tuning results and corresponding configurations. + """ + acc, _ = self.last_tune_result + if self.cur_best_tuning_cfg is None: + self.cur_best_tuning_cfg = copy.deepcopy(op_tuning_cfg) + if not isinstance(acc, list) and ((self.higher_is_better and acc >= self.cur_best_acc) \ + or (not self.higher_is_better and acc <= self.cur_best_acc)): + self.cur_best_acc = acc + self.cur_best_tuning_cfg = copy.deepcopy(op_tuning_cfg) + elif len(self.metric_name) > 1 and self.metric_weight is not None: + acc = np.mean(np.array(acc) * self.metric_weight) + if (self.higher_is_better and acc >= self.cur_best_acc) or \ + (not self.higher_is_better and acc <= self.cur_best_acc): + self.cur_best_acc = acc + self.cur_best_tuning_cfg = copy.deepcopy(op_tuning_cfg) + elif len(self.metric_name) > 1 and self.metric_weight is None: + if all([acc_i >= best_i if higher_is_better else acc_i <= best_i for \ + acc_i, best_i, higher_is_better in \ + zip(acc, self.cur_best_acc, self.metric_criterion)]): + self.cur_best_acc = acc + self.cur_best_tuning_cfg = copy.deepcopy(op_tuning_cfg) + logger.debug(f"Best acc is {self.cur_best_acc}.") + return self.cur_best_acc, self.cur_best_tuning_cfg + + def deploy_config(self): + """Save the configuration locally for deployment.""" + acc_dataloader_cfg = deep_get(self.cfg, 'evaluation.accuracy.dataloader') + perf_dataloader_cfg = deep_get(self.cfg, 'evaluation.performance.dataloader') + # use acc dataloader if perf dataloader is not configured + if perf_dataloader_cfg is None: + perf_dataloader_cfg = acc_dataloader_cfg + + self.deploy_cfg = OrderedDict() + # int8 dataloader graph transform + if deep_get(perf_dataloader_cfg, 'transform.QuantizedInput') is not None \ + or deep_get(acc_dataloader_cfg, 'transform.QuantizedInput') is not None: + self.best_qmodel, scale = self.adaptor.quantize_input(self.best_qmodel) + deep_set(perf_dataloader_cfg, 'transform.QuantizedInput.dtype', 'int8') + deep_set(perf_dataloader_cfg, 'transform.QuantizedInput.scale', scale) + deep_set(acc_dataloader_cfg, 'transform.QuantizedInput.dtype', 'int8') + deep_set(acc_dataloader_cfg, 'transform.QuantizedInput.scale', scale) + + self.deploy_cfg['model'] = self.cfg.model + self.deploy_cfg['device'] = self.cfg.device + if self.cfg.evaluation is not None: + deep_set(self.cfg, 'evaluation.performance.dataloader',\ + perf_dataloader_cfg) + deep_set(self.cfg, 'evaluation.accuracy.dataloader', \ + acc_dataloader_cfg) + self.deploy_cfg['evaluation'] = self.cfg.evaluation + + def setup_yaml(): + represent_dict_order = lambda self, \ + data: self.represent_mapping('tag:yaml.org,2002:map', data.items()) + yaml.add_representer(OrderedDict, represent_dict_order) + yaml.add_representer(DotDict, represent_dict_order) + setup_yaml() + with open(self.deploy_path, 'w+') as f: + yaml.dump(self.deploy_cfg, f) + logger.info("Save deploy yaml to {}".format(self.deploy_path)) + + def _get_common_cfg(self, model_wise_cfg, op_wise_cfgs): + """Get the common parts from the model_wise_cfg. + + This function is focused on composing the configuration that consists of + model-wise field and op-wise unique field data. + + Args: + model_wise_cfg ([DotDict]): The model-wise configuration. + op_wise_cfgs ([List]): The list of each op's config in DotDict type. + + Returns: + [DotDict]: The combined configration with the op-wise unique field. + """ + model_wise_keys = model_wise_cfg.keys() + + result = op_wise_cfgs[0] + for each_op_wise_cfg in op_wise_cfgs: + tmp_cfg = {} + for k in model_wise_keys: + tmp_cfg[k] = each_op_wise_cfg[k] + + if model_wise_cfg == tmp_cfg: + result = each_op_wise_cfg + break + + return result + + @property + def evaluation_result(self): + """Evaluate the given model. + + Returns: + The objective value evaluated. + """ + return self._evaluate(self.model) + + def _evaluate(self, model): + """Interface of evaluating model. + + Args: + model (object): The model to be evaluated. + + Returns: + Objective: The objective value evaluated. + """ + if self.eval_func: + if self.cfg.tuning.tensorboard: + # Pytorch can insert observer to model in this hook. + # Tensorflow don't support this mode for now + model = self.adaptor._pre_eval_hook(model) + val = self.objectives.evaluate( + self.eval_func, model if self.framework == "pytorch_ipex" else model.model + ) + if self.cfg.tuning.tensorboard: + # post_eval_hook to deal the tensor + self.adaptor._post_eval_hook(model, accuracy=val[0]) + else: + assert self.cfg.evaluation and self.cfg.evaluation.accuracy and \ + (self.cfg.evaluation.accuracy.metric or \ + self.cfg.evaluation.accuracy.multi_metrics), \ + "metric or multi_metrics field of accuracy field of evaluation" \ + " section should not be empty" + + postprocess_cfg = self.cfg.evaluation.accuracy.postprocess + metric_cfg = self.cfg.evaluation.accuracy.metric if \ + self.cfg.evaluation.accuracy.metric else \ + self.cfg.evaluation.accuracy.multi_metrics + iteration = -1 if self.cfg.evaluation.accuracy.iteration is None \ + else self.cfg.evaluation.accuracy.iteration + eval_func = create_eval_func(self.framework, + self.eval_dataloader, + self.adaptor, + metric_cfg, + postprocess_cfg, + iteration, + tensorboard = self.cfg.tuning.tensorboard, + fp32_baseline = self.baseline == None) + + if getattr(self.eval_dataloader, 'distributed', False): + if 'tensorflow' in self.framework: + import horovod.tensorflow as hvd + elif self.framework in ['pytorch_ipex','pytorch','pytorch_fx']: + import horovod.torch as hvd + else: + raise NotImplementedError("Currently only TensorFlow and PyTorch " + "support distributed inference in PTQ.") + hvd.init() + try: + len_dataloader = len(self.eval_dataloader) + except: + logger.info("The length of the distributed dataloader is unknown." + "When the iteration of evaluation dataloader in each " + "process is inconsistent, an error may occur.") + else: + list_len_dataloader = hvd.allgather_object(len_dataloader) + if hvd.rank() == 0: + for i in range(len(list_len_dataloader)-1): + if list_len_dataloader[i] != list_len_dataloader[i+1]: + raise AttributeError("The evaluation dataloader's iteration is" + "different between processes, please reset " + "dataloader's batch_size.") + val = self.objectives.evaluate(eval_func, model) + if isinstance(val[0], list): + assert all([np.isscalar(i) for i in val[0]]), \ + "The eval_func should return a scalar or list of scalar, " \ + "but not {}!".format(str([type(i) for i in val[0]])) + else: + assert np.isscalar(val[0]), \ + "The eval_func should return a scalar or list of scalar, " \ + "but not {}!".format(str(type(val[0]))) + + return val + + def __getstate__(self): + """Magic method for pickle saving. + + Returns: + dict: Saved dict for resuming + """ + return {'tuning_history': self.tuning_history} + + def __setstate__(self, d): + """Magic method for pickle loading. + + Args: + d (dict): The dict to load. + """ + self.__dict__.update(d) + + def stop(self, timeout, trials_count): + """Check if need to stop traverse. + + Check if need to stop traversing the tuning space, either accuracy goal is met or timeout is reach. + + Returns: + bool: True if need stop, otherwise False + """ + need_stop = False + if self.cfg.tuning.exit_policy.performance_only or \ + self.objectives.compare(self.best_tune_result, self.baseline): + self.best_tune_result = self.last_tune_result + self.best_qmodel = self.last_qmodel + self.best_tuning_cfg = copy.deepcopy(self.last_tune_cfg) + logger.debug(f"*** Update the best qmodel with the result {self.best_tune_result}") + if self.metric_met_point == 0: + self.metric_met_point = self.tuning_times + + # track the model with highest acc + if self.best_tune_result and self.last_tune_result: # (acc, [perf]) + if self.re_quant and self.objectives.accuracy_meets(): + self.best_tune_result = self.last_tune_result + self.best_qmodel = self.last_qmodel + self.best_tuning_cfg = copy.deepcopy(self.last_tune_cfg) + logger.debug(f"*** Update the best qmodel with the result {self.best_tune_result}.") + else: + logger.debug(f"*** Accuracy not meets the requirements, do not update the best qmodel.") + + if self.last_tune_result: + last_tune = self.last_tune_result[0] if \ + isinstance(self.last_tune_result[0], list) else [self.last_tune_result[0]] + + for name, data in zip(self.metric_name, last_tune): + if len(self.tune_data[name]) == 1: + self.tune_data[name].append(data) + else: + self.tune_data[name][1] = data + + if self.metric_weight and len(last_tune) > 1: + weighted_acc = np.mean(np.array(last_tune) * self.metric_weight) + + if len(self.tune_data['Weighted accuracy']) == 1: + self.tune_data['Weighted accuracy'].append(weighted_acc) + else: + self.tune_data['Weighted accuracy'][1] = weighted_acc + + last_tune = [weighted_acc] + + last_tune_msg = '[Accuracy (int8|fp32):' + \ + ''.join([' {:.4f}|{:.4f}'.format(last, base) for last, base in \ + zip(last_tune, self.tune_data['baseline'])]) + \ + ''.join([', {} (int8|fp32): {:.4f}|{:.4f}'.format( \ + x, y, z) for x, y, z in zip( \ + self.objectives.representation, self.last_tune_result[1], self.baseline[1]) \ + if x != 'Accuracy']) + ']' + else: # pragma: no cover + last_tune_msg = 'n/a' + for name in self.tune_data.keys() - {'baseline'}: + if len(self.tune_data[name]) == 1: + self.tune_data[name].append('n/a') + else: + self.tune_data[name][1] = 'n/a' + + if self.best_tune_result: + best_tune = self.best_tune_result[0] if isinstance(self.best_tune_result[0], list) \ + else [self.best_tune_result[0]] + + for name, data in zip(self.metric_name, best_tune): + if len(self.tune_data[name]) == 2: + self.tune_data[name].append(data) + else: + self.tune_data[name][2] = data + + if self.metric_weight and len(best_tune) > 1: + weighted_acc = np.mean(np.array(best_tune) * self.metric_weight) + + if len(self.tune_data['Weighted accuracy']) == 2: + self.tune_data['Weighted accuracy'].append(weighted_acc) + else: # pragma: no cover + self.tune_data['Weighted accuracy'][2] = weighted_acc + + best_tune = [weighted_acc] + + best_tune_msg = '[Accuracy:' + ''.join([' {:.4f}'.format(best) \ + for best in best_tune]) + ''.join([', {}: {:.4f}'.format(x,y) \ + for x,y in zip(self.objectives.representation, \ + self.best_tune_result[1]) if x != 'Accuracy']) + ']' + + else: + best_tune_msg = 'n/a' + for name in self.tune_data.keys() - {'baseline'}: + if len(self.tune_data[name]) == 2: + self.tune_data[name].append('n/a') + else: + self.tune_data[name][2] = 'n/a' + + logger.info("Tune {} result is: {}, Best tune result is: {}".format(trials_count, + last_tune_msg, + best_tune_msg)) + output_data = [[info_type, + '{:.4f} '.format(self.tune_data[info_type][0]) if \ + not isinstance(self.tune_data[info_type][0], str) else self.tune_data[info_type][0], + '{:.4f} '.format(self.tune_data[info_type][1]) if \ + not isinstance(self.tune_data[info_type][1], str) else self.tune_data[info_type][1], + '{:.4f} '.format(self.tune_data[info_type][2]) if \ + not isinstance(self.tune_data[info_type][2], str) else self.tune_data[info_type][2]] \ + for info_type in self.tune_data.keys() if info_type != 'baseline'] + + output_data.extend([[obj, + '{:.4f} '.format(self.baseline[1][i]) if self.baseline else 'n/a', + '{:.4f} '.format(self.last_tune_result[1][i]) if self.last_tune_result else 'n/a', + '{:.4f} '.format(self.best_tune_result[1][i]) if self.best_tune_result else 'n/a'] \ + for i, obj in enumerate(self.objectives.representation)]) + self.tuning_result_data = output_data + Statistics(output_data, + header='Tune Result Statistics', + field_names=['Info Type', 'Baseline', 'Tune {} result'.format(trials_count), \ + 'Best tune result']).print_stat() + + + if self.cfg.tuning.exit_policy.performance_only: + need_stop = True + elif timeout == 0 and self.best_tune_result: + need_stop = True + elif trials_count >= self.cfg.tuning.exit_policy.max_trials: + need_stop = True + else: + need_stop = False + + return need_stop + + def _save(self): + """Save current tuning state to snapshot for resuming.""" + logger.info("Save tuning history to {}.".format(self.history_path)) + with fault_tolerant_file(self.history_path) as f: + pickle.dump(self, f, protocol=pickle.HIGHEST_PROTOCOL) + + def _find_tuning_history(self, tune_cfg): + """Check if the specified tune_cfg is evaluated or not on same yaml config. + + Args: + tune_cfg (dict): The tune_cfg to check if evaluated before. + + Returns: + tuning_history or None: The tuning history containing evaluated tune_cfg. + """ + for tuning_history in self.tuning_history: + # only check if a tune_cfg is evaluated under same yam config, excluding + # some fields in tuning section of yaml, such as tensorboard, snapshot, resume. + if self._same_yaml(tuning_history['cfg'], self.cfg): + for history in tuning_history['history']: + if history and history['tune_cfg'] == tune_cfg: + return tuning_history + + return None + + def _find_history(self, tune_cfg): + """Check if the specified tune_cfg is evaluated or not on same yaml config. + + Returns: + history or None: The history containing evaluated tune_cfg. + """ + for tuning_history in self.tuning_history: + # only check if a tune_cfg is evaluated under same yam config, excluding + # some fields in tuning section of yaml, such as tensorboard, snapshot, resume. + if self._same_yaml(tuning_history['cfg'], self.cfg): + for history in tuning_history['history']: + if history and history['tune_cfg'] == tune_cfg: + return history + return None + + def _find_self_tuning_history(self): + """Find self history dict. + + Returns: + history or None: The history for self. + """ + for tuning_history in self.tuning_history: + # only check if a tune_cfg is evaluated under same yam config, excluding + # some fields in tuning section of yaml, such as tensorboard, snapshot, resume. + if self._same_yaml(tuning_history['cfg'], self.cfg): + return tuning_history + + return None + + def _add_tuning_history(self, tune_cfg=None, tune_result=None, **kwargs): + """Add tuning config to tuining history. + + Note this record is added under same yaml config. + """ + found = False + d = {'tune_cfg': tune_cfg, 'tune_result': tune_result} + for tuning_history in self.tuning_history: + if self._same_yaml(tuning_history['cfg'], self.cfg): + d.update(kwargs) + tuning_history['history'].append(d) + tuning_history['last_tune_result'] = self.last_tune_result + tuning_history['best_tune_result'] = self.best_tune_result + tuning_history['cfg'] = self.cfg + found = True + break + + if not found: + tuning_history = {} + tuning_history['version'] = __version__ + tuning_history['cfg'] = self.cfg + tuning_history['baseline'] = self.baseline + tuning_history['last_tune_result'] = self.last_tune_result + tuning_history['best_tune_result'] = self.best_tune_result + tuning_history['history'] = [] + if tune_cfg and tune_result: + d.update(kwargs) + tuning_history['history'].append(d) + self.tuning_history.append(tuning_history) + + self._save() + + def _collect_ops_by_quant_mode(self, tune_cfg, quant_mode): + ops_lst = [] + for op_info, op_config in tune_cfg.items(): + if isinstance(op_config, OpTuningConfig) and quant_mode in op_config.op_quant_mode: + ops_lst.append(op_info) + return ops_lst + + def _diagnosis(self): + import logging + logger = logging.getLogger("neural_compressor") + iteration_list = self.cfg.tuning.diagnosis.iteration_list + inspect_type = self.cfg.tuning.diagnosis.inspect_type + save_to_disk = self.cfg.tuning.diagnosis.save_to_disk + save_path = self.cfg.tuning.diagnosis.save_path + inspect_node_lst, updated_cfg = self.adaptor.diagnosis_helper(self._fp32_model, + self.last_qmodel, + self.tune_cfg, + save_path = save_path) + op_list = self.cfg.tuning.diagnosis.op_list + if not op_list: + op_list = list(inspect_node_lst) + else: + op_list = list(set(op_list).intersection(inspect_node_lst)) + + logger.debug(f'*** Start to inspect tensor :{op_list} in fp32 model.') + self.adaptor.inspect_tensor(self._fp32_model, + dataloader=self.calib_dataloader, + op_list=op_list, + iteration_list=iteration_list, + inspect_type=inspect_type, + save_to_disk=save_to_disk, + save_path= save_path + '/fp32/', + quantization_cfg=updated_cfg) + + logger.debug(f'*** Start to inspect tensor :{op_list} in quantized model.') + self.adaptor.inspect_tensor(self.last_qmodel, + dataloader=self.calib_dataloader, + op_list=op_list, + iteration_list=iteration_list, + inspect_type=inspect_type, + save_to_disk=save_to_disk, + save_path= save_path + '/quan/', + quantization_cfg=updated_cfg) diff --git a/neural_compressor/experimental/strategy/utils/__init__.py b/neural_compressor/experimental/strategy/utils/__init__.py new file mode 100644 index 00000000000..1b730c7ded2 --- /dev/null +++ b/neural_compressor/experimental/strategy/utils/__init__.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Intel Neural Compressor Strategy Utils.""" + +from .tuning_sampler import TuningSampler, OpWiseTuningSampler, OpTypeWiseTuningSampler, FallbackTuningSampler +from .tuning_structs import OpTuningConfig +from .tuning_space import TuningItem, TuningSpace diff --git a/neural_compressor/experimental/strategy/utils/constant.py b/neural_compressor/experimental/strategy/utils/constant.py new file mode 100644 index 00000000000..9cbeaa00859 --- /dev/null +++ b/neural_compressor/experimental/strategy/utils/constant.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Strategy constant.""" + +PRECISION_SET = {'bf16', 'fp16' , 'fp32',} +QUANT_MODE_SET = {'static', 'dynamic'} +QUNAT_BIT_SET = {'int8', 'uint8', 'int4', 'uint4'} + +TUNING_ITEMS_LST = [('activation','scheme'), ('activation','algorithm'), ('activation','granularity'), + ('weight','scheme'), ('weight','algorithm'), ('weight','granularity'), 'sampling_size'] + +PRECISION_SET_V2_0 = {'fp32', 'bf16'} + +auto_query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] +static_query_order = ['static', 'bf16', 'fp16', 'fp32'] +dynamic_query_order = ['dynamic', 'bf16', 'fp16', 'fp32'] + + +FALLBACK_RECIPES_SET = {'first_conv_or_matmul_quantization', 'last_conv_or_matmul_quantization' \ + 'pre_post_process_quantization'} \ No newline at end of file diff --git a/neural_compressor/experimental/strategy/utils/tuning_sampler.py b/neural_compressor/experimental/strategy/utils/tuning_sampler.py new file mode 100644 index 00000000000..63984f600dd --- /dev/null +++ b/neural_compressor/experimental/strategy/utils/tuning_sampler.py @@ -0,0 +1,463 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tuning sampler.""" + +from itertools import product +import copy +from collections import deque, OrderedDict, defaultdict +from typing import List, Dict, Any +from .tuning_space import TuningSpace, pattern_to_internal, pattern_to_path, quant_mode_from_pattern +from .tuning_structs import OpTuningConfig +from ....utils import logger + +TUNING_ITEM_PRIORITY = [('activation','scheme'), ('activation','algorithm'),('activation','granularity'), + ('activation','compute_dtype'), ('weight','scheme'), ('weight','algorithm'), \ + ('weight','granularity')] + + + +class TuningSamplerRegistry: + """Class decorator used to register all TuningSampler subclasses.""" + + sampler_dict = {} + + @classmethod + def register(cls, name): + """Register new tuning sampler. + + Args: + name: the name of new tuning sampler. + """ + def decorator(sampler): + assert name not in cls.sampler_dict, "Cannot have two sampler with the same name." + cls.sampler_dict[name] = sampler + return decorator + +class TuningOrder: + """Not displayed in API Docs.""" + + def __init__(self): + """For future use.""" + pass + + +class TuningSampler: + """Not displayed in API Docs. + + Basic class of tuning sampler. + """ + + def __init__(self, + tuning_space: TuningSpace, + tuning_order_lst: List[TuningOrder], + initial_op_tuning_cfg: Dict, + kwargs: Dict = {}): + """Init tuning sampler. + + Args: + tuning_space: The tuning space. + tuning_order_lst: The traverse orders. + initial_op_tuning_cfg: The initialized tuning config. + kwargs: other args. + """ + self.tuning_space = tuning_space + self.tuning_order_lst = tuning_order_lst + self.initial_op_tuning_cfg = initial_op_tuning_cfg + self.queue = deque() + # (op_name, op_type): [full_path1, full_path2,...] + self.op_complete_path = {} + + def __iter__(self, tune_cfg=None): + """Interface for generate the next tuning config.""" + pass + + def _set_dtype(self, op_name_type, config_args): + has_weight = op_name_type in self.tuning_space.ops_attr['weight'] + path = self.op_complete_path[op_name_type].get('activation', None) + config_args['activation_dtype'] = self.tuning_space.ops_data_type[op_name_type][path] + if has_weight: + path = self.op_complete_path[op_name_type].get('weight', None) + config_args['weight_dtype'] = self.tuning_space.ops_data_type[op_name_type][path] + + +class ModelWiseTuningSampler(TuningSampler): + """Not displayed in API Docs.""" + + def __init__(self, + tuning_space: TuningSpace, + tuning_items_priority: List[str], + tuning_order_lst: List[TuningOrder], + op_dtype_dict: Dict[tuple, str], + initial_op_tuning_cfg: Dict[tuple, OpTuningConfig]): + """Model type wise tuning sampler. + + step1. create a default tuning config for each op + step2. collect all tuning items and options, and build the model-wise traverse order + step3. yield the tuning item with option one by one, query the existence of tuning item + and specific option for one op if exist, use the default tuning config if not exist + + Args: + tuning_space: Tuning space. + tuning_items_priority: The priority to traverse the tuning items. + tuning_order_lst: The tuning orders. + op_dtype_dict: The (op name, op type) and its target data type. + initial_op_tuning_cfg: The initial tuning config. + + """ + super().__init__(tuning_space, tuning_order_lst, initial_op_tuning_cfg) + + self.op_dtype_dict = op_dtype_dict + self.tuning_space = tuning_space + self.default_op_config = {} + tuning_items = defaultdict(set) # item name: options + for op_name_type, quant_mode in op_dtype_dict.items(): + full_path = self.tuning_space.get_op_default_path_by_pattern(op_name_type, quant_mode) + self.op_complete_path[op_name_type] = copy.deepcopy(full_path) + # step1, set the default config for each op + self.default_op_config[op_name_type] = tuning_space.get_default_config(op_name_type, quant_mode) + if quant_mode[0] == 'precision': continue + mode_items = copy.deepcopy(full_path) # TODO refactor the initialization method + # step2, collect all tuning items and their options + for att in mode_items: + if att not in full_path: continue + quant_mode_item = self.tuning_space.query_quant_mode_item_by_full_path(op_name_type ,full_path[att]) + for tuning_item in quant_mode_item.options: + tuning_items[tuning_item.name] = tuning_items[tuning_item.name].union(tuning_item.options) + self.tuning_items = tuning_items + + def __iter__(self): + """Yield the next tuning config. + + Yields: + The next tuning config. + """ + keys = self.tuning_items.keys() + for vals in product(*self.tuning_items.values()): + # traverse all possible combinations by model-wise level + tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) + for op_name_type, quant_mode in self.op_dtype_dict.items(): + if quant_mode[0] == 'precision': continue + all_exist_flag = True + for method_name, method_val in zip(keys, vals): + full_path = self.op_complete_path[op_name_type] + if method_name[0] not in full_path: continue + if not self.tuning_space.query_item_option(op_name_type, + full_path[method_name[0]], + method_name, method_val): + all_exist_flag = False + tune_cfg[op_name_type] = self.default_op_config[op_name_type] + break + if all_exist_flag: + config_args = dict(zip(keys, vals)) + self._set_dtype( op_name_type, config_args) + internal_pattern = pattern_to_internal(quant_mode) + quant_mode = quant_mode_from_pattern(internal_pattern) + tune_cfg[op_name_type] = OpTuningConfig(op_name_type[0], + op_name_type[1], + quant_mode, + self.tuning_space, + kwargs=config_args) + yield tune_cfg + + +class OpTypeWiseTuningSampler(TuningSampler): + """Not displayed in API Docs.""" + + def __init__(self, + tuning_space: TuningSpace, + tuning_items_priority: List[str], + tuning_order_lst: List[TuningOrder], + op_dtype_dict: Dict[tuple, str], + initial_op_tuning_cfg: Dict[tuple, OpTuningConfig]): + """Op type wise tuning sampler. + + Args: + tuning_space: Tuning space. + tuning_items_priority: The priority to traverse the tuning items. + tuning_order_lst: The tuning orders. + op_dtype_dict: The (op name, op type) and its target data type. + initial_op_tuning_cfg: The initial tuning config. + """ + super().__init__(tuning_space, tuning_order_lst, initial_op_tuning_cfg) + tuning_items_priority = TUNING_ITEM_PRIORITY + # (op_type, quant_mode) : {tuning_item_name : [option1, option2]} + # {('activation', 'scheme'): ['sym', 'sym'], ('activation', 'algorithm'): ['minmax', 'kl', 'minmax', 'kl']} + + self.optype_quant_mode_option = {} + self.optype_quant_mode_items_name = defaultdict(list) + self.op_type_quant_mode_wise_combination = {} + self.op_dtype_dict = op_dtype_dict + self.default_op_config = {} + + for op_name_type, quant_mode in op_dtype_dict.items(): + full_path = self.tuning_space.get_op_default_path_by_pattern(op_name_type, quant_mode) + self.op_complete_path[op_name_type] = copy.deepcopy(full_path) + self.default_op_config[op_name_type] = self.tuning_space.get_default_config(op_name_type, quant_mode) + op_name, op_type = op_name_type + if quant_mode[0] == 'precision': continue + mode_items = copy.deepcopy(full_path) # TODO refactor the initialization method + op_type_quant_mode = (op_type, quant_mode) + filtered_tuning_items = [] + for item_name in tuning_items_priority: + att, method_name = item_name + if att not in mode_items: + continue + quant_mode_item = self.tuning_space.query_quant_mode_item_by_full_path(op_name_type ,full_path[att]) + item = quant_mode_item.get_option_by_name(item_name) + if item: + if op_type_quant_mode not in self.optype_quant_mode_option: + self.optype_quant_mode_option[op_type_quant_mode] = defaultdict(list) + self.optype_quant_mode_option[op_type_quant_mode][item_name] += item.options + filtered_tuning_items.append(item) + self.optype_quant_mode_items_name[op_type_quant_mode] = filtered_tuning_items + + for op_type_quant_mode, val in self.optype_quant_mode_option.items(): + options_lst = [] + # remove the duplicate options + for _, item_options in val.items(): + seen = set() + filter_options = [option for option in item_options if not (option in seen or seen.add(option))] + options_lst.append(filter_options) + op_type_quant_mode_vals = product(*options_lst) + self.op_type_quant_mode_wise_combination[op_type_quant_mode] = op_type_quant_mode_vals + + def __iter__(self): + """Yield the next tuning config. + + Yields: + The next tuning config. + """ + new_tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) + for options_lst in product(*self.op_type_quant_mode_wise_combination.values()): + for index, op_type_quant_mode in enumerate(self.op_type_quant_mode_wise_combination.keys()): + for op_name_type, quant_mode in self.op_dtype_dict.items(): + if op_name_type[1] == op_type_quant_mode[0] and quant_mode == op_type_quant_mode[1]: + op_tuning_items = [item.name for item in \ + self.optype_quant_mode_items_name[op_type_quant_mode]] + op_tuning_item_vals = options_lst[index] + all_exist_flag = True + for method_name, method_val in zip(op_tuning_items, op_tuning_item_vals): + full_path = self.op_complete_path[op_name_type] + if not self.tuning_space.query_item_option(op_name_type, + full_path[method_name[0]], + method_name, + method_val): + all_exist_flag = False + op_tuning_config = self.default_op_config[op_name_type] + break + if all_exist_flag: + config_args = dict(zip(op_tuning_items, op_tuning_item_vals)) + self._set_dtype( op_name_type, config_args) + internal_pattern = pattern_to_internal(quant_mode) + quant_mode = quant_mode_from_pattern(internal_pattern) + op_tuning_config = OpTuningConfig(op_name_type[0], + op_name_type[1], + quant_mode, + self.tuning_space, + kwargs=config_args) + new_tune_cfg.update({op_name_type: op_tuning_config}) + yield new_tune_cfg + +class OpWiseTuningSampler(TuningSampler): + """Not displayed in API Docs.""" + + def __init__(self, + tuning_space: TuningSpace, + tuning_items_priority: List[str], + tuning_order_lst: List[TuningOrder], + op_dtype_dict: Dict[tuple, str], + initial_op_tuning_cfg: Dict): + """Op wise tuning config sampler. + + Args: + tuning_space: Tuning space. + tuning_items_priority: The priority to traverse the tuning items. + tuning_order_lst: The tuning orders. + op_dtype_dict: The (op name, op type) and its target data type. + initial_op_tuning_cfg: The initial tuning config. + """ + super().__init__(tuning_space, tuning_order_lst, initial_op_tuning_cfg) + tuning_items_priority = TUNING_ITEM_PRIORITY + # query the combination of tuning items with according to the tuning items priority + self.op_dtype_dict = op_dtype_dict + self.op_options_combination = OrderedDict() + self.op_tuning_items = {} + for op_name_type, op_quant_mode in op_dtype_dict.items(): + full_path = self.tuning_space.get_op_default_path_by_pattern(op_name_type, op_quant_mode) + self.op_complete_path[op_name_type] = copy.deepcopy(full_path) + mode_items = copy.deepcopy(full_path) + internal_pattern = pattern_to_internal(op_quant_mode) + op_quant_mode = quant_mode_from_pattern(internal_pattern) + if internal_pattern[0] == 'precision': continue + filtered_tuning_items = [] + for item_name in tuning_items_priority: + att, method_name = item_name + if att not in mode_items: + continue + quant_mode_item = self.tuning_space.query_quant_mode_item_by_full_path(op_name_type ,full_path[att]) + item = quant_mode_item.get_option_by_name(item_name) + if item: + filtered_tuning_items.append(item) + self.op_tuning_items[op_name_type] = filtered_tuning_items + op_options_lst = product(*[item.options for item in filtered_tuning_items]) + self.op_options_combination[op_name_type] = op_options_lst + + def __iter__(self): + """Yield the next tuning config. + + Yields: + The next tuning config. + """ + new_tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) + for op_options_lst in product(*self.op_options_combination.values()): + for index, op_name_type in enumerate(self.op_options_combination.keys()): + op_quant_mode = self.op_dtype_dict[op_name_type] + op_tuning_items = [item.name for item in self.op_tuning_items[op_name_type]] + op_tuning_item_vals = op_options_lst[index] + config_args = dict(zip(op_tuning_items, op_tuning_item_vals)) + self._set_dtype(op_name_type, config_args) + internal_pattern = pattern_to_internal(op_quant_mode) + quant_mode = quant_mode_from_pattern(internal_pattern) + op_tuning_config = OpTuningConfig(op_name_type[0], op_name_type[1], + quant_mode, self.tuning_space, + kwargs=config_args) + new_tune_cfg.update({op_name_type: op_tuning_config}) + yield new_tune_cfg + + def get_opwise_candidate(self): + """Collect all op-wise setting. + + Returns: + op_wise_configs: all op-wise setting. + """ + op_wise_configs = OrderedDict() + for op_name_type, op_quant_mode in self.op_dtype_dict.items(): + # For static/dynamic/fp32/bf16 + internal_pattern = pattern_to_internal(op_quant_mode) + quant_mode = quant_mode_from_pattern(internal_pattern) + full_path = self.tuning_space.get_op_default_path_by_pattern(op_name_type, op_quant_mode) + self.op_complete_path[op_name_type] = copy.deepcopy(full_path) + op_wise_configs[op_name_type] = [] + # For precision + if internal_pattern[0] == 'precision': + config_args = {} + self._set_dtype(op_name_type, config_args) + op_tuning_config = OpTuningConfig(op_name_type[0], op_name_type[1], + quant_mode, self.tuning_space, + kwargs=config_args) + op_wise_configs[op_name_type].append(op_tuning_config) + continue + # For quantization + op_tuning_items = [item.name for item in self.op_tuning_items.get(op_name_type, [])] + op_options = self.op_options_combination[op_name_type] + + for op_tuning_item_vals in op_options: + config_args = dict(zip(op_tuning_items, op_tuning_item_vals)) + self._set_dtype( op_name_type, config_args) + op_tuning_config = OpTuningConfig(op_name_type[0], op_name_type[1], + quant_mode, self.tuning_space, + kwargs=config_args) + op_wise_configs[op_name_type].append(op_tuning_config) + return op_wise_configs + +class FallbackTuningSampler(TuningSampler): + """Not displayed in API Docs.""" + + def __init__(self, + tuning_space: TuningSpace, + tuning_order_lst: List[TuningOrder], + initial_op_tuning_cfg: Dict[tuple, Any], + op_dtypes: Dict[str, str], + accumulate: bool, + skip_first: bool = True + ): + """Sampler for generate the tuning config of fallback stage. + + Args: + tuning_space: Tuning space. + tuning_order_lst: The tuning orders. + initial_op_tuning_cfg: The initial tuning config. + op_dtypes: The (op name, op type) and its target data type. + accumulate: Fallback accumulated or not. + skip_first: Skip fallback the first op or not. Defaults to True. + """ + super().__init__(tuning_space, tuning_order_lst, initial_op_tuning_cfg) + self.op_dtypes = op_dtypes + self.accumulate = accumulate + self.skip_first = skip_first + + def __iter__(self): + """Yield the next tuning config. + + Yields: + The next tuning config. + """ + new_tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) + skip_first = self.skip_first + for op_name_type, target_dtype in self.op_dtypes.items(): + # Only support fallback to lower precision. + if not self.accumulate: + new_tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) + full_path = self.tuning_space.get_op_default_path_by_pattern(op_name_type, target_dtype) + self.op_complete_path[op_name_type] = copy.deepcopy(full_path) + config_args = {} + self._set_dtype(op_name_type, config_args) + internal_pattern = pattern_to_internal(target_dtype) + quant_mode = quant_mode_from_pattern(internal_pattern) + new_op_config = OpTuningConfig(op_name_type[0], op_name_type[1], + quant_mode, self.tuning_space, + kwargs=config_args) + + new_tune_cfg.update({op_name_type: new_op_config}) + if self.accumulate and skip_first: # skip the first one + skip_first = False + continue + logger.debug(f"fallback {op_name_type} to {target_dtype}") + yield new_tune_cfg # need to skip the first one + +@TuningSamplerRegistry.register("smooth_quant") +class SmoothQuantSampler(TuningSampler): + """Sampler for the hyperparameter tuning of smooth quantization.""" + + def __init__(self, + tuning_space: TuningSpace, + tuning_order_lst: List[TuningOrder], + initial_op_tuning_cfg: Dict, + kwargs: Dict ={}): + """Initialize the sampler.""" + super().__init__(tuning_space, tuning_order_lst, initial_op_tuning_cfg, kwargs) + # TODO use the alpha list specified by user + self._kwargs = kwargs + self._alpha_lst = [0.5] + if kwargs.get('smooth_quant_agrs', {}): + self._alpha_lst = kwargs['smooth_quant_agrs'].get('alpha_lst', [0.5]) + + def __iter__(self, tune_cfg=None) -> OpTuningConfig: + """Yield the next tuning config with update alpha. + + Args: + tune_cfg: tuning config. Defaults to None. + """ + for alpha in self._alpha_lst: + new_tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg) if not tune_cfg else copy.deepcopy(tune_cfg) + sq_args = {'smooth_quant': True, 'smooth_quant_args': {'alpha': alpha}} + if 'recipe_cfgs' not in new_tune_cfg: + new_tune_cfg['recipe_cfgs'] = sq_args + else: + new_tune_cfg['recipe_cfgs'].update(sq_args) + yield new_tune_cfg \ No newline at end of file diff --git a/neural_compressor/experimental/strategy/utils/tuning_space.py b/neural_compressor/experimental/strategy/utils/tuning_space.py new file mode 100644 index 00000000000..6ea1998dbb8 --- /dev/null +++ b/neural_compressor/experimental/strategy/utils/tuning_space.py @@ -0,0 +1,728 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tuning space.""" + +from collections import defaultdict, OrderedDict +import os +import re +from typing import Dict, Tuple +from copy import deepcopy +from ....utils import logger +from .utility import OrderedDefaultDict +from .tuning_structs import OpTuningConfig + +from .constant import TUNING_ITEMS_LST + +class TuningItem: + """Not displayed in API Docs.""" + + def __init__(self, name, options=[], item_type=None): + """Init the tuning item. + + Args: + name: tuning item name. + options: The options. Defaults to []. + item_type: The item type. Defaults to None. + """ + self.name = name + self._options = options + self.item_type = item_type + + @property + def options(self): + """Return all options. + + Returns: + All options. + """ + return self._options + + def get_options_name(self): + """Return the name list of the options.""" + return [o.name for o in self.options] + + def append(self, option): + """Append option. + + Args: + option: The option to add. + """ + self._options.append(option) + + def remove(self, option): + """Remove option. + + Args: + option: The option to remove. + """ + if option in self._options: + self._options.remove(option) + + def get_option_by_name(self, option_name): + """Get the option item by name. + + Args: + option_name: option name. + + Returns: + option: the queried option. + """ + for option in self.options: + if isinstance(option, TuningItem) and option.name == option_name: + return option + return None + + def get_details(self, depth=0): + """Get the tuning item and its options recursively. + + Args: + depth: recursion depth. Defaults to 0. + + Returns: + The tuning item and its options as a string. + """ + details = ['\t' * depth + f"{self.name}, {self.item_type}"] + for option in self.options: + if isinstance(option, int) or isinstance(option, str): + details.append("\t" * depth + str(option)) + else: + details.append(option.get_details(depth + 1)) + return "\n".join(details) + + +class TuningSpace: + """Not displayed in API Docs. + + 1) capability -> internal format -> merge -> tuning space (tree) + + """ + + def __init__(self, capability, conf, framework=None): + """Init the tuning space. + + Args: + capability: framework capability. + conf: user configuration + framework: framework name. Defaults to None. + """ + self.capability = capability + self.conf = conf + self.root_item = TuningItem(name='root', options=[], item_type='root') + self.quant_mode_wise_items = defaultdict(list) # quant_mode/precision_name: {(op_name, op_type),...} + self.op_type_wise_items = defaultdict(list) # op_type: {(op_name, op_type), ...} + self.framework = framework + self.ops_dtype = defaultdict(OrderedDict) + usr_cfg = conf.usr_cfg if conf else None + self.op_items = {} + # {(op_name, op_type): {(path): data type}} + self.ops_data_type = OrderedDefaultDict() + self.ops_attr = {'activation': set(), 'weight': set()} + # {(op_name, op_type): {path1, path2, ...} + self.ops_path_set = defaultdict(set) + + self._create_tuning_space(capability, usr_cfg) + + def _parse_capability(self, capability: Dict) -> None: + """Parse the capability and construct the tuning space(a tree). + + Args: + capability: merged framework capability. + """ + calib = TuningItem(name='calib_sampling_size', + options=capability['calib']['calib_sampling_size'], + item_type='calib_sampling_size') + self.root_item.append(calib) + def _parse(cap, root, path, op_name_type): + if isinstance(cap, dict): + for key, val in cap.items(): + if isinstance(val, dict): + if len(path) > 1 and path[-2] == 'precision': + self.ops_path_set[op_name_type].add(tuple(path + [key])) + tuning_item = TuningItem(name=key, options=[], item_type=key) + root.append(tuning_item) + _parse(val, tuning_item, path + [key], op_name_type) + elif isinstance(val, list): + new_key = ('activation', key) if 'activation' in path else ('weight', key) + tuning_item = TuningItem(name=new_key, options=val, item_type='method') + self.ops_path_set[op_name_type].add(tuple(path)) + root.append(tuning_item) + else: + return + + for op_name_type, op_cap in capability['op'].items(): + op_name, op_type = op_name_type + op_item = TuningItem(name=op_name_type, options=[], item_type='op') + self.op_type_wise_items[op_type].append(op_item) + self.root_item.append(op_item) + self.op_items[op_name_type] = op_item + _parse(op_cap, op_item, [], op_name_type) + for q_option in op_item.options: + if q_option and q_option.name == 'precision': + acc_item = q_option.get_option_by_name('activation') + if acc_item and acc_item.options: + for dtype_item in acc_item.options: + self.quant_mode_wise_items[dtype_item.name].append(op_item) + else: + self.quant_mode_wise_items[q_option.name].append(op_item) + + def _create_tuning_item(self, tuning_items: Dict, attr_name: str, quant_mode_item: TuningItem): + for tuning_item_name, options in tuning_items.items(): + if tuning_item_name not in ['dtype', 'quant_mode']: + name = (attr_name, tuning_item_name) + tuning_item = TuningItem(name=name, options=options, item_type=name) + quant_mode_item.append(tuning_item) + + def _merge_op_cfg(self, cur_op_cap, op_user_cfg, fw_op_cap): + """Merge the op cfg with user cfg. + + op_user_cfg:{ + 'activation':{ + 'dtype': ['fp32'] + }, + 'weight':{ + 'dtype': ['fp32'] + } + } + + Step1. merge dtype, get the intersection between fw_op_cap and op_user_cfg. + Step2. merge method options. + + # if dtype and type intersection with precision set -> only keep the intersection precision + # and remove the quantization. + # else(no dtype, or no intersection) -> merge the method + + Args: + cur_op_cap: current capability. + op_user_cfg: The user capability. + fw_op_cap: The fwk capability(baseline). + + Returns: + Return the merged capability. + """ + from .utility import extract_data_type, reverted_data_type + fw_op_cap = deepcopy(fw_op_cap) + new_op_cap = deepcopy(cur_op_cap) + for att in ['activation', 'weight']: + if op_user_cfg.get(att, None) is not None: + user_dtype_lst = op_user_cfg[att]['dtype'] if op_user_cfg[att]['dtype'] is not None else [] + # Merge the precision part. + fwk_att_precision_cap = fw_op_cap['precision'].get(att, {}) + fwk_precision_set = set(fwk_att_precision_cap.keys()) + # The intersection of user cfg and fwk capability. + valid_precision_set = set(fwk_precision_set).intersection(set(user_dtype_lst)) + if len(valid_precision_set) != 0: + new_op_cap = dict(filter(lambda item: item[0] == 'precision', new_op_cap.items())) + new_op_cap['precision'][att] = dict(filter(lambda item: item[0] in valid_precision_set,\ + fw_op_cap['precision'][att].items())) + else: + # Filter the valid options for tuning item + for quant_mode in fw_op_cap: + if quant_mode not in new_op_cap: + new_op_cap[quant_mode] = deepcopy(fw_op_cap[quant_mode]) + if quant_mode == 'precision': continue + for data_type in new_op_cap[quant_mode][att]: + for signed_flag in new_op_cap[quant_mode][att][data_type]: + cur_items = new_op_cap[quant_mode][att][data_type][signed_flag] + fwk_items = fw_op_cap[quant_mode][att][data_type][signed_flag] + for method_name, method_options in op_user_cfg[att].items(): + if method_name not in ['dtype', 'quant_mode'] and method_options: + # filter the method options + options_intersection = set(fwk_items[method_name]\ + ).intersection(set(method_options)) + # merge with fwk, if intersection -> use intersection + if len(options_intersection) > 0: + cur_items[method_name] = [option for option in fwk_items[method_name] if\ + option in options_intersection] + return new_op_cap + + def _merge_optype_wise_cfg(self, cap: Dict, optype_wise_usr_cfg: Dict, fw_cap: Dict): + for op_type, op_user_cfg in optype_wise_usr_cfg.items(): + op_lst = [op_name_type for op_name_type in cap['op'] if op_name_type[1] == op_type] + for op_name_type in op_lst: + cap['op'][op_name_type] = self._merge_op_cfg(cap['op'][op_name_type], + op_user_cfg, + fw_cap['op'][op_name_type]) + + def _merge_model_wise_cfg(self, cap: Dict, model_wise_usr_cfg: Dict, fw_cap: Dict): + for op_name_type in cap['op'].keys(): + cap['op'][op_name_type] = self._merge_op_cfg(cap['op'][op_name_type], + model_wise_usr_cfg, + fw_cap['op'][op_name_type]) + + def _merge_op_wise_cfg(self, cap: Dict, op_wise_usr_cfg: Dict, fw_cap: Dict): + op_name_types = {key[0]: key for key in cap['op'].keys()} + for op_name_pattern, op_user_cfg in op_wise_usr_cfg.items(): + op_name_pattern = re.compile(op_name_pattern) + for op_name in op_name_types: + if op_name_pattern.fullmatch(op_name): + op_name_type = op_name_types[op_name] + cap['op'][op_name_type] = self._merge_op_cfg(cap['op'][op_name_type], + op_user_cfg, + fw_cap['op'][op_name_type]) + + def _merge_with_user_cfg(self, capability: Dict, user_cfg: Dict): + """Merge the capability with user config. + + Merge the capability queried from the adaptor with user config in the order of + model-wise, optype-wise, and op-wise if needed. + The optype-wise user config will override the model-wise user config for their + intersection parts, the same as the op-wise and optype-wise. + + Here is an example: + capability:{ + ('op1','type1'): { + 'item1': [item1_option1, item1_option2, item1_option3], + 'item2': [item2_option1, item2_option2, item2_option3], + } + ('op2','type1'): { + 'item1': [item1_option1, item1_option2, item1_option3], + 'item2': [item2_option1, item2_option2, item2_option3], + } + ('op3','type2'): { + 'item1': [item1_option1, item1_option2], + 'item2': [item2_option1, item2_option2], + } + ('op4','type2'): { + 'item1': [item1_option1, item1_option2], + 'item2': [item2_option1, item2_option2], + } + } + + user_config{ + model-wise:{ + 'item1': [item1_option1] + } + optype-wise: { + 'type1': { + 'item1': [item1_option1, item1_option2] + }} + op-wise: { + ('op3','type2'): { + 'item2': [item2_option1] + }} + } + + # step1. merged with model-wise + capability:{ + ('op1','type1'): { + 'item1': [item1_option1], + 'item2': [item2_option1, item2_option2, item2_option3], + } + ('op2','type1'): { + 'item1': [item1_option1], + 'item2': [item2_option1, item2_option2, item2_option3], + } + ('op3','type2'): { + 'item1': [item1_option1], + 'item2': [item2_option1, item2_option2], + } + ('op4','type2'): { + 'item1': [item1_option1], + 'item2': [item2_option1, item2_option2], + } + } + + # step2. merged with optype-wise + capability:{ + ('op1','type1'): { + 'item1': [item1_option1, item1_option2], + 'item2': [item2_option1, item2_option2, item2_option3], + } + ('op2','type1'): { + 'item1': [item1_option1, item1_option2], + 'item2': [item2_option1, item2_option2, item2_option3], + } + ('op3','type2'): { + 'item1': [item1_option1], + 'item2': [item2_option1, item2_option2], + } + ('op4','type2'): { + 'item1': [item1_option1], + 'item2': [item2_option1, item2_option2], + } + } + + # step3. merged with op-wise + capability:{ + ('op1','type1'): { + 'item1': [item1_option1, item1_option2], + 'item2': [item2_option1, item2_option2, item2_option3], + } + ('op2','type1'): { + 'item1': [item1_option1, item1_option2], + 'item2': [item2_option1, item2_option2, item2_option3], + } + ('op3','type2'): { + 'item1': [item1_option1], + 'item2': [item2_option1], + } + ('op4','type2'): { + 'item1': [item1_option1], + 'item2': [item2_option1, item2_option2], + } + } + :param capability: + :param user_cfg: + :return: + """ + fw_capability = deepcopy(capability) + if user_cfg['model_wise'] is not None: + self._merge_model_wise_cfg(capability, user_cfg['model_wise'], fw_capability) + if user_cfg['optype_wise'] is not None: + self._merge_optype_wise_cfg(capability, user_cfg['optype_wise'], fw_capability) + if user_cfg['op_wise'] is not None: + self._merge_op_wise_cfg(capability, user_cfg['op_wise'], fw_capability) + + def _parse_cap_helper(self, cap): + """Convert the cpa to internal format. + + Parsed result: + (op_name, op_type): + { + 'static':{ + 'act':{ + 'int8':{ + 'signed':{ # (op_name, op_type): ('static', (('int8', 'signed'),(...))) + 'dtype': 'int8', + 'scheme': ['sym'], + 'algorithm': ['minmax', 'kl'], + 'granularity': ['per_channel','per_tensor'], + } + } + 'int4':{ + ... + } + }, + 'weight':{ + 'int8':{ + ... + } + 'int4':{ + 'signed':{ + 'dtype': 'int4' + 'scheme': ['asym'], + ... + } + } + } + }, + 'dynamic':{ + ... + } + 'precision':{ + 'act':{ + 'fp32':{} + 'bf16':{} + }, + 'weight':{ + 'fp32':{ + 'dtype': 'fp32, + }, + 'bf16':{ + 'dtype': 'fp32', + }, + } + + } + } + """ + from .utility import OrderedDefaultDict, extract_data_type + cap = deepcopy(cap) + parsed_cap = OrderedDict() # {(op_name, op_type): parsed_op_cap} + for op_name_type, op_cap_lst in cap.items(): + parsed_op_cap = OrderedDefaultDict() # {ptq_type/precision, {}} + parsed_op_cap['precision'] = OrderedDefaultDict() + # WA for some op have extra weight dtype. + has_weight = all(['weight' in op_cap for op_cap in op_cap_lst]) + if has_weight: self.ops_attr['weight'].add(op_name_type) + for op_cap in op_cap_lst: + if 'activation' in op_cap: + self.ops_attr['activation'].add(op_name_type) + attrs_lst = ['activation', 'weight'] if has_weight else ['activation'] + for att in attrs_lst: + # Parse the data info for item that has options. + if 'activation' in op_cap and 'quant_mode' in op_cap['activation']: + quant_mode = op_cap['activation']['quant_mode'] + att_dtype = op_cap[att]['dtype'][0] + signed_flag, _data_type = extract_data_type(att_dtype) + for item_name, item_options in op_cap[att].items(): + if item_name == 'dtype': + # The dtype should be a string, need to align with fwk.yaml. + self.ops_data_type[op_name_type][(quant_mode, att, _data_type, signed_flag)] = \ + item_options[0] if isinstance(item_options, list) else item_options + if item_name not in ['dtype', 'quant_mode']: + parsed_op_cap[quant_mode][att][_data_type][signed_flag][item_name] = item_options + else: + # Parse the data info for item with unique value. + att_dtype = op_cap[att]['dtype'] + if isinstance(att_dtype, list): + att_dtype = att_dtype[0] + parsed_op_cap['precision'][att][att_dtype] = {'dtype': att_dtype} + self.ops_data_type[op_name_type][('precision', att, att_dtype)] = att_dtype + + parsed_cap[op_name_type] = parsed_op_cap + return parsed_cap + + def _create_tuning_space(self, capability, usr_cfg): + """Create tuning space. + + steo1. convert the capability into internal format. + step2. merge the capability with usr_cfg + step3. create the tuning space + :param capability: + :param usr_cfg: + :return: + """ + capability['op'] = self._parse_cap_helper(deepcopy(capability['op'])) + if usr_cfg: + self._merge_with_user_cfg(capability, usr_cfg['quantization']) + logger.debug(f"*********** After Merged with user cfg ***********") + logger.debug(capability) + self._parse_capability(capability) + + def query_item_option(self, op_name_type, path, method_name, method_val): + """Query the method value, such as scheme, algorithm. + + Args: + op_name_type: (op_name, op_type) + path: full path + method_name: method name + method_val: method value + + Returns: + Return the query result if exist. + """ + mode_item = self.get_item_by_path((op_name_type, *path)) + if not mode_item: return None + method_item = mode_item.get_option_by_name(method_name) + return method_item is not None and method_val in method_item.options + + def get_default_config(self, op_name_type, quant_mode): + """Get the default tuning config. + + Args: + op_name_type: (op_name, op_type) + quant_mode: quantization mode. + + Returns: + op_tuning_config: the default config according to the specified quantization mode. + """ + from .tuning_structs import OpTuningConfig + # For quant_mode static/dynamic/((static, int8), (dynamic, int4)) + # set the first option as the default if the not support the required quant mode + full_path = self.get_op_default_path_by_pattern(op_name_type, quant_mode) + config_args = {} + has_weight = op_name_type in self.ops_attr['weight'] + config_args['activation_dtype'] = self.ops_data_type[op_name_type].get(full_path['activation']) + if has_weight: + config_args['weight_dtype'] = self.ops_data_type[op_name_type].get(full_path['weight']) + for att in full_path: + mode_item = self.query_quant_mode_item_by_full_path(op_name_type ,full_path[att]) + if mode_item: + method_args = {method_item.name: method_item.options[0] for method_item in mode_item.options \ + if method_item.name in TUNING_ITEMS_LST} + config_args.update(method_args) + + quant_mode = quant_mode if isinstance(quant_mode, str) else quant_mode[0] + # set the first option as the default for each tuning item + op_tuning_config = OpTuningConfig(op_name_type[0], + op_name_type[1], + quant_mode, + self, + kwargs=config_args) + return op_tuning_config + + def get_item_by_path(self, path, default=None): + """Get the item according to the path.""" + item = self.root_item + for val in path: + if item is None: + logger.warning(f"Did not found the item according to the path {path}") + return default + item = item.get_option_by_name(val) + if item is None: + logger.warning(f"Did not found the item according to the path {path}") + return item + + def get_default_full_path(self, op_name_type, path): + """Complete the path. + + Args: + op_name_type: (op_name, op_path) + path: incomplete path. + + Returns: + new_path: the complete path. + """ + # For precision + if path[0] == 'precision': + # If the path is ('precision', 'activation', dtype), return it directly. + if len(path) == 3: return path + assert len(path) == 2, f"Got the path: {path}, please provide the path include activation or weight." + att_item = self.get_item_by_path((op_name_type, *path)) + if not att_item or len(att_item.options) == 0: + logger.debug(f"Could not found item for {op_name_type} with path {path}") + return None + dtype = att_item.options[0].name + return (*path, dtype) + else: + # For quantization + assert len(path) >= 2, f"Got the path: {path}, please provide the path include activation or weight." + if path[-1] == None: path = path[:-1] + item = self.get_item_by_path((op_name_type, *path)) + new_path = path + # For path ('static', 'activation', ...) + while item: + item_options = item.options + if len(item_options) > 0 and isinstance(item_options[0], TuningItem) and \ + item_options[0].item_type != 'method': + new_path = new_path + (item_options[0].name,) + item = item_options[0] + else: + break + return new_path + + def query_quant_mode_item_by_full_path(self, op_name_type, path) -> Tuple[TuningItem, Tuple]: + """Query the mode item by full path.""" + new_path = (op_name_type, *path) + item = self.get_item_by_path(new_path) + return item + + def query_items_by_quant_mode(self, quant_mode): + """Collect all op items that support the specified mode. + + Args: + quant_mode: dynamic/static/bf16/fp32/fp16 + + Returns: + The op item set that support quant model. + """ + return self.quant_mode_wise_items.get(quant_mode, []) + + def get_op_default_path_by_pattern(self, op_name_type, pattern): + """Get the default path by quant mode. + + Args: + op_name_type: (op_name, op_type) + pattern: 'static', 'dynamic', ('static', 'int8'), ('precision', 'fp32') + + Returns: + result(Dict): The default full path of activation and weight if have. + """ + internal_pattern = pattern_to_internal(pattern) + full_path = {'activation': None, 'weight': None} + full_path['activation'], full_path['weight'] = pattern_to_path(internal_pattern) + result = {} + has_weight = op_name_type in self.ops_attr['weight'] + att_lst = ['activation', 'weight'] if has_weight else ['activation'] + for att in att_lst: + result[att] = self.get_default_full_path(op_name_type, full_path[att]) + return result + +def get_op_mode_by_query_order(tuning_space: TuningSpace, query_order): + """Get the op mode according to the query order.""" + quant_mode_wise_items = OrderedDict() # mode, op_item_lst + pre_items = set() + # Collect op items supported the specified mode. + for quant_mode in query_order: + items = tuning_space.query_items_by_quant_mode(quant_mode) + filtered_items = list(filter(lambda item: item not in pre_items, items)) + pre_items = pre_items.union(set(items)) + quant_mode_wise_items[quant_mode] = filtered_items + + def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): + for item in items_lst: + op_item_dtype_dict[item.name] = target_quant_mode + op_item_dtype_dict = OrderedDict() + for quant_mode, quant_mode_items in quant_mode_wise_items.items(): + initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) + + return op_item_dtype_dict + +def pattern_to_internal(pattern, default_dtype='int8'): + """Convert pattern to internal format. + + 'static' -> ('static', (('int8'),('int8'))) + 'dynamic' -> ('dynamic', (('int8'),('int8'))) + 'fp32' -> ('precision', (('fp32'), ('fp32'))) + 'bf16' -> ('precision', (('bf16'), ('bf16'))) + ('static', 'int8') -> ('static', (('int8'),('int8'))) + ('dynamic', 'int8') -> ('dynamic', (('int8'),('int8'))) + ('precision', 'fp32') -> ('precision', (('fp32'), ('fp32')))) # (('fp32'), ('fp32')) or ('fp32', 'fp32') + #TODO to add the support for mixed data type of weight and activation + """ + from .constant import PRECISION_SET_V2_0 + pattern_bk = pattern + if isinstance(pattern, str): + pattern = ('precision', pattern) if pattern in PRECISION_SET_V2_0 else (pattern, (None)) + internal_pattern = (pattern[0], ((pattern[1],), (pattern[1],))) + return internal_pattern + +def pattern_to_path(pattern): + """Convert pattern to path.""" + act_path = (pattern[0], 'activation', *pattern[1][0]) + weight_path = (pattern[0], 'weight', *pattern[1][1]) + return act_path, weight_path + +def quant_mode_from_pattern(internal_pattern): + """Get quant mode from internal pattern.""" + if internal_pattern[0] == 'precision': + return internal_pattern[1][0] + else: + return internal_pattern[0] + +def initial_tuning_cfg_with_quant_mode(op_name_type, quant_mode, tuning_space: TuningSpace) -> OpTuningConfig: + """Initialize the tuning cfg. + + Args: + op_name_type: (op name, op type) + quant_mode: dynamic/static/fp32/bf16/fp16 + tuning_space: tuning space. + + step1, convert the quant_mode into internal format. + step2, complete the path based. + step3, get the mode item. + step4, use the first option as value for method. + step5, create the op tuning config. + + Returns: + The initial tuning config. + """ + internal_pattern = pattern_to_internal(quant_mode) + full_path = {'activation': None, 'weight': None} + full_path['activation'], full_path['weight'] = pattern_to_path(internal_pattern) + has_weight = op_name_type in tuning_space.ops_attr['weight'] + + config_args = {} + att_lst = ['activation', 'weight'] if has_weight else ['activation'] + for att in att_lst: + att_full_path = tuning_space.get_default_full_path(op_name_type, full_path[att]) + config_args[att + '_dtype'] = tuning_space.ops_data_type[op_name_type].get(att_full_path, None) + mode_item = tuning_space.get_item_by_path((op_name_type, *att_full_path)) + if mode_item: + method_args = {method_item.name: method_item.options[0] for method_item in mode_item.options \ + if method_item.name in TUNING_ITEMS_LST} + config_args.update(method_args) + quant_mode = internal_pattern[0] + # set the first option as the default for each tuning item + op_tuning_config = OpTuningConfig(op_name_type[0], + op_name_type[1], + quant_mode, + tuning_space, + kwargs=config_args) + return op_tuning_config \ No newline at end of file diff --git a/neural_compressor/experimental/strategy/utils/tuning_structs.py b/neural_compressor/experimental/strategy/utils/tuning_structs.py new file mode 100644 index 00000000000..b13f27cf0cd --- /dev/null +++ b/neural_compressor/experimental/strategy/utils/tuning_structs.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tuning structure.""" + +from typing import Dict +from .constant import QUANT_MODE_SET, TUNING_ITEMS_LST, PRECISION_SET +from ....utils import logger + +class OpTuningConfig: + """Op tuning config.""" + + def __init__(self, op_name, op_type, op_quant_mode, tuning_space, kwargs={}): + """Create the tuning config. + + Args: + op_name: op name. + op_type: op type. + op_quant_mode: quantization mode. + tuning_space: tuning space. + kwargs: other parameters. Defaults to {}. + """ + self.op_name = op_name + self.op_type = op_type + self.op_name_type = (self.op_name, self.op_type) + self.op_quant_mode = op_quant_mode # static/dynamic/fp32/bf16/fp16 + self.kwargs = kwargs + self.act_dtype = None + self.weight_dtype = None + self.has_weight = self.op_name_type in tuning_space.ops_attr['weight'] + self._set_dtype() + + def _set_dtype(self): + """Set the date type.""" + if self.op_quant_mode in PRECISION_SET: + self.act_dtype, self.weight_dtype = self.op_quant_mode, self.op_quant_mode + else: + self.act_dtype = self.kwargs.get('activation_dtype', None) + self.weight_dtype = self.kwargs.get('weight_dtype', None) + assert self.act_dtype and isinstance(self.act_dtype, str),\ + (f"Didn't assign the activation data type for {self.op_name, self.op_type}", \ + f"with quant_mode {self.op_quant_mode}") + # if self.has_weight: + # assert self.weight_dtype, \ + # (f"Didn't assign the weight data type for {self.op_name, self.op_type}", \ + # f"with quant_mode {self.op_quant_mode}") + + + def __str__(self) -> str: + """Display the tuning config as string. + + Returns: + msg: the tuning config as string. + """ + msg = f"op name: {self.op_name}, op type : {self.op_type} \n" + msg += f"\t activation dtype: {self.act_dtype} \n" + msg += f"\t weight dtype: {self.weight_dtype} \n" if self.has_weight else "" + for key, val in self.kwargs.items(): + if key in TUNING_ITEMS_LST: + msg += f"\t {key[0]} {key[1]}: {val}\n" + return msg + + def get_state(self): + """Return the op tuning configuration. + + Returns: + Dict: The op tuning state. + """ + result = {} + if self.has_weight: + result['weight'] = { + 'dtype': self.weight_dtype, + } + result['activation'] = { + 'dtype': self.act_dtype, + 'quant_mode': self.op_quant_mode, + } + for key, val in self.kwargs.items(): + if key in TUNING_ITEMS_LST: + result[key[0]][key[1]] = val + return result + + @classmethod + def from_state(cls, config: Dict): + """Create the tuning config from dict. + + Args: + config: A dict includes the tuning config. + """ + cls(**config) diff --git a/neural_compressor/experimental/strategy/utils/utility.py b/neural_compressor/experimental/strategy/utils/utility.py new file mode 100644 index 00000000000..22b95176e59 --- /dev/null +++ b/neural_compressor/experimental/strategy/utils/utility.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tuning utility.""" + + +from collections import OrderedDict + +class OrderedDefaultDict(OrderedDict): + """Ordered default dict.""" + + def __missing__(self, key): + """Initialize value for the missing key.""" + self[key] = value = OrderedDefaultDict() + return value + +def extract_data_type(data_type: str) -> str: + """Extract data type and signed from data type. + + Args: + data_type: The original data type such as uint8, int8. + + Returns: + (signed or unsigned, data type without signed) + """ + return ('signed', data_type) if data_type[0] != 'u' else ('unsigned', data_type[1:]) + +def reverted_data_type(signed_flag: str, data_type: str) -> str: + """Revert the data type.""" + return data_type if signed_flag == 'signed' else 'u' + data_type + +def get_adaptor_name(adaptor): + """Get adaptor name. + + Args: + adaptor: adaptor instance. + """ + adaptor_name = type(adaptor).__name__.lower() + adaptor_name_lst = ['onnx', 'tensorflow', 'pytorch'] + for name in adaptor_name_lst: + if adaptor_name.startswith(name): + return name + return "" \ No newline at end of file diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index 545b7de90cb..8061442b77d 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -116,8 +116,9 @@ def pre_proccess(self): self.conf, self._calib_dataloader, self._train_func, - self._eval_dataloader, self._eval_func, + self._eval_dataloader, + self._eval_metric, _resume, self.callbacks.hooks if self.callbacks is not None else None) @@ -289,6 +290,7 @@ def metric(self, user_metric): Multi-metrics: {topk: 1, MSE: {compare_label: False}, + } For the built-in metrics, please refer to below link: https://github.com/intel/neural-compressor/blob/master/docs/source/metric.md#supported-built-in-metric-matrix. diff --git a/neural_compressor/strategy/auto.py b/neural_compressor/strategy/auto.py index 3e8a1ef3072..26048f9aa30 100644 --- a/neural_compressor/strategy/auto.py +++ b/neural_compressor/strategy/auto.py @@ -34,8 +34,16 @@ class AutoTuneStrategy(TuneStrategy): and the tuning process ends once the condition meets the exit policy. """ - def __init__(self, model, conf, q_dataloader=None, q_func=None, \ - eval_dataloader=None, eval_func=None, resume=None, q_hooks=None): + def __init__(self, + model, + conf, + q_dataloader=None, + q_func=None, + eval_func=None, + eval_dataloader=None, + eval_metric=None, + resume=None, + q_hooks=None): """Init an auto tuning strategy. Args: diff --git a/neural_compressor/strategy/auto_mixed_precision.py b/neural_compressor/strategy/auto_mixed_precision.py index 04f0bc39307..44471670626 100644 --- a/neural_compressor/strategy/auto_mixed_precision.py +++ b/neural_compressor/strategy/auto_mixed_precision.py @@ -117,7 +117,7 @@ def traverse(self): tune_cfg = self._tune_cfg_converter(op_tuning_cfg) self.trials_count += 1 tuning_history = self._find_tuning_history(tune_cfg) - if tuning_history and self.trials_count < self.cfg.tuning.exit_policy.max_trials: + if tuning_history and self.trials_count < self.conf.quantization.tuning_criterion.max_trials: self.last_tune_result = tuning_history['last_tune_result'] self.best_tune_result = tuning_history['best_tune_result'] logger.warn("Find evaluated tuning config, skip.") diff --git a/neural_compressor/strategy/bayesian.py b/neural_compressor/strategy/bayesian.py index d267e8cdbc4..12ce0a23429 100644 --- a/neural_compressor/strategy/bayesian.py +++ b/neural_compressor/strategy/bayesian.py @@ -101,7 +101,7 @@ def next_tune_cfg(self): return if self.bayes_opt is None: self.bayes_opt = BayesianOptimization( - pbounds=pbounds, random_seed=self.cfg.tuning.random_seed) + pbounds=pbounds, random_seed=self.conf.options.random_seed) while True: params = self.bayes_opt.gen_next_params() logger.debug("Dump current bayesian params:") diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index b9e3187cb41..57cf6ec106f 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -18,7 +18,6 @@ """The base class for tuning strategy.""" from abc import abstractmethod -from enum import EnumMeta import os import math import copy @@ -31,6 +30,8 @@ from typing import OrderedDict as T_OrderedDict from neural_compressor.adaptor.tensorflow import TensorFlowAdaptor +from neural_compressor.config import PostTrainingQuantConfig +from ..config import MixedPrecisionConfig from ..objective import MultiObjective from ..adaptor import FRAMEWORKS from ..utils.utility import Statistics, dump_data_to_local @@ -78,9 +79,25 @@ def strategy_registry(cls): @strategy_registry class TuneStrategy(object): """Basic class for tuning strategy.""" - - def __init__(self, model, conf, q_dataloader=None, q_func=None, eval_dataloader=None, - eval_func=None, resume=None, q_hooks=None): + + def _check_tuning_status(self): + if self.eval_func: + self._not_tuning = False + return + elif self.eval_dataloader and self.eval_metric: + self._not_tuning = False + return + + def __init__(self, + model, + conf: PostTrainingQuantConfig, + q_dataloader=None, + q_func=None, + eval_func=None, + eval_dataloader=None, + eval_metric=None, + resume=None, + q_hooks=None): """Init the TuneStrategy. Args: @@ -99,15 +116,18 @@ def __init__(self, model, conf, q_dataloader=None, q_func=None, eval_dataloader= best_qmodel: The best quantized model that generated during the tuning process. """ self.model = model - self.cfg = conf.usr_cfg - self.cfg_bk = copy.deepcopy(self.cfg) - self.history_path = self._create_path(self.cfg.tuning.workspace.path, './history.snapshot') - self.deploy_path = self._create_path(self.cfg.tuning.workspace.path, 'deploy.yaml') - self.eval_dataloader = eval_dataloader + self.conf = conf + self.history_path = self._create_path(self.conf.options.workspace, './history.snapshot') + self.deploy_path = self._create_path(self.conf.options.workspace, 'deploy.yaml') self.calib_dataloader = q_dataloader + self.eval_dataloader = eval_dataloader + self.eval_metric = eval_metric + self.eval_func = eval_func + # not tuning equals to performance only + self._not_tuning = True + self._check_tuning_status() self.q_func = q_func self.q_hooks = q_hooks - self.eval_func = eval_func GLOBAL_STATE.STATE = MODE.QUANTIZATION framework, framework_specific_info = self._set_framework_info(q_dataloader, q_func) self.adaptor = FRAMEWORKS[framework](framework_specific_info) @@ -119,29 +139,7 @@ def __init__(self, model, conf, q_dataloader=None, q_func=None, eval_dataloader= self.tune_result_record = [] self.tuning_history = [] self.tuning_result_data = [] - # The tuning history ever made, structured like below: - # [ - # { - # 'version': __version__, - # 'cfg': cfg1, - # 'framework': tensorflow - # 'baseline': baseline1, - # 'last_tune_result': last_tune_result1, - # 'best_tune_result': best_tune_result1, - # 'history': [ - # # tuning history under same yaml config - # {'tune_cfg': tune_cfg1, 'tune_result': \ - # tune_result1, 'q_config': q_config1, ...}, - - # ..., - # ], - # # new fields added by subclass for resuming - # ..., - # }, - # # tuning history under different yaml configs - # ..., - # ] - + self.baseline = None self.last_tune_result = None self.last_qmodel = None @@ -158,7 +156,7 @@ def __init__(self, model, conf, q_dataloader=None, q_func=None, eval_dataloader= self.set_tuning_space(conf) #For algo scheduler - self.algo_scheduler = AlgorithmScheduler(self.cfg.quantization.recipes) + self.algo_scheduler = AlgorithmScheduler(self.conf.quantization.recipes) self.algo_scheduler.dataloader = self.calib_dataloader # reuse the calibration iteration self.algo_scheduler.origin_model = self.model self.algo_scheduler.adaptor = self.adaptor @@ -179,6 +177,8 @@ def __init__(self, model, conf, q_dataloader=None, q_func=None, eval_dataloader= self._not_tuning_recipes_values = {} self._initialize_recipe() self.applied_all_recipes_flag = False + + if resume is not None: self.setup_resume(resume) @@ -215,7 +215,7 @@ def _initialize_recipe(self): # not tuning list: the value is not equal to the default value logger.info(f"Adaptor has {len(adaptor_recipes)} recipes.") logger.debug(adaptor_recipes) - usr_recipes_cfg = self.cfg_bk.quantization.recipes if self.cfg_bk.quantization.recipes else {} + usr_recipes_cfg = self.conf.quantization.recipes if self.conf.quantization.recipes else {} for recipe_name, recipe_val in usr_recipes_cfg.items(): # for not tuning recipes, use the value specified by user. if recipe_name in adaptor_recipes and recipe_val != adaptor_recipes[recipe_name][0]: @@ -350,9 +350,9 @@ def master_worker_handle(self, comm): break # send the next cfg if not exceed max trials - if self.overall_trials > self.cfg.tuning.exit_policy.max_trials: + if self.overall_trials > self.conf.quantization.tuning_criterion.max_trials: self.max_trial_flag = True - # elif time.time() - self.overall_time_start > self.cfg.tuning.exit_policy.timeout: + # elif time.time() - self.overall_time_start > self.conf.quantization.tuning_criterion.timeout: # self.max_time_flag = True elif cur_cfg_id < len(self.tune_cfg_lst): logger.info("[Rank {}]master sends new tuning cfg {} to rank: {}".format(comm.Get_rank(), \ @@ -590,8 +590,8 @@ def traverse(self): The main traverse logic which could be override by some concrete strategy which needs more hooks. """ self._eval_baseline() - if self.cfg.tuning.use_distributed_tuning: - logger.info("use distributed traverse: {}".format(self.cfg.tuning.use_distributed_tuning)) + if self.conf.quantization.use_distributed_tuning: + logger.info("use distributed traverse: {}".format(self.conf.quantization.use_distributed_tuning)) return self.distributed_traverse() traverse_start_time = time() for op_tuning_cfg in self.next_tune_cfg(): @@ -599,7 +599,7 @@ def traverse(self): tune_cfg = self._tune_cfg_converter(op_tuning_cfg) self.trials_count += 1 tuning_history = self._find_tuning_history(tune_cfg) - if tuning_history and self.trials_count < self.cfg.tuning.exit_policy.max_trials: + if tuning_history and self.trials_count < self.conf.quantization.tuning_criterion.max_trials: self.last_tune_result = tuning_history['last_tune_result'] self.best_tune_result = tuning_history['best_tune_result'] logger.warn("Find evaluated tuning config, skip.") @@ -623,13 +623,13 @@ def traverse(self): self.algo_scheduler.reset_exec_algorithms() assert self.last_qmodel # Return the last quantized model as a result. if performance only. - if self.cfg.tuning.exit_policy.performance_only: + if self._not_tuning: self.best_qmodel = self.last_qmodel self._add_tuning_history(copy.deepcopy(tune_cfg), (-1, [0]), q_config=self.last_qmodel.q_config) return self.last_tune_result = self._evaluate(self.last_qmodel) self.cur_best_acc, self.cur_best_tuning_cfg = self.update_best_op_tuning_cfg(op_tuning_cfg) - need_stop = self.stop(self.cfg.tuning.exit_policy.timeout, self.trials_count) + need_stop = self.stop(self.conf.quantization.tuning_criterion.timeout, self.trials_count) # record the tuning history saved_tune_cfg = copy.deepcopy(tune_cfg) @@ -657,7 +657,7 @@ def traverse(self): continue # recover the best quantized model from tuning config self._recover_best_qmodel_from_tuning_cfg() - if self.cfg.tuning.diagnosis and self.cfg.tuning.diagnosis.diagnosis_after_tuning: + if self.conf.options.diagnosis: logger.debug(f'*** Start to do diagnosis (inspect tensor).') self._diagnosis() if self.use_multi_objective and len(self.tune_result_record) > 1 and \ @@ -667,7 +667,7 @@ def traverse(self): if best_result != self.best_tune_result: from neural_compressor.utils.utility import recover self.best_qmodel = recover(self.model.model, - os.path.join(self.cfg.tuning.workspace.path, 'history.snapshot'), + os.path.join(self.conf.options.workspace, 'history.snapshot'), best_trail) logger.debug(f"*** Update the best qmodel by recovering from history.") self.best_tune_result = best_result @@ -683,33 +683,20 @@ def _remove_redundant_qmodel(self): """ self.last_qmodel = None self.best_qmodel = None - - def _can_create_eval_func_from_cfg(self): - """Determine whether an eval function can be created from cfg. - - Returns: - Returns True if the eval func can be created from config, False otherwise. - """ - if self.cfg.evaluation and self.cfg.evaluation.accuracy and \ - (self.cfg.evaluation.accuracy.metric or self.cfg.evaluation.accuracy.multi_metrics)\ - and self.eval_dataloader: - return True - return False def _eval_baseline(self): """Evaluate the fp32 model if needed.""" - if not self._can_create_eval_func_from_cfg() and not self.eval_func: + if self._not_tuning: logger.info("Neither evaluation function nor metric is defined." \ " Generate a quantized model with default quantization configuration.") - self.cfg.tuning.exit_policy.performance_only = True - logger.info("Force setting 'tuning.exit_policy.performance_only = True'.") + self._not_tuning = True - if not self.cfg.tuning.exit_policy.performance_only: + if not self._not_tuning: # get fp32 model baseline if self.baseline is None: logger.info("Get FP32 model baseline.") self._fp32_model = self.model - self.baseline = self._evaluate(self.model) + self.baseline = self._evaluate(self.model) self.objectives.baseline = self.baseline # record the FP32 baseline self._add_tuning_history() @@ -828,14 +815,13 @@ def initial_tuning_cfg(self): """ from .utils.constant import auto_query_order, static_query_order, dynamic_query_order from .utils.tuning_space import initial_tuning_cfg_with_quant_mode - if self.cfg.quantization.approach == 'post_training_auto_quant': + if self.conf.quantization.approach == 'post_training_auto_quant': query_order = auto_query_order - elif self.cfg.quantization.approach == 'post_training_dynamic_quant': + elif self.conf.quantization.approach == 'post_training_dynamic_quant': query_order = dynamic_query_order - elif self.cfg.quantization.approach == 'post_training_static_quant': + elif self.conf.quantization.approach == 'post_training_static_quant': query_order = static_query_order - elif self.cfg.quantization.approach == 'quant_aware_training': - logger.info("!!! Currently, the qat tuning is not supported by strategy.") + elif self.conf.quantization.approach == 'quant_aware_training': query_order = auto_query_order quant_mode_wise_items = OrderedDict() # mode, op_item_lst @@ -929,15 +915,14 @@ def _tune_cfg_converter(self, op_tuning_cfg): self.calib_dataloader.batch_size) else: tune_cfg['calib_iteration'] = 1 - tune_cfg['advance'] = self.cfg.quantization.advance - tune_cfg['approach'] = self.cfg.quantization.approach + tune_cfg['approach'] = self.conf.quantization.approach # Add the recipe config tune_cfg['recipe_cfgs'] = tune_cfg.get('recipe_cfgs', {}) # For not tuning recipe, tune cfg use it directly tune_cfg['recipe_cfgs'].update(self._not_tuning_recipes_values) # WA for get the smooth quant args - if 'smooth_quant_args' in self.cfg_bk.quantization.recipes: - tune_cfg['recipe_cfgs']['smooth_quant_args'] = self.cfg_bk.quantization.recipes['smooth_quant_args'] + if 'smooth_quant_args' in self.conf.quantization.recipes: + tune_cfg['recipe_cfgs']['smooth_quant_args'] = self.conf.quantization.recipes['smooth_quant_args'] # For tuning recipe, use the default value if it not specified by recipe tuning sampler. for recipe_name, recipe_val in self._tuning_recipes_default_values.items(): if recipe_name not in tune_cfg['recipe_cfgs']: @@ -952,7 +937,7 @@ def set_tuning_space(self, conf): Args: conf: The Conf class instance includes all user configurations. """ - calib_sampling_size_lst = self.cfg.quantization.calibration.sampling_size + calib_sampling_size_lst = self.conf.quantization.calibration_sampling_size calib_sampling_size_lst = [int(calib_sampling_size) for calib_sampling_size in calib_sampling_size_lst] if self.calib_dataloader: self.calib_iter = [math.ceil(int(x) / self.calib_dataloader.batch_size) \ @@ -974,7 +959,7 @@ def setup_resume(self, resume): """ self.__dict__.update(resume) for history in self.tuning_history: - if self._same_yaml(history['cfg'], self.cfg): + if self._same_yaml(history['cfg'], self.conf): self.__dict__.update({k: v for k, v in history.items() \ if k not in ['version', 'history']}) logger.info("Start to resume tuning process.") @@ -993,14 +978,8 @@ def setup_resume(self, resume): def set_q_func(self): """Set the training function for quantization aware training.""" - if self.q_func == None and self.cfg.quantization.approach == 'quant_aware_training': - train_cfg = self.cfg.quantization.train - assert train_cfg, "train field of quantization section in yaml file must " \ - "be configured for quantization aware training if q_func is NOT set." - assert self.calib_dataloader, "dataloader field of train field of quantization " \ - "section in yaml file must be configured." - self.q_func = create_train_func(self.framework, self.calib_dataloader, \ - self.adaptor, train_cfg, hooks=self.q_hooks) + if self.conf.quantization.approach == 'quant_aware_training': + assert self.q_func != None, "Please set train func for quantization aware training" def _create_path(self, custom_path, filename): new_path = os.path.join(os.path.abspath(os.path.expanduser(custom_path)),filename) @@ -1009,95 +988,113 @@ def _create_path(self, custom_path, filename): return new_path def _set_framework_info(self, q_dataloader, q_func=None): - framework_specific_info = {'device': self.cfg.device, - 'approach': self.cfg.quantization.approach, - 'random_seed': self.cfg.tuning.random_seed, - 'performance_only': self.cfg.tuning.exit_policy.performance_only,} - framework = self.cfg.model.framework.lower() - framework_specific_info.update({'backend': self.cfg.model.get('backend', 'default')}) - framework_specific_info.update({'format': self.cfg.model.get('quant_format', 'default')}) - framework_specific_info.update({'domain': self.cfg.model.get('domain', 'auto')}) - - self.mixed_precision_mode = bool('mixed_precision' in self.cfg) or \ - bool('graph_optimization' in self.cfg) + framework_specific_info = {'device': self.conf.quantization.device, + 'approach': self.conf.quantization.approach, + 'random_seed': self.conf.options.random_seed, + 'performance_only': self._not_tuning} + framework = self.conf.quantization.framework.lower() + framework_specific_info.update({'backend': self.conf.quantization.backend}) + framework_specific_info.update({'format': self.conf.quantization.quant_format}) + framework_specific_info.update({'domain': self.conf.quantization.quant_format}) + + self.mixed_precision_mode = isinstance(self.conf.quantization, MixedPrecisionConfig) if 'tensorflow' in framework: framework_specific_info.update( - {"inputs": self.cfg.model.inputs, - "outputs": self.cfg.model.outputs, - 'workspace_path': self.cfg.tuning.workspace.path, - 'recipes': self.cfg.quantization.recipes, - 'use_bf16': self.cfg.use_bf16 if self.cfg.use_bf16 is not None else False}) + {"inputs": self.conf.quantization.inputs, + "outputs": self.conf.quantization.outputs, + 'workspace_path': self.conf.options.workspace, + 'recipes': self.conf.quantization.recipes, + 'use_bf16': self.conf.quantization.use_bf16 if self.conf.quantization.use_bf16 is not None else False}) for item in ['scale_propagation_max_pooling', 'scale_propagation_concat']: if item not in framework_specific_info['recipes']: framework_specific_info['recipes'].update({item: True}) - if self.cfg.model.backend == 'itex': - self.cfg.model.framework = 'tensorflow_itex' + if self.conf.quantization.backend == 'itex': + #TODO replace it with when config ready framework = 'tensorflow_itex' if 'keras' in framework: framework_specific_info.update({ - 'workspace_path': self.cfg.tuning.workspace.path, }) + 'workspace_path': self.conf.options.workspace, }) if framework == 'mxnet': framework_specific_info.update({"q_dataloader": q_dataloader}) if 'onnx' in framework.lower(): if self.mixed_precision_mode: framework_specific_info.update({"approach": "post_training_dynamic_quant"}) framework_specific_info.update({"deploy_path": os.path.dirname(self.deploy_path)}) - framework_specific_info.update({'workspace_path': self.cfg.tuning.workspace.path}) - framework_specific_info.update({'recipes': self.cfg.quantization.recipes}) - framework_specific_info.update({'reduce_range': self.cfg.reduce_range}) - framework_specific_info.update({'recipes': self.cfg.quantization.get('recipes', {})}) + framework_specific_info.update({'workspace_path': self.conf.options.workspace}) + framework_specific_info.update({'recipes': self.conf.quantization.recipes}) + framework_specific_info.update({'reduce_range': self.conf.quantization.reduce_range}) + framework_specific_info.update({'recipes': self.conf.quantization.recipes}) if framework.lower() == 'onnxrt_qdq' or \ framework_specific_info['backend'] == 'onnxrt_trt_ep': framework_specific_info.update({'format': 'QDQ'}) framework = 'onnxrt_qdq' if framework == 'pytorch_ipex' or framework == 'pytorch' or framework == 'pytorch_fx': - if self.cfg.model.backend == 'ipex': - self.cfg.model.framework = 'pytorch_ipex' + if self.conf.quantization.backend == 'ipex': framework = 'pytorch_ipex' - elif self.cfg.model.backend == 'default': - self.cfg.model.framework = 'pytorch_fx' + elif self.conf.quantization.backend == 'default': framework = 'pytorch_fx' if self.mixed_precision_mode: framework_specific_info.update({"approach": "post_training_dynamic_quant"}) framework_specific_info.update({"q_dataloader": q_dataloader}) - framework_specific_info.update({"use_bf16": self.cfg.use_bf16 \ - if self.cfg.use_bf16 is not None else True}) + framework_specific_info.update({"use_bf16": self.conf.quantization.use_bf16 \ + if self.conf.quantization.use_bf16 is not None else True}) framework_specific_info.update( {"workspace_path": os.path.dirname(self.deploy_path)}) - if self.cfg['quantization']['op_wise'] is not None \ - and 'default_qconfig' in self.cfg['quantization']['op_wise']: + if self.conf.quantization.op_name_dict is not None \ + and 'default_qconfig' in self.conf.quantization.op_name_dict: framework_specific_info.update( - {"default_qconfig": self.cfg['quantization']['op_wise']['default_qconfig']}) + {"default_qconfig": self.conf.quantization.op_name_dict['default_qconfig']}) framework_specific_info.update({"q_func": q_func}) - framework_specific_info.update({"example_inputs": self.cfg.quantization.example_inputs}) + framework_specific_info.update({"example_inputs": self.conf.quantization.example_inputs}) return framework, framework_specific_info def _set_objectives(self): - self.higher_is_better = bool(self.cfg.tuning.accuracy_criterion.higher_is_better) - self.use_multi_objective = deep_get(self.cfg, 'tuning.multi_objectives') and \ - len(self.cfg.tuning.multi_objectives.objective) > 1 - objectives = [i.lower() for i in self.cfg.tuning.multi_objectives.objective] if \ - self.use_multi_objective else [self.cfg.tuning.objective.lower()] - self.metric_weight = deep_get(self.cfg, 'evaluation.accuracy.multi_metrics.weight') - self.metric_name = ['Accuracy'] if \ - not deep_get(self.cfg, 'evaluation.accuracy.multi_metrics') else \ - self.cfg.evaluation.accuracy.multi_metrics.keys()-{'weight','higher_is_better'} - if len(self.metric_name) == 1: - self.metric_criterion = [self.higher_is_better] - elif not deep_get(self.cfg, 'evaluation.accuracy.multi_metrics.higher_is_better'): - # default is True - self.metric_criterion = [True] * len(self.metric_name) + # set objectives + self.higher_is_better = bool(self.conf.quantization.accuracy_criterion.higher_is_better) + obj_higher_is_better = None + obj_weight = None + if self.conf.quantization.tuning_criterion.multi_objectives: + obj_higher_is_better = self.conf.quantization.tuning_criterion.multi_objectives.get('higher_is_better', None) + obj_weight = self.conf.quantization.tuning_criterion.multi_objectives.get('weight', None) + obj_lst = self.conf.quantization.tuning_criterion.multi_objectives.get('objective', []) + self.use_multi_objective = len(obj_lst) > 0 + if self.use_multi_objective: + objectives = [i.lower() for i in obj_lst] else: - self.metric_criterion = \ - deep_get(self.cfg, 'evaluation.accuracy.multi_metrics.higher_is_better') - - self.objectives = MultiObjective(objectives, - self.cfg.tuning.accuracy_criterion, - self.metric_criterion, - self.metric_weight, - deep_get(self.cfg, 'tuning.multi_objectives.higher_is_better'), - deep_get(self.cfg, 'tuning.multi_objectives.weight')) + objectives = [self.conf.quantization.tuning_criterion.objective.lower()] + + # set metric + self.metric_name = ['Accuracy'] + self.metric_criterion = [self.higher_is_better] + self.metric_weight = None + use_multi_metrics = False + if self.eval_metric: + # metric name + # 'weight','higher_is_better', 'metric1', 'metric2', ... + if len(self.eval_metric.keys()) >= 4: + self.metric_name = self.eval_metric.keys() - {'weight','higher_is_better'} + use_multi_metrics = True + metric_higher_is_better = self.eval_metric.get('higher_is_better', None) + # metric criterion + if use_multi_metrics: + if metric_higher_is_better is not None: + self.metric_criterion = [metric_higher_is_better] * len(self.metric_name) + else: + self.metric_criterion = [True] * len(self.metric_name) + # metric weight + self.metric_weight = self.eval_metric.get('weight', None) + + accuracy_criterion = {'relative': 0.01, 'higher_is_better': True} + accuracy_criterion_conf = self.conf.quantization.accuracy_criterion + accuracy_criterion[accuracy_criterion_conf.criterion] = accuracy_criterion_conf.tolerable_loss + accuracy_criterion['higher_is_better'] = accuracy_criterion_conf.higher_is_better + self.objectives = MultiObjective(objectives=objectives, + accuracy_criterion=accuracy_criterion, + metric_criterion=self.metric_criterion, + metric_weight=self.metric_weight, + obj_criterion=obj_higher_is_better, + obj_weight=obj_weight) def _same_yaml(self, src_yaml, dst_yaml): """Check if the two yamls are the same. @@ -1146,41 +1143,43 @@ def update_best_op_tuning_cfg(self, op_tuning_cfg): return self.cur_best_acc, self.cur_best_tuning_cfg def deploy_config(self): - """Save the configuration locally for deployment.""" - acc_dataloader_cfg = deep_get(self.cfg, 'evaluation.accuracy.dataloader') - perf_dataloader_cfg = deep_get(self.cfg, 'evaluation.performance.dataloader') - # use acc dataloader if perf dataloader is not configured - if perf_dataloader_cfg is None: - perf_dataloader_cfg = acc_dataloader_cfg - - self.deploy_cfg = OrderedDict() - # int8 dataloader graph transform - if deep_get(perf_dataloader_cfg, 'transform.QuantizedInput') is not None \ - or deep_get(acc_dataloader_cfg, 'transform.QuantizedInput') is not None: - self.best_qmodel, scale = self.adaptor.quantize_input(self.best_qmodel) - deep_set(perf_dataloader_cfg, 'transform.QuantizedInput.dtype', 'int8') - deep_set(perf_dataloader_cfg, 'transform.QuantizedInput.scale', scale) - deep_set(acc_dataloader_cfg, 'transform.QuantizedInput.dtype', 'int8') - deep_set(acc_dataloader_cfg, 'transform.QuantizedInput.scale', scale) - - self.deploy_cfg['model'] = self.cfg.model - self.deploy_cfg['device'] = self.cfg.device - if self.cfg.evaluation is not None: - deep_set(self.cfg, 'evaluation.performance.dataloader',\ - perf_dataloader_cfg) - deep_set(self.cfg, 'evaluation.accuracy.dataloader', \ - acc_dataloader_cfg) - self.deploy_cfg['evaluation'] = self.cfg.evaluation - - def setup_yaml(): - represent_dict_order = lambda self, \ - data: self.represent_mapping('tag:yaml.org,2002:map', data.items()) - yaml.add_representer(OrderedDict, represent_dict_order) - yaml.add_representer(DotDict, represent_dict_order) - setup_yaml() - with open(self.deploy_path, 'w+') as f: - yaml.dump(self.deploy_cfg, f) - logger.info("Save deploy yaml to {}".format(self.deploy_path)) + return + #TODO uncomment it after config ready + # """Save the configuration locally for deployment.""" + # acc_dataloader_cfg = deep_get(self.cfg, 'evaluation.accuracy.dataloader') + # perf_dataloader_cfg = deep_get(self.cfg, 'evaluation.performance.dataloader') + # # use acc dataloader if perf dataloader is not configured + # if perf_dataloader_cfg is None: + # perf_dataloader_cfg = acc_dataloader_cfg + + # self.deploy_cfg = OrderedDict() + # # int8 dataloader graph transform + # if deep_get(perf_dataloader_cfg, 'transform.QuantizedInput') is not None \ + # or deep_get(acc_dataloader_cfg, 'transform.QuantizedInput') is not None: + # self.best_qmodel, scale = self.adaptor.quantize_input(self.best_qmodel) + # deep_set(perf_dataloader_cfg, 'transform.QuantizedInput.dtype', 'int8') + # deep_set(perf_dataloader_cfg, 'transform.QuantizedInput.scale', scale) + # deep_set(acc_dataloader_cfg, 'transform.QuantizedInput.dtype', 'int8') + # deep_set(acc_dataloader_cfg, 'transform.QuantizedInput.scale', scale) + + # self.deploy_cfg['model'] = self.cfg.model + # self.deploy_cfg['device'] = self.conf.quantization.device + # if self.cfg.evaluation is not None: + # deep_set(self.cfg, 'evaluation.performance.dataloader',\ + # perf_dataloader_cfg) + # deep_set(self.cfg, 'evaluation.accuracy.dataloader', \ + # acc_dataloader_cfg) + # self.deploy_cfg['evaluation'] = self.cfg.evaluation + + # def setup_yaml(): + # represent_dict_order = lambda self, \ + # data: self.represent_mapping('tag:yaml.org,2002:map', data.items()) + # yaml.add_representer(OrderedDict, represent_dict_order) + # yaml.add_representer(DotDict, represent_dict_order) + # setup_yaml() + # with open(self.deploy_path, 'w+') as f: + # yaml.dump(self.deploy_cfg, f) + # logger.info("Save deploy yaml to {}".format(self.deploy_path)) def _get_common_cfg(self, model_wise_cfg, op_wise_cfgs): """Get the common parts from the model_wise_cfg. @@ -1228,36 +1227,29 @@ def _evaluate(self, model): Objective: The objective value evaluated. """ if self.eval_func: - if self.cfg.tuning.tensorboard: + if self.conf.options.tensorboard: # Pytorch can insert observer to model in this hook. # Tensorflow don't support this mode for now model = self.adaptor._pre_eval_hook(model) val = self.objectives.evaluate( self.eval_func, model if self.framework == "pytorch_ipex" else model.model ) - if self.cfg.tuning.tensorboard: + if self.conf.options.tensorboard: # post_eval_hook to deal the tensor self.adaptor._post_eval_hook(model, accuracy=val[0]) else: - assert self.cfg.evaluation and self.cfg.evaluation.accuracy and \ - (self.cfg.evaluation.accuracy.metric or \ - self.cfg.evaluation.accuracy.multi_metrics), \ - "metric or multi_metrics field of accuracy field of evaluation" \ - " section should not be empty" - - postprocess_cfg = self.cfg.evaluation.accuracy.postprocess - metric_cfg = self.cfg.evaluation.accuracy.metric if \ - self.cfg.evaluation.accuracy.metric else \ - self.cfg.evaluation.accuracy.multi_metrics - iteration = -1 if self.cfg.evaluation.accuracy.iteration is None \ - else self.cfg.evaluation.accuracy.iteration + assert self._not_tuning, "Please set eval_dataloader and eval_metric for create eval_func" + + postprocess_cfg = None + metric_cfg = self.eval_metric + iteration = -1 eval_func = create_eval_func(self.framework, self.eval_dataloader, self.adaptor, metric_cfg, postprocess_cfg, iteration, - tensorboard = self.cfg.tuning.tensorboard, + tensorboard = self.conf.options.tensorboard, fp32_baseline = self.baseline == None) if getattr(self.eval_dataloader, 'distributed', False): @@ -1320,7 +1312,7 @@ def stop(self, timeout, trials_count): bool: True if need stop, otherwise False """ need_stop = False - if self.cfg.tuning.exit_policy.performance_only or \ + if self._not_tuning or \ self.objectives.compare(self.best_tune_result, self.baseline): self.best_tune_result = self.last_tune_result self.best_qmodel = self.last_qmodel @@ -1431,11 +1423,11 @@ def stop(self, timeout, trials_count): 'Best tune result']).print_stat() - if self.cfg.tuning.exit_policy.performance_only: + if self._not_tuning: need_stop = True elif timeout == 0 and self.best_tune_result: need_stop = True - elif self.trials_count >= self.cfg.tuning.exit_policy.max_trials: + elif self.trials_count >= self.conf.quantization.tuning_criterion.max_trials: need_stop = True else: need_stop = False @@ -1460,7 +1452,8 @@ def _find_tuning_history(self, tune_cfg): for tuning_history in self.tuning_history: # only check if a tune_cfg is evaluated under same yam config, excluding # some fields in tuning section of yaml, such as tensorboard, snapshot, resume. - if self._same_yaml(tuning_history['cfg'], self.cfg): + # TODO double check + if self._same_yaml(tuning_history['cfg'], self.conf): for history in tuning_history['history']: if history and history['tune_cfg'] == tune_cfg: return tuning_history @@ -1476,7 +1469,8 @@ def _find_history(self, tune_cfg): for tuning_history in self.tuning_history: # only check if a tune_cfg is evaluated under same yam config, excluding # some fields in tuning section of yaml, such as tensorboard, snapshot, resume. - if self._same_yaml(tuning_history['cfg'], self.cfg): + # TODO double check + if self._same_yaml(tuning_history['cfg'], self.conf): for history in tuning_history['history']: if history and history['tune_cfg'] == tune_cfg: return history @@ -1491,7 +1485,7 @@ def _find_self_tuning_history(self): for tuning_history in self.tuning_history: # only check if a tune_cfg is evaluated under same yam config, excluding # some fields in tuning section of yaml, such as tensorboard, snapshot, resume. - if self._same_yaml(tuning_history['cfg'], self.cfg): + if self._same_yaml(tuning_history['cfg'], self.conf): return tuning_history return None @@ -1499,24 +1493,48 @@ def _find_self_tuning_history(self): def _add_tuning_history(self, tune_cfg=None, tune_result=None, **kwargs): """Add tuning config to tuining history. + + The tuning history ever made, structured like below: + [ + { + 'version': __version__, + 'cfg': cfg1, + 'framework': tensorflow + 'baseline': baseline1, + 'last_tune_result': last_tune_result1, + 'best_tune_result': best_tune_result1, + 'history': [ + # tuning history under same yaml config + {'tune_cfg': tune_cfg1, 'tune_result': \ + tune_result1, 'q_config': q_config1, ...}, + + ..., + ], + # new fields added by subclass for resuming + ..., + }, + # tuning history under different yaml configs + ..., + ] + Note this record is added under same yaml config. """ found = False d = {'tune_cfg': tune_cfg, 'tune_result': tune_result} for tuning_history in self.tuning_history: - if self._same_yaml(tuning_history['cfg'], self.cfg): + if self._same_yaml(tuning_history['cfg'], self.conf): d.update(kwargs) tuning_history['history'].append(d) tuning_history['last_tune_result'] = self.last_tune_result tuning_history['best_tune_result'] = self.best_tune_result - tuning_history['cfg'] = self.cfg + tuning_history['cfg'] = self.conf found = True break if not found: tuning_history = {} tuning_history['version'] = __version__ - tuning_history['cfg'] = self.cfg + tuning_history['cfg'] = self.conf tuning_history['baseline'] = self.baseline tuning_history['last_tune_result'] = self.last_tune_result tuning_history['best_tune_result'] = self.best_tune_result @@ -1538,15 +1556,15 @@ def _collect_ops_by_quant_mode(self, tune_cfg, quant_mode): def _diagnosis(self): import logging logger = logging.getLogger("neural_compressor") - iteration_list = self.cfg.tuning.diagnosis.iteration_list - inspect_type = self.cfg.tuning.diagnosis.inspect_type - save_to_disk = self.cfg.tuning.diagnosis.save_to_disk - save_path = self.cfg.tuning.diagnosis.save_path + iteration_list = [1] + inspect_type = 'all' + save_to_disk = True + save_path = './nc_workspace/inspect_saved/' inspect_node_lst, updated_cfg = self.adaptor.diagnosis_helper(self._fp32_model, self.last_qmodel, self.tune_cfg, save_path = save_path) - op_list = self.cfg.tuning.diagnosis.op_list + op_list = [] if not op_list: op_list = list(inspect_node_lst) else: diff --git a/neural_compressor/strategy/utils/tuning_space.py b/neural_compressor/strategy/utils/tuning_space.py index 505fdef7a15..07909d7f711 100644 --- a/neural_compressor/strategy/utils/tuning_space.py +++ b/neural_compressor/strategy/utils/tuning_space.py @@ -127,7 +127,7 @@ def __init__(self, capability, conf, framework=None): self.op_type_wise_items = defaultdict(list) # op_type: {(op_name, op_type), ...} self.framework = framework self.ops_dtype = defaultdict(OrderedDict) - usr_cfg = conf.usr_cfg if conf else None + self._usr_cfg = self._init_usr_cfg() self.op_items = {} # {(op_name, op_type): {(path): data type}} self.ops_data_type = OrderedDefaultDict() @@ -135,7 +135,15 @@ def __init__(self, capability, conf, framework=None): # {(op_name, op_type): {path1, path2, ...} self.ops_path_set = defaultdict(set) - self._create_tuning_space(capability, usr_cfg) + self._create_tuning_space(capability, self._usr_cfg) + + def _init_usr_cfg(self): + """Init user config.""" + usr_cfg = {'quantization': {}} + usr_cfg['quantization']['model_wise'] = None + usr_cfg['quantization']['optype_wise'] = self.conf.quantization.op_type_dict + usr_cfg['quantization']['op_wise'] = self.conf.quantization.op_name_dict + return usr_cfg def _parse_capability(self, capability: Dict) -> None: """Parse the capability and construct the tuning space(a tree). diff --git a/test/strategy/test_basic.py b/test/strategy/test_basic.py index b110238c3d1..781a7ee333f 100644 --- a/test/strategy/test_basic.py +++ b/test/strategy/test_basic.py @@ -2,121 +2,6 @@ import numpy as np import unittest import shutil -import os -import yaml - -def build_fake_yaml(): - fake_yaml = ''' - model: - name: fake_yaml - framework: tensorflow - inputs: x - outputs: op2_to_store - device: cpu - evaluation: - accuracy: - metric: - topk: 1 - tuning: - strategy: - name: basic - accuracy_criterion: - relative: 0.01 - workspace: - path: saved - ''' - y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) - f.close() - -def build_fake_yaml2(): - fake_yaml = ''' - model: - name: fake_yaml - framework: tensorflow - inputs: x - outputs: op2_to_store - device: cpu - evaluation: - accuracy: - metric: - topk: 1 - tuning: - strategy: - name: basic - exit_policy: - max_trials: 3 - accuracy_criterion: - relative: -0.01 - workspace: - path: saved - ''' - y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) - f.close() - -def build_fake_yaml3(): - fake_yaml = ''' - model: - name: fake_yaml - framework: tensorflow - inputs: x - outputs: op2_to_store - device: cpu - evaluation: - accuracy: - multi_metrics: - topk: 1 - MSE: - compare_label: False - tuning: - strategy: - name: basic - exit_policy: - max_trials: 3 - timeout: 50 - accuracy_criterion: - relative: -0.01 - workspace: - path: saved - ''' - y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml3.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) - f.close() - -def build_fake_yaml4(): - fake_yaml = ''' - model: - name: fake_yaml - framework: tensorflow - inputs: x - outputs: op2_to_store - device: cpu - evaluation: - accuracy: - multi_metrics: - topk: 1 - MSE: - compare_label: False - weight: [1, 0] - tuning: - strategy: - name: basic - exit_policy: - max_trials: 3 - timeout: 50 - accuracy_criterion: - relative: -0.01 - workspace: - path: saved - ''' - y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml4.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) - f.close() def build_fake_model(): import tensorflow as tf @@ -160,63 +45,10 @@ class TestBasicTuningStrategy(unittest.TestCase): @classmethod def setUpClass(self): self.constant_graph = build_fake_model() - build_fake_yaml() - build_fake_yaml2() - build_fake_yaml3() - build_fake_yaml4() @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('fake_yaml2.yaml') - os.remove('fake_yaml3.yaml') - os.remove('fake_yaml4.yaml') shutil.rmtree('saved', ignore_errors=True) - - def test_run_basic_one_trial(self): - from neural_compressor.experimental import Quantization, common - - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.model = self.constant_graph - quantizer.fit() - - # resume tuning history - quantizer.conf.usr_cfg.tuning.workspace.resume = 'saved/history.snapshot' - quantizer.fit() - - def test_run_basic_max_trials(self): - from neural_compressor.experimental import Quantization, common - - quantizer = Quantization('fake_yaml2.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.model = self.constant_graph - quantizer.fit() - - def test_run_basic_max_trials_multimetric(self): - from neural_compressor.experimental import Quantization, common - - quantizer = Quantization('fake_yaml3.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.model = self.constant_graph - quantizer.fit() - - def test_run_basic_max_trials_multimetric_weight(self): - from neural_compressor.experimental import Quantization, common - - quantizer = Quantization('fake_yaml4.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.model = self.constant_graph - quantizer.fit() - def test_run_basic_one_trial_new_api(self): from neural_compressor.quantization import fit @@ -227,9 +59,12 @@ def test_run_basic_one_trial_new_api(self): dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) dataloader = DATALOADERS["tensorflow"](dataset) + def fake_eval(model): + return 1 + # tuning and accuracy criterion conf = PostTrainingQuantConfig() - q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader= dataloader, eval_dataloader=dataloader) + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader= dataloader, eval_func=fake_eval) self.assertIsNotNone(q_model) def test_no_tuning(self): diff --git a/test/strategy/test_basic_1.x.py b/test/strategy/test_basic_1.x.py new file mode 100644 index 00000000000..89b47ffa722 --- /dev/null +++ b/test/strategy/test_basic_1.x.py @@ -0,0 +1,221 @@ +"""Tests for quantization""" +import numpy as np +import unittest +import shutil +import os +import yaml + +def build_fake_yaml(): + fake_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: x + outputs: op2_to_store + device: cpu + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: basic + accuracy_criterion: + relative: 0.01 + workspace: + path: saved + ''' + y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) + with open('fake_yaml.yaml',"w",encoding="utf-8") as f: + yaml.dump(y,f) + f.close() + +def build_fake_yaml2(): + fake_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: x + outputs: op2_to_store + device: cpu + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: basic + exit_policy: + max_trials: 3 + accuracy_criterion: + relative: -0.01 + workspace: + path: saved + ''' + y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) + with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: + yaml.dump(y,f) + f.close() + +def build_fake_yaml3(): + fake_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: x + outputs: op2_to_store + device: cpu + evaluation: + accuracy: + multi_metrics: + topk: 1 + MSE: + compare_label: False + tuning: + strategy: + name: basic + exit_policy: + max_trials: 3 + timeout: 50 + accuracy_criterion: + relative: -0.01 + workspace: + path: saved + ''' + y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) + with open('fake_yaml3.yaml',"w",encoding="utf-8") as f: + yaml.dump(y,f) + f.close() + +def build_fake_yaml4(): + fake_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: x + outputs: op2_to_store + device: cpu + evaluation: + accuracy: + multi_metrics: + topk: 1 + MSE: + compare_label: False + weight: [1, 0] + tuning: + strategy: + name: basic + exit_policy: + max_trials: 3 + timeout: 50 + accuracy_criterion: + relative: -0.01 + workspace: + path: saved + ''' + y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) + with open('fake_yaml4.yaml',"w",encoding="utf-8") as f: + yaml.dump(y,f) + f.close() + +def build_fake_model(): + import tensorflow as tf + try: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') + y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') + z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') + op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID', ) + last_identity = tf.identity(op2, name='op2_to_store') + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + except: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') + y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') + z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') + op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID') + last_identity = tf.identity(op2, name='op2_to_store') + + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph + +class TestBasicTuningStrategy(unittest.TestCase): + + @classmethod + def setUpClass(self): + self.constant_graph = build_fake_model() + build_fake_yaml() + build_fake_yaml2() + build_fake_yaml3() + build_fake_yaml4() + + @classmethod + def tearDownClass(self): + os.remove('fake_yaml.yaml') + os.remove('fake_yaml2.yaml') + os.remove('fake_yaml3.yaml') + os.remove('fake_yaml4.yaml') + shutil.rmtree('saved', ignore_errors=True) + + def test_run_basic_one_trial(self): + from neural_compressor.experimental import Quantization, common + + quantizer = Quantization('fake_yaml.yaml') + dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = self.constant_graph + quantizer.fit() + + # resume tuning history + quantizer.conf.usr_cfg.tuning.workspace.resume = 'saved/history.snapshot' + quantizer.fit() + + def test_run_basic_max_trials(self): + from neural_compressor.experimental import Quantization, common + + quantizer = Quantization('fake_yaml2.yaml') + dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = self.constant_graph + quantizer.fit() + + def test_run_basic_max_trials_multimetric(self): + from neural_compressor.experimental import Quantization, common + + quantizer = Quantization('fake_yaml3.yaml') + dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = self.constant_graph + quantizer.fit() + + def test_run_basic_max_trials_multimetric_weight(self): + from neural_compressor.experimental import Quantization, common + + quantizer = Quantization('fake_yaml4.yaml') + dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = self.constant_graph + quantizer.fit() + +if __name__ == "__main__": + unittest.main() From 53dc10d0abc07e2bee3f05c4479221ca23af32ef Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 29 Mar 2023 13:23:27 +0800 Subject: [PATCH 006/103] remove some comments --- neural_compressor/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py index bee0012eb81..51cc9386f38 100644 --- a/neural_compressor/__init__.py +++ b/neural_compressor/__init__.py @@ -22,7 +22,8 @@ # we need to set a global 'NA' backend, or Model can't be used from .utils.utility import set_random_seed, set_tensorboard, set_workspace from .utils import options -# from .config import conf +from .conf.config import conf +from .conf.pythonic_config import config from .config import DistillationConfig, PostTrainingQuantConfig, \ WeightPruningConfig, QuantizationAwareTrainingConfig, \ MixedPrecisionConfig From 9ec6ef41f34f0832ccda77ace6b5c36ffffcf559 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Wed, 29 Mar 2023 17:47:04 +0800 Subject: [PATCH 007/103] rewrite configs for cofig classes Signed-off-by: Cheng, Zixuan --- neural_compressor/__init__.py | 4 -- neural_compressor/benchmark.py | 93 ++++++++++++------------------ neural_compressor/config.py | 10 ++-- neural_compressor/mix_precision.py | 85 +++++++++++++-------------- neural_compressor/quantization.py | 57 +++++++----------- neural_compressor/training.py | 2 + 6 files changed, 106 insertions(+), 145 deletions(-) diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py index 88f74b88ccb..82f88fb4d92 100644 --- a/neural_compressor/__init__.py +++ b/neural_compressor/__init__.py @@ -16,14 +16,10 @@ # limitations under the License. """Intel® Neural Compressor: An open-source Python library supporting popular model compression techniques.""" -from .benchmark import _Benchmark from .version import __version__ -from .contrib import * # we need to set a global 'NA' backend, or Model can't be used from .utils.utility import set_random_seed, set_tensorboard, set_workspace from .utils import options -from .conf.config import conf -from .conf.pythonic_config import config from .config import DistillationConfig, PostTrainingQuantConfig, \ WeightPruningConfig, QuantizationAwareTrainingConfig, \ MixedPrecisionConfig diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py index 8e47ba7b64e..a1f13c01452 100644 --- a/neural_compressor/benchmark.py +++ b/neural_compressor/benchmark.py @@ -26,18 +26,14 @@ from threading import Thread from .adaptor import FRAMEWORKS from .objective import MultiObjective -from .conf.config import BenchmarkConf +from .config import Config, BenchmarkConfig from .utils import logger from .utils import OPTIONS from .utils.utility import GLOBAL_STATE, MODE -from .conf.dotdict import deep_get, deep_set from .model import BaseModel from .model import Model as NCModel from .model.model import get_model_fwk_name -from .conf.pythonic_config import Config from .utils import logger -from .conf.pythonic_config import Config -from .config import BenchmarkConfig from .utils.utility import Statistics @@ -146,7 +142,7 @@ class _Benchmark(object): With the objective setting, user can get the data of what they configured in yaml. Args: - conf (obj): The config.BenchmarkConfig class containing accuracy goal, tuning objective etc. + conf (obj): The BenchmarkConfig class containing accuracy goal, tuning objective etc. """ def __init__(self, conf): @@ -158,11 +154,9 @@ def __init__(self, conf): self._results = {} assert isinstance(conf, BenchmarkConfig), \ "The config object should be config.BenchmarkConfig, not {}".format(type(conf)) - conf = Config(quantization=None, benchmark=conf, pruning=None, distillation=None, nas=None) - self.conf = BenchmarkConf() - self.conf.map_pyconfig_to_cfg(conf) - if self.conf.usr_cfg.model.framework != 'NA': - self.framework = self.conf.usr_cfg.model.framework.lower() + self.conf = Config(quantization=None, benchmark=conf, pruning=None, distillation=None, nas=None) + if self.conf.quantization.framework is not None: + self.framework = self.conf.quantization.framework.lower() def __call__(self, raw_cmd=None): """Directly call a Benchmark object. @@ -170,12 +164,10 @@ def __call__(self, raw_cmd=None): Args: raw_cmd: raw command used for benchmark """ - cfg = self.conf.usr_cfg - assert cfg.evaluation is not None, 'benchmark evaluation filed should not be None...' + cfg = self.conf assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...' - set_all_env_var(deep_get(cfg, 'evaluation.performance.configs')) # disable multi-instance for running bechmark on GPU device - if cfg.device == 'gpu': + if cfg.quantization.device == 'gpu': set_env_var('NC_ENV_CONF', True, overwrite_existing=True) logger.info("Start to run Benchmark.") @@ -328,53 +320,44 @@ def run_instance(self): by user config and returns model performance """ if self._b_func is None: - cfg = self.conf.usr_cfg + cfg = self.conf GLOBAL_STATE.STATE = MODE.BENCHMARK - framework_specific_info = {'device': cfg.device, \ + framework_specific_info = {'device': cfg.quantization.device, \ 'approach': cfg.quantization.approach, \ - 'random_seed': cfg.tuning.random_seed, - 'backend': cfg.model.get('backend', 'default'), - 'format': cfg.model.get('quant_format', 'default')} - framework = cfg.model.framework.lower() + 'random_seed': cfg.options.random_seed, + 'backend': cfg.quantization.backend \ + if cfg.quantization.backend is not None else 'default', + 'format': cfg.quantization.quant_format \ + if cfg.quantization.backend is not None else 'default'} + framework = cfg.quantization.framework.lower() if 'tensorflow' in framework: - framework_specific_info.update({"inputs": cfg.model.inputs, \ - "outputs": cfg.model.outputs, \ - "recipes": cfg.model.recipes, \ - 'workspace_path': cfg.tuning.workspace.path}) + framework_specific_info.update({"inputs": cfg.quantization.inputs, \ + "outputs": cfg.quantization.outputs, \ + "recipes": cfg.quantization.recipes, \ + 'workspace_path': cfg.options.workspace}) if framework == 'keras': - framework_specific_info.update({'workspace_path': cfg.tuning.workspace.path}) + framework_specific_info.update({'workspace_path': cfg.options.workspace}) if framework == 'mxnet': framework_specific_info.update({"b_dataloader": self._b_dataloader}) - if 'onnx' in framework.lower(): + if 'onnx' in framework: framework_specific_info.update( - {'workspace_path': cfg.tuning.workspace.path, \ + {'workspace_path': cfg.options.workspace, \ 'graph_optimization': OPTIONS[framework].graph_optimization}) if framework == 'pytorch_ipex' or framework == 'pytorch' or framework == 'pytorch_fx': - framework_specific_info.update({"workspace_path": cfg.tuning.workspace.path, + framework_specific_info.update({"workspace_path": cfg.options.workspace, "q_dataloader": None}) assert isinstance(self._model, BaseModel), 'need set neural_compressor Model for quantization....' adaptor = FRAMEWORKS[framework](framework_specific_info) - if deep_get(cfg, 'evaluation.performance.iteration') == -1 and 'dummy_v2' in \ - deep_get(cfg, 'evaluation.performance.dataloader.dataset', {}): - deep_set(cfg, 'evaluation.performance.iteration', 10) - - iteration = -1 if deep_get(cfg, 'evaluation.performance.iteration') is None \ - else deep_get(cfg, 'evaluation.performance.iteration') - - b_postprocess_cfg = deep_get(cfg, 'evaluation.performance.postprocess') - assert self._b_dataloader is not None, "dataloader should not be None" from neural_compressor.utils.create_obj_from_config import create_eval_func self._b_func = create_eval_func(self.framework, \ self._b_dataloader, \ adaptor, \ - None, \ - b_postprocess_cfg, - iteration=iteration) + None) self.objectives = MultiObjective(["performance"], {'relative': 0.1}, @@ -385,7 +368,7 @@ def run_instance(self): # also measurer have result list among steps acc, _ = val batch_size = self._b_dataloader.batch_size - warmup = deep_get(cfg, "evaluation.performance.warmup") + warmup = cfg.benchmark.warmup if len(self.objectives.objectives[0].result_list()) < warmup: if len(self.objectives.objectives[0].result_list()) > 1 and warmup != 0: warmup = 1 @@ -489,43 +472,43 @@ def model(self, user_model): be careful of the name of the model configured in the yaml file, make sure the name is in the supported slim model list. """ - cfg = self.conf.usr_cfg - if cfg.model.framework == 'NA': + cfg = self.conf + if cfg.quantization.framework is None: assert not isinstance(user_model, BaseModel), \ "Please pass an original framework model but not neural compressor model!" self.framework = get_model_fwk_name(user_model) if self.framework == "tensorflow": from .model.tensorflow_model import get_model_type - if get_model_type(user_model) == 'keras' and cfg.model.backend == 'itex': + if get_model_type(user_model) == 'keras' and cfg.quantization.backend == 'itex': self.framework = 'keras' if self.framework == "pytorch": - if cfg.model.backend == "default": + if cfg.quantization.backend == "default": self.framework = "pytorch_fx" - elif cfg.model.backend == "ipex": + elif cfg.quantization.backend == "ipex": self.framework = "pytorch_ipex" import intel_extension_for_pytorch - cfg.model.framework = self.framework + cfg.quantization.framework = self.framework if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") if "tensorflow" in self.framework or self.framework == "keras": - self._model = NCModel(user_model, backend=self.framework, device=cfg.device) + self._model = NCModel(user_model, backend=self.framework, device=cfg.quantization.device) else: self._model = NCModel(user_model, backend=self.framework) else: # It is config of neural_compressor version < 2.0, no need in 2.0 - if cfg.model.framework == "pytorch_ipex": + if cfg.quantization.framework == "pytorch_ipex": from neural_compressor.model.torch_model import IPEXModel if not isinstance(user_model, IPEXModel): - self._model = NCModel(user_model.model, framework=cfg.model.framework) + self._model = NCModel(user_model.model, framework=cfg.quantization.framework) return self._model = user_model if 'tensorflow' in self.framework: - self._model.name = cfg.model.name - self._model.output_tensor_names = cfg.model.outputs - self._model.input_tensor_names = cfg.model.inputs - self._model.workspace_path = cfg.tuning.workspace.path + self._model.name = cfg.quantization.model_name + self._model.output_tensor_names = cfg.quantization.outputs + self._model.input_tensor_names = cfg.quantization.inputs + self._model.workspace_path = cfg.options.workspace def __repr__(self): """Get the object representation in string format.""" diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 370b9245409..3cfe38dd1eb 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -500,6 +500,9 @@ def multi_objectives(self): @multi_objectives.setter def multi_objectives(self, multi_objectives): if _check_value('multi_objectives', multi_objectives, dict): + if 'weight' in multi_objectives.keys() and isinstance(multi_objectives['weight'], list): + assert len(multi_objectives['objective']) == len(multi_objectives['weight']) + for k, v in multi_objectives.items(): _check_value('multi_objectives', k, str, ['objective', 'weight', 'higher_is_better']) if k == 'objective': @@ -1036,6 +1039,7 @@ def __init__(self, op_type_dict=None, op_name_dict=None, reduce_range=None, + example_inputs=None, excluded_precisions=[], quant_level="auto", accuracy_criterion=accuracy_criterion, @@ -1054,6 +1058,7 @@ def __init__(self, op_type_dict=op_type_dict, op_name_dict=op_name_dict, reduce_range=reduce_range, + example_inputs=example_inputs, excluded_precisions=excluded_precisions, quant_level=quant_level, accuracy_criterion=accuracy_criterion, @@ -1152,11 +1157,6 @@ def __init__(self, backend=backend, op_type_dict=op_type_dict, op_name_dict=op_name_dict, - strategy=tuning_criterion.strategy, - strategy_kwargs=tuning_criterion.strategy_kwargs, - objective=tuning_criterion.objective, - timeout=tuning_criterion.timeout, - max_trials=tuning_criterion.max_trials, reduce_range=reduce_range, excluded_precisions=excluded_precisions, quant_level=quant_level, diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index 49712b429ee..f1a29045c46 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -22,10 +22,8 @@ import random from .utils.utility import time_limit, CpuInfo from .strategy import STRATEGIES -from .conf.pythonic_config import Config -from .conf.config import MixedPrecision_Conf +from .config import Config from .utils import logger -from .conf.dotdict import deep_get, deep_set, DotDict from .model.model import BaseModel, get_model_fwk_name, Model, MODELS class _MixedPrecision: @@ -39,7 +37,7 @@ class _MixedPrecision: from neural_compressor.config import MixedPrecisionConfig def eval_func(model): ... - return accuracy + return accuracy conf = MixedPrecisionConfig() output_model = mix_precision.fit( @@ -54,10 +52,8 @@ def __init__(self, conf=None): Args: conf (obj): The MixedPrecisionConfig class containing accuracy goal, tuning objective etc. """ - conf = Config(quantization=conf, benchmark=None, pruning=None, distillation=None, nas=None) - self.conf = MixedPrecision_Conf() - self.conf.map_pyconfig_to_cfg(conf) - seed = self.conf.usr_cfg.tuning.random_seed + self.conf = Config(quantization=conf, benchmark=None, pruning=None, distillation=None, nas=None) + seed = self.conf.options.random_seed random.seed(seed) np.random.seed(seed) @@ -68,13 +64,13 @@ def __init__(self, conf=None): def pre_process(self): """Create strategy object for tuning.""" - cfg = self.conf.usr_cfg + cfg = self.conf strategy = 'automixedprecision' _resume = None # check if interrupted tuning procedure exists. if yes, it will resume the # whole auto tune process. - self.resume_file = os.path.abspath(os.path.expanduser(cfg.tuning.workspace.resume)) \ - if cfg.tuning.workspace and cfg.tuning.workspace.resume else None + self.resume_file = os.path.abspath(os.path.expanduser(cfg.options.resume_from)) \ + if cfg.options.workspace and cfg.options.resume_from else None if self.resume_file: assert os.path.exists(self.resume_file), \ "The specified resume file {} doesn't exist!".format(self.resume_file) @@ -88,12 +84,13 @@ def pre_process(self): None, self._eval_dataloader, self._eval_func, + self._eval_metric, _resume) def execute(self): """Execute routinue based on strategy design.""" try: - with time_limit(self.conf.usr_cfg.tuning.exit_policy.timeout): + with time_limit(self.conf.quantization.tuning_criterion.timeout): self.strategy.traverse() except KeyboardInterrupt: pass @@ -137,7 +134,7 @@ def precisions(self, customized_precisions): self._precisions = sorted([i.strip() for i in customized_precisions]) elif isinstance(customized_precisions, str): self._precisions = sorted([i.strip() for i in customized_precisions.split(',')]) - self.conf.usr_cfg.mixed_precision.precisions = self._precisions + self.conf.mixed_precision.precision = self._precisions @property def eval_dataloader(self): @@ -193,55 +190,55 @@ def model(self, user_model): of tensorflow, be careful of the name of model configured in yaml file, make sure the name is in supported slim model list. """ - cfg = self.conf.usr_cfg - if cfg.model.framework == 'NA': + cfg = self.conf + if cfg.quantization.framework == 'NA': if isinstance(user_model, BaseModel): - cfg.model.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))] - if cfg.model.backend == "ipex": - assert cfg.model.framework == "pytorch_ipex", "Please wrap the model with correct Model class!" - if cfg.model.backend == "itex": + cfg.quantization.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))] + if cfg.quantization.backend == "ipex": + assert cfg.quantization.framework == "pytorch_ipex", "Please wrap the model with correct Model class!" + if cfg.quantization.backend == "itex": from .model.tensorflow_model import get_model_type if get_model_type(user_model.model) == 'keras': - assert cfg.model.framework == "keras", "Please wrap the model with KerasModel class!" + assert cfg.quantization.framework == "keras", "Please wrap the model with KerasModel class!" else: - assert cfg.model.framework == "pytorch_itex", \ + assert cfg.quantization.framework == "pytorch_itex", \ "Please wrap the model with TensorflowModel class!" else: framework = get_model_fwk_name(user_model) if framework == "tensorflow": from .model.tensorflow_model import get_model_type - if get_model_type(user_model) == 'keras' and cfg.model.backend == 'itex': + if get_model_type(user_model) == 'keras' and cfg.quantization.backend == 'itex': framework = 'keras' if framework == "pytorch": - if cfg.model.backend == "default": + if cfg.quantization.backend == "default": framework = "pytorch_fx" - elif cfg.model.backend == "ipex": + elif cfg.quantization.backend == "ipex": framework = "pytorch_ipex" - cfg.model.framework = framework + cfg.quantization.framework = framework if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") - if "tensorflow" in cfg.model.framework or cfg.model.framework == "keras": - self._model = Model(user_model, backend=cfg.model.framework, device=cfg.device) + if "tensorflow" in cfg.quantization.framework or cfg.quantization.framework == "keras": + self._model = Model(user_model, backend=cfg.quantization.framework, device=cfg.quantization.device) else: - self._model = Model(user_model, backend=cfg.model.framework) + self._model = Model(user_model, backend=cfg.quantization.framework) else: - if cfg.model.framework == "pytorch_ipex": + if cfg.quantization.framework == "pytorch_ipex": from neural_compressor.model.torch_model import IPEXModel assert type(user_model) == IPEXModel, \ "The backend is ipex, please wrap the model with IPEXModel class!" - elif cfg.model.framework == "pytorch_fx": + elif cfg.quantization.framework == "pytorch_fx": from neural_compressor.model.torch_model import PyTorchFXModel assert type(user_model) == PyTorchFXModel, \ "The backend is default, please wrap the model with PyTorchFXModel class!" self._model = user_model - if 'tensorflow' in cfg.model.framework: - self._model.name = cfg.model.name - self._model.output_tensor_names = cfg.model.outputs - self._model.input_tensor_names = cfg.model.inputs - self._model.workspace_path = cfg.tuning.workspace.path + if 'tensorflow' in cfg.quantization.framework: + self._model.name = cfg.quantization.model_name + self._model.output_tensor_names = cfg.quantization.outputs + self._model.input_tensor_names = cfg.quantization.inputs + self._model.workspace_path = cfg.options.workspace @property def metric(self): @@ -261,6 +258,8 @@ def metric(self, user_metric): Multi-metrics: {topk: 1, MSE: {compare_label: False}, + weight: [0.5, 0.5], + higher_is_better: [True, False] } Refer to this [file](../docs/source/metric.md#supported-built-in-metric-matrix) for built-in metric list 2. User also can set specific metric through this api. The metric class should take the outputs of the model or @@ -271,10 +270,6 @@ def metric(self, user_metric): user_metric(neural_compressor.metric.Metric or a dict of built-in metric configures): The object of Metric or a dict of built-in metric configurations. """ - if deep_get(self.conf.usr_cfg, "evaluation.accuracy.metric"): - logger.warning("Override the value of `metric` field defined in yaml file" \ - " as user defines the value of `metric` attribute by code.") - from .metric import Metric as NCMetric, METRICS if isinstance(user_metric, dict): metric_cfg = user_metric @@ -290,12 +285,12 @@ def metric(self, user_metric): metric_cls = type(user_metric).__name__ name = 'user_' + metric_cls metric_cfg = {name: id(user_metric)} - metrics = METRICS(self.conf.usr_cfg.model.framework) + metrics = METRICS(self.conf.quantization.framework) metrics.register(name, metric_cls) - deep_set(self.conf.usr_cfg, "evaluation.accuracy.metric", metric_cfg) - self.conf.usr_cfg = DotDict(self.conf.usr_cfg) + self._metric = user_metric + @property def eval_func(self): """Get evaluation function.""" @@ -380,14 +375,14 @@ def fit(model, converter.precisions = precisions converter.model = model - if ('bf16' in precisions or 'fp16' in precisions) and converter.model.framework() == "onnxruntime": + if ('bf16' in precisions or 'fp16' in precisions) and converter.framework() == "onnxruntime": if config.device == "cpu": logger.warning("Mix precision exits due to device isn't gpu for onnx models.") sys.exit(0) elif config.backend != "onnxrt_cuda_ep": logger.warning("Mix precision exits due to backend isn't onnxrt_cuda_ep for onnx models.") sys.exit(0) - elif 'bf16' in precisions and not CpuInfo().bf16 and converter.model.framework() != "onnxruntime": + elif 'bf16' in precisions and not CpuInfo().bf16 and converter.framework() != "onnxruntime": if os.getenv('FORCE_BF16') == '1': logger.warning("Mix precision will generate bf16 graph although " \ "the hardware doesn't support bf16 instruction.") @@ -395,7 +390,7 @@ def fit(model, logger.warning("Mix precision exits due to the hardware " \ "doesn't support bf16 instruction.") sys.exit(0) - elif 'fp16' in precisions and converter.model.framework() != "onnxruntime": + elif 'fp16' in precisions and converter.framework() != "onnxruntime": logger.warning("Currently mix precision only supports fp16 for onnx models.") sys.exit(0) if eval_func is not None: diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index d0b113dae9e..7640703c49c 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -20,7 +20,6 @@ import pickle import random import numpy as np -from .conf.dotdict import deep_get, deep_set, DotDict from .config import Config from .model.model import BaseModel, get_model_fwk_name, get_model_type, Model, MODELS from .strategy import STRATEGIES @@ -52,7 +51,7 @@ def __init__(self, conf, **kwargs): """Initialize the parameters. Args: - conf (QuantizationConfig): A instance of QuantizationConfig to + conf (PostTrainingQuantConfig): A instance of PostTrainingQuantConfig to specify the quantization behavior. """ self.conf = Config(quantization=conf, benchmark=None, pruning=None, distillation=None, nas=None) @@ -73,15 +72,8 @@ def pre_proccess(self): """Create strategy to optimize model.""" cfg = self.conf - if os.environ.get("PERFORMANCE_ONLY") in ['0', '1']: - performance_only = bool(int(os.environ.get("PERFORMANCE_ONLY"))) - deep_set(cfg, 'quantization.performance_only', performance_only) - logger.info("Get environ 'PERFORMANCE_ONLY={}'," \ - " force setting 'quantization.performance_only = True'.".format(performance_only)) - strategy = cfg.quantization.tuning_criterion.strategy - if cfg.quantization.quant_level == "auto": strategy = "auto" @@ -89,9 +81,9 @@ def pre_proccess(self): strategy = "conservative" if strategy == "mse_v2": - if not (cfg.quantization._framework.startswith("tensorflow") or cfg.quantization._framework == 'pytorch_fx'): + if not (cfg.quantization.framework.startswith("tensorflow") or cfg.quantization.framework == 'pytorch_fx'): strategy = "basic" - logger.warning(f"MSE_v2 does not support {cfg.quantization._framework} now, use basic instead.") + logger.warning(f"MSE_v2 does not support {cfg.quantization.framework} now, use basic instead.") logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.") assert strategy in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy) @@ -108,7 +100,6 @@ def pre_proccess(self): _resume = pickle.load(f).__dict__ if self._eval_func is None and self._eval_dataloader is None: - self.conf.quantization.performance_only = True logger.info("Quantize model without tuning!") self.strategy = STRATEGIES[strategy]( @@ -183,48 +174,48 @@ def model(self, user_model): """ cfg = self.conf - if cfg.quantization._framework == None: + if cfg.quantization.framework == None: if isinstance(user_model, BaseModel): - cfg.quantization._framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))] + cfg.quantization.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))] if cfg.quantization.backend == "ipex": - assert cfg.quantization._framework == "pytorch_ipex", "Please wrap the model with correct Model class!" + assert cfg.quantization.framework == "pytorch_ipex", "Please wrap the model with correct Model class!" if cfg.quantization.backend == "itex": if get_model_type(user_model.model) == 'keras': - assert cfg.quantization._framework == "keras", "Please wrap the model with KerasModel class!" + assert cfg.quantization.framework == "keras", "Please wrap the model with KerasModel class!" else: - assert cfg.quantization._framework == "pytorch_itex", \ + assert cfg.quantization.framework == "pytorch_itex", \ "Please wrap the model with TensorflowModel class!" else: framework = get_model_fwk_name(user_model) - cfg.quantization._framework = framework + cfg.quantization.framework = framework if framework == "tensorflow": if get_model_type(user_model) == 'keras' and cfg.quantization.backend == 'itex': - cfg.quantization._framework = 'keras' + cfg.quantization.framework = 'keras' if framework == "pytorch": if cfg.quantization.backend == "default": - cfg.quantization._framework = "pytorch_fx" + cfg.quantization.framework = "pytorch_fx" elif cfg.quantization.backend == "ipex": - cfg.quantization._framework = "pytorch_ipex" + cfg.quantization.framework = "pytorch_ipex" if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") - if "tensorflow" in cfg.quantization._framework or cfg.quantization._framework == "keras": - self._model = Model(user_model, backend=cfg.quantization._framework, device=cfg.quantization.device) + if "tensorflow" in cfg.quantization.framework or cfg.quantization.framework == "keras": + self._model = Model(user_model, backend=cfg.quantization.framework, device=cfg.quantization.device) else: - self._model = Model(user_model, backend=cfg.quantization._framework) + self._model = Model(user_model, backend=cfg.quantization.framework) else: - if cfg.quantization._framework == "pytorch_ipex": + if cfg.quantization.framework == "pytorch_ipex": from neural_compressor.model.torch_model import IPEXModel assert type(user_model) == IPEXModel, \ "The backend is ipex, please wrap the model with IPEXModel class!" - elif cfg.quantization._framework == "pytorch_fx": + elif cfg.quantization.framework == "pytorch_fx": from neural_compressor.model.torch_model import PyTorchFXModel assert type(user_model) == PyTorchFXModel, \ "The backend is default, please wrap the model with PyTorchFXModel class!" self._model = user_model - if 'tensorflow' in cfg.quantization._framework: + if 'tensorflow' in cfg.quantization.framework: self._model.name = cfg.quantization.model_name self._model.output_tensor_names = cfg.quantization.outputs self._model.input_tensor_names = cfg.quantization.inputs @@ -290,7 +281,8 @@ def metric(self, user_metric): Multi-metrics: {topk: 1, MSE: {compare_label: False}, - + weight: [0.5, 0.5], + higher_is_better: [True, False] } For the built-in metrics, please refer to below link: https://github.com/intel/neural-compressor/blob/master/docs/source/metric.md#supported-built-in-metric-matrix. @@ -304,10 +296,6 @@ def metric(self, user_metric): The object of Metric or a dict of built-in metric configurations. """ - if deep_get(self.conf, "quantization.metric"): - logger.warning("Override the value of `metric` field defined in yaml file" \ - " as user defines the value of `metric` attribute by code.") - from .metric import Metric as NCMetric, METRICS if isinstance(user_metric, dict): metric_cfg = user_metric @@ -323,12 +311,9 @@ def metric(self, user_metric): metric_cls = type(user_metric).__name__ name = 'user_' + metric_cls metric_cfg = {name: id(user_metric)} - metrics = METRICS(self.conf.quantization._framework) + metrics = METRICS(self.conf.quantization.framework) metrics.register(name, metric_cls) - deep_set(self.conf, "quantization.metric", metric_cfg) - self.conf = DotDict(self.conf) - self._metric = user_metric @property diff --git a/neural_compressor/training.py b/neural_compressor/training.py index 0b3a387757d..040cf6e8804 100644 --- a/neural_compressor/training.py +++ b/neural_compressor/training.py @@ -166,6 +166,8 @@ def metric(self, user_metric): Multi-metrics: {topk: 1, MSE: {compare_label: False}, + weight: [0.5, 0.5], + higher_is_better: [True, False] } For the built-in metrics, please refer to below link: https://github.com/intel/neural-compressor/blob/master/docs/source/metric.md#supported-built-in-metric-matrix. From eae000160a961d9c93af64c4712283fdd6691ee5 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 29 Mar 2023 20:28:24 +0800 Subject: [PATCH 008/103] add deploy config Signed-off-by: yiliu30 --- neural_compressor/config.py | 4 ++ neural_compressor/strategy/strategy.py | 59 ++++++++++---------------- 2 files changed, 26 insertions(+), 37 deletions(-) diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 3cfe38dd1eb..5cab1132274 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -925,6 +925,10 @@ def inputs(self, inputs): def framework(self): return self._framework + @framework.setter + def framework(self, framework): + self._framework = framework + @property def example_inputs(self): """Get strategy_kwargs.""" diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 9ed67c979d4..2fb60b405a4 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -1142,43 +1142,28 @@ def update_best_op_tuning_cfg(self, op_tuning_cfg): return self.cur_best_acc, self.cur_best_tuning_cfg def deploy_config(self): - return - #TODO uncomment it after config ready - # """Save the configuration locally for deployment.""" - # acc_dataloader_cfg = deep_get(self.cfg, 'evaluation.accuracy.dataloader') - # perf_dataloader_cfg = deep_get(self.cfg, 'evaluation.performance.dataloader') - # # use acc dataloader if perf dataloader is not configured - # if perf_dataloader_cfg is None: - # perf_dataloader_cfg = acc_dataloader_cfg - - # self.deploy_cfg = OrderedDict() - # # int8 dataloader graph transform - # if deep_get(perf_dataloader_cfg, 'transform.QuantizedInput') is not None \ - # or deep_get(acc_dataloader_cfg, 'transform.QuantizedInput') is not None: - # self.best_qmodel, scale = self.adaptor.quantize_input(self.best_qmodel) - # deep_set(perf_dataloader_cfg, 'transform.QuantizedInput.dtype', 'int8') - # deep_set(perf_dataloader_cfg, 'transform.QuantizedInput.scale', scale) - # deep_set(acc_dataloader_cfg, 'transform.QuantizedInput.dtype', 'int8') - # deep_set(acc_dataloader_cfg, 'transform.QuantizedInput.scale', scale) - - # self.deploy_cfg['model'] = self.cfg.model - # self.deploy_cfg['device'] = self.conf.quantization.device - # if self.cfg.evaluation is not None: - # deep_set(self.cfg, 'evaluation.performance.dataloader',\ - # perf_dataloader_cfg) - # deep_set(self.cfg, 'evaluation.accuracy.dataloader', \ - # acc_dataloader_cfg) - # self.deploy_cfg['evaluation'] = self.cfg.evaluation - - # def setup_yaml(): - # represent_dict_order = lambda self, \ - # data: self.represent_mapping('tag:yaml.org,2002:map', data.items()) - # yaml.add_representer(OrderedDict, represent_dict_order) - # yaml.add_representer(DotDict, represent_dict_order) - # setup_yaml() - # with open(self.deploy_path, 'w+') as f: - # yaml.dump(self.deploy_cfg, f) - # logger.info("Save deploy yaml to {}".format(self.deploy_path)) + """Save the configuration locally for deployment.""" + # TODO need to double check + self.deploy_cfg = OrderedDict() + model_cfg = dict() + model_cfg['inputs'] = self.conf.quantization.inputs + model_cfg['outputs'] = self.conf.quantization.outputs + model_cfg['backend'] = self.conf.quantization.backend + model_cfg['quant_format'] = self.conf.quantization.quant_format + model_cfg['domain'] = self.conf.quantization.domain + model_cfg['backend'] = self.conf.quantization.backend + self.deploy_cfg['model'] = model_cfg + self.deploy_cfg['device'] = self.conf.quantization.device + + def setup_yaml(): + represent_dict_order = lambda self, \ + data: self.represent_mapping('tag:yaml.org,2002:map', data.items()) + yaml.add_representer(OrderedDict, represent_dict_order) + yaml.add_representer(DotDict, represent_dict_order) + setup_yaml() + with open(self.deploy_path, 'w+') as f: + yaml.dump(self.deploy_cfg, f) + logger.info("Save deploy yaml to {}".format(self.deploy_path)) def _get_common_cfg(self, model_wise_cfg, op_wise_cfgs): """Get the common parts from the model_wise_cfg. From a8860257dbd08a10a8595ea1bc386810cfd0c160 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 30 Mar 2023 08:38:43 +0800 Subject: [PATCH 009/103] adjust other strategies Signed-off-by: yiliu30 --- neural_compressor/config.py | 16 +++++++++++++++- .../strategy/auto_mixed_precision.py | 4 ++-- neural_compressor/strategy/basic.py | 4 ++-- neural_compressor/strategy/bayesian.py | 2 +- neural_compressor/strategy/hawq_v2.py | 5 ++++- neural_compressor/strategy/mse.py | 2 +- neural_compressor/strategy/mse_v2.py | 11 +++++++---- neural_compressor/strategy/strategy.py | 2 ++ neural_compressor/strategy/utils/tuning_space.py | 4 ++-- 9 files changed, 36 insertions(+), 14 deletions(-) diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 5cab1132274..285e7ba2d13 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -114,6 +114,8 @@ class Options: tensorboard(bool): This flag indicates whether to save the weights of the model and the inputs of each layer for visual display. Default value is False. + diagnosis(bool): This flag indicates whether to do diagnosis. + Default value is False. Example:: @@ -122,14 +124,16 @@ class Options: set_workspace("workspace_path") set_resume_from("workspace_path") set_tensorboard(True) + """ def __init__(self, random_seed=1978, workspace=default_workspace, - resume_from=None, tensorboard=False): + resume_from=None, tensorboard=False, diagnosis=False): """Init an Option object.""" self.random_seed = random_seed self.workspace = workspace self.resume_from = resume_from self.tensorboard = tensorboard + self.diagnosis = diagnosis # TODO expose the diagnosis to user @property def random_seed(self): @@ -174,7 +178,17 @@ def tensorboard(self, tensorboard): """Set tensorboard.""" if _check_value('tensorboard', tensorboard, bool): self._tensorboard = tensorboard + + @property + def diagnosis(self): + """Get diagnosis.""" + return self._diagnosis + @diagnosis.setter + def diagnosis(self, diagnosis): + """Set diagnosis.""" + if _check_value('diagnosis', diagnosis, bool): + self._diagnosis = diagnosis class BenchmarkConfig: """Config Class for Benchmark. diff --git a/neural_compressor/strategy/auto_mixed_precision.py b/neural_compressor/strategy/auto_mixed_precision.py index 44471670626..f9d6dfb5007 100644 --- a/neural_compressor/strategy/auto_mixed_precision.py +++ b/neural_compressor/strategy/auto_mixed_precision.py @@ -129,7 +129,7 @@ def traverse(self): tune_cfg, self.model, self.calib_dataloader, self.q_func) assert self.last_qmodel # Return the last quantized model as a result. if performance only. - if self.cfg.tuning.exit_policy.performance_only: + if self._not_tuning: self.best_qmodel = self.last_qmodel self._add_tuning_history(copy.deepcopy(tune_cfg), (-1, [0]), q_config=self.last_qmodel.q_config) return @@ -138,7 +138,7 @@ def traverse(self): q_config = copy.deepcopy(self.last_qmodel.q_config) self.last_tune_result = self._evaluate(self.last_qmodel) self.cur_best_acc, self.cur_best_tuning_cfg = self.update_best_op_tuning_cfg(op_tuning_cfg) - need_stop = self.stop(self.cfg.tuning.exit_policy.timeout, self.trials_count) + need_stop = self.stop(self.conf.quantization.tuning_criterion.timeout, self.trials_count) # record the tuning history saved_tune_cfg = copy.deepcopy(tune_cfg) saved_last_tune_result = copy.deepcopy(self.last_tune_result) diff --git a/neural_compressor/strategy/basic.py b/neural_compressor/strategy/basic.py index ca0cb118b24..e211db95436 100644 --- a/neural_compressor/strategy/basic.py +++ b/neural_compressor/strategy/basic.py @@ -80,7 +80,7 @@ def distributed_next_tune_cfg_lst(self, comm): # stage 2: yield new_op_tuning_cfg_lst (length of stage 1) # Fallback the ops supported both static and dynamic from static to dynamic # Tuning items: None - if self.cfg.quantization.approach == 'post_training_auto_quant': + if self.conf.quantization.approach == 'post_training_auto_quant': static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if item in tuning_space.query_items_by_quant_mode('dynamic')] if static_dynamic_items: @@ -211,7 +211,7 @@ def next_tune_cfg(self): # Fallback the ops supported both static and dynamic from static to dynamic # Tuning items: None - if self.cfg.quantization.approach == 'post_training_auto_quant': + if self.conf.quantization.approach == 'post_training_auto_quant': static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if item in tuning_space.query_items_by_quant_mode('dynamic')] if static_dynamic_items: diff --git a/neural_compressor/strategy/bayesian.py b/neural_compressor/strategy/bayesian.py index 12ce0a23429..ceefd26f7ae 100644 --- a/neural_compressor/strategy/bayesian.py +++ b/neural_compressor/strategy/bayesian.py @@ -51,7 +51,7 @@ def __getstate__(self): dict: Saved dict for resuming """ for history in self.tuning_history: - if self._same_yaml(history['cfg'], self.cfg): + if self._same_yaml(history['cfg'], self.conf): history['bayes_opt'] = self.bayes_opt save_dict = super().__getstate__() return save_dict diff --git a/neural_compressor/strategy/hawq_v2.py b/neural_compressor/strategy/hawq_v2.py index 5fbdb59151f..de005885bee 100644 --- a/neural_compressor/strategy/hawq_v2.py +++ b/neural_compressor/strategy/hawq_v2.py @@ -64,7 +64,10 @@ def next_tune_cfg(self): # Start compute the hessian trace logger.info(f"************** Start compute the hessian trace *****************") target_dtype = "fp32" - hawq_v2_criterion =self.cfg.tuning.strategy.hawq_v2_loss + hawq_v2_criterion = None + strategy_kwargs = self.conf.quantization.tuning_criterion.strategy_kwargs + if strategy_kwargs: + hawq_v2_criterion = strategy_kwargs.get('hawq_v2_loss', None) # assert hawq_v2_criterion is not None, "HAWQ-V2 strategy needs model loss function to compute the gradient, \ # Please assign it by strategy_kwargs({'hawq_v2_loss': hawq_v2_loss})." op_to_traces = self.adaptor.calculate_hessian_trace(fp32_model = self._fp32_model, diff --git a/neural_compressor/strategy/mse.py b/neural_compressor/strategy/mse.py index a2eea943f2a..3bcf100cec9 100644 --- a/neural_compressor/strategy/mse.py +++ b/neural_compressor/strategy/mse.py @@ -50,7 +50,7 @@ def __getstate__(self): save_dict: Saved dict for resuming """ for history in self.tuning_history: - if self._same_yaml(history['cfg'], self.cfg): + if self._same_yaml(history['cfg'], self.conf): history['ordered_ops'] = self.ordered_ops save_dict = super().__getstate__() return save_dict diff --git a/neural_compressor/strategy/mse_v2.py b/neural_compressor/strategy/mse_v2.py index c2bee011d59..91d181f547b 100644 --- a/neural_compressor/strategy/mse_v2.py +++ b/neural_compressor/strategy/mse_v2.py @@ -125,8 +125,11 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): tune_cfg = deepcopy(self.cur_best_tuning_cfg) requantize_cfg = deepcopy(self._tune_cfg_converter(self.cur_best_tuning_cfg)) self.output_op_names = self.adaptor.get_output_op_names(self.last_qmodel) - self.confidence_batches = (self.cfg.tuning.strategy.confidence_batches - if self.cfg.tuning.strategy.confidence_batches != None else 2) + confidence_batches = 2 + strategy_kwargs = self.conf.quantization.tuning_criterion.strategy_kwargs + if strategy_kwargs and strategy_kwargs.get('confidence_batches', None): + confidence_batches = strategy_kwargs.get('confidence_batches', None) + tune_cfg_backup = deepcopy(tune_cfg) quant_ops_in_tune_cfg = self._collect_ops_by_quant_mode(tune_cfg, 'dynamic') + \ self._collect_ops_by_quant_mode(tune_cfg, 'static') @@ -140,7 +143,7 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): self.calib_dataloader, deepcopy(self._tune_cfg_converter(tune_cfg)), self.output_op_names, - self.confidence_batches, + confidence_batches, fallback=True) logger.debug(f"*** The op sensitivity analysis took {time() - start:.2f}s.") select_op_info = ops_lst[0] @@ -171,7 +174,7 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): self.calib_dataloader, deepcopy(self._tune_cfg_converter(tune_cfg)), self.output_op_names, - self.confidence_batches, + confidence_batches, fallback=False, requantize_cfgs=requantize_cfg['op']) logger.debug(f"*** The op sensitivity analysis took {time() - start:.2f}s.") diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 2fb60b405a4..c503c56340a 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -1101,6 +1101,8 @@ def _same_yaml(self, src_yaml, dst_yaml): The check will exclude those keys which do not really impact the tuning result, such as tensorboard, workspace, resume options under the tuning section of YAML. """ + return False + # TODO rewrite the compare method for new API if equal_dicts(src_yaml, dst_yaml, ignore_keys=['tuning']) and \ equal_dicts(src_yaml.tuning, src_yaml.tuning, compare_keys=['objective', 'accuracy_criterion', diff --git a/neural_compressor/strategy/utils/tuning_space.py b/neural_compressor/strategy/utils/tuning_space.py index 07909d7f711..21c24647c28 100644 --- a/neural_compressor/strategy/utils/tuning_space.py +++ b/neural_compressor/strategy/utils/tuning_space.py @@ -141,8 +141,8 @@ def _init_usr_cfg(self): """Init user config.""" usr_cfg = {'quantization': {}} usr_cfg['quantization']['model_wise'] = None - usr_cfg['quantization']['optype_wise'] = self.conf.quantization.op_type_dict - usr_cfg['quantization']['op_wise'] = self.conf.quantization.op_name_dict + usr_cfg['quantization']['optype_wise'] = self.conf.quantization.op_type_dict if self.conf else None + usr_cfg['quantization']['op_wise'] = self.conf.quantization.op_name_dict if self.conf else None return usr_cfg def _parse_capability(self, capability: Dict) -> None: From 3e11863d35bbee37f695dabd306a2538ac49a5db Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 30 Mar 2023 09:27:19 +0800 Subject: [PATCH 010/103] replace conf.config with config Signed-off-by: yiliu30 --- neural_compressor/model/mxnet_model.py | 2 +- neural_compressor/model/tensorflow_model.py | 2 +- neural_compressor/model/torch_model.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/neural_compressor/model/mxnet_model.py b/neural_compressor/model/mxnet_model.py index 9a16797c5e0..481ac962282 100644 --- a/neural_compressor/model/mxnet_model.py +++ b/neural_compressor/model/mxnet_model.py @@ -54,7 +54,7 @@ def model(self, model): def save(self, root=None): """Save MXNet model.""" if root is None: - from neural_compressor.conf import config as cfg + from neural_compressor import config as cfg root = cfg.default_workspace root = os.path.abspath(os.path.expanduser(root)) os.makedirs(os.path.dirname(root), exist_ok=True) diff --git a/neural_compressor/model/tensorflow_model.py b/neural_compressor/model/tensorflow_model.py index 83c4cb9ff54..322f8ae3c3d 100644 --- a/neural_compressor/model/tensorflow_model.py +++ b/neural_compressor/model/tensorflow_model.py @@ -27,7 +27,7 @@ from neural_compressor.utils.utility import LazyImport, compute_sparsity from neural_compressor.utils.utility import version1_lt_version2, version1_gt_version2, version1_gte_version2 from neural_compressor.utils import logger -from neural_compressor.conf import config as cfg +from neural_compressor import config as cfg from neural_compressor.model.base_model import BaseModel tf = LazyImport('tensorflow') diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py index e357162aeaa..43efcbbcaad 100644 --- a/neural_compressor/model/torch_model.py +++ b/neural_compressor/model/torch_model.py @@ -24,7 +24,7 @@ from collections import OrderedDict, UserDict from neural_compressor.utils.utility import LazyImport, compute_sparsity from neural_compressor.utils import logger -from neural_compressor.conf import config as cfg +from neural_compressor import config as cfg from neural_compressor.model.base_model import BaseModel torch = LazyImport('torch') From 4629174dcc243005fbc76048ad196c6779746aaf Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 30 Mar 2023 09:28:44 +0800 Subject: [PATCH 011/103] update strategy uts Signed-off-by: yiliu30 --- .../strategy/utils/tuning_space.py | 2 +- test/strategy/test_tuning_sampler_1.x.py | 244 ++++++++++++ test/strategy/test_tuning_space.py | 71 +--- test/strategy/test_tuning_space_1.x.py | 371 ++++++++++++++++++ test/strategy/test_tuning_space_v2.py | 8 +- test/strategy/test_tuning_space_v2_1.x.py | 279 +++++++++++++ 6 files changed, 901 insertions(+), 74 deletions(-) create mode 100644 test/strategy/test_tuning_sampler_1.x.py create mode 100644 test/strategy/test_tuning_space_1.x.py create mode 100644 test/strategy/test_tuning_space_v2_1.x.py diff --git a/neural_compressor/strategy/utils/tuning_space.py b/neural_compressor/strategy/utils/tuning_space.py index 21c24647c28..51a742e002a 100644 --- a/neural_compressor/strategy/utils/tuning_space.py +++ b/neural_compressor/strategy/utils/tuning_space.py @@ -140,7 +140,7 @@ def __init__(self, capability, conf, framework=None): def _init_usr_cfg(self): """Init user config.""" usr_cfg = {'quantization': {}} - usr_cfg['quantization']['model_wise'] = None + usr_cfg['quantization']['model_wise'] = None usr_cfg['quantization']['optype_wise'] = self.conf.quantization.op_type_dict if self.conf else None usr_cfg['quantization']['op_wise'] = self.conf.quantization.op_name_dict if self.conf else None return usr_cfg diff --git a/test/strategy/test_tuning_sampler_1.x.py b/test/strategy/test_tuning_sampler_1.x.py new file mode 100644 index 00000000000..fc0dfdd341b --- /dev/null +++ b/test/strategy/test_tuning_sampler_1.x.py @@ -0,0 +1,244 @@ +from neural_compressor.experimental.strategy.utils.tuning_sampler import OpTypeWiseTuningSampler, ModelWiseTuningSampler +from neural_compressor.experimental.strategy.utils.tuning_sampler import OpWiseTuningSampler, FallbackTuningSampler +from neural_compressor.experimental.strategy.utils.tuning_structs import OpTuningConfig +from neural_compressor.experimental.strategy.utils.tuning_space import TuningSpace +from collections import OrderedDict +from copy import deepcopy +import unittest + +op_cap = { + ('op_name1', 'op_type1'): [ + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'], + 'algorithm': ['minmax', 'kl'] + }, + 'weight': + { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + } + }, + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'dynamic', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'], + 'algorithm': ['minmax', 'kl'] + }, + 'weight': + { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + } + }, + { + 'activation': + { + 'dtype': 'fp32' + }, + 'weight': + { + 'dtype': 'fp32' + } + }, + ], + ('op_name2', 'op_type1'): [ + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'], + 'algorithm': ['minmax', 'kl'] + }, + 'weight': + { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + } + }, + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'dynamic', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'], + 'algorithm': ['minmax', 'kl'] + }, + 'weight': + { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + } + }, + { + 'activation': + { + 'dtype': 'fp32' + }, + 'weight': + { + 'dtype': 'fp32' + } + }, + ], + ('op_name3', 'op_type2'): [ + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'granularity': ['per_channel'] + }, + 'weight': + { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_channel'] + } + }, + { + 'activation': + { + 'dtype': 'fp32' + }, + 'weight': + { + 'dtype': 'fp32' + } + }, + ], + ('op_name4', 'op_type3'): [ + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + }, + }, + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'dynamic', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + }, + }, + { + 'activation': + { + 'dtype': 'fp32' + }, + }, + ] +} + + +class TestTuningSampler(unittest.TestCase): + def test_tuning_sampler(self): + capability = { + 'calib': {'calib_sampling_size': [1, 10, 50]}, + 'op': op_cap + } + conf = None + tuning_space = TuningSpace(capability, conf) + + initial_op_tuning_cfg = {} + for item in tuning_space.root_item.options: + if item.item_type == 'op': + op_name, op_type = item.name + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) + quant_mode_wise_items = OrderedDict() + from neural_compressor.experimental.strategy.utils.constant import auto_query_order as query_order + pre_items = set() + for quant_mode in query_order: + items = tuning_space.query_items_by_quant_mode(quant_mode) + filtered_items = [item for item in items if item not in pre_items] + pre_items = pre_items.union(set(items)) + quant_mode_wise_items[quant_mode] = filtered_items + + def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): + for item in items_lst: + op_item_dtype_dict[item.name] = target_quant_mode + + op_item_dtype_dict = OrderedDict() + for quant_mode, quant_mode_items in quant_mode_wise_items.items(): + initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) + + op_wise_tuning_sampler = OpWiseTuningSampler(deepcopy(tuning_space), [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + self.assertEqual(len(list(op_wise_tuning_sampler)), 128) + optype_wise_tuning_sampler = OpTypeWiseTuningSampler(deepcopy(tuning_space), [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + cfg_lst = list(optype_wise_tuning_sampler) + self.assertEqual(len(cfg_lst), 16) + model_wise_tuning_sampler = ModelWiseTuningSampler(deepcopy(tuning_space), [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + model_wise_pool = [] + best_tune_cfg = None + for tune_cfg in model_wise_tuning_sampler: + best_tune_cfg = tune_cfg + model_wise_pool.append(tune_cfg) + self.assertEqual(len(model_wise_pool), 8) + + # fallback test + quant_ops = quant_mode_wise_items.get('static', []) + quant_ops += quant_mode_wise_items.get('dynamic', []) + target_dtype = 'fp32' + target_type_lst = tuning_space.query_items_by_quant_mode(target_dtype) + fallback_items_lst = [item for item in quant_ops if item in target_type_lst] + if fallback_items_lst: + print(f"Start to fallback op to {target_dtype} one by one.") + fallback_items_name_lst = [item.name for item in fallback_items_lst] + op_dtypes = OrderedDict(zip(fallback_items_name_lst[::-1], [target_dtype] * len(fallback_items_name_lst))) + initial_op_tuning_cfg = deepcopy(best_tune_cfg) + fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=False) + fallback_cnt = [] + fp32_lst = [] + for op_cfgs in fallback_sampler: + cnt = 0 + for op_name, op_cfg in op_cfgs.items(): + op_state = op_cfg.get_state() + if 'fp32' == op_state['activation']['dtype'] and\ + ('fp32' == op_state['weight']['dtype'] if 'weight' in op_state else True): + cnt = cnt + 1 + fp32_lst.append(op_name) + fallback_cnt.append(cnt) + self.assertListEqual(fallback_cnt, [1, 1, 1, 1]) + self.assertListEqual(fp32_lst, fallback_items_name_lst[::-1]) + + fallback_sampler_acc = FallbackTuningSampler(tuning_space, tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_dtypes=op_dtypes, accumulate=True) + fallback_cnt = [] + for op_cfgs in fallback_sampler_acc: + cnt = 0 + for op_name, op_cfg in op_cfgs.items(): + op_state = op_cfg.get_state() + if 'fp32' == op_state['activation']['dtype'] and\ + ('fp32' == op_state['weight']['dtype'] if 'weight' in op_state else True): + cnt = cnt + 1 + fallback_cnt.append(cnt) + self.assertListEqual(fallback_cnt, [2, 3, 4]) + +if __name__ == "__main__": + unittest.main() diff --git a/test/strategy/test_tuning_space.py b/test/strategy/test_tuning_space.py index 89f25d6e7fa..6da04ebaf29 100644 --- a/test/strategy/test_tuning_space.py +++ b/test/strategy/test_tuning_space.py @@ -194,11 +194,6 @@ def setUp(self) -> None: } } } - self.model_wise_user_config = { - 'activation': { - 'granularity': ['per_channel'], - } - } # fallback op_name4 self.op_wise_user_config = { 'op_name4': { @@ -224,11 +219,9 @@ def setUp(self) -> None: def test_tuning_space_merge_op_wise_not_exist(self): # op-wise conf = { - 'usr_cfg': { 'quantization': { - 'op_wise': deepcopy(self.op_wise_user_config2), + 'op_type_dict': deepcopy(self.op_wise_user_config2), } - } } conf = DotDict(conf) tuning_space2 = TuningSpace(deepcopy(self.capability2), deepcopy(conf)) @@ -257,42 +250,12 @@ def test_tuning_space_creation(self): # all optype self.assertEqual(list(tuning_space.op_type_wise_items.keys()), ['op_type1', 'op_type2', 'op_type3']) - def test_tuning_space_merge_model_wise(self): - # Test merge with user config, model-wise, optype-wise, op-wise - # model-wise - self.capability = { - 'calib': {'calib_sampling_size': [1, 10, 50]}, - 'op': op_cap - } - conf = { - 'usr_cfg': { - 'quantization': { - 'model_wise': self.model_wise_user_config, - } - } - } - conf = DotDict(conf) - tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) - logger.debug(tuning_space2.root_item.get_details()) - found_per_tensor = False - for quant_mode in ['static', 'dynamic']: - for op_item in tuning_space2.query_items_by_quant_mode(quant_mode): - for path in tuning_space2.ops_path_set[op_item.name]: - mode_item = tuning_space2.query_quant_mode_item_by_full_path(op_item.name, path) - act_algo_item = mode_item.get_option_by_name(('activation', 'granularity')) - if act_algo_item and 'per_tensor' in act_algo_item.options: - found_per_tensor = True - break - self.assertFalse(found_per_tensor) - def test_tuning_space_merge_optype_wise(self): # optype-wise conf = { - 'usr_cfg': { 'quantization': { - 'optype_wise': self.optype_wise_user_config, + 'op_type_dict': self.optype_wise_user_config, } - } } conf = DotDict(conf) tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) @@ -315,11 +278,9 @@ def test_tuning_space_merge_optype_wise(self): def test_tuning_space_merge_op_wise(self): # op-wise conf = { - 'usr_cfg': { 'quantization': { - 'op_wise': self.op_wise_user_config, + 'op_name_dict': self.op_wise_user_config, } - } } conf = DotDict(conf) @@ -340,32 +301,6 @@ def test_tuning_space_merge_op_wise(self): self.assertFalse(found_quant_op_name4) self.assertTrue(found_fp32_op_name4) - def test_tuning_space_merge_model_wise_and_opty_wise(self): - # Test mode-wise + optype-wise - conf = { - 'usr_cfg': { - 'quantization': { - 'model_wise': self.model_wise_user_config, - 'optype_wise': self.optype_wise_user_config, - } - } - - } - # the optype_wise config will overwrite the model-wise config - conf = DotDict(conf) - tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) - logger.debug(tuning_space2.root_item.get_details()) - found_per_tensor = False - for quant_mode in ['static', 'dynamic']: - for op_item in tuning_space2.query_items_by_quant_mode(quant_mode): - for path in tuning_space2.ops_path_set[op_item.name]: - mode_item = tuning_space2.query_quant_mode_item_by_full_path(op_item.name, path) - act_algo_item = mode_item.get_option_by_name(('activation', 'granularity')) - if act_algo_item and 'per_tensor' in act_algo_item.options: - found_per_tensor = True - break - self.assertTrue(found_per_tensor) - if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_tuning_space_1.x.py b/test/strategy/test_tuning_space_1.x.py new file mode 100644 index 00000000000..cad910441f6 --- /dev/null +++ b/test/strategy/test_tuning_space_1.x.py @@ -0,0 +1,371 @@ +from neural_compressor.experimental.strategy.utils.tuning_space import TuningItem, TuningSpace +from neural_compressor.conf.dotdict import DotDict +from neural_compressor.utils import logger +from copy import deepcopy +import unittest + +op_cap = { + # op have both weight and activation and support static/dynamic/fp32 + ('op_name1', 'op_type1'): [ + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'], + 'algorithm': ['minmax', 'kl'] + }, + 'weight': + { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + } + }, + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'dynamic', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'], + 'algorithm': ['minmax', 'kl'] + }, + 'weight': + { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + } + }, + { + 'activation': + { + 'dtype': 'fp32' + }, + 'weight': + { + 'dtype': 'fp32' + } + }, + ], + # op have both weight and activation and support static/dynamic/fp32 + ('op_name2', 'op_type1'): [ + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'], + 'algorithm': ['minmax', 'kl'] + }, + 'weight': + { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + } + }, + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'dynamic', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'], + 'algorithm': ['minmax', 'kl'] + }, + 'weight': + { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + } + }, + { + 'activation': + { + 'dtype': 'fp32' + }, + 'weight': + { + 'dtype': 'fp32' + } + }, + ], + # op have both weight and activation and support static/fp32 + ('op_name3', 'op_type2'): [ + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'granularity': ['per_channel'], + 'algorithm': ['minmax', 'kl'] + }, + 'weight': + { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_channel'], + 'algorithm': ['minmax', 'kl'] + } + }, + { + 'activation': + { + 'dtype': 'fp32' + }, + 'weight': + { + 'dtype': 'fp32' + } + }, + ], + # op only have activation and support dynamic/fp32 + ('op_name4', 'op_type3'): [ + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'], + 'algorithm': ['minmax', 'kl'] + }, + }, + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'dynamic', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'], + 'algorithm': ['minmax'] + }, + }, + { + 'activation': + { + 'dtype': 'fp32' + }, + }, + ] +} + + +op_cap2 = { + # The granularity of op activation do not support per_tensor. + ('op_name4', 'op_type1'): [ + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'granularity': ['per_channel'], + 'algorithm': ['minmax', 'kl'] + }, + 'weight': + { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + } + },] +} + + +class TestTuningSampler(unittest.TestCase): + def setUp(self) -> None: + self.capability = { + 'calib': {'calib_sampling_size': [1, 10, 50]}, + 'op': deepcopy(op_cap) + } + # for optype1,'algorithm': ['minmax', 'kl'] -> ['minmax'] + self.optype_wise_user_config = { + 'op_type1': { + 'activation': { + 'algorithm': ['minmax'], + 'granularity': ['per_channel', 'per_tensor'], + } + } + } + self.model_wise_user_config = { + 'activation': { + 'granularity': ['per_channel'], + } + } + # fallback op_name4 + self.op_wise_user_config = { + 'op_name4': { + 'activation': { + 'dtype': ['fp32'], + } + } + } + + self.op_wise_user_config2 = { + 'op_name4': { + 'activation': { + 'granularity': ['per_tensor'], + } + } + } + + self.capability2 = { + 'calib': {'calib_sampling_size': [1, 10]}, + 'op': deepcopy(op_cap2) + } + + def test_tuning_space_merge_op_wise_not_exist(self): + # op-wise + conf = { + 'usr_cfg': { + 'quantization': { + 'op_wise': deepcopy(self.op_wise_user_config2), + } + } + } + conf = DotDict(conf) + tuning_space2 = TuningSpace(deepcopy(self.capability2), deepcopy(conf)) + logger.debug(tuning_space2.root_item.get_details()) + + + def test_tuning_space_creation(self): + conf = None + # Test the creation of tuning space + tuning_space = TuningSpace(self.capability, conf) + logger.debug(tuning_space.root_item.get_details()) + # ops supported static + static_items = tuning_space.query_items_by_quant_mode('static') + static_items_name = [item.name for item in static_items] + self.assertEqual(set(static_items_name), set(op_cap.keys())) + # ops supported dynamic + dynamic_items = tuning_space.query_items_by_quant_mode('dynamic') + dynamic_items_name = [item.name for item in dynamic_items] + all_items_name = list(op_cap.keys()) + all_items_name.remove(('op_name3', 'op_type2')) + self.assertEqual(set(dynamic_items_name), set(all_items_name)) + # ops supported fp32 + fp32_items = tuning_space.query_items_by_quant_mode('fp32') + fp32_items_name = [item.name for item in fp32_items] + self.assertEqual(set(fp32_items_name), set(op_cap.keys())) + # all optype + self.assertEqual(list(tuning_space.op_type_wise_items.keys()), ['op_type1', 'op_type2', 'op_type3']) + + def test_tuning_space_merge_model_wise(self): + # Test merge with user config, model-wise, optype-wise, op-wise + # model-wise + self.capability = { + 'calib': {'calib_sampling_size': [1, 10, 50]}, + 'op': op_cap + } + conf = { + 'usr_cfg': { + 'quantization': { + 'model_wise': self.model_wise_user_config, + } + } + } + conf = DotDict(conf) + tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) + logger.debug(tuning_space2.root_item.get_details()) + found_per_tensor = False + for quant_mode in ['static', 'dynamic']: + for op_item in tuning_space2.query_items_by_quant_mode(quant_mode): + for path in tuning_space2.ops_path_set[op_item.name]: + mode_item = tuning_space2.query_quant_mode_item_by_full_path(op_item.name, path) + act_algo_item = mode_item.get_option_by_name(('activation', 'granularity')) + if act_algo_item and 'per_tensor' in act_algo_item.options: + found_per_tensor = True + break + self.assertFalse(found_per_tensor) + + def test_tuning_space_merge_optype_wise(self): + # optype-wise + conf = { + 'usr_cfg': { + 'quantization': { + 'optype_wise': self.optype_wise_user_config, + } + } + } + conf = DotDict(conf) + tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) + logger.debug(tuning_space2.root_item.get_details()) + found_act_algo_kl_optype1 = False + found_act_algo_kl_others = False + for quant_mode in ['static', 'dynamic']: + for op_item in tuning_space2.query_items_by_quant_mode(quant_mode): + for path in tuning_space2.ops_path_set[op_item.name]: + mode_item = tuning_space2.query_quant_mode_item_by_full_path(op_item.name, path) + act_algo_item = mode_item.get_option_by_name(('activation', 'algorithm')) + if act_algo_item and op_item.name[1] == 'op_type1' and 'kl' in act_algo_item.options: + found_act_algo_kl_optype1 = True + break + if act_algo_item and op_item.name[1] != 'op_type1' and 'kl' in act_algo_item.options: + found_act_algo_kl_others = True + self.assertFalse(found_act_algo_kl_optype1) + self.assertTrue(found_act_algo_kl_others) + + def test_tuning_space_merge_op_wise(self): + # op-wise + conf = { + 'usr_cfg': { + 'quantization': { + 'op_wise': self.op_wise_user_config, + } + } + + } + conf = DotDict(conf) + tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) + logger.debug(tuning_space2.root_item.get_details()) + found_quant_op_name4 = False + found_fp32_op_name4 = False + for quant_mode in ['static', 'dynamic']: + for item in tuning_space2.query_items_by_quant_mode(quant_mode): + if 'op_name4' in item.name: + found_quant_op_name4 = True + break + + for item in tuning_space2.query_items_by_quant_mode('fp32'): + if 'op_name4' in item.name: + found_fp32_op_name4 = True + break + self.assertFalse(found_quant_op_name4) + self.assertTrue(found_fp32_op_name4) + + def test_tuning_space_merge_model_wise_and_opty_wise(self): + # Test mode-wise + optype-wise + conf = { + 'usr_cfg': { + 'quantization': { + 'model_wise': self.model_wise_user_config, + 'optype_wise': self.optype_wise_user_config, + } + } + + } + # the optype_wise config will overwrite the model-wise config + conf = DotDict(conf) + tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) + logger.debug(tuning_space2.root_item.get_details()) + found_per_tensor = False + for quant_mode in ['static', 'dynamic']: + for op_item in tuning_space2.query_items_by_quant_mode(quant_mode): + for path in tuning_space2.ops_path_set[op_item.name]: + mode_item = tuning_space2.query_quant_mode_item_by_full_path(op_item.name, path) + act_algo_item = mode_item.get_option_by_name(('activation', 'granularity')) + if act_algo_item and 'per_tensor' in act_algo_item.options: + found_per_tensor = True + break + self.assertTrue(found_per_tensor) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/strategy/test_tuning_space_v2.py b/test/strategy/test_tuning_space_v2.py index d5b4f176c85..8ef5cd05f08 100644 --- a/test/strategy/test_tuning_space_v2.py +++ b/test/strategy/test_tuning_space_v2.py @@ -189,7 +189,7 @@ def setUp(self) -> None: def test_tuning_sampler_int4(self): # op-wise - conf = {'usr_cfg': { } } + conf = {} conf = DotDict(conf) # test space construction tuning_space = TuningSpace(deepcopy(self.capability), deepcopy(conf)) @@ -215,7 +215,7 @@ def test_sampler_int4(self): from neural_compressor.strategy.utils.tuning_structs import OpTuningConfig from neural_compressor.strategy.utils.tuning_sampler import OpWiseTuningSampler # op-wise - conf = {'usr_cfg': { } } + conf = {} conf = DotDict(conf) # test space construction tuning_space = TuningSpace(deepcopy(self.capability), deepcopy(conf)) @@ -255,11 +255,9 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): def test_tuning_space_merge_op_wise(self): # op-wise conf = { - 'usr_cfg': { 'quantization': { - 'op_wise': self.op_wise_user_cfg_for_fallback, + 'op_name_dict': self.op_wise_user_cfg_for_fallback, } - } } conf = DotDict(conf) diff --git a/test/strategy/test_tuning_space_v2_1.x.py b/test/strategy/test_tuning_space_v2_1.x.py new file mode 100644 index 00000000000..b087daad3db --- /dev/null +++ b/test/strategy/test_tuning_space_v2_1.x.py @@ -0,0 +1,279 @@ +from neural_compressor.experimental.strategy.utils.tuning_space import TuningItem, TuningSpace +from neural_compressor.conf.dotdict import DotDict +from neural_compressor.utils import logger +from copy import deepcopy +import unittest + +op_cap = { + # op1 have both weight and activation and support static/dynamic/fp32/b16 + ('op_name1', 'op_type1'): [ + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'], + 'algorithm': ['minmax', 'kl'] + }, + 'weight': + { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + } + }, + { + 'activation': + { + 'dtype': ['int4'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'], + 'algorithm': ['minmax', 'kl'] + }, + 'weight': + { + 'dtype': ['uint4'], + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + } + }, + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'dynamic', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'], + 'algorithm': ['minmax', 'kl'] + }, + 'weight': + { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + } + }, + { + 'activation': + { + 'dtype': 'bf16' + }, + 'weight': + { + 'dtype': 'bf16' + } + }, + { + 'activation': + { + 'dtype': 'fp32' + }, + 'weight': + { + 'dtype': 'fp32' + } + }, + ], + # op2 have both weight and activation and support static/dynamic/fp32 + ('op_name2', 'op_type1'): [ + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'], + 'algorithm': ['minmax', 'kl'] + }, + 'weight': + { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + } + }, + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'dynamic', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'], + 'algorithm': ['minmax', 'kl'] + }, + 'weight': + { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + } + }, + { + 'activation': + { + 'dtype': 'fp32' + }, + 'weight': + { + 'dtype': 'fp32' + } + }, + ], + # op3 have both weight and activation and support int4 + ('op_name3', 'op_type3'): [ + { + 'activation': + { + 'dtype': ['int4'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'], + 'algorithm': ['minmax', 'kl'] + }, + 'weight': + { + 'dtype': ['int4'], + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + } + }, + { + 'activation': + { + 'dtype': ['int8'], + 'quant_mode': 'static', + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'], + 'algorithm': ['minmax', 'kl'] + }, + 'weight': + { + 'dtype': ['int8'], + 'scheme': ['sym'], + 'granularity': ['per_channel', 'per_tensor'] + } + }, + { + 'activation': + { + 'dtype': 'fp32' + }, + 'weight': + { + 'dtype': 'fp32' + } + }, + ], +} + +class TestTuningSpaceV2(unittest.TestCase): + def setUp(self) -> None: + self.capability = { + 'calib': {'calib_sampling_size': [1, 10, 50]}, + 'op': deepcopy(op_cap) + } + + self.op_wise_user_cfg_for_fallback = { + 'op_name1': { + 'activation': { + 'dtype': ['fp32'] + }, + 'weight': { + 'dtype': ['fp32'] + } + }, + } + + + def test_tuning_sampler_int4(self): + # op-wise + conf = {'usr_cfg': { } } + conf = DotDict(conf) + # test space construction + tuning_space = TuningSpace(deepcopy(self.capability), deepcopy(conf)) + logger.debug(tuning_space.root_item.get_details()) + found_int4_activation = False + found_int4_weight = False + op3_act_item = tuning_space.query_quant_mode_item_by_full_path(('op_name3', 'op_type3'),\ + ('static', 'activation')) + for dtype_item in op3_act_item.options: + if dtype_item.name == 'int4': + found_int4_activation = True + self.assertTrue(found_int4_activation) + op3_weight_item = tuning_space.query_quant_mode_item_by_full_path(('op_name3', 'op_type3'), \ + ('static', 'weight')) + for dtype_item in op3_weight_item.options: + if dtype_item.name == 'int4': + found_int4_weight = True + self.assertTrue(found_int4_weight) + + def test_sampler_int4(self): + # test sampler + from collections import OrderedDict + from neural_compressor.strategy.utils.tuning_structs import OpTuningConfig + from neural_compressor.strategy.utils.tuning_sampler import OpWiseTuningSampler + # op-wise + conf = {'usr_cfg': { } } + conf = DotDict(conf) + # test space construction + tuning_space = TuningSpace(deepcopy(self.capability), deepcopy(conf)) + logger.debug(tuning_space.root_item.get_details()) + initial_op_tuning_cfg = {} + for item in tuning_space.root_item.options: + if item.item_type == 'op': + op_name, op_type = item.name + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) + quant_mode_wise_items = OrderedDict() + from neural_compressor.strategy.utils.constant import auto_query_order as query_order + pre_items = set() + for quant_mode in query_order: + items = tuning_space.query_items_by_quant_mode(quant_mode) + filtered_items = [item for item in items if item not in pre_items] + pre_items = pre_items.union(set(items)) + quant_mode_wise_items[quant_mode] = filtered_items + + def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): + for item in items_lst: + op_item_dtype_dict[item.name] = target_quant_mode + + op_item_dtype_dict = OrderedDict() + for quant_mode, quant_mode_items in quant_mode_wise_items.items(): + initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) + + op_wise_tuning_sampler = OpWiseTuningSampler(deepcopy(tuning_space), [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + op3 = ('op_name3', 'op_type3') + for tune_cfg in op_wise_tuning_sampler: + op_cfg = tune_cfg[op3].get_state() + act_dtype = op_cfg['activation']['dtype'] + weight_dtype = op_cfg['weight']['dtype'] + self.assertTrue(act_dtype == weight_dtype == 'int4') + + + def test_tuning_space_merge_op_wise(self): + # op-wise + conf = { + 'usr_cfg': { + 'quantization': { + 'op_wise': self.op_wise_user_cfg_for_fallback, + } + } + + } + conf = DotDict(conf) + # test fallback + tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) + logger.debug(tuning_space2.root_item.get_details()) + op_name1_only_fp32 = True + for quant_mode in ['static', 'dynamic']: + for item in tuning_space2.query_items_by_quant_mode(quant_mode): + if item.name[0] == 'op_name1': + op_name1_only_fp32 = False + self.assertTrue(op_name1_only_fp32) + + + +if __name__ == "__main__": + unittest.main() From 7243bd276a4b8b61b9bdaeaec42ab75492102a5d Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Thu, 30 Mar 2023 11:01:57 +0800 Subject: [PATCH 012/103] update for mixed precision config Signed-off-by: Cheng, Zixuan --- neural_compressor/config.py | 12 +++++++++++- neural_compressor/mix_precision.py | 14 ++++++++------ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 285e7ba2d13..f8d76c08c30 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -1552,6 +1552,7 @@ def __init__(self, device="cpu", backend="default", precision="bf16", + model=None, inputs=[], outputs=[], tuning_criterion=tuning_criterion, @@ -1567,6 +1568,7 @@ def __init__(self, excluded_precisions=excluded_precisions, ) self.precision = precision + self.model = model @property def precision(self): @@ -1582,7 +1584,15 @@ def precision(self, precision): elif isinstance(precision, list): assert all([i in ["fp16", "bf16"] for i in precision]), "Only " \ "support 'fp16' and 'bf16' for mix precision." - self._precision = precision + self._precision = precision + + @property + def model(self): + return self._model + + @model.setter + def model(self, model): + self._model = model class ExportConfig: diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index f1a29045c46..0ab580e541c 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -191,7 +191,7 @@ def model(self, user_model): make sure the name is in supported slim model list. """ cfg = self.conf - if cfg.quantization.framework == 'NA': + if cfg.quantization.framework is None: if isinstance(user_model, BaseModel): cfg.quantization.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))] if cfg.quantization.backend == "ipex": @@ -372,17 +372,19 @@ def fit(model, "please modify precision or excluded_precisions to make it understandable.") sys.exit(0) precisions = list(set(config.precision) - set(config.excluded_precisions)) - converter.precisions = precisions - converter.model = model + converter.conf.quantization.precisions = precisions + converter.conf.quantization.model = model - if ('bf16' in precisions or 'fp16' in precisions) and converter.framework() == "onnxruntime": + if ('bf16' in precisions or 'fp16' in precisions) and \ + converter.conf.quantization.framework == "onnxruntime": if config.device == "cpu": logger.warning("Mix precision exits due to device isn't gpu for onnx models.") sys.exit(0) elif config.backend != "onnxrt_cuda_ep": logger.warning("Mix precision exits due to backend isn't onnxrt_cuda_ep for onnx models.") sys.exit(0) - elif 'bf16' in precisions and not CpuInfo().bf16 and converter.framework() != "onnxruntime": + elif 'bf16' in precisions and not CpuInfo().bf16 and \ + converter.conf.quantization.framework != "onnxruntime": if os.getenv('FORCE_BF16') == '1': logger.warning("Mix precision will generate bf16 graph although " \ "the hardware doesn't support bf16 instruction.") @@ -390,7 +392,7 @@ def fit(model, logger.warning("Mix precision exits due to the hardware " \ "doesn't support bf16 instruction.") sys.exit(0) - elif 'fp16' in precisions and converter.framework() != "onnxruntime": + elif 'fp16' in precisions and converter.conf.quantization.framework != "onnxruntime": logger.warning("Currently mix precision only supports fp16 for onnx models.") sys.exit(0) if eval_func is not None: From 410a793842586154e734786e211b48702bed6785 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 30 Mar 2023 14:27:30 +0800 Subject: [PATCH 013/103] fixed mix precision Signed-off-by: yiliu30 --- neural_compressor/config.py | 6 ++++++ neural_compressor/mix_precision.py | 19 ++++++++++--------- neural_compressor/quantization.py | 18 +++++++++--------- .../strategy/auto_mixed_precision.py | 3 +-- neural_compressor/strategy/strategy.py | 10 +++++----- 5 files changed, 31 insertions(+), 25 deletions(-) diff --git a/neural_compressor/config.py b/neural_compressor/config.py index f8d76c08c30..f89d5072ad9 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -1569,6 +1569,8 @@ def __init__(self, ) self.precision = precision self.model = model + # For align with quant config + self._approach = None @property def precision(self): @@ -1593,6 +1595,10 @@ def model(self): @model.setter def model(self, model): self._model = model + + @property + def approach(self): + return self._approach class ExportConfig: diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index 0ab580e541c..3c89702c3b3 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -78,14 +78,15 @@ def pre_process(self): _resume = pickle.load(f).__dict__ self.strategy = STRATEGIES[strategy]( - self._model, - self.conf, - None, - None, - self._eval_dataloader, - self._eval_func, - self._eval_metric, - _resume) + model = self.model, + conf = self.conf, + q_dataloader=None, + q_func=None, + eval_func=self._eval_func, + eval_dataloader=self._eval_dataloader, + eval_metric=self._eval_metric, + resume=_resume, + q_hooks=None) def execute(self): """Execute routinue based on strategy design.""" @@ -373,7 +374,7 @@ def fit(model, sys.exit(0) precisions = list(set(config.precision) - set(config.excluded_precisions)) converter.conf.quantization.precisions = precisions - converter.conf.quantization.model = model + converter.model = model if ('bf16' in precisions or 'fp16' in precisions) and \ converter.conf.quantization.framework == "onnxruntime": diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index 7640703c49c..f1815f7e1cf 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -103,15 +103,15 @@ def pre_proccess(self): logger.info("Quantize model without tuning!") self.strategy = STRATEGIES[strategy]( - self._model, - self.conf, - self._calib_dataloader, - self._train_func, - self._eval_func, - self._eval_dataloader, - self._eval_metric, - _resume, - self.callbacks.hooks if self.callbacks is not None else None) + model = self.model, + conf = self.conf, + q_dataloader=self._calib_dataloader, + q_func=self._train_func, + eval_func=self._eval_func, + eval_dataloader=self._eval_dataloader, + eval_metric=self._eval_metric, + resume=_resume, + q_hooks=self.callbacks.hooks if self.callbacks is not None else None) def execute(self): """Quantization execute routinue based on strategy design.""" diff --git a/neural_compressor/strategy/auto_mixed_precision.py b/neural_compressor/strategy/auto_mixed_precision.py index f9d6dfb5007..bee43873601 100644 --- a/neural_compressor/strategy/auto_mixed_precision.py +++ b/neural_compressor/strategy/auto_mixed_precision.py @@ -46,8 +46,7 @@ def next_tune_cfg(self): # filter quantization dtype # TODO align with the old mixed-precison - target_dtypes = self.cfg.graph_optimization.precisions if self.cfg.graph_optimization \ - else self.cfg.mixed_precision.precisions + target_dtypes = self.conf.quantization.precisions target_dtypes = list(set(target_dtypes) - set(['fp32'])) tuning_space = self.tuning_space initial_op_tuning_cfg = {} diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index c503c56340a..8b2e1aa380b 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -88,15 +88,15 @@ def _check_tuning_status(self): self._not_tuning = False return - def __init__(self, - model, - conf: PostTrainingQuantConfig, + def __init__(self, + model, + conf, q_dataloader=None, - q_func=None, + q_func=None, eval_func=None, eval_dataloader=None, eval_metric=None, - resume=None, + resume=None, q_hooks=None): """Init the TuneStrategy. From 1643038f6cae2e8b6d3a181d56672ad6cb910c43 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 30 Mar 2023 15:20:29 +0800 Subject: [PATCH 014/103] update strategies Signed-off-by: yiliu30 --- neural_compressor/strategy/auto.py | 47 +++++++++++++--------- neural_compressor/strategy/bayesian.py | 40 +++++++++++++++--- neural_compressor/strategy/conservative.py | 41 ++++++++++++++++--- neural_compressor/strategy/mse.py | 41 ++++++++++++++++--- neural_compressor/strategy/strategy.py | 11 +++-- 5 files changed, 140 insertions(+), 40 deletions(-) diff --git a/neural_compressor/strategy/auto.py b/neural_compressor/strategy/auto.py index 26048f9aa30..e779a55d945 100644 --- a/neural_compressor/strategy/auto.py +++ b/neural_compressor/strategy/auto.py @@ -34,15 +34,15 @@ class AutoTuneStrategy(TuneStrategy): and the tuning process ends once the condition meets the exit policy. """ - def __init__(self, - model, + def __init__(self, + model, conf, q_dataloader=None, - q_func=None, + q_func=None, eval_func=None, - eval_dataloader=None, - eval_metric=None, - resume=None, + eval_dataloader=None, + eval_metric=None, + resume=None, q_hooks=None): """Init an auto tuning strategy. @@ -51,25 +51,25 @@ def __init__(self, conf: The Conf class instance includes all user configurations. q_dataloader: Data loader for calibration, mandatory for post-training quantization. Defaults to None. q_func: Training function for quantization aware training. Defaults to None. Defaults to None. - eval_dataloader: Data loader for evaluation. Defaults to None. eval_func: The evaluation function provided by user. This function takes model as parameter, and evaluation dataset and metrics should be encapsulated in this function implementation and outputs a higher-is-better accuracy scalar value. + eval_dataloader: Data loader for evaluation. Defaults to None. + eval_metric: Metric for evaluation. Defaults to None. resume: The dict containing resume information. Defaults to None. q_hooks: The dict of training hooks, supported keys are: on_epoch_begin, on_epoch_end, on_step_begin, on_step_end. Their values are functions to be executed in adaptor layer.. Defaults to None. """ - super().__init__(model, conf, q_dataloader, q_func, eval_dataloader,\ - eval_func, resume, q_hooks) + super().__init__(model=model, + conf=conf, + q_dataloader=q_dataloader, + q_func=q_func, + eval_func=eval_func, + eval_dataloader=eval_dataloader, + eval_metric=eval_metric, + resume=resume, + q_hooks=q_hooks) logger.info(f"*** Initialize auto tuning") - self.model = model - self.conf = conf - self.q_dataloader = q_dataloader - self.q_func = q_func - self.eval_dataloader = eval_dataloader - self.eval_func = eval_func - self.resume = resume - self.q_hooks = q_hooks self.strategies_sequence = ['conservative', 'basic'] def sequential_traverse(self): @@ -77,8 +77,17 @@ def sequential_traverse(self): pre_strategy = self for strategy_name in self.strategies_sequence: logger.info(f"*** Start {strategy_name} tuning.") - strategy = STRATEGIES[strategy_name](self.model, self.conf, self.q_dataloader, self.q_func, \ - self.eval_dataloader, self.eval_func, self.resume, self.q_hooks) + strategy = STRATEGIES[strategy_name]( + model = self.model, + conf = self.conf, + q_dataloader=self.calib_dataloader, + q_func=self.q_func, + eval_func=self.eval_func, + eval_dataloader=self.eval_dataloader, + eval_metric=self.eval_metric, + resume=self._resume, + q_hooks=self.q_hooks) + if pre_strategy: #TODO add tuning history from the previous stage to current stage. strategy.baseline = deepcopy(pre_strategy.baseline) diff --git a/neural_compressor/strategy/bayesian.py b/neural_compressor/strategy/bayesian.py index ceefd26f7ae..73421b5e11c 100644 --- a/neural_compressor/strategy/bayesian.py +++ b/neural_compressor/strategy/bayesian.py @@ -37,11 +37,41 @@ class BayesianTuneStrategy(TuneStrategy): """The Bayesian tuning strategy.""" - def __init__(self, model, conf, q_dataloader, q_func=None, eval_dataloader=None, - eval_func=None, dicts=None, q_hooks=None): - """Init the BaySian tuning strategy.""" - super().__init__(model, conf, q_dataloader, q_func, eval_dataloader, - eval_func, dicts, q_hooks) + def __init__(self, + model, + conf, + q_dataloader=None, + q_func=None, + eval_func=None, + eval_dataloader=None, + eval_metric=None, + resume=None, + q_hooks=None): + """Init the BaySian tuning strategy + + Args: + model: The FP32 model specified for low precision tuning. + conf: The Conf class instance includes all user configurations. + q_dataloader: Data loader for calibration, mandatory for post-training quantization. Defaults to None. + q_func: Training function for quantization aware training. Defaults to None. Defaults to None. + eval_func: The evaluation function provided by user. This function takes model as parameter, and + evaluation dataset and metrics should be encapsulated in this function implementation and + outputs a higher-is-better accuracy scalar value. + eval_dataloader: Data loader for evaluation. Defaults to None. + eval_metric: Metric for evaluation. Defaults to None. + resume: The dict containing resume information. Defaults to None. + q_hooks: The dict of training hooks, supported keys are: on_epoch_begin, on_epoch_end, on_step_begin, + on_step_end. Their values are functions to be executed in adaptor layer.. Defaults to None. + """ + super().__init__(model=model, + conf=conf, + q_dataloader=q_dataloader, + q_func=q_func, + eval_func=eval_func, + eval_dataloader=eval_dataloader, + eval_metric=eval_metric, + resume=resume, + q_hooks=q_hooks) self.bayes_opt = None def __getstate__(self): diff --git a/neural_compressor/strategy/conservative.py b/neural_compressor/strategy/conservative.py index 643661aeaff..979e2b9f479 100644 --- a/neural_compressor/strategy/conservative.py +++ b/neural_compressor/strategy/conservative.py @@ -38,12 +38,43 @@ class ConservativeTuneStrategy(TuneStrategy): of the model after quantization. It starts with the original(fp32) model, and then quantize the OPs to lower precision OP type wisely and OP wisely. """ + + def __init__(self, + model, + conf, + q_dataloader=None, + q_func=None, + eval_func=None, + eval_dataloader=None, + eval_metric=None, + resume=None, + q_hooks=None): + """Init conservative tuning strategy. - def __init__(self, model, conf, q_dataloader, q_func=None, eval_dataloader=None, - eval_func=None, dicts=None, q_hooks=None): - """Init conservative tuning strategy.""" - super().__init__(model, conf, q_dataloader, q_func, eval_dataloader, - eval_func, dicts, q_hooks) + Args: + model: The FP32 model specified for low precision tuning. + conf: The Conf class instance includes all user configurations. + q_dataloader: Data loader for calibration, mandatory for post-training quantization. Defaults to None. + q_func: Training function for quantization aware training. Defaults to None. Defaults to None. + eval_func: The evaluation function provided by user. This function takes model as parameter, and + evaluation dataset and metrics should be encapsulated in this function implementation and + outputs a higher-is-better accuracy scalar value. + eval_dataloader: Data loader for evaluation. Defaults to None. + eval_metric: Metric for evaluation. Defaults to None. + resume: The dict containing resume information. Defaults to None. + q_hooks: The dict of training hooks, supported keys are: on_epoch_begin, on_epoch_end, on_step_begin, + on_step_end. Their values are functions to be executed in adaptor layer.. Defaults to None. + """ + super().__init__(model=model, + conf=conf, + q_dataloader=q_dataloader, + q_func=q_func, + eval_func=eval_func, + eval_dataloader=eval_dataloader, + eval_metric=eval_metric, + resume=resume, + q_hooks=q_hooks) + logger.info(f"*** Initialize conservative tuning") self.acc_meet_flag = False self.quant_op_type_lst = ['conv', 'matmul', 'linear'] res_lst = [None] * len(self.quant_op_type_lst) diff --git a/neural_compressor/strategy/mse.py b/neural_compressor/strategy/mse.py index 3bcf100cec9..14803e04b1d 100644 --- a/neural_compressor/strategy/mse.py +++ b/neural_compressor/strategy/mse.py @@ -35,11 +35,42 @@ class MSETuneStrategy(TuneStrategy): those OPs according to the MSE value, and performs the op-wise fallback in this order. """ - def __init__(self, model, conf, q_dataloader, q_func=None, eval_dataloader=None, - eval_func=None, dicts=None, q_hooks=None): - """Init an mse tuning strategy.""" - super().__init__(model, conf, q_dataloader, q_func, eval_dataloader, - eval_func, dicts, q_hooks) + def __init__(self, + model, + conf, + q_dataloader=None, + q_func=None, + eval_func=None, + eval_dataloader=None, + eval_metric=None, + resume=None, + q_hooks=None): + """Init MSE tuning strategy. + + Args: + model: The FP32 model specified for low precision tuning. + conf: The Conf class instance includes all user configurations. + q_dataloader: Data loader for calibration, mandatory for post-training quantization. Defaults to None. + q_func: Training function for quantization aware training. Defaults to None. Defaults to None. + eval_func: The evaluation function provided by user. This function takes model as parameter, and + evaluation dataset and metrics should be encapsulated in this function implementation and + outputs a higher-is-better accuracy scalar value. + eval_dataloader: Data loader for evaluation. Defaults to None. + eval_metric: Metric for evaluation. Defaults to None. + resume: The dict containing resume information. Defaults to None. + q_hooks: The dict of training hooks, supported keys are: on_epoch_begin, on_epoch_end, on_step_begin, + on_step_end. Their values are functions to be executed in adaptor layer.. Defaults to None. + """ + super().__init__(model=model, + conf=conf, + q_dataloader=q_dataloader, + q_func=q_func, + eval_func=eval_func, + eval_dataloader=eval_dataloader, + eval_metric=eval_metric, + resume=resume, + q_hooks=q_hooks) + logger.info(f"*** Initialize MSE tuning") self.ordered_ops = None diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 8b2e1aa380b..5607b79c7dd 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -105,24 +105,23 @@ def __init__(self, conf: The Conf class instance includes all user configurations. q_dataloader: Data loader for calibration, mandatory for post-training quantization. Defaults to None. q_func: Training function for quantization aware training. Defaults to None. Defaults to None. - eval_dataloader: Data loader for evaluation. Defaults to None. eval_func: The evaluation function provided by user. This function takes model as parameter, and evaluation dataset and metrics should be encapsulated in this function implementation and outputs a higher-is-better accuracy scalar value. + eval_dataloader: Data loader for evaluation. Defaults to None. + eval_metric: Metric for evaluation. Defaults to None. resume: The dict containing resume information. Defaults to None. q_hooks: The dict of training hooks, supported keys are: on_epoch_begin, on_epoch_end, on_step_begin, on_step_end. Their values are functions to be executed in adaptor layer.. Defaults to None. - last_qmodel: The quantized model that generated from the last tuning. - best_qmodel: The best quantized model that generated during the tuning process. """ self.model = model self.conf = conf self.history_path = self._create_path(self.conf.options.workspace, './history.snapshot') self.deploy_path = self._create_path(self.conf.options.workspace, 'deploy.yaml') self.calib_dataloader = q_dataloader + self.eval_func = eval_func self.eval_dataloader = eval_dataloader self.eval_metric = eval_metric - self.eval_func = eval_func # not tuning equals to performance only self._not_tuning = True self._check_tuning_status() @@ -178,8 +177,8 @@ def __init__(self, self._initialize_recipe() self.applied_all_recipes_flag = False - - if resume is not None: self.setup_resume(resume) + self._resume = resume + if self._resume is not None: self.setup_resume(resume) @abstractmethod From 6997b7d41e27aca635648ab12ea31cea6fc1cb80 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Thu, 30 Mar 2023 15:39:17 +0800 Subject: [PATCH 015/103] update benchmark config Signed-off-by: Cheng, Zixuan --- neural_compressor/benchmark.py | 67 +++++++++++++++++----------------- neural_compressor/config.py | 53 ++++++++++++++++++++++++++- 2 files changed, 85 insertions(+), 35 deletions(-) diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py index a1f13c01452..6aca7ff5134 100644 --- a/neural_compressor/benchmark.py +++ b/neural_compressor/benchmark.py @@ -54,18 +54,17 @@ def set_all_env_var(conf, overwrite_existing=False): Neural Compressor only uses physical cores """ cpu_counts = psutil.cpu_count(logical=False) - if not conf: - conf = {} - conf['num_of_instance'] = 1 - conf['cores_per_instance'] = cpu_counts - if 'cores_per_instance' in conf: - assert conf['cores_per_instance'] * conf['num_of_instance'] <= cpu_counts,\ + assert isinstance(conf, Config), \ + 'input has to be a Config object' + + if conf.benchmark.cores_per_instance is not None: + assert conf.benchmark.cores_per_instance * conf.benchmark.num_of_instance <= cpu_counts, \ 'num_of_instance * cores_per_instance should <= cpu physical cores' else: - assert conf['num_of_instance'] <= cpu_counts, 'num_of_instance should <= cpu counts' - conf['cores_per_instance'] = int(cpu_counts / conf['num_of_instance']) - - for var, value in conf.items(): + assert conf.benchmark.num_of_instance <= cpu_counts, \ + 'num_of_instance should <= cpu counts' + conf.benchmark.cores_per_instance = int(cpu_counts / conf.benchmark.num_of_instance) + for var, value in dict(conf.benchmark).items(): set_env_var(var.upper(), value, overwrite_existing) @@ -155,8 +154,8 @@ def __init__(self, conf): assert isinstance(conf, BenchmarkConfig), \ "The config object should be config.BenchmarkConfig, not {}".format(type(conf)) self.conf = Config(quantization=None, benchmark=conf, pruning=None, distillation=None, nas=None) - if self.conf.quantization.framework is not None: - self.framework = self.conf.quantization.framework.lower() + if self.conf.benchmark.framework is not None: + self.framework = self.conf.benchmark.framework def __call__(self, raw_cmd=None): """Directly call a Benchmark object. @@ -167,7 +166,8 @@ def __call__(self, raw_cmd=None): cfg = self.conf assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...' # disable multi-instance for running bechmark on GPU device - if cfg.quantization.device == 'gpu': + set_all_env_var(cfg) + if cfg.benchmark.device == 'gpu': set_env_var('NC_ENV_CONF', True, overwrite_existing=True) logger.info("Start to run Benchmark.") @@ -322,18 +322,17 @@ def run_instance(self): if self._b_func is None: cfg = self.conf GLOBAL_STATE.STATE = MODE.BENCHMARK - framework_specific_info = {'device': cfg.quantization.device, \ - 'approach': cfg.quantization.approach, \ + framework_specific_info = {'device': cfg.benchmark.device, \ + 'approach': None, \ 'random_seed': cfg.options.random_seed, - 'backend': cfg.quantization.backend \ - if cfg.quantization.backend is not None else 'default', - 'format': cfg.quantization.quant_format \ - if cfg.quantization.backend is not None else 'default'} - framework = cfg.quantization.framework.lower() + 'backend': cfg.benchmark.backend \ + if cfg.benchmark.backend is not None else 'default', + 'format': None} + framework = cfg.benchmark.framework if 'tensorflow' in framework: - framework_specific_info.update({"inputs": cfg.quantization.inputs, \ - "outputs": cfg.quantization.outputs, \ - "recipes": cfg.quantization.recipes, \ + framework_specific_info.update({"inputs": cfg.benchmark.inputs, \ + "outputs": cfg.benchmark.outputs, \ + "recipes": None, \ 'workspace_path': cfg.options.workspace}) if framework == 'keras': framework_specific_info.update({'workspace_path': cfg.options.workspace}) @@ -473,41 +472,41 @@ def model(self, user_model): make sure the name is in the supported slim model list. """ cfg = self.conf - if cfg.quantization.framework is None: + if cfg.benchmark.framework is None: assert not isinstance(user_model, BaseModel), \ "Please pass an original framework model but not neural compressor model!" self.framework = get_model_fwk_name(user_model) if self.framework == "tensorflow": from .model.tensorflow_model import get_model_type - if get_model_type(user_model) == 'keras' and cfg.quantization.backend == 'itex': + if get_model_type(user_model) == 'keras' and cfg.benchmark.backend == 'itex': self.framework = 'keras' if self.framework == "pytorch": - if cfg.quantization.backend == "default": + if cfg.benchmark.backend == "default": self.framework = "pytorch_fx" - elif cfg.quantization.backend == "ipex": + elif cfg.benchmark.backend == "ipex": self.framework = "pytorch_ipex" import intel_extension_for_pytorch - cfg.quantization.framework = self.framework + cfg.benchmark.framework = self.framework if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") if "tensorflow" in self.framework or self.framework == "keras": - self._model = NCModel(user_model, backend=self.framework, device=cfg.quantization.device) + self._model = NCModel(user_model, backend=self.framework, device=cfg.benchmark.device) else: self._model = NCModel(user_model, backend=self.framework) else: # It is config of neural_compressor version < 2.0, no need in 2.0 - if cfg.quantization.framework == "pytorch_ipex": + if cfg.benchmark.framework == "pytorch_ipex": from neural_compressor.model.torch_model import IPEXModel if not isinstance(user_model, IPEXModel): - self._model = NCModel(user_model.model, framework=cfg.quantization.framework) + self._model = NCModel(user_model.model, framework=cfg.benchmark.framework) return self._model = user_model if 'tensorflow' in self.framework: - self._model.name = cfg.quantization.model_name - self._model.output_tensor_names = cfg.quantization.outputs - self._model.input_tensor_names = cfg.quantization.inputs + self._model.name = cfg.benchmark.model_name + self._model.output_tensor_names = cfg.benchmark.outputs + self._model.input_tensor_names = cfg.benchmark.inputs self._model.workspace_path = cfg.options.workspace def __repr__(self): diff --git a/neural_compressor/config.py b/neural_compressor/config.py index f89d5072ad9..34e33fc87ad 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -221,8 +221,11 @@ def __init__(self, inputs=[], outputs=[], backend='default', + device='cpu', warmup=5, iteration=-1, + model=None, + model_name='', cores_per_instance=None, num_of_instance=None, inter_num_of_threads=None, @@ -231,13 +234,25 @@ def __init__(self, self.inputs = inputs self.outputs = outputs self.backend = backend + self.device=device self.warmup = warmup self.iteration = iteration + self.model = model + self.model_name = model_name self.cores_per_instance = cores_per_instance self.num_of_instance = num_of_instance self.inter_num_of_threads = inter_num_of_threads self.intra_num_of_threads = intra_num_of_threads + self._framework=None + + def keys(self): + return ('inputs', 'outputs', 'backend', 'device', 'warmup', 'iteration', 'model', \ + 'model_name', 'cores_per_instance', 'num_of_instance', 'framework', \ + 'inter_num_of_threads','intra_num_of_threads') + def __getitem__(self, item): + return getattr(self, item) + @property def backend(self): """Get backend.""" @@ -250,6 +265,15 @@ def backend(self, backend): 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep']): self._backend = backend + @property + def device(self): + return self._device + + @device.setter + def device(self, device): + if _check_value('device', device, str, ['cpu', 'gpu']): + self._device = device + @property def outputs(self): """Get outputs.""" @@ -341,6 +365,33 @@ def intra_num_of_threads(self, intra_num_of_threads): intra_num_of_threads, int): self._intra_num_of_threads = intra_num_of_threads + @property + def model(self): + return self._model + + @model.setter + def model(self, model): + self._model = model + + @property + def model_name(self): + """Get model name.""" + return self._model_name + + @model_name.setter + def model_name(self, model_name): + """Set model name.""" + if _check_value("model_name", model_name, str): + self._model_name = model_name + + @property + def framework(self): + return self._framework + + @framework.setter + def framework(self, framework): + self._framework = framework + class AccuracyCriterion: """Class of Accuracy Criterion. @@ -1595,7 +1646,7 @@ def model(self): @model.setter def model(self, model): self._model = model - + @property def approach(self): return self._approach From 459d323a8045d0b6c211b5ebeed0f547698cbe7e Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 30 Mar 2023 15:48:09 +0800 Subject: [PATCH 016/103] update tpe and sigopt Signed-off-by: yiliu30 --- neural_compressor/contrib/strategy/sigopt.py | 14 +++++++------- neural_compressor/contrib/strategy/tpe.py | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/neural_compressor/contrib/strategy/sigopt.py b/neural_compressor/contrib/strategy/sigopt.py index 500db31d64e..3b8317dd9d5 100644 --- a/neural_compressor/contrib/strategy/sigopt.py +++ b/neural_compressor/contrib/strategy/sigopt.py @@ -158,10 +158,11 @@ def next_tune_cfg(self): def get_acc_target(self, base_acc): """Get the tuning target of the accuracy ceiterion.""" - if self.cfg.tuning.accuracy_criterion.relative: - return base_acc * (1. - self.cfg.tuning.accuracy_criterion.relative) + accuracy_criterion_conf = self.conf.quantization.accuracy_criterion + if accuracy_criterion_conf.criterion == 'relative': + return base_acc * (1. - accuracy_criterion_conf.tolerable_loss) else: - return base_acc - self.cfg.tuning.accuracy_criterion.absolute + return base_acc - accuracy_criterion_conf.tolerable_loss def traverse(self): """The main traverse logic, which could be override by some concrete strategy which needs more hooks. @@ -179,10 +180,9 @@ def traverse(self): trials_count = 0 for tune_cfg in self.next_tune_cfg(): # add tune_cfg here as quantize use tune_cfg - tune_cfg['advance'] = self.cfg.quantization.advance trials_count += 1 tuning_history = self._find_tuning_history(tune_cfg) - if tuning_history and trials_count < self.cfg.tuning.exit_policy.max_trials: + if tuning_history and trials_count < self.conf.quantization.tuning_criterion.max_trials: self.last_tune_result = tuning_history['last_tune_result'] self.best_tune_result = tuning_history['best_tune_result'] logger.warn("Find evaluated tuning config, skip.") @@ -194,14 +194,14 @@ def traverse(self): tune_cfg, self.model, self.calib_dataloader, self.q_func) assert self.last_qmodel # Return the last quantized model as a result. if performance only. - if self.cfg.tuning.exit_policy.performance_only: + if self._not_tuning: self.best_qmodel = self.last_qmodel self._add_tuning_history(copy.deepcopy(tune_cfg), (-1, [0]), q_config=self.last_qmodel.q_config) return self.last_tune_cfg = copy.deepcopy(tune_cfg) self.last_tune_result = self._evaluate(self.last_qmodel) - need_stop = self.stop(self.cfg.tuning.exit_policy.timeout, trials_count) + need_stop = self.stop(self.conf.quantization.tuning_criterion.timeout, trials_count) # record the tuning history saved_tune_cfg = copy.deepcopy(tune_cfg) diff --git a/neural_compressor/contrib/strategy/tpe.py b/neural_compressor/contrib/strategy/tpe.py index 8e8d1f653ac..349518a240a 100644 --- a/neural_compressor/contrib/strategy/tpe.py +++ b/neural_compressor/contrib/strategy/tpe.py @@ -144,7 +144,7 @@ def __getstate__(self): dict: Saved dict for resuming """ for history in self.tuning_history: - if self._same_yaml(history['cfg'], self.cfg): + if self._same_yaml(history['cfg'], self.conf): history['warm_start'] = True history['hpopt_trials'] = self.hpopt_trials history['loss_function_config'] = self.loss_function_config @@ -293,7 +293,7 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): self._save_trials(trials_file) self._update_best_result(best_result_file) self._save() - if self.stop(self.cfg.tuning.exit_policy.timeout, trials_count): + if self.stop(self.conf.quantization.tuning_criterion.timeout, trials_count): exit = True else: logger.warn("Can't create search space for input model.") @@ -488,8 +488,8 @@ def stop(self, timeout, trials_count): del self.last_qmodel last_tune_msg = '[Accuracy ({}|fp32): {:.4f}|{:.4f}'.format( \ - self.cfg.quantization.dtype, self.last_tune_result[0], self.baseline[0]) + \ - ''.join([', {} ({}|fp32): {:.4f}|{:.4f}'.format(x,self.cfg.quantization.dtype,y,z) \ + 'int8', self.last_tune_result[0], self.baseline[0]) + \ + ''.join([', {} ({}|fp32): {:.4f}|{:.4f}'.format(x,'int8',y,z) \ for x,y,z in zip(self.objectives.representation, \ self.last_tune_result[1], self.baseline[1]) if x != 'Accuracy']) + ']' \ if self.last_tune_result else 'n/a' From e922b3f3c3e809075933c3d117e1e189100ba770 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 30 Mar 2023 16:26:26 +0800 Subject: [PATCH 017/103] update tpe ut Signed-off-by: yiliu30 --- neural_compressor/__init__.py | 1 + neural_compressor/config.py | 6 +++ neural_compressor/contrib/strategy/sigopt.py | 50 ++++++++++++++----- neural_compressor/contrib/strategy/tpe.py | 51 ++++++++++++-------- test/strategy/test_tpe.py | 37 +++++++++++--- 5 files changed, 105 insertions(+), 40 deletions(-) diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py index 82f88fb4d92..47a11d76ee8 100644 --- a/neural_compressor/__init__.py +++ b/neural_compressor/__init__.py @@ -23,3 +23,4 @@ from .config import DistillationConfig, PostTrainingQuantConfig, \ WeightPruningConfig, QuantizationAwareTrainingConfig, \ MixedPrecisionConfig +from .contrib import * \ No newline at end of file diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 34e33fc87ad..179f083c06a 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -1591,6 +1591,12 @@ class MixedPrecisionConfig(_BaseQuantizationConfig): it won't work if there is no accuracy tuning process. excluded_precisions (list, optional): Precisions to be excluded during mix precision conversion, default is []. +yaml + quantization: + diagxx +== +QuantizationConfig(diagnosis=True) + Example:: from neural_compressor import mix_precision diff --git a/neural_compressor/contrib/strategy/sigopt.py b/neural_compressor/contrib/strategy/sigopt.py index 3b8317dd9d5..8c9b230ee17 100644 --- a/neural_compressor/contrib/strategy/sigopt.py +++ b/neural_compressor/contrib/strategy/sigopt.py @@ -73,19 +73,43 @@ def eval_func(model): """ - def __init__(self, model, conf, q_dataloader, q_func=None, - eval_dataloader=None, eval_func=None, dicts=None, q_hooks=None): - """Initialize the SigOpt tuning strategy if the user specified to use it.""" - super().__init__( - model, - conf, - q_dataloader, - q_func, - eval_dataloader, - eval_func, - dicts, - q_hooks) - strategy_name = conf.usr_cfg.tuning.strategy.name + def __init__(self, + model, + conf, + q_dataloader=None, + q_func=None, + eval_func=None, + eval_dataloader=None, + eval_metric=None, + resume=None, + q_hooks=None): + """Initialize the SigOpt tuning strategy if the user specified to use it. + + Args: + model: The FP32 model specified for low precision tuning. + conf: The Conf class instance includes all user configurations. + q_dataloader: Data loader for calibration, mandatory for post-training quantization. Defaults to None. + q_func: Training function for quantization aware training. Defaults to None. Defaults to None. + eval_func: The evaluation function provided by user. This function takes model as parameter, and + evaluation dataset and metrics should be encapsulated in this function implementation and + outputs a higher-is-better accuracy scalar value. + eval_dataloader: Data loader for evaluation. Defaults to None. + eval_metric: Metric for evaluation. Defaults to None. + resume: The dict containing resume information. Defaults to None. + q_hooks: The dict of training hooks, supported keys are: on_epoch_begin, on_epoch_end, on_step_begin, + on_step_end. Their values are functions to be executed in adaptor layer.. Defaults to None. + """ + super().__init__(model=model, + conf=conf, + q_dataloader=q_dataloader, + q_func=q_func, + eval_func=eval_func, + eval_dataloader=eval_dataloader, + eval_metric=eval_metric, + resume=resume, + q_hooks=q_hooks) + logger.info(f"*** Initialize SigOpt tuning") + strategy_name = conf.quantization.tuning_criterion.strategy if strategy_name.lower() == "sigopt": try: import sigopt diff --git a/neural_compressor/contrib/strategy/tpe.py b/neural_compressor/contrib/strategy/tpe.py index 349518a240a..abafa483bb7 100644 --- a/neural_compressor/contrib/strategy/tpe.py +++ b/neural_compressor/contrib/strategy/tpe.py @@ -82,13 +82,21 @@ def eval_func(model): Defaults to None. """ - def __init__(self, model, conf, q_dataloader, q_func=None, - eval_dataloader=None, eval_func=None, dicts=None, q_hooks=None): + def __init__(self, + model, + conf, + q_dataloader=None, + q_func=None, + eval_func=None, + eval_dataloader=None, + eval_metric=None, + resume=None, + q_hooks=None): """Initialize the tpe tuning strategy if the user specified to use it.""" - assert conf.usr_cfg.quantization.approach == 'post_training_static_quant', \ + assert conf.quantization.approach == 'post_training_static_quant', \ "TPE strategy is only for post training static quantization!" """Initialize the tpe tuning strategy if the user specified to use it.""" - strategy_name = conf.usr_cfg.tuning.strategy.name + strategy_name = conf.quantization.tuning_criterion.strategy if strategy_name.lower() == "tpe": try: import hyperopt @@ -106,14 +114,19 @@ def __init__(self, model, conf, q_dataloader, q_func=None, self.warm_start = False self.cfg_evaluated = False self.hpopt_trials = hyperopt.Trials() - self.max_trials = conf.usr_cfg.tuning.exit_policy.get('max_trials', 200) + self.max_trials = 200 + if conf.quantization.tuning_criterion.max_trials: + self.max_trials = conf.quantization.tuning_criterion.max_trials + self.loss_function_config = { - 'acc_th': conf.usr_cfg.tuning.accuracy_criterion.relative if \ - conf.usr_cfg.tuning.accuracy_criterion and \ - conf.usr_cfg.tuning.accuracy_criterion.relative else 0.01, - 'acc_weight': conf.usr_cfg.tuning.strategy.get('accuracy_weight', 1.0), - 'lat_weight': conf.usr_cfg.tuning.strategy.get('latency_weight', 1.0) + 'acc_th': 0.01, + 'acc_weight': 1.0, + 'lat_weight': 1.0 } + accuracy_criterion = conf.quantization.accuracy_criterion + if accuracy_criterion.criterion == 'relative': + self.loss_function_config['acc_th'] = accuracy_criterion.tolerable_loss + self.tpe_params = { 'n_initial_point': 10, 'gamma': 0.3, @@ -127,15 +140,15 @@ def __init__(self, model, conf, q_dataloader, q_func=None, } self._algo = None - super().__init__( - model, - conf, - q_dataloader, - q_func, - eval_dataloader, - eval_func, - dicts, - q_hooks) + super().__init__(model=model, + conf=conf, + q_dataloader=q_dataloader, + q_func=q_func, + eval_func=eval_func, + eval_dataloader=eval_dataloader, + eval_metric=eval_metric, + resume=resume, + q_hooks=q_hooks) def __getstate__(self): """Magic method for pickle saving. diff --git a/test/strategy/test_tpe.py b/test/strategy/test_tpe.py index 2ee5b2e7f7c..2c0dc5eac67 100644 --- a/test/strategy/test_tpe.py +++ b/test/strategy/test_tpe.py @@ -109,14 +109,35 @@ def tearDownClass(self): print("Error while deleting file ") def test_run_tpe_one_trial(self): - from neural_compressor.experimental import Quantization, common - - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.model = self.constant_graph - quantizer.fit() + # from neural_compressor.experimental import Quantization, common + + # quantizer = Quantization('fake_yaml.yaml') + # dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + # quantizer.calib_dataloader = common.DataLoader(dataset) + # quantizer.eval_dataloader = common.DataLoader(dataset) + # quantizer.model = self.constant_graph + # quantizer.fit() + + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion + from neural_compressor.data import Datasets, DATALOADERS + + # dataset and dataloader + dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) + dataloader = DATALOADERS["tensorflow"](dataset) + + # tuning and accuracy criterion + tune_cri = TuningCriterion(strategy='tpe') + acc_cri = AccuracyCriterion(tolerable_loss=0.01) + def eval_func(model): + return 1 + conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) + q_model = fit(model=self.constant_graph, + conf=conf, + calib_dataloader=dataloader, + eval_func=eval_func) + + def test_run_tpe_max_trials(self): from neural_compressor.experimental import Quantization, common From 772be16d1a0e7a3a9799cb553f3e8c663c07f27e Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 30 Mar 2023 16:27:58 +0800 Subject: [PATCH 018/103] keep old ut Signed-off-by: yiliu30 --- test/strategy/test_sigopt_1.x.py | 167 +++++++++++++++++++++++++++++++ test/strategy/test_tpe_1.x.py | 153 ++++++++++++++++++++++++++++ 2 files changed, 320 insertions(+) create mode 100644 test/strategy/test_sigopt_1.x.py create mode 100644 test/strategy/test_tpe_1.x.py diff --git a/test/strategy/test_sigopt_1.x.py b/test/strategy/test_sigopt_1.x.py new file mode 100644 index 00000000000..650a24273d0 --- /dev/null +++ b/test/strategy/test_sigopt_1.x.py @@ -0,0 +1,167 @@ +"""Tests for quantization""" +import numpy as np +import unittest +import shutil +import os +import yaml +if os.getenv('SIGOPT_API_TOKEN') is None or os.getenv('SIGOPT_PROJECT_ID') is None: + CONDITION = True +else: + CONDITION = False + +def build_fake_yaml(sigopt_api_token,sigopt_project_id): + fake_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: x + outputs: op2_to_store + device: cpu + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: sigopt + sigopt_api_token: {} + sigopt_project_id: {} + sigopt_experiment_name: nc-tune + accuracy_criterion: + relative: 0.01 + workspace: + path: saved + '''.format(sigopt_api_token, sigopt_project_id) + y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) + with open('fake_yaml.yaml',"w",encoding="utf-8") as f: + yaml.dump(y,f) + f.close() + +def build_fake_yaml2(sigopt_api_token,sigopt_project_id): + fake_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: x + outputs: op2_to_store + device: cpu + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: sigopt + sigopt_api_token: {} + sigopt_project_id: {} + sigopt_experiment_name: nc-tune + exit_policy: + max_trials: 3 + accuracy_criterion: + relative: -0.01 + workspace: + path: saved + '''.format(sigopt_api_token, sigopt_project_id) + y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) + with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: + yaml.dump(y,f) + f.close() + +def build_fake_model(): + import tensorflow as tf + try: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') + y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') + z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') + op = tf.nn.conv2d(input=tf.nn.relu(x), filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + op2 = tf.nn.conv2d(input=tf.nn.relu(op), filters=z, strides=[1,1,1,1], padding='VALID', name='op2_to_store') + + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + except: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') + y = tf.constant(np.random.random((2,2,1,1)).astype(np.float32), name='y') + z = tf.constant(np.random.random((1,1,1,1)).astype(np.float32), name='z') + op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + op2 = tf.nn.conv2d(input=op, filters=z, strides=[1,1,1,1], padding='VALID', name='op2_to_store') + + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op2_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph + +@unittest.skipIf(CONDITION , "missing the env variables 'SIGOPT_API_TOKEN' or 'SIGOPT_PROJECT_ID'") +class TestSigoptTuningStrategy(unittest.TestCase): + + @classmethod + def setUpClass(self): + sigopt_api_token = os.getenv('SIGOPT_API_TOKEN') + sigopt_project_id = os.getenv('SIGOPT_PROJECT_ID') + self.constant_graph = build_fake_model() + build_fake_yaml(sigopt_api_token,sigopt_project_id) + build_fake_yaml2(sigopt_api_token,sigopt_project_id) + + @classmethod + def tearDownClass(self): + os.remove('fake_yaml.yaml') + os.remove('fake_yaml2.yaml') + shutil.rmtree('saved', ignore_errors=True) + + def test_run_basic_one_trial(self): + from neural_compressor.experimental import Quantization, common + + quantizer = Quantization('fake_yaml.yaml') + dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = self.constant_graph + quantizer.fit() + + + def test_run_basic_max_trials(self): + from neural_compressor.experimental import Quantization, common + + quantizer = Quantization('fake_yaml2.yaml') + dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = self.constant_graph + quantizer.fit() + + def test_run_sigopt_one_trial_new_api(self): + from neural_compressor.quantization import fit + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import Datasets, DATALOADERS + + # dataset and dataloader + dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) + dataloader = DATALOADERS["tensorflow"](dataset) + + # tuning and accuracy criterion + accuracy_criterion = AccuracyCriterion(criterion='relative') + strategy_kwargs = {'sigopt_api_token': 'sigopt_api_token_test', + 'sigopt_project_id': 'sigopt_project_id_test', + 'sigopt_experiment_name': 'nc-tune'} + tuning_criterion = TuningCriterion(strategy='sigopt', strategy_kwargs=strategy_kwargs, max_trials=3) + conf = PostTrainingQuantConfig(approach="static", + tuning_criterion=tuning_criterion, + accuracy_criterion=accuracy_criterion) + self.assertEqual(conf.strategy_kwargs, strategy_kwargs) + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader= dataloader, eval_dataloader=dataloader) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/strategy/test_tpe_1.x.py b/test/strategy/test_tpe_1.x.py new file mode 100644 index 00000000000..2ee5b2e7f7c --- /dev/null +++ b/test/strategy/test_tpe_1.x.py @@ -0,0 +1,153 @@ +"""Tests for quantization""" +import numpy as np +import unittest +import os +import shutil +import yaml + +def build_fake_yaml(): + fake_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: x + outputs: op_to_store + device: cpu + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: tpe + accuracy_criterion: + relative: 0.01 + workspace: + path: saved + ''' + y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) + with open('fake_yaml.yaml',"w",encoding="utf-8") as f: + yaml.dump(y,f) + f.close() + +def build_fake_yaml2(): + fake_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: x + outputs: op_to_store + device: cpu + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: tpe + exit_policy: + max_trials: 5 + accuracy_criterion: + relative: -0.01 + workspace: + path: saved + ''' + y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) + with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: + yaml.dump(y,f) + f.close() + +def build_fake_model(): + import tensorflow as tf + try: + graph = tf.Graph() + graph_def = tf.GraphDef() + + with tf.Session() as sess: + x = tf.placeholder(tf.float32, shape=(1,3,3,1), name='x') + y = tf.constant(np.random.random((2,2,1,1)), name='y', dtype=tf.float32) + op = tf.nn.conv2d(input=x, filter=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + + sess.run(tf.global_variables_initializer()) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + except: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float32, shape=(1,3,3,1), name='x') + y = tf.compat.v1.constant(np.random.random((2,2,1,1)), name='y', dtype=tf.float32) + op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph + +class TestQuantization(unittest.TestCase): + + @classmethod + def setUpClass(self): + self.constant_graph = build_fake_model() + build_fake_yaml() + build_fake_yaml2() + + @classmethod + def tearDownClass(self): + try: + os.remove('fake_yaml.yaml') + os.remove('fake_yaml2.yaml') + + shutil.rmtree("saved", ignore_errors=True) + except: + print("Error while deleting file ") + + def test_run_tpe_one_trial(self): + from neural_compressor.experimental import Quantization, common + + quantizer = Quantization('fake_yaml.yaml') + dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = self.constant_graph + quantizer.fit() + + def test_run_tpe_max_trials(self): + from neural_compressor.experimental import Quantization, common + + quantizer = Quantization('fake_yaml2.yaml') + dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = self.constant_graph + quantizer.fit() + + def test_loss_calculation(self): + from neural_compressor.contrib.strategy.tpe import TpeTuneStrategy + from neural_compressor.experimental import Quantization, common + + quantizer = Quantization('fake_yaml.yaml') + dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = self.constant_graph + + testObject = TpeTuneStrategy(quantizer.model, quantizer.conf, quantizer.calib_dataloader) + testObject._calculate_loss_function_scaling_components(0.01, 2, testObject.loss_function_config) + # check if latency difference between min and max corresponds to 10 points of loss function + tmp_val = testObject.calculate_loss(0.01, 2, testObject.loss_function_config) + tmp_val2 = testObject.calculate_loss(0.01, 1, testObject.loss_function_config) + self.assertTrue(True if int(tmp_val2 - tmp_val) == 10 else False) + # check if 1% of acc difference corresponds to 10 points of loss function + tmp_val = testObject.calculate_loss(0.02, 2, testObject.loss_function_config) + tmp_val2 = testObject.calculate_loss(0.03, 2, testObject.loss_function_config) + self.assertTrue(True if int(tmp_val2 - tmp_val) == 10 else False) + +if __name__ == "__main__": + unittest.main() From 3820c45505d80dbd13e7eefe26bbd175540293ce Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 30 Mar 2023 21:00:31 +0800 Subject: [PATCH 019/103] fix ut Signed-off-by: yiliu30 --- test/quantization/test_quantization.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/quantization/test_quantization.py b/test/quantization/test_quantization.py index 029a0bad469..2a62e5eb2e7 100644 --- a/test/quantization/test_quantization.py +++ b/test/quantization/test_quantization.py @@ -190,13 +190,13 @@ def build_fake_model(): return graph def build_fake_strategy(): - with open(os.path.join(os.path.dirname(importlib.util.find_spec('neural_compressor').origin), 'strategy/fake.py'), 'w', encoding='utf-8') as f: + with open(os.path.join(os.path.dirname(importlib.util.find_spec('neural_compressor').origin), 'experimental/strategy/fake.py'), 'w', encoding='utf-8') as f: seq = ["import time \n", "import copy \n", "import numpy as np \n", "from collections import OrderedDict \n", "from .strategy import strategy_registry, TuneStrategy \n", - "from ..utils import logger \n", + "from ...utils import logger \n", "from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler \n", "from .utils.tuning_structs import OpTuningConfig \n", "import copy \n", @@ -296,7 +296,7 @@ def tearDownClass(self): os.remove('fake_yaml4.yaml') os.remove('fake_yaml5.yaml') os.remove('fake_yaml6.yaml') - os.remove(os.path.join(os.path.dirname(importlib.util.find_spec('neural_compressor').origin), 'strategy/fake.py')) + os.remove(os.path.join(os.path.dirname(importlib.util.find_spec('neural_compressor').origin), 'experimental/strategy/fake.py')) shutil.rmtree('./saved', ignore_errors=True) def test_resume(self): From 84e69a0549c77feee29ae37bdcab722cec7f97f0 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 31 Mar 2023 08:18:18 +0800 Subject: [PATCH 020/103] update strategy UTs Signed-off-by: yiliu30 --- test/strategy/test_bayesian_1.x.py | 309 +++++++++++++++++++++++++++++ test/strategy/test_random_1.x.py | 137 +++++++++++++ 2 files changed, 446 insertions(+) create mode 100644 test/strategy/test_bayesian_1.x.py create mode 100644 test/strategy/test_random_1.x.py diff --git a/test/strategy/test_bayesian_1.x.py b/test/strategy/test_bayesian_1.x.py new file mode 100644 index 00000000000..e2d54d9b3e2 --- /dev/null +++ b/test/strategy/test_bayesian_1.x.py @@ -0,0 +1,309 @@ +"""Tests for quantization""" +import numpy as np +import unittest +import os +import shutil +import yaml + +def build_fake_yaml(): + fake_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: x + outputs: op_to_store + device: cpu + quantization: + calibration: + sampling_size: 10 + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: bayesian + exit_policy: + max_trials: 1 + accuracy_criterion: + relative: 0.01 + workspace: + path: saved + ''' + y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) + with open('fake_yaml.yaml',"w",encoding="utf-8") as f: + yaml.dump(y,f) + f.close() + +def build_fake_yaml2(): + fake_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: input + outputs: final + device: cpu + quantization: + calibration: + sampling_size: 10, 20 + op_wise: { + \"conv1\": { + \"activation\": {\"dtype\": [\"fp32\"]}, + }, + } + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: bayesian + exit_policy: + max_trials: 3 + accuracy_criterion: + relative: 0.01 + workspace: + path: saved + ''' + with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: + f.write(fake_yaml) + f.close() + +def build_fake_model(): + import tensorflow as tf + try: + graph = tf.Graph() + graph_def = tf.GraphDef() + with tf.Session() as sess: + x = tf.placeholder(tf.float64, shape=(1,3,3,1), name='x') + y = tf.constant(np.random.random((2,2,1,1)), name='y') + op = tf.nn.conv2d(input=x, filter=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + + sess.run(tf.global_variables_initializer()) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + except: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float64, shape=(1,3,3,1), name='x') + y = tf.compat.v1.constant(np.random.random((2,2,1,1)), name='y') + op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph + +def create_test_graph(): + from tensorflow.core.framework import attr_value_pb2 + from tensorflow.core.framework import graph_pb2 + from tensorflow.core.framework import node_def_pb2 + from tensorflow.python.framework import tensor_util + from tensorflow.python.framework import dtypes + input_node = node_def_pb2.NodeDef() + input_node.name = "input" + input_node.op = "Placeholder" + input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue( + type=dtypes.float32.as_datatype_enum)) + + conv1_weight_node = node_def_pb2.NodeDef() + conv1_weight_node.name = "conv1_weights" + conv1_weight_node.op = "Const" + conv1_weight_value = np.float32(np.abs(np.random.randn(3,3,3,32))) + conv1_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv1_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape))) + + conv1_node = node_def_pb2.NodeDef() + conv1_node.name = "conv1" + conv1_node.op = "Conv2D" + conv1_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( + type=dtypes.float32.as_datatype_enum)) + conv1_node.input.extend([input_node.name, conv1_weight_node.name]) + conv1_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) + conv1_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) + conv1_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) + conv1_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + + bias_node = node_def_pb2.NodeDef() + bias_node.name = "conv1_bias" + bias_node.op = "Const" + bias_value = np.float32(np.abs(np.random.randn(32))) + bias_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( + bias_value, bias_value.dtype.type, bias_value.shape))) + + bias_add_node = node_def_pb2.NodeDef() + bias_add_node.name = "conv1_bias_add" + bias_add_node.op = "BiasAdd" + bias_add_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node.input.extend([conv1_node.name, bias_node.name]) + bias_add_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + + relu_node = node_def_pb2.NodeDef() + relu_node.op = "Relu" + relu_node.name = "relu" + relu_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node.input.extend([bias_add_node.name]) + + conv2_weight_node = node_def_pb2.NodeDef() + conv2_weight_node.name = "conv2_weights" + conv2_weight_node.op = "Const" + conv2_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) + conv2_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv2_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape))) + + conv2_node = node_def_pb2.NodeDef() + conv2_node.name = "conv2" + conv2_node.op = "Conv2D" + conv2_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( + type=dtypes.float32.as_datatype_enum)) + conv2_node.input.extend([relu_node.name, conv2_weight_node.name]) + conv2_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) + conv2_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) + conv2_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) + conv2_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + + bias_node2 = node_def_pb2.NodeDef() + bias_node2.name = "conv2_bias" + bias_node2.op = "Const" + bias_value2 = np.float32(np.abs(np.random.randn(32))) + bias_node2.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node2.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( + bias_value2, bias_value2.dtype.type, bias_value2.shape))) + + bias_add_node2 = node_def_pb2.NodeDef() + bias_add_node2.name = "conv2_bias_add" + bias_add_node2.op = "BiasAdd" + bias_add_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node2.input.extend([conv2_node.name, bias_node2.name]) + bias_add_node2.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + + relu_node2 = node_def_pb2.NodeDef() + relu_node2.op = "Relu" + relu_node2.name = "relu2" + relu_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node2.input.extend([bias_add_node2.name]) + + conv3_weight_node = node_def_pb2.NodeDef() + conv3_weight_node.name = "conv3_weights" + conv3_weight_node.op = "Const" + conv3_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) + conv3_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv3_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape))) + + conv3_node = node_def_pb2.NodeDef() + conv3_node.name = "conv3" + conv3_node.op = "Conv2D" + conv3_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( + type=dtypes.float32.as_datatype_enum)) + conv3_node.input.extend([relu_node2.name, conv3_weight_node.name]) + conv3_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) + conv3_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) + conv3_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) + conv3_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + + identity_node = node_def_pb2.NodeDef() + identity_node.name = "final" + identity_node.op = "Identity" + identity_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( + type=dtypes.float32.as_datatype_enum)) + identity_node.input.extend([conv3_node.name]) + + test_graph = graph_pb2.GraphDef() + + test_graph.node.extend([input_node, + conv1_weight_node, + conv1_node, + bias_node, + bias_add_node, + relu_node, + conv2_weight_node, + conv2_node, + bias_node2, + bias_add_node2, + relu_node2, + conv3_weight_node, + conv3_node, + identity_node + ]) + return test_graph + +def objective_func(params): + return params['x1']**2 + params['x2'] + +class TestQuantization(unittest.TestCase): + + @classmethod + def setUpClass(self): + self.constant_graph = build_fake_model() + self.test_graph = create_test_graph() + build_fake_yaml() + build_fake_yaml2() + + @classmethod + def tearDownClass(self): + os.remove('fake_yaml.yaml') + os.remove('fake_yaml2.yaml') + + shutil.rmtree("saved", ignore_errors=True) + + def test_run_bayesian_one_trial(self): + + from neural_compressor.experimental import Quantization, common + quantizer = Quantization('fake_yaml.yaml') + dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.model = self.constant_graph + output_graph = quantizer.fit() + self.assertNotEqual(output_graph, None) + + def test_run_bayesian_max_trials(self): + + from neural_compressor.experimental import Quantization, common + quantizer = Quantization('fake_yaml2.yaml') + dataset = quantizer.dataset('dummy', shape=(1, 224, 224, 3), label=True) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.model = self.test_graph + output_graph = quantizer.fit() + self.assertNotEqual(output_graph, None) + + def test_bayesian_opt_class(self): + from neural_compressor.strategy.bayesian import BayesianOptimization + pbounds = {} + pbounds['x1'] = (0, 1) + pbounds['x2'] = (0, 1) + np.random.seed(9527) + bayes_opt = BayesianOptimization(pbounds=pbounds, + random_seed=9527) + for i in range(10): + params = bayes_opt.gen_next_params() + try: + bayes_opt._space.register(params, objective_func(params)) + except KeyError: + pass + self.assertTrue(bayes_opt._space.max()['target'] == 2.0) + self.assertTrue(len(bayes_opt._space.res()) == 8) + +if __name__ == "__main__": + unittest.main() diff --git a/test/strategy/test_random_1.x.py b/test/strategy/test_random_1.x.py new file mode 100644 index 00000000000..d0f007fd7f2 --- /dev/null +++ b/test/strategy/test_random_1.x.py @@ -0,0 +1,137 @@ +"""Tests for quantization""" +import numpy as np +import unittest +import os +import shutil +import yaml + + +def build_fake_yaml(): + fake_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: x + outputs: op_to_store + device: cpu + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: random + accuracy_criterion: + relative: 0.01 + workspace: + path: saved + ''' + y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) + with open('fake_yaml.yaml', "w", encoding="utf-8") as f: + yaml.dump(y, f) + f.close() + + +def build_fake_yaml2(): + fake_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: x + outputs: op_to_store + device: cpu + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: random + exit_policy: + max_trials: 3 + accuracy_criterion: + relative: -0.01 + workspace: + path: saved + ''' + y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) + with open('fake_yaml2.yaml', "w", encoding="utf-8") as f: + yaml.dump(y, f) + f.close() + + +def build_fake_model(): + import tensorflow as tf + try: + graph = tf.Graph() + graph_def = tf.GraphDef() + with tf.Session() as sess: + x = tf.placeholder(tf.float64, shape=(1, 3, 3, 1), name='x') + y = tf.constant(np.random.random((2, 2, 1, 1)), name='y') + op = tf.nn.conv2d(input=x, filter=y, strides=[ + 1, 1, 1, 1], padding='VALID', name='op_to_store') + + sess.run(tf.global_variables_initializer()) + constant_graph = tf.graph_util.convert_variables_to_constants( + sess, sess.graph_def, ['op_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + except: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float64, shape=(1, 3, 3, 1), name='x') + y = tf.compat.v1.constant(np.random.random((2, 2, 1, 1)), name='y') + op = tf.nn.conv2d(input=x, filters=y, strides=[ + 1, 1, 1, 1], padding='VALID', name='op_to_store') + + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, [ + 'op_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph + + +class TestQuantization(unittest.TestCase): + + @classmethod + def setUpClass(self): + self.constant_graph = build_fake_model() + build_fake_yaml() + build_fake_yaml2() + + @classmethod + def tearDownClass(self): + os.remove('fake_yaml.yaml') + os.remove('fake_yaml2.yaml') + + shutil.rmtree("saved", ignore_errors=True) + + def test_ru_random_one_trial(self): + from neural_compressor.experimental import Quantization, common + + quantizer = Quantization('fake_yaml.yaml') + dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = self.constant_graph + quantizer.fit() + + def test_ru_random_max_trials(self): + from neural_compressor.experimental import Quantization, common + + quantizer = Quantization('fake_yaml2.yaml') + dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = self.constant_graph + quantizer.fit() + + +if __name__ == "__main__": + unittest.main() From 410cbe968ab0e6330e49328c336277c8a27950c6 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Fri, 31 Mar 2023 14:42:28 +0800 Subject: [PATCH 021/103] update callbacks for qat config Signed-off-by: Cheng, Zixuan --- neural_compressor/benchmark.py | 4 +- neural_compressor/compression/callbacks.py | 126 +++++++++------------ neural_compressor/config.py | 73 +++++++++++- neural_compressor/quantization.py | 2 +- 4 files changed, 131 insertions(+), 74 deletions(-) diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py index 6aca7ff5134..7a5b58beb58 100644 --- a/neural_compressor/benchmark.py +++ b/neural_compressor/benchmark.py @@ -155,7 +155,7 @@ def __init__(self, conf): "The config object should be config.BenchmarkConfig, not {}".format(type(conf)) self.conf = Config(quantization=None, benchmark=conf, pruning=None, distillation=None, nas=None) if self.conf.benchmark.framework is not None: - self.framework = self.conf.benchmark.framework + self.framework = self.conf.benchmark.framework.lower() def __call__(self, raw_cmd=None): """Directly call a Benchmark object. @@ -328,7 +328,7 @@ def run_instance(self): 'backend': cfg.benchmark.backend \ if cfg.benchmark.backend is not None else 'default', 'format': None} - framework = cfg.benchmark.framework + framework = cfg.benchmark.framework.lower() if 'tensorflow' in framework: framework_specific_info.update({"inputs": cfg.benchmark.inputs, \ "outputs": cfg.benchmark.outputs, \ diff --git a/neural_compressor/compression/callbacks.py b/neural_compressor/compression/callbacks.py index d075e435ad2..395e54ab1c7 100644 --- a/neural_compressor/compression/callbacks.py +++ b/neural_compressor/compression/callbacks.py @@ -28,9 +28,7 @@ import random from .distillation.criterions import Criterions from ..adaptor import FRAMEWORKS -from ..conf.config import QuantConf, DistillationConf, PruningConf -from ..conf.dotdict import deep_get, deep_set, DotDict -from ..conf.pythonic_config import Config +from ..config import Config, PostTrainingQuantConfig, DistillationConfig, WeightPruningConfig from ..utils import logger from ..utils.utility import time_limit, LazyImport from ..model import BaseModel, Model @@ -56,7 +54,7 @@ def __init__(self, conf=None, model=None): Args: conf: A Config object which definds the compressor behavior. - Just like:QuantizationAwareTrainingConfig, WeightPruningConfig and DistillationConfig. + Just like: QuantizationAwareTrainingConfig, WeightPruningConfig and DistillationConfig. model: Model to be compressed in this object. """ self.conf = None @@ -68,6 +66,7 @@ def __init__(self, conf=None, model=None): self._train_dataloader = None self._eval_func = None self._eval_dataloader = None + self._eval_metric = None self._train_distributed = False self._evaluation_distributed = False self.adaptor = None @@ -198,7 +197,7 @@ def model(self, user_model): user_model: user are supported to set model from original framework model format (eg, tensorflow frozen_pb or path to a saved model), but not recommended. Best practice is to set from a initialized - neural_compressor.experimental.common.Model. + neural_compressor.model.Model. If tensorflow model is used, model's inputs/outputs will be auto inferenced, but sometimes auto inferenced inputs/outputs will not meet your requests, @@ -211,22 +210,22 @@ def model(self, user_model): if user_model is None: return - if self.cfg.model.framework == 'NA': + if self.cfg.qat_quantization.framework is None: self.framework = get_model_fwk_name( user_model.model if isinstance(user_model, BaseModel) else user_model) if self.framework == "tensorflow": - if self.cfg.quantization.approach == "quant_aware_training": + if self.cfg.qat_quantization.approach == "quant_aware_training": self.framework = 'tensorflow_itex' else: from ..model.tensorflow_model import get_model_type - if get_model_type(user_model) == 'keras' and self.cfg.model.backend == 'itex': + if get_model_type(user_model) == 'keras' and self.cfg.qat_quantization.backend == 'itex': self.framework = 'keras' if self.framework == "pytorch": - if self.cfg.model.backend == "default": + if self.cfg.qat_quantization.backend == "default": self.framework = "pytorch_fx" - elif self.cfg.model.backend == "ipex": + elif self.cfg.qat_quantization.backend == "ipex": self.framework = "pytorch_ipex" - self.cfg.model.framework = self.framework + self.cfg.qat_quantization.framework = self.framework if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") @@ -236,17 +235,17 @@ def model(self, user_model): else: self._model = TensorflowQATModel(user_model._model) elif "tensorflow" in self.framework or self.framework == "keras": - self._model = Model(user_model, backend=self.framework, device=self.cfg.device) + self._model = Model(user_model, backend=self.framework, device=self.cfg.qat_quantization.device) else: self._model = Model(user_model, backend=self.framework) else: self._model = user_model if 'tensorflow' in self.framework: - self._model.name = self.cfg.model.name - self._model.output_tensor_names = self.cfg.model.outputs - self._model.input_tensor_names = self.cfg.model.inputs - self._model.workspace_path = self.cfg.tuning.workspace.path + self._model.name = self.cfg.qat_quantization.model_name + self._model.output_tensor_names = self.cfg.qat_quantization.outputs + self._model.input_tensor_names = self.cfg.qat_quantization.inputs + self._model.workspace_path = self.cfg.options.workspace def pre_process(self): """Create strategy to optimize model.""" @@ -255,45 +254,46 @@ def pre_process(self): self.remove_hook("on_train_begin", self.adaptor._pre_hook_for_qat) self.remove_hook("on_train_end", self.adaptor._post_hook_for_qat) - strategy = self.cfg.tuning.strategy.name.lower() - if self.cfg.quantization.quant_level == 0: + strategy = self.cfg.qat_quantization.tuning_criterion.strategy.lower() + if self.cfg.qat_quantization.quant_level == 0: strategy = "conservative" logger.info(f"On the premise that the accuracy meets the conditions, improve the performance.") if strategy == "mse_v2": - if not (self.cfg.model.framework.startswith("tensorflow") or self.cfg.model.framework == 'pytorch_fx'): + if not (self.cfg.qat_quantization.framework.startswith("tensorflow") or self.cfg.qat_quantization.framework == 'pytorch_fx'): strategy = "basic" - logger.warning(f"MSE_v2 does not support {self.cfg.model.framework} now, use basic instead.") + logger.warning(f"MSE_v2 does not support {self.cfg.qat_quantization.framework} now, use basic instead.") logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.") assert strategy in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy) _resume = None # check if interrupted tuning procedure exists. if yes, it will resume the # whole auto tune process. - self.resume_file = os.path.abspath(os.path.expanduser(self.cfg.tuning.workspace.resume)) \ - if self.cfg.tuning.workspace and self.cfg.tuning.workspace.resume else None + self.resume_file = os.path.abspath(os.path.expanduser(self.cfg.options.resume_from)) \ + if self.cfg.options.workspace and self.cfg.options.resume_from else None if self.resume_file: assert os.path.exists(self.resume_file), \ "The specified resume file {} doesn't exist!".format(self.resume_file) with open(self.resume_file, 'rb') as f: _resume = pickle.load(f).__dict__ - + self.strategy = STRATEGIES[strategy]( - self._model, - self.conf, - None, - self._train_func, - self._eval_dataloader, - self._eval_func, - _resume, - None) + model = self.model, + conf = self.conf, + q_dataloader=None, + q_func=self._train_func, + eval_func=self._eval_func, + eval_dataloader=self._eval_dataloader, + eval_metric=self._eval_metric, + resume=_resume, + q_hooks=None) def execute(self): """Quantization Aware Training execute routinue based on strategy design.""" try: - with time_limit(self.conf.usr_cfg.tuning.exit_policy.timeout): + with time_limit(self.conf.qat_quantization.tuning_criterion.timeout): logger.debug("Dump user yaml configuration:") - logger.debug(self.conf.usr_cfg) + logger.debug(self.conf) self.strategy.traverse() except KeyboardInterrupt: pass @@ -417,10 +417,11 @@ def metric(self, user_metric): You can set multi-metrics to evaluate the performance of a specific model. Single metric: {topk: 1} - Multi-metrics: {topk: 1, MSE: {compare_label: False}, + weight: [0.5, 0.5], + higher_is_better: [True, False] } For the built-in metrics, please refer to below link: https://github.com/intel/neural-compressor/blob/master/docs/source/metric.md#supported-built-in-metric-matrix. @@ -433,10 +434,6 @@ def metric(self, user_metric): user_metric(neural_compressor.metric.Metric or a dict of built-in metric configures): """ - if deep_get(self.conf.usr_cfg, "evaluation.accuracy.metric"): - logger.warning("Override the value of `metric` field defined in yaml file" \ - " as user defines the value of `metric` attribute by code.") - from ..metric import Metric as NCMetric, METRICS if isinstance(user_metric, dict): metric_cfg = user_metric @@ -452,12 +449,9 @@ def metric(self, user_metric): metric_cls = type(user_metric).__name__ name = 'user_' + metric_cls metric_cfg = {name: id(user_metric)} - metrics = METRICS(self.conf.usr_cfg.model.framework) + metrics = METRICS(self.conf.qat_quantization.framework) metrics.register(name, metric_cls) - deep_set(self.conf.usr_cfg, "evaluation.accuracy.metric", metric_cfg) - self.conf.usr_cfg = DotDict(self.conf.usr_cfg) - self._metric = user_metric def remove_hook(self, scope, hook): @@ -482,29 +476,26 @@ def __init__(self, conf=None, model=None): model: Model to be quantized in this object. """ super(QuantizationAwareTrainingCallbacks, self).__init__(conf=None) - conf = Config(quantization=conf, benchmark=None, pruning=None, distillation=None, nas=None) - self.conf = QuantConf() - self.conf.map_pyconfig_to_cfg(conf) - self.cfg = self.conf.usr_cfg + self.conf = Config(quantization=conf, benchmark=None, pruning=None, distillation=None, nas=None) + self.cfg = self.conf self.model = model - seed = self.conf.usr_cfg.tuning.random_seed + seed = self.conf.options.random_seed random.seed(seed) np.random.seed(seed) - framework_specific_info = {'device': self.cfg.device, - 'random_seed': self.cfg.tuning.random_seed, - 'workspace_path': self.cfg.tuning.workspace.path, + framework_specific_info = {'device': self.cfg.qat_quantization.device, + 'random_seed': self.cfg.options.random_seed, + 'workspace_path': self.cfg.options.workspace, 'q_dataloader': None, - 'backend': self.cfg.model.get('backend', 'default'), - 'format': self.cfg.model.get('quant_format', 'default'), - 'performance_only': self.cfg.model.get('tuning.exit_policy.performance_only', False)} - if self.cfg.quantization.approach is not None: - framework_specific_info['approach'] = self.cfg.quantization.approach + 'backend': self.cfg.qat_quantization.backend if self.cfg.qat_quantization.backend is not None else 'default', + 'format': self.cfg.qat_quantization.quant_format if self.cfg.qat_quantization.quant_format is not None else 'default'} + if self.cfg.qat_quantization.approach is not None: + framework_specific_info['approach'] = self.cfg.qat_quantization.approach if 'tensorflow' in self.framework: framework_specific_info.update( - {"inputs": self.cfg.model.inputs, "outputs": self.cfg.model.outputs}) + {"inputs": self.cfg.qat_quantization.inputs, "outputs": self.cfg.qat_quantization.outputs}) self.adaptor = FRAMEWORKS[self.framework](framework_specific_info) self.adaptor.model = self.model self.register_hook('on_train_begin', self.adaptor._pre_hook_for_qat) @@ -529,13 +520,10 @@ def __init__(self, conf=None, model=None): model: Model to be Pruning in this object. """ super(PruningCallbacks, self).__init__(conf=None) - conf_ = Config(pruning=conf, quantization=None, benchmark=None, distillation=None, nas=None) - self.cfg = PruningConf() - self.cfg.map_pyconfig_to_cfg(conf_) - self.cfg = self.cfg.usr_cfg - self.conf = conf_.pruning + self.conf = Config(pruning=conf, quantization=None, benchmark=None, distillation=None, nas=None) + self.cfg = self.conf.pruning self.model = model - self.pruners_info = process_config(self.conf) + self.pruners_info = process_config(self.cfg) self.pruners = [] self._generate_pruners() self.generate_hooks() @@ -595,10 +583,8 @@ class DistillationCallbacks(BaseCallbacks): def __init__(self, conf=None, model=None): """Initialize the attributes.""" super(DistillationCallbacks, self).__init__() - conf = Config(quantization=None, benchmark=None, pruning=None, distillation=conf, nas=None) - self.conf = DistillationConf() - self.conf.map_pyconfig_to_cfg(conf) - self.cfg = self.conf.usr_cfg + self.conf = Config(quantization=None, benchmark=None, pruning=None, distillation=conf, nas=None) + self.cfg = self.conf.distillation self.model = model self._teacher_model = None @@ -609,8 +595,8 @@ def __init__(self, conf=None, model=None): self.best_score = 0 self.best_model = None self.hooks_registered = False - assert hasattr(conf.distillation, "teacher_model"), "Please assign teacher model in DistillationConfig." - self.teacher_model = conf.distillation.teacher_model + assert hasattr(self.cfg, "teacher_model"), "Please assign teacher model in DistillationConfig." + self.teacher_model = self.cfg.teacher_model self.generate_hooks() self.create_criterion() @@ -646,13 +632,13 @@ def init_train_cfg(self): """Initialize the training configuration.""" if self._train_cfg is None: # train section of distillation section in yaml file should be configured. - self._train_cfg = self.cfg.distillation.train + self._train_cfg = self.cfg.train assert self._train_cfg, "train field of distillation section in yaml file must " \ "be configured for distillation if train_func is NOT set." def create_criterion(self): """Create the criterion for training.""" - self.init_train_cfg() + #self.init_train_cfg() if self.criterion is None: assert 'criterion' in self._train_cfg.keys(), \ "criterion part in train field of distillation section in yaml file " \ diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 179f083c06a..e8d5f49a7c5 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -18,7 +18,6 @@ import datetime import logging from schema import Schema, And, Optional -from .conf.dotdict import DotDict logger = logging.getLogger("neural_compressor") default_workspace = './nc_workspace/{}/'.format( @@ -91,6 +90,47 @@ def datatype(self, datatype): return True +class DotDict(dict): + """access yaml using attributes instead of using the dictionary notation. + + Args: + value (dict): The dict object to access. + + """ + + def __init__(self, value=None): + if value is None: + pass + elif isinstance(value, dict): + for key in value: + self.__setitem__(key, value[key]) + else: + raise TypeError('expected dict') + + def __getitem__(self, key): + value = self.get(key, None) + return value + + def __setitem__(self, key, value): + if isinstance(value, dict) and not isinstance(value, DotDict): + value = DotDict(value) + if isinstance(value, list) and len(value) == 1 and isinstance( + value[0], dict): + value = DotDict(value[0]) + if isinstance(value, list) and len(value) > 1 and all(isinstance( + v, dict) for v in value): + value = DotDict({k: v for d in value for k, v in d.items()}) + super(DotDict, self).__setitem__(key, value) + + def __getstate__(self): + return self.__dict__ + + def __setstate__(self, d): + self.__dict__.update(d) + + __setattr__, __getattr__ = __setitem__, __getitem__ + + class Options: """Option Class for configs. @@ -489,6 +529,7 @@ def keys(self): def __getitem__(self, item): return getattr(self, item) + accuracy_criterion = AccuracyCriterion() @@ -1215,6 +1256,8 @@ def __init__(self, op_type_dict=None, op_name_dict=None, reduce_range=None, + model_name="", + quant_format="default", excluded_precisions=[], quant_level="auto", accuracy_criterion=accuracy_criterion, @@ -1227,16 +1270,27 @@ def __init__(self, op_type_dict=op_type_dict, op_name_dict=op_name_dict, reduce_range=reduce_range, + model_name=model_name, + quant_format=quant_format, excluded_precisions=excluded_precisions, quant_level=quant_level, accuracy_criterion=accuracy_criterion, tuning_criterion=tuning_criterion) self._approach = 'quant_aware_training' + self._framework = None @property def approach(self): """Get approach.""" return self._approach + + @property + def framework(self): + return self._framework + + @framework.setter + def framework(self, framework): + self._framework = framework class WeightPruningConfig: @@ -1972,6 +2026,7 @@ def __init__(self, precisions=None): class Config: def __init__(self, quantization=quantization, + qat_quantization=qat_quantization, benchmark=benchmark, options=options, mixed_precision=mixed_precision, @@ -1983,8 +2038,11 @@ def __init__(self, pytorch=pytorch_config, mxnet=mxnet_config, keras=keras_config, + accuracy_criterion=accuracy_criterion, + tuning_criterion=tuning_criterion ): self._quantization = quantization + self._qat_quantization = qat_quantization self._benchmark = benchmark self._options = options self._mixed_precision=mixed_precision @@ -1996,6 +2054,8 @@ def __init__(self, self._pytorch = pytorch self._mxnet = mxnet self._keras = keras + self._accuracy = accuracy_criterion + self._tuning = tuning_criterion @property def distillation(self): @@ -2028,6 +2088,10 @@ def pruning(self): @property def quantization(self): return self._quantization + + @property + def qat_quantization(self): + return self._qat_quantization @property def benchmark(self): @@ -2045,5 +2109,12 @@ def mixed_precision(self): def onnxruntime(self): return self._onnxruntime + @property + def accuracy(self): + return self._accuracy + + @property + def tuning(self): + return self._tuning config = Config() diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index f1815f7e1cf..21180fd5802 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -174,7 +174,7 @@ def model(self, user_model): """ cfg = self.conf - if cfg.quantization.framework == None: + if cfg.quantization.framework is None: if isinstance(user_model, BaseModel): cfg.quantization.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))] if cfg.quantization.backend == "ipex": From 60f425670b76a43323423bf865c8885d159d6851 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Fri, 31 Mar 2023 14:57:43 +0800 Subject: [PATCH 022/103] fix format Signed-off-by: Cheng, Zixuan --- neural_compressor/compression/callbacks.py | 32 ++++++++++++++-------- neural_compressor/mix_precision.py | 9 ++++-- neural_compressor/quantization.py | 12 +++++--- 3 files changed, 35 insertions(+), 18 deletions(-) diff --git a/neural_compressor/compression/callbacks.py b/neural_compressor/compression/callbacks.py index 395e54ab1c7..dc44ea74d48 100644 --- a/neural_compressor/compression/callbacks.py +++ b/neural_compressor/compression/callbacks.py @@ -28,14 +28,15 @@ import random from .distillation.criterions import Criterions from ..adaptor import FRAMEWORKS -from ..config import Config, PostTrainingQuantConfig, DistillationConfig, WeightPruningConfig +from ..config import Config from ..utils import logger from ..utils.utility import time_limit, LazyImport from ..model import BaseModel, Model from ..model.model import get_model_fwk_name from ..model.tensorflow_model import TensorflowQATModel from ..strategy import STRATEGIES -from .pruner.utils import process_config, parse_to_prune, generate_pruner_config, get_sparsity_ratio +from .pruner.utils import process_config, parse_to_prune, \ + generate_pruner_config, get_sparsity_ratio from .pruner.pruners import get_pruner, PRUNERS LazyImport('torch.nn') torch = LazyImport('torch') @@ -54,7 +55,8 @@ def __init__(self, conf=None, model=None): Args: conf: A Config object which definds the compressor behavior. - Just like: QuantizationAwareTrainingConfig, WeightPruningConfig and DistillationConfig. + Just like: QuantizationAwareTrainingConfig, WeightPruningConfig \ + and DistillationConfig. model: Model to be compressed in this object. """ self.conf = None @@ -123,7 +125,8 @@ def on_step_begin(self, batch_id): else: return None - def on_after_compute_loss(self, input, student_output, student_loss, teacher_output=None): + def on_after_compute_loss(self, input, student_output, \ + student_loss, teacher_output=None): """Be called on the end of loss computation.""" if len(self.hooks_dict['on_after_compute_loss']) > 0: loss = student_loss @@ -476,7 +479,8 @@ def __init__(self, conf=None, model=None): model: Model to be quantized in this object. """ super(QuantizationAwareTrainingCallbacks, self).__init__(conf=None) - self.conf = Config(quantization=conf, benchmark=None, pruning=None, distillation=None, nas=None) + self.conf = Config(quantization=conf, benchmark=None, \ + pruning=None, distillation=None, nas=None) self.cfg = self.conf self.model = model @@ -488,14 +492,17 @@ def __init__(self, conf=None, model=None): 'random_seed': self.cfg.options.random_seed, 'workspace_path': self.cfg.options.workspace, 'q_dataloader': None, - 'backend': self.cfg.qat_quantization.backend if self.cfg.qat_quantization.backend is not None else 'default', - 'format': self.cfg.qat_quantization.quant_format if self.cfg.qat_quantization.quant_format is not None else 'default'} + 'backend': self.cfg.qat_quantization.backend if \ + self.cfg.qat_quantization.backend is not None else 'default', + 'format': self.cfg.qat_quantization.quant_format if \ + self.cfg.qat_quantization.quant_format is not None else 'default'} if self.cfg.qat_quantization.approach is not None: framework_specific_info['approach'] = self.cfg.qat_quantization.approach if 'tensorflow' in self.framework: framework_specific_info.update( - {"inputs": self.cfg.qat_quantization.inputs, "outputs": self.cfg.qat_quantization.outputs}) + {"inputs": self.cfg.qat_quantization.inputs, \ + "outputs": self.cfg.qat_quantization.outputs}) self.adaptor = FRAMEWORKS[self.framework](framework_specific_info) self.adaptor.model = self.model self.register_hook('on_train_begin', self.adaptor._pre_hook_for_qat) @@ -520,7 +527,8 @@ def __init__(self, conf=None, model=None): model: Model to be Pruning in this object. """ super(PruningCallbacks, self).__init__(conf=None) - self.conf = Config(pruning=conf, quantization=None, benchmark=None, distillation=None, nas=None) + self.conf = Config(pruning=conf, quantization=None, benchmark=None + , distillation=None, nas=None) self.cfg = self.conf.pruning self.model = model self.pruners_info = process_config(self.cfg) @@ -583,7 +591,8 @@ class DistillationCallbacks(BaseCallbacks): def __init__(self, conf=None, model=None): """Initialize the attributes.""" super(DistillationCallbacks, self).__init__() - self.conf = Config(quantization=None, benchmark=None, pruning=None, distillation=conf, nas=None) + self.conf = Config(quantization=None, benchmark=None, pruning=None + , distillation=conf, nas=None) self.cfg = self.conf.distillation self.model = model @@ -595,7 +604,8 @@ def __init__(self, conf=None, model=None): self.best_score = 0 self.best_model = None self.hooks_registered = False - assert hasattr(self.cfg, "teacher_model"), "Please assign teacher model in DistillationConfig." + assert hasattr(self.cfg, "teacher_model"),\ + "Please assign teacher model in DistillationConfig." self.teacher_model = self.cfg.teacher_model self.generate_hooks() self.create_criterion() diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index 3c89702c3b3..acf9a64c276 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -196,11 +196,13 @@ def model(self, user_model): if isinstance(user_model, BaseModel): cfg.quantization.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))] if cfg.quantization.backend == "ipex": - assert cfg.quantization.framework == "pytorch_ipex", "Please wrap the model with correct Model class!" + assert cfg.quantization.framework == "pytorch_ipex",\ + "Please wrap the model with correct Model class!" if cfg.quantization.backend == "itex": from .model.tensorflow_model import get_model_type if get_model_type(user_model.model) == 'keras': - assert cfg.quantization.framework == "keras", "Please wrap the model with KerasModel class!" + assert cfg.quantization.framework == "keras",\ + "Please wrap the model with KerasModel class!" else: assert cfg.quantization.framework == "pytorch_itex", \ "Please wrap the model with TensorflowModel class!" @@ -220,7 +222,8 @@ def model(self, user_model): if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") if "tensorflow" in cfg.quantization.framework or cfg.quantization.framework == "keras": - self._model = Model(user_model, backend=cfg.quantization.framework, device=cfg.quantization.device) + self._model = Model(user_model, backend=cfg.quantization.framework + , device=cfg.quantization.device) else: self._model = Model(user_model, backend=cfg.quantization.framework) else: diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index 21180fd5802..750a2c52cc6 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -54,7 +54,8 @@ def __init__(self, conf, **kwargs): conf (PostTrainingQuantConfig): A instance of PostTrainingQuantConfig to specify the quantization behavior. """ - self.conf = Config(quantization=conf, benchmark=None, pruning=None, distillation=None, nas=None) + self.conf = Config(quantization=conf, benchmark=None + , pruning=None, distillation=None, nas=None) seed = self.conf.options.random_seed random.seed(seed) np.random.seed(seed) @@ -81,7 +82,8 @@ def pre_proccess(self): strategy = "conservative" if strategy == "mse_v2": - if not (cfg.quantization.framework.startswith("tensorflow") or cfg.quantization.framework == 'pytorch_fx'): + if not (cfg.quantization.framework.startswith("tensorflow")\ + or cfg.quantization.framework == 'pytorch_fx'): strategy = "basic" logger.warning(f"MSE_v2 does not support {cfg.quantization.framework} now, use basic instead.") logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.") @@ -178,10 +180,12 @@ def model(self, user_model): if isinstance(user_model, BaseModel): cfg.quantization.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))] if cfg.quantization.backend == "ipex": - assert cfg.quantization.framework == "pytorch_ipex", "Please wrap the model with correct Model class!" + assert cfg.quantization.framework == "pytorch_ipex",\ + "Please wrap the model with correct Model class!" if cfg.quantization.backend == "itex": if get_model_type(user_model.model) == 'keras': - assert cfg.quantization.framework == "keras", "Please wrap the model with KerasModel class!" + assert cfg.quantization.framework == "keras",\ + "Please wrap the model with KerasModel class!" else: assert cfg.quantization.framework == "pytorch_itex", \ "Please wrap the model with TensorflowModel class!" From 435ba6d08cd24e33c7a0001a158fd492a0b51bef Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 31 Mar 2023 16:37:43 +0800 Subject: [PATCH 023/103] update log Signed-off-by: yiliu30 --- neural_compressor/strategy/strategy.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 5607b79c7dd..b1cbe2f655b 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -685,11 +685,10 @@ def _remove_redundant_qmodel(self): def _eval_baseline(self): """Evaluate the fp32 model if needed.""" if self._not_tuning: - logger.info("Neither evaluation function nor metric is defined." \ + logger.info("Neither evaluation function nor metric and evaluation dataloader is defined." \ " Generate a quantized model with default quantization configuration.") - self._not_tuning = True - - if not self._not_tuning: + return + else: # get fp32 model baseline if self.baseline is None: logger.info("Get FP32 model baseline.") From bb2026cdcef291cbb0cf1d000cca939acf1f497a Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 31 Mar 2023 17:43:58 +0800 Subject: [PATCH 024/103] add dump config Signed-off-by: yiliu30 --- neural_compressor/utils/utility.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/neural_compressor/utils/utility.py b/neural_compressor/utils/utility.py index 4370d826045..9d1bed04bc6 100644 --- a/neural_compressor/utils/utility.py +++ b/neural_compressor/utils/utility.py @@ -561,4 +561,25 @@ def show_memory_info(hint): info = p.memory_full_info() memory = info.uss / 1024. / 1024 - print('{} memory used: {} MB'.format(hint, memory)) \ No newline at end of file + print('{} memory used: {} MB'.format(hint, memory)) + + +def dump_class_attrs(obj, result = {}): + """ + Dump the attributes and values of a config class. + + Args: + obj: An instance of a config class + """ + obj_name = obj.__class__.__name__ + if obj_name not in result: + result[obj_name] = {} + for attr in dir(obj): + if not attr.startswith("__"): + value = getattr(obj, attr) + value_class_name = value.__class__.__name__ + if 'Config' in value_class_name or 'Criterion' in value_class_name: + dump_class_attrs(value, result=result[obj_name]) + else: + attr = attr[1:] if attr.startswith('_') else attr + result[obj_name][attr] = value \ No newline at end of file From f35d53ad38d2874b7eda910b69e3057fe8adea1b Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 31 Mar 2023 17:44:24 +0800 Subject: [PATCH 025/103] add ut for metric + dataloader Signed-off-by: yiliu30 --- neural_compressor/quantization.py | 8 +++++--- neural_compressor/strategy/strategy.py | 4 +--- test/strategy/test_basic.py | 17 +++++++++++++++++ 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index f1815f7e1cf..08ceca784e3 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -24,7 +24,7 @@ from .model.model import BaseModel, get_model_fwk_name, get_model_type, Model, MODELS from .strategy import STRATEGIES from .utils import logger -from .utils.utility import time_limit +from .utils.utility import time_limit, dump_class_attrs class _PostTrainingQuant: @@ -109,7 +109,7 @@ def pre_proccess(self): q_func=self._train_func, eval_func=self._eval_func, eval_dataloader=self._eval_dataloader, - eval_metric=self._eval_metric, + eval_metric=self._metric, resume=_resume, q_hooks=self.callbacks.hooks if self.callbacks is not None else None) @@ -118,7 +118,9 @@ def execute(self): try: with time_limit(self.conf.quantization.tuning_criterion.timeout): logger.debug("Dump user configuration:") - logger.debug(self.conf) + conf_dict = {} + dump_class_attrs(self.conf, conf_dict) + logger.info(conf_dict) self.strategy.traverse() except KeyboardInterrupt: pass diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index b1cbe2f655b..d8e68d679b8 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -83,10 +83,8 @@ class TuneStrategy(object): def _check_tuning_status(self): if self.eval_func: self._not_tuning = False - return - elif self.eval_dataloader and self.eval_metric: + if self.eval_dataloader and self.eval_metric: self._not_tuning = False - return def __init__(self, model, diff --git a/test/strategy/test_basic.py b/test/strategy/test_basic.py index 781a7ee333f..365a012be88 100644 --- a/test/strategy/test_basic.py +++ b/test/strategy/test_basic.py @@ -66,6 +66,23 @@ def fake_eval(model): conf = PostTrainingQuantConfig() q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader= dataloader, eval_func=fake_eval) self.assertIsNotNone(q_model) + + def test_run_create_eval_from_metric_and_dataloader(self): + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.data import Datasets, DATALOADERS + + # dataset and dataloader + dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) + dataloader = DATALOADERS["tensorflow"](dataset) + from neural_compressor.metric import METRICS + metrics = METRICS('tensorflow') + top1 = metrics['topk']() + + # tuning and accuracy criterion + conf = PostTrainingQuantConfig() + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader= dataloader,\ + eval_dataloader=dataloader, eval_metric=top1) def test_no_tuning(self): import torchvision From c00bbd72308310761c6413fbde8cbcaf3ef666f0 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 31 Mar 2023 17:48:23 +0800 Subject: [PATCH 026/103] fixed metric Signed-off-by: yiliu30 --- neural_compressor/quantization.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index 523bd99f8a2..f4a344c3c50 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -111,7 +111,7 @@ def pre_proccess(self): q_func=self._train_func, eval_func=self._eval_func, eval_dataloader=self._eval_dataloader, - eval_metric=self._metric, + eval_metric=self._eval_metric, resume=_resume, q_hooks=self.callbacks.hooks if self.callbacks is not None else None) @@ -320,7 +320,7 @@ def metric(self, user_metric): metrics = METRICS(self.conf.quantization.framework) metrics.register(name, metric_cls) - self._metric = user_metric + self._eval_metric = user_metric @property def calib_func(self): From bb36abb60ac937b86688c15b47e1546dc4de7d43 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Mon, 3 Apr 2023 11:48:25 +0800 Subject: [PATCH 027/103] fix for pydocstyle and pylint scan Signed-off-by: Cheng, Zixuan --- neural_compressor/compression/callbacks.py | 3 +- neural_compressor/config.py | 76 +++++++++++++++++++--- neural_compressor/strategy/bayesian.py | 2 +- neural_compressor/strategy/strategy.py | 4 +- neural_compressor/utils/utility.py | 3 +- 5 files changed, 72 insertions(+), 16 deletions(-) diff --git a/neural_compressor/compression/callbacks.py b/neural_compressor/compression/callbacks.py index dc44ea74d48..5ee6df574ce 100644 --- a/neural_compressor/compression/callbacks.py +++ b/neural_compressor/compression/callbacks.py @@ -263,7 +263,8 @@ def pre_process(self): logger.info(f"On the premise that the accuracy meets the conditions, improve the performance.") if strategy == "mse_v2": - if not (self.cfg.qat_quantization.framework.startswith("tensorflow") or self.cfg.qat_quantization.framework == 'pytorch_fx'): + if not (self.cfg.qat_quantization.framework.startswith("tensorflow") \ + or self.cfg.qat_quantization.framework == 'pytorch_fx'): strategy = "basic" logger.warning(f"MSE_v2 does not support {self.cfg.qat_quantization.framework} now, use basic instead.") logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.") diff --git a/neural_compressor/config.py b/neural_compressor/config.py index e8d5f49a7c5..83f55bd4c4c 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -99,6 +99,7 @@ class DotDict(dict): """ def __init__(self, value=None): + """Init a DotDict object.""" if value is None: pass elif isinstance(value, dict): @@ -108,10 +109,12 @@ def __init__(self, value=None): raise TypeError('expected dict') def __getitem__(self, key): + """Get the key.""" value = self.get(key, None) return value def __setitem__(self, key, value): + """Set the value to the key.""" if isinstance(value, dict) and not isinstance(value, DotDict): value = DotDict(value) if isinstance(value, list) and len(value) == 1 and isinstance( @@ -123,9 +126,11 @@ def __setitem__(self, key, value): super(DotDict, self).__setitem__(key, value) def __getstate__(self): + """Get the dict.""" return self.__dict__ def __setstate__(self, d): + """Set the dict.""" self.__dict__.update(d) __setattr__, __getattr__ = __setitem__, __getitem__ @@ -286,11 +291,13 @@ def __init__(self, self._framework=None def keys(self): + """Returns keys of the dict.""" return ('inputs', 'outputs', 'backend', 'device', 'warmup', 'iteration', 'model', \ 'model_name', 'cores_per_instance', 'num_of_instance', 'framework', \ 'inter_num_of_threads','intra_num_of_threads') def __getitem__(self, item): + """Get the dict.""" return getattr(self, item) @property @@ -307,6 +314,7 @@ def backend(self, backend): @property def device(self): + """Get device name.""" return self._device @device.setter @@ -407,10 +415,12 @@ def intra_num_of_threads(self, intra_num_of_threads): @property def model(self): + """Get model.""" return self._model @model.setter def model(self, model): + """Set model.""" self._model = model @property @@ -426,10 +436,12 @@ def model_name(self, model_name): @property def framework(self): + """Set framework.""" return self._framework @framework.setter def framework(self, framework): + """Get framework.""" self._framework = framework @@ -524,9 +536,11 @@ def __str__(self): return self.criterion def keys(self): + """Returns keys of the dict.""" return ('higher_is_better', 'criterion', 'tolerable_loss') def __getitem__(self, item): + """Get the dict.""" return getattr(self, item) @@ -1286,10 +1300,12 @@ def approach(self): @property def framework(self): + """Get framework.""" return self._framework @framework.setter def framework(self, framework): + """Set framework.""" self._framework = framework @@ -1645,19 +1661,19 @@ class MixedPrecisionConfig(_BaseQuantizationConfig): it won't work if there is no accuracy tuning process. excluded_precisions (list, optional): Precisions to be excluded during mix precision conversion, default is []. -yaml - quantization: - diagxx -== -QuantizationConfig(diagnosis=True) + yaml + quantization: + diagxx + == + QuantizationConfig(diagnosis=True) - Example:: + Example:: - from neural_compressor import mix_precision - from neural_compressor.config import MixedPrecisionConfig + from neural_compressor import mix_precision + from neural_compressor.config import MixedPrecisionConfig - conf = MixedPrecisionConfig() - converted_model = mix_precision.fit(model, config=conf) + conf = MixedPrecisionConfig() + converted_model = mix_precision.fit(model, config=conf) """ def __init__(self, device="cpu", @@ -1701,14 +1717,17 @@ def precision(self, precision): @property def model(self): + """Get model.""" return self._model @model.setter def model(self, model): + """Set model.""" self._model = model @property def approach(self): + """Get approach.""" return self._approach @@ -1929,8 +1948,10 @@ def __init__( class NASConfig: + """Config class for NAS approaches.""" def __init__(self, approach=None, search_space=None, search_algorithm=None, metrics=[], higher_is_better=[], max_trials=3, seed=42, dynas=None): + """Init a NASConfig object.""" self._approach = approach self._search = DotDict({ 'search_space': search_space, @@ -1946,31 +1967,39 @@ def __init__(self, approach=None, search_space=None, search_algorithm=None, @property def approach(self): + """Get approach.""" return self._approach @approach.setter def approach(self, approach): + """Set approach.""" self._approach = approach @property def search(self): + """Get the setting dict for search.""" return self._search @search.setter def search(self, search): + """Set the setting dict for search.""" self._search = search class MXNet: + """Base config class for MXNet.""" def __init__(self, precisions=None): + """Init an MXNet object.""" self._precisions = precisions @property def precisions(self): + """Get precision.""" return self._precisions @precisions.setter def precisions(self, precisions): + """Set precision.""" if not isinstance(precisions, list): precisions = [precisions] if _check_value('precisions', precisions, str, ['int8', 'uint8', 'fp32', 'bf16', 'fp16']): @@ -1978,33 +2007,43 @@ def precisions(self, precisions): class ONNX(MXNet): + """Config class for ONNX.""" def __init__(self, graph_optimization_level=None, precisions=None): + """Init an ONNX object.""" super().__init__(precisions) self._graph_optimization_level = graph_optimization_level @property def graph_optimization_level(self): + """Get graph optimization level.""" return self._graph_optimization_level @graph_optimization_level.setter def graph_optimization_level(self, graph_optimization_level): + """Set graph optimization level.""" if _check_value('graph_optimization_level', graph_optimization_level, str, ['DISABLE_ALL', 'ENABLE_BASIC', 'ENABLE_EXTENDED', 'ENABLE_ALL']): self._graph_optimization_level = graph_optimization_level class TensorFlow(MXNet): + """Config class for TensorFlow.""" def __init__(self, precisions=None): + """Init a TensorFlow object.""" super().__init__(precisions) class Keras(MXNet): + """Config class for Keras.""" def __init__(self, precisions=None): + """Init a Keras object.""" super().__init__(precisions) class PyTorch(MXNet): + """Config class for PyTorch.""" def __init__(self, precisions=None): + """Init a PyTorch object.""" super().__init__(precisions) @@ -2024,6 +2063,7 @@ def __init__(self, precisions=None): class Config: + """Main config class.""" def __init__(self, quantization=quantization, qat_quantization=qat_quantization, @@ -2041,6 +2081,7 @@ def __init__(self, accuracy_criterion=accuracy_criterion, tuning_criterion=tuning_criterion ): + """Init a config object.""" self._quantization = quantization self._qat_quantization = qat_quantization self._benchmark = benchmark @@ -2059,62 +2100,77 @@ def __init__(self, @property def distillation(self): + """Get the distillation object.""" return self._distillation @property def nas(self): + """Get the nas object.""" return self._nas @property def tensorflow(self): + """Get the tensorflow object.""" return self._tensorflow @property def keras(self): + """Get the keras object.""" return self._keras @property def pytorch(self): + """Get the pytorch object.""" return self._pytorch @property def mxnet(self): + """Get the mxnet object.""" return self._mxnet @property def pruning(self): + """Get the pruning object.""" return self._pruning @property def quantization(self): + """Get the quantization object.""" return self._quantization @property def qat_quantization(self): + """Get the qat quantization object.""" return self._qat_quantization @property def benchmark(self): + """Get the benchmark object.""" return self._benchmark @property def options(self): + """Get the options object.""" return self._options @property def mixed_precision(self): + """Get the mixed_precision object.""" return self._mixed_precision @property def onnxruntime(self): + """Get the onnxruntime object.""" return self._onnxruntime @property def accuracy(self): + """Get the accuracy object.""" return self._accuracy @property def tuning(self): + """Get the tuning object.""" return self._tuning config = Config() diff --git a/neural_compressor/strategy/bayesian.py b/neural_compressor/strategy/bayesian.py index 73421b5e11c..eed2ef84cbf 100644 --- a/neural_compressor/strategy/bayesian.py +++ b/neural_compressor/strategy/bayesian.py @@ -47,7 +47,7 @@ def __init__(self, eval_metric=None, resume=None, q_hooks=None): - """Init the BaySian tuning strategy + """Init the BaySian tuning strategy. Args: model: The FP32 model specified for low precision tuning. diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index d8e68d679b8..622814cbcef 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -1050,7 +1050,8 @@ def _set_objectives(self): obj_higher_is_better = None obj_weight = None if self.conf.quantization.tuning_criterion.multi_objectives: - obj_higher_is_better = self.conf.quantization.tuning_criterion.multi_objectives.get('higher_is_better', None) + obj_higher_is_better = self.conf.quantization.tuning_criterion\ + .multi_objectives.get('higher_is_better', None) obj_weight = self.conf.quantization.tuning_criterion.multi_objectives.get('weight', None) obj_lst = self.conf.quantization.tuning_criterion.multi_objectives.get('objective', []) self.use_multi_objective = len(obj_lst) > 0 @@ -1475,7 +1476,6 @@ def _find_self_tuning_history(self): def _add_tuning_history(self, tune_cfg=None, tune_result=None, **kwargs): """Add tuning config to tuining history. - The tuning history ever made, structured like below: [ { diff --git a/neural_compressor/utils/utility.py b/neural_compressor/utils/utility.py index 9d1bed04bc6..f60045f69a1 100644 --- a/neural_compressor/utils/utility.py +++ b/neural_compressor/utils/utility.py @@ -565,8 +565,7 @@ def show_memory_info(hint): def dump_class_attrs(obj, result = {}): - """ - Dump the attributes and values of a config class. + """Dump the attributes and values of a config class. Args: obj: An instance of a config class From 203db1151520250f51dfd95131d0ee4b7bca0dac Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Mon, 3 Apr 2023 11:53:03 +0800 Subject: [PATCH 028/103] minor fix Signed-off-by: Cheng, Zixuan --- neural_compressor/utils/utility.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/neural_compressor/utils/utility.py b/neural_compressor/utils/utility.py index f60045f69a1..f560f45fe4a 100644 --- a/neural_compressor/utils/utility.py +++ b/neural_compressor/utils/utility.py @@ -568,7 +568,8 @@ def dump_class_attrs(obj, result = {}): """Dump the attributes and values of a config class. Args: - obj: An instance of a config class + obj: An instance of a config class. + result: An dict for recording attributes and values. """ obj_name = obj.__class__.__name__ if obj_name not in result: From 464fe1fe962e036e43feb6c6aa55b6cc19070c60 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Mon, 3 Apr 2023 17:07:16 +0800 Subject: [PATCH 029/103] fix for distillation and pruning 2.x UT Signed-off-by: Cheng, Zixuan --- neural_compressor/compression/callbacks.py | 52 ++++++++++++++-------- neural_compressor/config.py | 6 +-- 2 files changed, 37 insertions(+), 21 deletions(-) diff --git a/neural_compressor/compression/callbacks.py b/neural_compressor/compression/callbacks.py index 5ee6df574ce..9702f080643 100644 --- a/neural_compressor/compression/callbacks.py +++ b/neural_compressor/compression/callbacks.py @@ -28,7 +28,7 @@ import random from .distillation.criterions import Criterions from ..adaptor import FRAMEWORKS -from ..config import Config +from ..config import Config, QuantizationAwareTrainingConfig, DistillationConfig, WeightPruningConfig from ..utils import logger from ..utils.utility import time_limit, LazyImport from ..model import BaseModel, Model @@ -213,22 +213,28 @@ def model(self, user_model): if user_model is None: return - if self.cfg.qat_quantization.framework is None: - self.framework = get_model_fwk_name( - user_model.model if isinstance(user_model, BaseModel) else user_model) - if self.framework == "tensorflow": + self.framework = get_model_fwk_name( + user_model.model if isinstance(user_model, BaseModel) else user_model) + if self.framework == "tensorflow": + try: if self.cfg.qat_quantization.approach == "quant_aware_training": self.framework = 'tensorflow_itex' else: from ..model.tensorflow_model import get_model_type if get_model_type(user_model) == 'keras' and self.cfg.qat_quantization.backend == 'itex': self.framework = 'keras' - if self.framework == "pytorch": + except Exception as e: + pass + + if self.framework == "pytorch": + try: if self.cfg.qat_quantization.backend == "default": self.framework = "pytorch_fx" elif self.cfg.qat_quantization.backend == "ipex": self.framework = "pytorch_ipex" - self.cfg.qat_quantization.framework = self.framework + self.cfg.qat_quantization.framework = self.framework + except Exception as e: + pass if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") @@ -238,17 +244,26 @@ def model(self, user_model): else: self._model = TensorflowQATModel(user_model._model) elif "tensorflow" in self.framework or self.framework == "keras": - self._model = Model(user_model, backend=self.framework, device=self.cfg.qat_quantization.device) + try: + self._model = Model(user_model, backend=self.framework, device=self.cfg.qat_quantization.device) + except Exception as e: + self._model = Model(user_model, backend=self.framework, device=None) else: self._model = Model(user_model, backend=self.framework) else: self._model = user_model if 'tensorflow' in self.framework: - self._model.name = self.cfg.qat_quantization.model_name - self._model.output_tensor_names = self.cfg.qat_quantization.outputs - self._model.input_tensor_names = self.cfg.qat_quantization.inputs - self._model.workspace_path = self.cfg.options.workspace + try: + self._model.name = self.cfg.qat_quantization.model_name + self._model.output_tensor_names = self.cfg.qat_quantization.outputs + self._model.input_tensor_names = self.cfg.qat_quantization.inputs + self._model.workspace_path = self.cfg.options.workspace + except Exception as e: + self._model.name = None + self._model.output_tensor_names = None + self._model.input_tensor_names = None + self._model.workspace_path = None def pre_process(self): """Create strategy to optimize model.""" @@ -266,7 +281,8 @@ def pre_process(self): if not (self.cfg.qat_quantization.framework.startswith("tensorflow") \ or self.cfg.qat_quantization.framework == 'pytorch_fx'): strategy = "basic" - logger.warning(f"MSE_v2 does not support {self.cfg.qat_quantization.framework} now, use basic instead.") + logger.warning(f"MSE_v2 does not support \ + {self.cfg.qat_quantization.framework} now, use basic instead.") logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.") assert strategy in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy) @@ -643,21 +659,21 @@ def init_train_cfg(self): """Initialize the training configuration.""" if self._train_cfg is None: # train section of distillation section in yaml file should be configured. - self._train_cfg = self.cfg.train + self._train_cfg = self.cfg.criterion assert self._train_cfg, "train field of distillation section in yaml file must " \ "be configured for distillation if train_func is NOT set." def create_criterion(self): """Create the criterion for training.""" - #self.init_train_cfg() + self.init_train_cfg() if self.criterion is None: - assert 'criterion' in self._train_cfg.keys(), \ + assert self._train_cfg.config is not None, \ "criterion part in train field of distillation section in yaml file " \ "must be configured for distillation if criterion is NOT set." - criterion_cfg = self._train_cfg.criterion + criterion_cfg = self._train_cfg.config assert len(criterion_cfg) == 1, "There must be exactly one loss in " \ "criterion part, instead got {} loss.".format(len(criterion_cfg)) - loss = list(criterion_cfg.keys())[0] + loss = [i for i in criterion_cfg.keys()][0] loss_cfg = criterion_cfg[loss] criterion_builder = Criterions(self.framework)[loss](loss_cfg) criterion_tuple = criterion_builder() diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 83f55bd4c4c..9cf1d0e36e5 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -1591,9 +1591,9 @@ class DistillationConfig: Example:: from neural_compressor.training import prepare_compression - from neural_compressor.config import DistillationConfig, SelfKnowledgeDistillationLossConfig + from neural_compressor.config import DistillationConfig, KnowledgeDistillationLossConfig - distil_loss = SelfKnowledgeDistillationLossConfig() + distil_loss = KnowledgeDistillationLossConfig() conf = DistillationConfig(teacher_model=model, criterion=distil_loss) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) @@ -1607,7 +1607,7 @@ def __init__(self, 'learning_rate': 0.0001 }}): """Init a DistillationConfig object.""" - self.criterion = criterion.config + self.criterion = criterion self.optimizer = optimizer self.teacher_model = teacher_model From 0d662d802c1b58ef6a904227a2fd6b643485c657 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Mon, 3 Apr 2023 21:49:01 +0800 Subject: [PATCH 030/103] adjust diagnosis Signed-off-by: yiliu30 --- neural_compressor/config.py | 34 +++++++++++++------------- neural_compressor/strategy/strategy.py | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 9cf1d0e36e5..05064b97476 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -159,8 +159,6 @@ class Options: tensorboard(bool): This flag indicates whether to save the weights of the model and the inputs of each layer for visual display. Default value is False. - diagnosis(bool): This flag indicates whether to do diagnosis. - Default value is False. Example:: @@ -172,13 +170,12 @@ class Options: """ def __init__(self, random_seed=1978, workspace=default_workspace, - resume_from=None, tensorboard=False, diagnosis=False): + resume_from=None, tensorboard=False): """Init an Option object.""" self.random_seed = random_seed self.workspace = workspace self.resume_from = resume_from self.tensorboard = tensorboard - self.diagnosis = diagnosis # TODO expose the diagnosis to user @property def random_seed(self): @@ -223,17 +220,6 @@ def tensorboard(self, tensorboard): """Set tensorboard.""" if _check_value('tensorboard', tensorboard, bool): self._tensorboard = tensorboard - - @property - def diagnosis(self): - """Get diagnosis.""" - return self._diagnosis - - @diagnosis.setter - def diagnosis(self, diagnosis): - """Set diagnosis.""" - if _check_value('diagnosis', diagnosis, bool): - self._diagnosis = diagnosis class BenchmarkConfig: """Config Class for Benchmark. @@ -1137,7 +1123,8 @@ class PostTrainingQuantConfig(_BaseQuantizationConfig): criterion and tolerable_loss. Please refer to docstring of AccuracyCriterion class. use_distributed_tuning: Whether use distributed tuning or not. - + diagnosis(bool): This flag indicates whether to do diagnosis. + Default value is False. Example:: from neural_compressor.config PostTrainingQuantConfig, TuningCriterion @@ -1168,7 +1155,8 @@ def __init__(self, quant_level="auto", accuracy_criterion=accuracy_criterion, tuning_criterion=tuning_criterion, - use_distributed_tuning=False + use_distributed_tuning=False, + diagnosis=False ): """Init a PostTrainingQuantConfig object.""" super().__init__(inputs=inputs, @@ -1189,6 +1177,7 @@ def __init__(self, tuning_criterion=tuning_criterion, use_distributed_tuning=use_distributed_tuning) self.approach = approach + self.diagnosis = diagnosis @property def approach(self): @@ -1201,6 +1190,17 @@ def approach(self, approach): if _check_value("approach", approach, str, ["static", "dynamic", "auto"]): self._approach = QUANTMAPPING[approach] + @property + def diagnosis(self): + """Get diagnosis.""" + return self._diagnosis + + @diagnosis.setter + def diagnosis(self, diagnosis): + """Set diagnosis.""" + if _check_value('diagnosis', diagnosis, bool): + self._diagnosis = diagnosis + class QuantizationAwareTrainingConfig(_BaseQuantizationConfig): """Config Class for Quantization Aware Training. diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 622814cbcef..0a8d9aae2a4 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -653,7 +653,7 @@ def traverse(self): continue # recover the best quantized model from tuning config self._recover_best_qmodel_from_tuning_cfg() - if self.conf.options.diagnosis: + if self.conf.quantization.diagnosis: logger.debug(f'*** Start to do diagnosis (inspect tensor).') self._diagnosis() if self.use_multi_objective and len(self.tune_result_record) > 1 and \ From 7a688f9c75c109000ad0dcb1384f0906f631b2bf Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Tue, 4 Apr 2023 14:21:06 +0800 Subject: [PATCH 031/103] split old config and new Signed-off-by: Cheng, Zixuan --- neural_compressor/conf/pythonic_config.py | 645 +++++++++++++++++++++- neural_compressor/config.py | 5 +- 2 files changed, 646 insertions(+), 4 deletions(-) diff --git a/neural_compressor/conf/pythonic_config.py b/neural_compressor/conf/pythonic_config.py index 7bb8fea75dc..b0168e81ce8 100644 --- a/neural_compressor/conf/pythonic_config.py +++ b/neural_compressor/conf/pythonic_config.py @@ -16,13 +16,158 @@ # limitations under the License. import logging +from schema import Schema, And, Optional from .dotdict import DotDict -from ..config import ops_schema, AccuracyCriterion, accuracy_criterion, BenchmarkConfig, \ - _check_value, DistillationConfig, options, WeightPruningConfig + logger = logging.getLogger("neural_compressor") +ops_schema = Schema({ + Optional('weight', default=None): { + Optional('granularity'): And( + list, + lambda s: all(i in ['per_channel', 'per_tensor'] for i in s)), + Optional('scheme'): And( + list, + lambda s: all(i in ['asym', 'sym', 'asym_float'] for i in s)), + Optional('dtype'): And( + list, + lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16', 'fp16'] for i in s)), + Optional('algorithm'): And( + list, + lambda s: all(i in ['minmax'] for i in s))}, + Optional('activation', default=None): { + Optional('granularity'): And( + list, + lambda s: all(i in ['per_channel', 'per_tensor'] for i in s)), + Optional('scheme'): And( + list, + lambda s: all(i in ['asym', 'sym'] for i in s)), + Optional('dtype'): And( + list, + lambda s: all(i in ['int8', 'uint8', 'fp32', 'bf16', 'fp16', 'None'] for i in s)), + Optional('algorithm'): And( + list, + lambda s: all(i in ['minmax', 'kl', 'placeholder'] for i in s))}}) + + +def _check_value(name, src, supported_type, supported_value=[]): + """Check if the given object is the given supported type and in the given supported value. + + Example:: + + from neural_compressor.config import _check_value + + def datatype(self, datatype): + if _check_value('datatype', datatype, list, ['fp32', 'bf16', 'uint8', 'int8']): + self._datatype = datatype + """ + if isinstance(src, list) and any([not isinstance(i, supported_type) for i in src]): + assert False, ("Type of {} items should be {} but not {}".format( + name, str(supported_type), [type(i) for i in src])) + elif not isinstance(src, list) and not isinstance(src, supported_type): + assert False, ("Type of {} should be {} but not {}".format( + name, str(supported_type), type(src))) + + if len(supported_value) > 0: + if isinstance(src, str) and src not in supported_value: + assert False, ("{} is not in supported {}: {}. Skip setting it.".format( + src, name, str(supported_value))) + elif isinstance(src, list) and all([isinstance(i, str) for i in src]) and \ + any([i not in supported_value for i in src]): + assert False, ("{} is not in supported {}: {}. Skip setting it.".format( + src, name, str(supported_value))) + + return True + + +class Options: + """Option Class for configs. + + This class is used for configuring global variables. The global variable options is created with this class. + If you want to change global variables, you should use functions from utils.utility.py: + set_random_seed(seed: int) + set_workspace(workspace: str) + set_resume_from(resume_from: str) + set_tensorboard(tensorboard: bool) + + Args: + random_seed(int): Random seed used in neural compressor. + Default value is 1978. + workspace(str): The directory where intermediate files and tuning history file are stored. + Default value is: + './nc_workspace/{}/'.format(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')). + resume_from(str): The directory you want to resume tuning history file from. + The tuning history was automatically saved in the workspace directory + during the last tune process. + Default value is None. + tensorboard(bool): This flag indicates whether to save the weights of the model and the inputs of each layer + for visual display. + Default value is False. + + Example:: + + from neural_compressor.utils.utility import set_random_seed, set_workspace, set_resume_from, set_tensorboard + set_random_seed(2022) + set_workspace("workspace_path") + set_resume_from("workspace_path") + set_tensorboard(True) + + """ + def __init__(self, random_seed=1978, workspace=default_workspace, + resume_from=None, tensorboard=False): + """Init an Option object.""" + self.random_seed = random_seed + self.workspace = workspace + self.resume_from = resume_from + self.tensorboard = tensorboard + + @property + def random_seed(self): + """Get random seed.""" + return self._random_seed + + @random_seed.setter + def random_seed(self, random_seed): + """Set random seed.""" + if _check_value('random_seed', random_seed, int): + self._random_seed = random_seed + + @property + def workspace(self): + """Get workspace.""" + return self._workspace + + @workspace.setter + def workspace(self, workspace): + """Set workspace.""" + if _check_value('workspace', workspace, str): + self._workspace = workspace + + @property + def resume_from(self): + """Get resume_from.""" + return self._resume_from + + @resume_from.setter + def resume_from(self, resume_from): + """Set resume_from.""" + if resume_from is None or _check_value('resume_from', resume_from, str): + self._resume_from = resume_from + + @property + def tensorboard(self): + """Get tensorboard.""" + return self._tensorboard + + @tensorboard.setter + def tensorboard(self, tensorboard): + """Set tensorboard.""" + if _check_value('tensorboard', tensorboard, bool): + self._tensorboard = tensorboard + + class _BaseQuantizationConfig: """Args: inputs: inputs of model @@ -435,6 +580,318 @@ def example_inputs(self, example_inputs): self._example_inputs = example_inputs +class BenchmarkConfig: + """Config Class for Benchmark. + + Args: + inputs (list, optional): A list of strings containing the inputs of model. Default is an empty list. + outputs (list, optional): A list of strings containing the outputs of model. Default is an empty list. + backend (str, optional): Backend name for model execution. Supported values include: 'default', 'itex', + 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep'. Default value is 'default'. + warmup (int, optional): The number of iterations to perform warmup before running performance tests. + Default value is 5. + iteration (int, optional): The number of iterations to run performance tests. Default is -1. + cores_per_instance (int, optional): The number of CPU cores to use per instance. Default value is None. + num_of_instance (int, optional): The number of instances to use for performance testing. + Default value is None. + inter_num_of_threads (int, optional): The number of threads to use for inter-thread operations. + Default value is None. + intra_num_of_threads (int, optional): The number of threads to use for intra-thread operations. + Default value is None. + + Example:: + + # Run benchmark according to config + from neural_compressor.benchmark import fit + + conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7) + fit(model='./int8.pb', config=conf, b_dataloader=eval_dataloader) + """ + def __init__(self, + inputs=[], + outputs=[], + backend='default', + device='cpu', + warmup=5, + iteration=-1, + model=None, + model_name='', + cores_per_instance=None, + num_of_instance=None, + inter_num_of_threads=None, + intra_num_of_threads=None): + """Init a BenchmarkConfig object.""" + self.inputs = inputs + self.outputs = outputs + self.backend = backend + self.device=device + self.warmup = warmup + self.iteration = iteration + self.model = model + self.model_name = model_name + self.cores_per_instance = cores_per_instance + self.num_of_instance = num_of_instance + self.inter_num_of_threads = inter_num_of_threads + self.intra_num_of_threads = intra_num_of_threads + self._framework=None + + def keys(self): + """Returns keys of the dict.""" + return ('inputs', 'outputs', 'backend', 'device', 'warmup', 'iteration', 'model', \ + 'model_name', 'cores_per_instance', 'num_of_instance', 'framework', \ + 'inter_num_of_threads','intra_num_of_threads') + + def __getitem__(self, item): + """Get the dict.""" + return getattr(self, item) + + @property + def backend(self): + """Get backend.""" + return self._backend + + @backend.setter + def backend(self, backend): + """Set backend.""" + if _check_value('backend', backend, str, [ + 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep']): + self._backend = backend + + @property + def device(self): + """Get device name.""" + return self._device + + @device.setter + def device(self, device): + if _check_value('device', device, str, ['cpu', 'gpu']): + self._device = device + + @property + def outputs(self): + """Get outputs.""" + return self._outputs + + @outputs.setter + def outputs(self, outputs): + """Set outputs.""" + if _check_value('outputs', outputs, str): + self._outputs = outputs + + @property + def inputs(self): + """Get inputs.""" + return self._inputs + + @inputs.setter + def inputs(self, inputs): + """Set inputs.""" + if _check_value('inputs', inputs, str): + self._inputs = inputs + + @property + def warmup(self): + """Get warmup.""" + return self._warmup + + @warmup.setter + def warmup(self, warmup): + """Set warmup.""" + if _check_value('warmup', warmup, int): + self._warmup = warmup + + @property + def iteration(self): + """Get iteration.""" + return self._iteration + + @iteration.setter + def iteration(self, iteration): + """Set iteration.""" + if _check_value('iteration', iteration, int): + self._iteration = iteration + + @property + def cores_per_instance(self): + """Get cores_per_instance.""" + return self._cores_per_instance + + @cores_per_instance.setter + def cores_per_instance(self, cores_per_instance): + """Set cores_per_instance.""" + if cores_per_instance is None or _check_value('cores_per_instance', cores_per_instance, + int): + self._cores_per_instance = cores_per_instance + + @property + def num_of_instance(self): + """Get num_of_instance.""" + return self._num_of_instance + + @num_of_instance.setter + def num_of_instance(self, num_of_instance): + """Set num_of_instance.""" + if num_of_instance is None or _check_value('num_of_instance', num_of_instance, int): + self._num_of_instance = num_of_instance + + @property + def inter_num_of_threads(self): + """Get inter_num_of_threads.""" + return self._inter_num_of_threads + + @inter_num_of_threads.setter + def inter_num_of_threads(self, inter_num_of_threads): + """Set inter_num_of_threads.""" + if inter_num_of_threads is None or _check_value('inter_num_of_threads', + inter_num_of_threads, int): + self._inter_num_of_threads = inter_num_of_threads + + @property + def intra_num_of_threads(self): + """Get intra_num_of_threads.""" + return self._intra_num_of_threads + + @intra_num_of_threads.setter + def intra_num_of_threads(self, intra_num_of_threads): + """Get intra_num_of_threads.""" + if intra_num_of_threads is None or _check_value('intra_num_of_threads', + intra_num_of_threads, int): + self._intra_num_of_threads = intra_num_of_threads + + @property + def model(self): + """Get model.""" + return self._model + + @model.setter + def model(self, model): + """Set model.""" + self._model = model + + @property + def model_name(self): + """Get model name.""" + return self._model_name + + @model_name.setter + def model_name(self, model_name): + """Set model name.""" + if _check_value("model_name", model_name, str): + self._model_name = model_name + + @property + def framework(self): + """Set framework.""" + return self._framework + + @framework.setter + def framework(self, framework): + """Get framework.""" + self._framework = framework + + +class AccuracyCriterion: + """Class of Accuracy Criterion. + + Args: + higher_is_better(bool, optional): This flag indicates whether the metric higher is the better. + Default value is True. + criterion:(str, optional): This flag indicates whether the metric loss is 'relative' or 'absolute'. + Default value is 'relative'. + tolerable_loss(float, optional): This float indicates how much metric loss we can accept. + Default value is 0.01. + + Example:: + + from neural_compressor.config import AccuracyCriterion + + accuracy_criterion = AccuracyCriterion( + higher_is_better=True, # optional. + criterion='relative', # optional. Available values are 'relative' and 'absolute'. + tolerable_loss=0.01, # optional. + ) + """ + def __init__(self, higher_is_better=True, criterion='relative', tolerable_loss=0.01): + """Init an AccuracyCriterion object.""" + self.higher_is_better = higher_is_better + self.criterion = criterion + self.tolerable_loss = tolerable_loss + + @property + def higher_is_better(self): + """Get higher_is_better.""" + return self._higher_is_better + + @higher_is_better.setter + def higher_is_better(self, higher_is_better): + """Set higher_is_better.""" + if _check_value('higher_is_better', higher_is_better, bool): + self._higher_is_better = higher_is_better + + @property + def relative(self): + """Get tolerable_loss when criterion is relative.""" + if self.criterion != 'relative': + return None + return self.tolerable_loss + + @relative.setter + def relative(self, relative): + """Set tolerable_loss and criterion to relative.""" + self.criterion = 'relative' + self.tolerable_loss = relative + + @property + def absolute(self): + """Get tolerable_loss when criterion is absolute.""" + if self.criterion != 'absolute': + return None + return self.tolerable_loss + + @absolute.setter + def absolute(self, absolute): + """Set tolerable_loss and criterion to absolute.""" + self.criterion = 'absolute' + self.tolerable_loss = absolute + + @property + def criterion(self): + """Get criterion.""" + return self._criterion + + @criterion.setter + def criterion(self, criterion): + """Set criterion.""" + if _check_value('criterion', criterion, str, ['relative', 'absolute']): + self._criterion = criterion + + @property + def tolerable_loss(self): + """Get tolerable_loss.""" + return self._tolerable_loss + + @tolerable_loss.setter + def tolerable_loss(self, tolerable_loss): + """Set tolerable_loss.""" + if _check_value('tolerable_loss', tolerable_loss, float): + self._tolerable_loss = tolerable_loss + + def __str__(self): + """Get criterion.""" + return self.criterion + + def keys(self): + """Returns keys of the dict.""" + return ('higher_is_better', 'criterion', 'tolerable_loss') + + def __getitem__(self, item): + """Get the dict.""" + return getattr(self, item) + + +accuracy_criterion = AccuracyCriterion() + + class QuantizationConfig(_BaseQuantizationConfig): def __init__(self, inputs=[], @@ -491,6 +948,116 @@ def approach(self, approach): ): self._approach = approach + +class WeightPruningConfig: + """Config Class for Pruning. Define a single or a sequence of pruning configs. + + Args: + pruning_configs (list of dicts, optional): Local pruning configs only valid to linked layers. + Parameters defined out of pruning_configs are valid for all layers. + By defining dicts in pruning_config, users can set different pruning strategies for corresponding layers. + Defaults to [{}]. + target_sparsity (float, optional): Sparsity ratio the model can reach after pruning. + Supports a float between 0 and 1. + Default to 0.90. + pruning_type (str, optional): A string define the criteria for pruning. + Supports "magnitude", "snip", "snip_momentum", + "magnitude_progressive", "snip_progressive", "snip_momentum_progressive", "pattern_lock" + Default to "snip_momentum", which is the most feasible pruning criteria under most situations. + pattern (str, optional): Sparsity's structure (or unstructure) types. + Supports "NxM" (e.g "4x1", "8x1"), "channelx1" & "1xchannel"(channel-wise), "N:M" (e.g "2:4"). + Default to "4x1", which can be directly processed by our kernels in ITREX. + op_names (list of str, optional): Layers contains some specific names to be included for pruning. + Defaults to []. + excluded_op_names: Layers contains some specific names to be excluded for pruning. + Defaults to []. + start_step (int, optional): The step to start pruning. + Supports an integer. + Default to 0. + end_step: (int, optional): The step to end pruning. + Supports an integer. + Default to 0. + pruning_scope (str, optional): Determine layers' scores should be gather together to sort + Supports "global" and "local". + Default: "global", since this leads to less accuracy loss. + pruning_frequency: the frequency of pruning operation. + Supports an integer. + Default to 1. + min_sparsity_ratio_per_op (float, optional): Minimum restriction for every layer's sparsity. + Supports a float between 0 and 1. + Default to 0.0. + max_sparsity_ratio_per_op (float, optional): Maximum restriction for every layer's sparsity. + Supports a float between 0 and 1. + Default to 0.98. + sparsity_decay_type (str, optional): how to schedule the sparsity increasing methods. + Supports "exp", "cube", "cube", "linear". + Default to "exp". + pruning_op_types (list of str): Operator types currently support for pruning. + Supports ['Conv', 'Linear']. + Default to ['Conv', 'Linear']. + + Example:: + + from neural_compressor.config import WeightPruningConfig + local_configs = [ + { + "pruning_scope": "local", + "target_sparsity": 0.6, + "op_names": ["query", "key", "value"], + "pattern": "channelx1", + }, + { + "pruning_type": "snip_momentum_progressive", + "target_sparsity": 0.5, + "op_names": ["self.attention.dense"], + } + ] + config = WeightPruningConfig( + pruning_configs = local_configs, + target_sparsity=0.8 + ) + prune = Pruning(config) + prune.update_config(start_step=1, end_step=10) + prune.model = self.model + """ + + def __init__(self, pruning_configs=[{}], ##empty dict will use global values + target_sparsity=0.9, pruning_type="snip_momentum", pattern="4x1", op_names=[], + excluded_op_names=[], + start_step=0, end_step=0, pruning_scope="global", pruning_frequency=1, + min_sparsity_ratio_per_op=0.0, max_sparsity_ratio_per_op=0.98, + sparsity_decay_type="exp", pruning_op_types=['Conv', 'Linear'], + **kwargs): + """Init a WeightPruningConfig object.""" + self.pruning_configs = pruning_configs + self._weight_compression = DotDict({ + 'target_sparsity': target_sparsity, + 'pruning_type': pruning_type, + 'pattern': pattern, + 'op_names': op_names, + 'excluded_op_names': excluded_op_names, ##global only + 'start_step': start_step, + 'end_step': end_step, + 'pruning_scope': pruning_scope, + 'pruning_frequency': pruning_frequency, + 'min_sparsity_ratio_per_op': min_sparsity_ratio_per_op, + 'max_sparsity_ratio_per_op': max_sparsity_ratio_per_op, + 'sparsity_decay_type': sparsity_decay_type, + 'pruning_op_types': pruning_op_types, + }) + self._weight_compression.update(kwargs) + + @property + def weight_compression(self): + """Get weight_compression.""" + return self._weight_compression + + @weight_compression.setter + def weight_compression(self, weight_compression): + """Set weight_compression.""" + self._weight_compression = weight_compression + + class WeightConf: def __init__(self, datatype=None, scheme=None, granularity=None, algorithm=None): self._datatype = datatype @@ -534,13 +1101,80 @@ def algorithm(self, algorithm): if _check_value('algorithm', algorithm, str, ['minmax', 'kl']): self._algorithm = algorithm if isinstance(algorithm, list) else [algorithm] + +class DistillationConfig: + """Config of distillation. + + Args: + teacher_model (Callable): Teacher model for distillation. Defaults to None. + features (optional): Teacher features for distillation, features and teacher_model are alternative. + Defaults to None. + criterion (Callable, optional): Distillation loss configure. + optimizer (dictionary, optional): Optimizer configure. + + Example:: + + from neural_compressor.training import prepare_compression + from neural_compressor.config import DistillationConfig, KnowledgeDistillationLossConfig + + distil_loss = KnowledgeDistillationLossConfig() + conf = DistillationConfig(teacher_model=model, criterion=distil_loss) + criterion = nn.CrossEntropyLoss() + optimizer = torch.optim.SGD(model.parameters(), lr=0.0001) + compression_manager = prepare_compression(model, conf) + model = compression_manager.model + """ + def __init__(self, + teacher_model=None, + criterion=criterion, + optimizer={'SGD': { + 'learning_rate': 0.0001 + }}): + """Init a DistillationConfig object.""" + self.criterion = criterion + self.optimizer = optimizer + self.teacher_model = teacher_model + + @property + def criterion(self): + """Get criterion.""" + return self._criterion + + @criterion.setter + def criterion(self, criterion): + """Set criterion.""" + self._criterion = criterion + + @property + def optimizer(self): + """Get optimizer.""" + return self._optimizer + + @optimizer.setter + def optimizer(self, optimizer): + """Set optimizer.""" + self._optimizer = optimizer + + @property + def teacher_model(self): + """Get teacher_model.""" + return self._teacher_model + + @teacher_model.setter + def teacher_model(self, teacher_model): + """Set teacher_model.""" + self._teacher_model = teacher_model + + class ActivationConf(WeightConf): def __init__(self, datatype=None, scheme=None, granularity=None, algorithm=None): super().__init__(datatype, scheme, granularity, algorithm) + weight = WeightConf() activation = ActivationConf() + class OpQuantConf: def __init__(self, op_type=None, weight=weight, activation=activation): self._op_type = op_type @@ -564,6 +1198,7 @@ def weight(self): def activation(self): return self._activation + class MXNet: def __init__(self, precisions=None): self._precisions = precisions @@ -579,6 +1214,7 @@ def precisions(self, precisions): if _check_value('precisions', precisions, str, ['int8', 'uint8', 'fp32', 'bf16', 'fp16']): self._precisions = precisions + class ONNX(MXNet): def __init__(self, graph_optimization_level=None, precisions=None): super().__init__(precisions) @@ -594,14 +1230,17 @@ def graph_optimization_level(self, graph_optimization_level): ['DISABLE_ALL', 'ENABLE_BASIC', 'ENABLE_EXTENDED', 'ENABLE_ALL']): self._graph_optimization_level = graph_optimization_level + class TensorFlow(MXNet): def __init__(self, precisions=None): super().__init__(precisions) + class Keras(MXNet): def __init__(self, precisions=None): super().__init__(precisions) + class PyTorch(MXNet): def __init__(self, precisions=None): super().__init__(precisions) @@ -620,6 +1259,7 @@ def __init__(self, supernet=None, metrics=None, population=50, num_evals=100000, 'batch_size': batch_size, } + class NASConfig: def __init__(self, approach=None, search_space=None, search_algorithm=None, metrics=[], higher_is_better=[], max_trials=3, seed=42, dynas=None): @@ -734,4 +1374,5 @@ def options(self): def onnxruntime(self): return self._onnxruntime + config = Config() diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 05064b97476..c39ecc5e944 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -221,6 +221,7 @@ def tensorboard(self, tensorboard): if _check_value('tensorboard', tensorboard, bool): self._tensorboard = tensorboard + class BenchmarkConfig: """Config Class for Benchmark. @@ -1187,7 +1188,8 @@ def approach(self): @approach.setter def approach(self, approach): """Set approach.""" - if _check_value("approach", approach, str, ["static", "dynamic", "auto"]): + if _check_value("approach", approach, str, ["static", "dynamic", "auto",\ + "post_training_static_quant"]): self._approach = QUANTMAPPING[approach] @property @@ -1577,7 +1579,6 @@ def __init__(self, criterion = KnowledgeDistillationLossConfig() - class DistillationConfig: """Config of distillation. From be1f5261f54e96092f4d3cba626a6de41c2a88b1 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 4 Apr 2023 14:31:22 +0800 Subject: [PATCH 032/103] fixed metric setting Signed-off-by: yiliu30 --- neural_compressor/quantization.py | 12 ++++++------ neural_compressor/strategy/strategy.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index f4a344c3c50..b77b4003890 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -65,6 +65,7 @@ def __init__(self, conf, **kwargs): self._eval_dataloader = None self._eval_metric = None self._model = None + self._metric = None self.callbacks = None if "model" in kwargs: self.model = kwargs["model"] @@ -111,7 +112,7 @@ def pre_proccess(self): q_func=self._train_func, eval_func=self._eval_func, eval_dataloader=self._eval_dataloader, - eval_metric=self._eval_metric, + eval_metric=self.metric, resume=_resume, q_hooks=self.callbacks.hooks if self.callbacks is not None else None) @@ -272,8 +273,7 @@ def eval_dataloader(self, dataloader): @property def metric(self): """Get `metric` attribute.""" - assert False, 'Should not try to get the value of `metric` attribute.' - return None + return self._metric @metric.setter def metric(self, user_metric): @@ -302,7 +302,8 @@ def metric(self, user_metric): The object of Metric or a dict of built-in metric configurations. """ - from .metric import Metric as NCMetric, METRICS + from .metric import Metric as NCMetric + from .metric import METRICS if isinstance(user_metric, dict): metric_cfg = user_metric else: @@ -319,8 +320,7 @@ def metric(self, user_metric): metric_cfg = {name: id(user_metric)} metrics = METRICS(self.conf.quantization.framework) metrics.register(name, metric_cls) - - self._eval_metric = user_metric + self._metric = metric_cfg @property def calib_func(self): diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 0a8d9aae2a4..096aa1c44a9 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -1221,7 +1221,7 @@ def _evaluate(self, model): # post_eval_hook to deal the tensor self.adaptor._post_eval_hook(model, accuracy=val[0]) else: - assert self._not_tuning, "Please set eval_dataloader and eval_metric for create eval_func" + assert not self._not_tuning, "Please set eval_dataloader and eval_metric for create eval_func" postprocess_cfg = None metric_cfg = self.eval_metric From 3d250c00752a35a9a56beacc0193b850ba89a523 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Tue, 4 Apr 2023 14:36:09 +0800 Subject: [PATCH 033/103] minor fix --- neural_compressor/conf/pythonic_config.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/neural_compressor/conf/pythonic_config.py b/neural_compressor/conf/pythonic_config.py index b0168e81ce8..16612465cb4 100644 --- a/neural_compressor/conf/pythonic_config.py +++ b/neural_compressor/conf/pythonic_config.py @@ -14,13 +14,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +"""Configs for Neural Compressor 1.x.""" import logging +import datetime from schema import Schema, And, Optional from .dotdict import DotDict logger = logging.getLogger("neural_compressor") +default_workspace = './nc_workspace/{}/'.format( + datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) ops_schema = Schema({ From 7c56af13bf54599d17da237f5b85acfabaaf6a12 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Tue, 4 Apr 2023 14:40:54 +0800 Subject: [PATCH 034/103] minor fix --- neural_compressor/conf/pythonic_config.py | 204 +++++++++++----------- 1 file changed, 102 insertions(+), 102 deletions(-) diff --git a/neural_compressor/conf/pythonic_config.py b/neural_compressor/conf/pythonic_config.py index 16612465cb4..765129accb9 100644 --- a/neural_compressor/conf/pythonic_config.py +++ b/neural_compressor/conf/pythonic_config.py @@ -171,6 +171,108 @@ def tensorboard(self, tensorboard): self._tensorboard = tensorboard +class AccuracyCriterion: + """Class of Accuracy Criterion. + + Args: + higher_is_better(bool, optional): This flag indicates whether the metric higher is the better. + Default value is True. + criterion:(str, optional): This flag indicates whether the metric loss is 'relative' or 'absolute'. + Default value is 'relative'. + tolerable_loss(float, optional): This float indicates how much metric loss we can accept. + Default value is 0.01. + + Example:: + + from neural_compressor.config import AccuracyCriterion + + accuracy_criterion = AccuracyCriterion( + higher_is_better=True, # optional. + criterion='relative', # optional. Available values are 'relative' and 'absolute'. + tolerable_loss=0.01, # optional. + ) + """ + def __init__(self, higher_is_better=True, criterion='relative', tolerable_loss=0.01): + """Init an AccuracyCriterion object.""" + self.higher_is_better = higher_is_better + self.criterion = criterion + self.tolerable_loss = tolerable_loss + + @property + def higher_is_better(self): + """Get higher_is_better.""" + return self._higher_is_better + + @higher_is_better.setter + def higher_is_better(self, higher_is_better): + """Set higher_is_better.""" + if _check_value('higher_is_better', higher_is_better, bool): + self._higher_is_better = higher_is_better + + @property + def relative(self): + """Get tolerable_loss when criterion is relative.""" + if self.criterion != 'relative': + return None + return self.tolerable_loss + + @relative.setter + def relative(self, relative): + """Set tolerable_loss and criterion to relative.""" + self.criterion = 'relative' + self.tolerable_loss = relative + + @property + def absolute(self): + """Get tolerable_loss when criterion is absolute.""" + if self.criterion != 'absolute': + return None + return self.tolerable_loss + + @absolute.setter + def absolute(self, absolute): + """Set tolerable_loss and criterion to absolute.""" + self.criterion = 'absolute' + self.tolerable_loss = absolute + + @property + def criterion(self): + """Get criterion.""" + return self._criterion + + @criterion.setter + def criterion(self, criterion): + """Set criterion.""" + if _check_value('criterion', criterion, str, ['relative', 'absolute']): + self._criterion = criterion + + @property + def tolerable_loss(self): + """Get tolerable_loss.""" + return self._tolerable_loss + + @tolerable_loss.setter + def tolerable_loss(self, tolerable_loss): + """Set tolerable_loss.""" + if _check_value('tolerable_loss', tolerable_loss, float): + self._tolerable_loss = tolerable_loss + + def __str__(self): + """Get criterion.""" + return self.criterion + + def keys(self): + """Returns keys of the dict.""" + return ('higher_is_better', 'criterion', 'tolerable_loss') + + def __getitem__(self, item): + """Get the dict.""" + return getattr(self, item) + + +accuracy_criterion = AccuracyCriterion() + + class _BaseQuantizationConfig: """Args: inputs: inputs of model @@ -793,108 +895,6 @@ def framework(self, framework): self._framework = framework -class AccuracyCriterion: - """Class of Accuracy Criterion. - - Args: - higher_is_better(bool, optional): This flag indicates whether the metric higher is the better. - Default value is True. - criterion:(str, optional): This flag indicates whether the metric loss is 'relative' or 'absolute'. - Default value is 'relative'. - tolerable_loss(float, optional): This float indicates how much metric loss we can accept. - Default value is 0.01. - - Example:: - - from neural_compressor.config import AccuracyCriterion - - accuracy_criterion = AccuracyCriterion( - higher_is_better=True, # optional. - criterion='relative', # optional. Available values are 'relative' and 'absolute'. - tolerable_loss=0.01, # optional. - ) - """ - def __init__(self, higher_is_better=True, criterion='relative', tolerable_loss=0.01): - """Init an AccuracyCriterion object.""" - self.higher_is_better = higher_is_better - self.criterion = criterion - self.tolerable_loss = tolerable_loss - - @property - def higher_is_better(self): - """Get higher_is_better.""" - return self._higher_is_better - - @higher_is_better.setter - def higher_is_better(self, higher_is_better): - """Set higher_is_better.""" - if _check_value('higher_is_better', higher_is_better, bool): - self._higher_is_better = higher_is_better - - @property - def relative(self): - """Get tolerable_loss when criterion is relative.""" - if self.criterion != 'relative': - return None - return self.tolerable_loss - - @relative.setter - def relative(self, relative): - """Set tolerable_loss and criterion to relative.""" - self.criterion = 'relative' - self.tolerable_loss = relative - - @property - def absolute(self): - """Get tolerable_loss when criterion is absolute.""" - if self.criterion != 'absolute': - return None - return self.tolerable_loss - - @absolute.setter - def absolute(self, absolute): - """Set tolerable_loss and criterion to absolute.""" - self.criterion = 'absolute' - self.tolerable_loss = absolute - - @property - def criterion(self): - """Get criterion.""" - return self._criterion - - @criterion.setter - def criterion(self, criterion): - """Set criterion.""" - if _check_value('criterion', criterion, str, ['relative', 'absolute']): - self._criterion = criterion - - @property - def tolerable_loss(self): - """Get tolerable_loss.""" - return self._tolerable_loss - - @tolerable_loss.setter - def tolerable_loss(self, tolerable_loss): - """Set tolerable_loss.""" - if _check_value('tolerable_loss', tolerable_loss, float): - self._tolerable_loss = tolerable_loss - - def __str__(self): - """Get criterion.""" - return self.criterion - - def keys(self): - """Returns keys of the dict.""" - return ('higher_is_better', 'criterion', 'tolerable_loss') - - def __getitem__(self, item): - """Get the dict.""" - return getattr(self, item) - - -accuracy_criterion = AccuracyCriterion() - - class QuantizationConfig(_BaseQuantizationConfig): def __init__(self, inputs=[], From 972cf3965287a29989d06ca50b385b8d85752818 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 4 Apr 2023 15:05:14 +0800 Subject: [PATCH 035/103] update tpe ut Signed-off-by: yiliu30 --- test/strategy/test_tpe.py | 127 +++++++------------------------------- 1 file changed, 24 insertions(+), 103 deletions(-) diff --git a/test/strategy/test_tpe.py b/test/strategy/test_tpe.py index 2c0dc5eac67..79ad7bcc20a 100644 --- a/test/strategy/test_tpe.py +++ b/test/strategy/test_tpe.py @@ -3,59 +3,7 @@ import unittest import os import shutil -import yaml -def build_fake_yaml(): - fake_yaml = ''' - model: - name: fake_yaml - framework: tensorflow - inputs: x - outputs: op_to_store - device: cpu - evaluation: - accuracy: - metric: - topk: 1 - tuning: - strategy: - name: tpe - accuracy_criterion: - relative: 0.01 - workspace: - path: saved - ''' - y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) - f.close() - -def build_fake_yaml2(): - fake_yaml = ''' - model: - name: fake_yaml - framework: tensorflow - inputs: x - outputs: op_to_store - device: cpu - evaluation: - accuracy: - metric: - topk: 1 - tuning: - strategy: - name: tpe - exit_policy: - max_trials: 5 - accuracy_criterion: - relative: -0.01 - workspace: - path: saved - ''' - y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) - f.close() def build_fake_model(): import tensorflow as tf @@ -95,29 +43,12 @@ class TestQuantization(unittest.TestCase): @classmethod def setUpClass(self): self.constant_graph = build_fake_model() - build_fake_yaml() - build_fake_yaml2() @classmethod def tearDownClass(self): - try: - os.remove('fake_yaml.yaml') - os.remove('fake_yaml2.yaml') - - shutil.rmtree("saved", ignore_errors=True) - except: - print("Error while deleting file ") - - def test_run_tpe_one_trial(self): - # from neural_compressor.experimental import Quantization, common + shutil.rmtree("saved", ignore_errors=True) - # quantizer = Quantization('fake_yaml.yaml') - # dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) - # quantizer.calib_dataloader = common.DataLoader(dataset) - # quantizer.eval_dataloader = common.DataLoader(dataset) - # quantizer.model = self.constant_graph - # quantizer.fit() - + def test_run_tpe_one_trial(self): from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion from neural_compressor.data import Datasets, DATALOADERS @@ -128,7 +59,7 @@ def test_run_tpe_one_trial(self): # tuning and accuracy criterion tune_cri = TuningCriterion(strategy='tpe') - acc_cri = AccuracyCriterion(tolerable_loss=0.01) + acc_cri = AccuracyCriterion(tolerable_loss=-0.01) def eval_func(model): return 1 conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) @@ -137,38 +68,28 @@ def eval_func(model): calib_dataloader=dataloader, eval_func=eval_func) - - def test_run_tpe_max_trials(self): - from neural_compressor.experimental import Quantization, common - - quantizer = Quantization('fake_yaml2.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.model = self.constant_graph - quantizer.fit() - - def test_loss_calculation(self): - from neural_compressor.contrib.strategy.tpe import TpeTuneStrategy - from neural_compressor.experimental import Quantization, common - - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.model = self.constant_graph - - testObject = TpeTuneStrategy(quantizer.model, quantizer.conf, quantizer.calib_dataloader) - testObject._calculate_loss_function_scaling_components(0.01, 2, testObject.loss_function_config) - # check if latency difference between min and max corresponds to 10 points of loss function - tmp_val = testObject.calculate_loss(0.01, 2, testObject.loss_function_config) - tmp_val2 = testObject.calculate_loss(0.01, 1, testObject.loss_function_config) - self.assertTrue(True if int(tmp_val2 - tmp_val) == 10 else False) - # check if 1% of acc difference corresponds to 10 points of loss function - tmp_val = testObject.calculate_loss(0.02, 2, testObject.loss_function_config) - tmp_val2 = testObject.calculate_loss(0.03, 2, testObject.loss_function_config) - self.assertTrue(True if int(tmp_val2 - tmp_val) == 10 else False) + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion + from neural_compressor.data import Datasets, DATALOADERS + + # dataset and dataloader + dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) + dataloader = DATALOADERS["tensorflow"](dataset) + + # tuning and accuracy criterion + tune_cri = TuningCriterion(strategy='tpe', max_trials=5) + acc_cri = AccuracyCriterion(tolerable_loss=-0.01) + + from neural_compressor.metric import METRICS + metrics = METRICS('tensorflow') + top1 = metrics['topk']() + conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) + q_model = fit(model=self.constant_graph, + conf=conf, + calib_dataloader=dataloader, + eval_dataloader=dataloader, + eval_metric=top1) if __name__ == "__main__": unittest.main() From 8f66bc8168024f700f3e986fe493bccf572e405b Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Tue, 4 Apr 2023 15:19:43 +0800 Subject: [PATCH 036/103] config refine Signed-off-by: Cheng, Zixuan --- neural_compressor/compression/callbacks.py | 9 ++--- neural_compressor/conf/pythonic_config.py | 41 ++++++++++++++++++++++ neural_compressor/config.py | 39 ++++++++------------ neural_compressor/mix_precision.py | 10 +++--- neural_compressor/quantization.py | 1 - 5 files changed, 63 insertions(+), 37 deletions(-) diff --git a/neural_compressor/compression/callbacks.py b/neural_compressor/compression/callbacks.py index f009d6cb4f7..3bfda9697d0 100644 --- a/neural_compressor/compression/callbacks.py +++ b/neural_compressor/compression/callbacks.py @@ -71,7 +71,6 @@ def __init__(self, conf=None, model=None): self._train_dataloader = None self._eval_func = None self._eval_dataloader = None - self._eval_metric = None self._train_distributed = False self._evaluation_distributed = False self.adaptor = None @@ -307,7 +306,7 @@ def pre_process(self): q_func=self._train_func, eval_func=self._eval_func, eval_dataloader=self._eval_dataloader, - eval_metric=self._eval_metric, + eval_metric=self.metric, resume=_resume, q_hooks=None) @@ -427,8 +426,7 @@ def eval_dataloader(self, dataloader): @property def metric(self): """Get `metric` attribute.""" - assert False, 'Should not try to get the value of `metric` attribute.' - return None + return self._metric @metric.setter def metric(self, user_metric): @@ -474,8 +472,7 @@ def metric(self, user_metric): metric_cfg = {name: id(user_metric)} metrics = METRICS(self.conf.qat_quantization.framework) metrics.register(name, metric_cls) - - self._metric = user_metric + self._metric = metric_cfg def remove_hook(self, scope, hook): """Remove hooks if user want to tune accuracy with train_func.""" diff --git a/neural_compressor/conf/pythonic_config.py b/neural_compressor/conf/pythonic_config.py index 765129accb9..22167ae1477 100644 --- a/neural_compressor/conf/pythonic_config.py +++ b/neural_compressor/conf/pythonic_config.py @@ -1105,6 +1105,47 @@ def algorithm(self, algorithm): self._algorithm = algorithm if isinstance(algorithm, list) else [algorithm] +class KnowledgeDistillationLossConfig: + """Config Class for Knowledge Distillation Loss. + + Args: + temperature (float, optional): Hyperparameters that control the entropy + of probability distributions. Defaults to 1.0. + loss_types (list[str], optional): loss types, should be a list of length 2. + First item is the loss type for student model output and groundtruth label, + second item is the loss type for student model output and teacher model output. + Supported tpyes for first item are "CE", "MSE". + Supported tpyes for second item are "CE", "MSE", "KL". + Defaults to ['CE', 'CE']. + loss_weights (list[float], optional): loss weights, should be a list of length 2 and sum to 1.0. + First item is the weight multipled to the loss of student model output and groundtruth label, + second item is the weight multipled to the loss of student model output and teacher model output. + Defaults to [0.5, 0.5]. + + Example:: + + from neural_compressor.config import DistillationConfig, KnowledgeDistillationLossConfig + from neural_compressor.training import prepare_compression + + criterion_conf = KnowledgeDistillationLossConfig() + d_conf = DistillationConfig(teacher_model=teacher_model, criterion=criterion_conf) + compression_manager = prepare_compression(model, d_conf) + model = compression_manager.model + """ + def __init__(self, temperature=1.0, loss_types=['CE', 'CE'], loss_weights=[0.5, 0.5]): + """Init a KnowledgeDistillationLossConfig object.""" + self.config = DotDict({ + 'KnowledgeDistillationLoss': { + 'temperature': temperature, + 'loss_types': loss_types, + 'loss_weights': loss_weights + } + }) + + +criterion = KnowledgeDistillationLossConfig() + + class DistillationConfig: """Config of distillation. diff --git a/neural_compressor/config.py b/neural_compressor/config.py index c39ecc5e944..914bd2f2ce6 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -14,7 +14,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Configs for Neural Compressor.""" +"""Configs for Neural Compressor 2.x.""" import datetime import logging from schema import Schema, And, Optional @@ -538,13 +538,13 @@ class TuningCriterion: """Class for Tuning Criterion. Args: - strategy: Strategy name used in tuning, Please refer to docs/source/tuning_strategies.md. - strategy_kwargs: Parameters for strategy, Please refer to docs/source/tuning_strategies.md. - objective: Objective with accuracy constraint guaranteed, support 'performance', 'modelsize', 'footprint'. + strategy: Strategy name used in tuning. Please refer to docs/source/tuning_strategies.md. + strategy_kwargs: Parameters for strategy. Please refer to docs/source/tuning_strategies.md. + objective: String or dict. Objective with accuracy constraint guaranteed. String value supports + 'performance', 'modelsize', 'footprint'. Default value is 'performance'. Please refer to docs/source/objective.md. - Default value is 'performance'. - timeout: Tuning timeout (seconds). default value is 0 which means early stop - max_trials: Max tune times. default value is 100. Combine with timeout field to decide when to exit + timeout: Tuning timeout (seconds). Default value is 0 which means early stop. + max_trials: Max tune times. Default value is 100. Combine with timeout field to decide when to exit. Example:: from neural_compressor.config import TuningCriterion @@ -557,13 +557,12 @@ class TuningCriterion: ) """ def __init__(self, strategy="basic", strategy_kwargs=None, timeout=0, - max_trials=100, objective="performance", multi_objectives={}): + max_trials=100, objective="performance"): """Init a TuningCriterion object.""" self.strategy = strategy self.timeout = timeout self.max_trials = max_trials self.objective = objective - self.multi_objectives = multi_objectives self.strategy_kwargs = strategy_kwargs @property @@ -598,23 +597,15 @@ def objective(self, objective): if _check_value('objective', objective, str, ['performance', 'accuracy', 'modelsize', 'footprint']): self._objective = objective - - @property - def multi_objectives(self): - """Get multi-objectives.""" - return self._multi_objectives - - @multi_objectives.setter - def multi_objectives(self, multi_objectives): - if _check_value('multi_objectives', multi_objectives, dict): - if 'weight' in multi_objectives.keys() and isinstance(multi_objectives['weight'], list): - assert len(multi_objectives['objective']) == len(multi_objectives['weight']) - - for k, v in multi_objectives.items(): - _check_value('multi_objectives', k, str, ['objective', 'weight', 'higher_is_better']) + + if _check_value('objective', objective, dict): + if 'weight' in objective.keys() and isinstance(objective['weight'], list): + assert len(objective['objective']) == len(objective['weight']) + for k, v in objective.items(): + _check_value('objective', k, str, ['objective', 'weight', 'higher_is_better']) if k == 'objective': _check_value('objective', v, str, ['performance', 'accuracy', 'modelsize', 'footprint']) - self._multi_objectives = multi_objectives + self._objective = objective @property def strategy(self): diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index b822a08b52f..b95979f602d 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -59,7 +59,6 @@ def __init__(self, conf=None): self._eval_func = None self._eval_dataloader = None - self._eval_metric = None self._model = None def pre_process(self): @@ -84,7 +83,7 @@ def pre_process(self): q_func=None, eval_func=self._eval_func, eval_dataloader=self._eval_dataloader, - eval_metric=self._eval_metric, + eval_metric=self.metric, resume=_resume, q_hooks=None) @@ -250,8 +249,8 @@ def model(self, user_model): @property def metric(self): - """Get metric.""" - assert False, 'Should not try to get the value of `metric` attribute.' + """Get `metric` attribute.""" + return self._metric @metric.setter def metric(self, user_metric): @@ -295,8 +294,7 @@ def metric(self, user_metric): metric_cfg = {name: id(user_metric)} metrics = METRICS(self.conf.quantization.framework) metrics.register(name, metric_cls) - - self._metric = user_metric + self._metric = metric_cfg @property diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index b77b4003890..0315a856b12 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -63,7 +63,6 @@ def __init__(self, conf, **kwargs): self._calib_dataloader = None self._eval_func = None self._eval_dataloader = None - self._eval_metric = None self._model = None self._metric = None self.callbacks = None From 36fc40a9f13bac6b8b086ccf4e4656e763917b8c Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Tue, 4 Apr 2023 15:41:27 +0800 Subject: [PATCH 037/103] minor fix Signed-off-by: Cheng, Zixuan --- neural_compressor/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 914bd2f2ce6..3e8783ded8b 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -597,6 +597,7 @@ def objective(self, objective): if _check_value('objective', objective, str, ['performance', 'accuracy', 'modelsize', 'footprint']): self._objective = objective + return if _check_value('objective', objective, dict): if 'weight' in objective.keys() and isinstance(objective['weight'], list): From 535376c1e7ba4603cce1975cc0fd7dc9e7a0148c Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Tue, 4 Apr 2023 16:28:36 +0800 Subject: [PATCH 038/103] fix for ut Signed-off-by: Cheng, Zixuan --- neural_compressor/conf/pythonic_config.py | 1 + neural_compressor/strategy/mse_v2.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/neural_compressor/conf/pythonic_config.py b/neural_compressor/conf/pythonic_config.py index 22167ae1477..6e2dcfb206c 100644 --- a/neural_compressor/conf/pythonic_config.py +++ b/neural_compressor/conf/pythonic_config.py @@ -1339,6 +1339,7 @@ def search(self, search): quantization = QuantizationConfig() benchmark = BenchmarkConfig() +options = Options() pruning = WeightPruningConfig() distillation = DistillationConfig(teacher_model=None) nas = NASConfig() diff --git a/neural_compressor/strategy/mse_v2.py b/neural_compressor/strategy/mse_v2.py index a1e95f3cecd..9b9fef6d8dc 100644 --- a/neural_compressor/strategy/mse_v2.py +++ b/neural_compressor/strategy/mse_v2.py @@ -154,7 +154,7 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): self.calib_dataloader, deepcopy(self._tune_cfg_converter(tune_cfg)), self.output_op_names, - self.confidence_batches, + confidence_batches, fallback=True) if not ops_lst: logger.debug(f" Try to fallback to next data type.") From 6ef6d368ee38fc72cf30025f12ebaa345850c3e2 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 6 Apr 2023 09:57:43 +0800 Subject: [PATCH 039/103] udapte obj Signed-off-by: yiliu30 --- neural_compressor/strategy/strategy.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 096aa1c44a9..e1e2a01f7f8 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -1049,17 +1049,18 @@ def _set_objectives(self): self.higher_is_better = bool(self.conf.quantization.accuracy_criterion.higher_is_better) obj_higher_is_better = None obj_weight = None - if self.conf.quantization.tuning_criterion.multi_objectives: - obj_higher_is_better = self.conf.quantization.tuning_criterion\ - .multi_objectives.get('higher_is_better', None) - obj_weight = self.conf.quantization.tuning_criterion.multi_objectives.get('weight', None) - obj_lst = self.conf.quantization.tuning_criterion.multi_objectives.get('objective', []) - self.use_multi_objective = len(obj_lst) > 0 - if self.use_multi_objective: + obj = self.conf.quantization.tuning_criterion.objective + use_multi_objs = isinstance(obj, dict) + self.use_multi_objective = False + if use_multi_objs: + obj_higher_is_better = obj.get('higher_is_better', None) + obj_weight = obj.get('weight', None) + obj_lst = obj.get('objective', []) objectives = [i.lower() for i in obj_lst] + self.use_multi_objective = True else: - objectives = [self.conf.quantization.tuning_criterion.objective.lower()] - + objectives = [obj.lower()] + # set metric self.metric_name = ['Accuracy'] self.metric_criterion = [self.higher_is_better] From 6e5cb516ed205c737345f09c47bc8a6ab2d21005 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Thu, 6 Apr 2023 16:13:38 +0800 Subject: [PATCH 040/103] fix for ut Signed-off-by: Cheng, Zixuan --- neural_compressor/conf/pythonic_config.py | 123 ++++++++++++++-------- neural_compressor/config.py | 4 + neural_compressor/quantization.py | 10 +- neural_compressor/utils/utility.py | 5 +- 4 files changed, 92 insertions(+), 50 deletions(-) diff --git a/neural_compressor/conf/pythonic_config.py b/neural_compressor/conf/pythonic_config.py index 6e2dcfb206c..a2cddd25b46 100644 --- a/neural_compressor/conf/pythonic_config.py +++ b/neural_compressor/conf/pythonic_config.py @@ -274,46 +274,76 @@ def __getitem__(self, item): class _BaseQuantizationConfig: - """Args: - inputs: inputs of model - outputs: outputs of model - backend: backend for model execution. Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep' - domain: model domain. Support 'auto', 'cv', 'object_detection', 'nlp' and 'recommendation_system'. - Adaptor will use specific quantization settings for different domains automatically, and - explicitly specified quantization settings will override the automatic setting. - If users set domain as auto, automatic detection for domain will be executed. - recipes: recipes for quantiztaion, support list is as below. - 'smooth_quant': whether do smooth quant - 'smooth_quant_args': parameters for smooth_quant - 'fast_bias_correction': whether do fast bias correction - 'weight_correction': whether do weight correction - 'gemm_to_matmul': whether convert gemm to matmul and add, only valid for onnx models - 'graph_optimization_level': support 'DISABLE_ALL', 'ENABLE_BASIC', 'ENABLE_EXTENDED', 'ENABLE_ALL' - only valid for onnx models - 'first_conv_or_matmul_quantization': whether quantize the first conv or matmul - 'last_conv_or_matmul_quantization': whether quantize the last conv or matmul - 'pre_post_process_quantization': whether quantize the ops in preprocess and postprocess - 'add_qdq_pair_to_weight': whether add QDQ pair for weights, only vaild for onnxrt_trt_ep - 'optypes_to_exclude_output_quant': don't quantize output of specified optypes - 'dedicated_qdq_pair': whether dedicate QDQ pair, only vaild for onnxrt_trt_ep - quant_format: support 'default', 'QDQ' and 'QOperator' - device: support 'cpu' and 'gpu' - calibration_sampling_size: number of calibration sample - op_type_dict: tuning constraints on optype-wise - op_name_dict: tuning constraints on op-wise - strategy: strategy name - strategy_kwargs: parameters for strategy - objective: objective with accuracy constraint guaranteed, support 'performance', 'modelsize', 'footprint' - timeout: tuning timeout (seconds). default value is 0 which means early stop - max_trials: max tune times. default value is 100. Combine with timeout field to decide when to exit - performance_only: whether do evaluation - reduce_range: whether use 7 bit - example_inputs: used to trace PyTorch model with torch.jit/torch.fx - excluded_precisions: precisions to be excluded, support 'bf16' - quant_level: support auto, 0 and 1, 0 is conservative strategy, 1 is basic or user-specified - strategy, auto (default) is the combination of 0 and 1. - accuracy_criterion: accuracy constraint settings - use_distributed_tuning: whether use distributed tuning or not + """Basic class for quantization config. Inherited by PostTrainingQuantConfig and QuantizationAwareTrainingConfig. + + Args: + inputs: Inputs of model, only required in tensorflow. + outputs: Outputs of model, only required in tensorflow. + backend: Backend for model execution. Support 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep' + domain: Model domain. Support 'auto', 'cv', 'object_detection', 'nlp' and 'recommendation_system'. + Adaptor will use specific quantization settings for different domains automatically, and + explicitly specified quantization settings will override the automatic setting. + If users set domain as auto, automatic detection for domain will be executed. + recipes: Recipes for quantiztaion, support list is as below. + 'smooth_quant': whether do smooth quant + 'smooth_quant_args': parameters for smooth_quant + 'fast_bias_correction': whether do fast bias correction + 'weight_correction': whether do weight correction + 'gemm_to_matmul': whether convert gemm to matmul and add, only valid for onnx models + 'graph_optimization_level': support 'DISABLE_ALL', 'ENABLE_BASIC', 'ENABLE_EXTENDED', 'ENABLE_ALL' + only valid for onnx models + 'first_conv_or_matmul_quantization': whether quantize the first conv or matmul + 'last_conv_or_matmul_quantization': whether quantize the last conv or matmul + 'pre_post_process_quantization': whether quantize the ops in preprocess and postprocess + 'add_qdq_pair_to_weight': whether add QDQ pair for weights, only vaild for onnxrt_trt_ep + 'optypes_to_exclude_output_quant': don't quantize output of specified optypes + 'dedicated_qdq_pair': whether dedicate QDQ pair, only vaild for onnxrt_trt_ep + quant_format: Support 'default', 'QDQ' and 'QOperator', only required in ONNXRuntime. + device: Support 'cpu' and 'gpu'. + calibration_sampling_size: Number of calibration sample. + op_type_dict: Tuning constraints on optype-wise for advance user to reduce tuning space. + User can specify the quantization config by op type: + example: + { + 'Conv': { + 'weight': { + 'dtype': ['fp32'] + }, + 'activation': { + 'dtype': ['fp32'] + } + } + } + op_name_dict: Tuning constraints on op-wise for advance user to reduce tuning space. + User can specify the quantization config by op name: + example: + { + "layer1.0.conv1": { + "activation": { + "dtype": ["fp32"] + }, + "weight": { + "dtype": ["fp32"] + } + }, + } + strategy: Strategy name used in tuning, Please refer to docs/source/tuning_strategies.md. + strategy_kwargs: Parameters for strategy, Please refer to docs/source/tuning_strategies.md. + objective: Objective with accuracy constraint guaranteed, support 'performance', 'modelsize', 'footprint'. + Please refer to docs/source/objective.md. + Default value is 'performance'. + timeout: Tuning timeout (seconds). default value is 0 which means early stop + max_trials: Max tune times. default value is 100. Combine with timeout field to decide when to exit + performance_only: Whether do evaluation + reduce_range: Whether use 7 bit to quantization. + example_inputs: Used to trace PyTorch model with torch.jit/torch.fx. + excluded_precisions: Precisions to be excluded, Default value is empty list. + Neural compressor enable the mixed precision with fp32 + bf16 + int8 by default. + If you want to disable bf16 data type, you can specify excluded_precisions = ['bf16]. + quant_level: Support auto, 0 and 1, 0 is conservative strategy, 1 is basic or user-specified + strategy, auto (default) is the combination of 0 and 1. + accuracy_criterion: Accuracy constraint settings. + use_distributed_tuning: Whether use distributed tuning or not. """ def __init__(self, inputs=[], @@ -338,8 +368,7 @@ def __init__(self, quant_level="auto", accuracy_criterion=accuracy_criterion, use_distributed_tuning=False): - """Initialize _BaseQuantizationConfig class. - """ + """Initialize _BaseQuantizationConfig class.""" self.inputs = inputs self.outputs = outputs self.backend = backend @@ -398,7 +427,11 @@ def smooth_quant_args(val=None): _check_value("smooth_quant_args", val, dict) for k, v in val.items(): if k == "alpha": - _check_value("alpha", v, float) + if isinstance(v, str): + assert v == "auto", "the alpha of sq only supports float and 'auto'" + else: + _check_value("alpha", v, float) + return True else: return {} @@ -463,7 +496,7 @@ def dedicated_qdq_pair(val=None): return _check_value("dedicated_qdq_pair", val, bool) else: return False - + RECIPES = {"smooth_quant": smooth_quant, "smooth_quant_args": smooth_quant_args, "fast_bias_correction": fast_bias_correction, @@ -624,7 +657,7 @@ def calibration_sampling_size(self): def calibration_sampling_size(self, sampling_size): if _check_value('calibration_sampling_size', sampling_size, int): if isinstance(sampling_size, int): - sampling_size =[sampling_size] + sampling_size = [sampling_size] self._calibration_sampling_size = sampling_size @property diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 3e8783ded8b..362988ff2b3 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -1180,6 +1180,10 @@ def approach(self): @approach.setter def approach(self, approach): """Set approach.""" + if 'static' in approach: + approach = 'static' + if 'dynamic' in approach: + approach = 'dynamic' if _check_value("approach", approach, str, ["static", "dynamic", "auto",\ "post_training_static_quant"]): self._approach = QUANTMAPPING[approach] diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index 0315a856b12..1e5108aef75 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -185,6 +185,7 @@ def model(self, user_model): assert cfg.quantization.framework == "pytorch_ipex",\ "Please wrap the model with correct Model class!" if cfg.quantization.backend == "itex": + from .model.tensorflow_model import get_model_type if get_model_type(user_model.model) == 'keras': assert cfg.quantization.framework == "keras",\ "Please wrap the model with KerasModel class!" @@ -193,15 +194,16 @@ def model(self, user_model): "Please wrap the model with TensorflowModel class!" else: framework = get_model_fwk_name(user_model) - cfg.quantization.framework = framework if framework == "tensorflow": + from .model.tensorflow_model import get_model_type if get_model_type(user_model) == 'keras' and cfg.quantization.backend == 'itex': - cfg.quantization.framework = 'keras' + framework = 'keras' if framework == "pytorch": if cfg.quantization.backend == "default": - cfg.quantization.framework = "pytorch_fx" + framework = "pytorch_fx" elif cfg.quantization.backend == "ipex": - cfg.quantization.framework = "pytorch_ipex" + framework = "pytorch_ipex" + cfg.quantization.framework = framework if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") diff --git a/neural_compressor/utils/utility.py b/neural_compressor/utils/utility.py index f560f45fe4a..7e7b8d2b02d 100644 --- a/neural_compressor/utils/utility.py +++ b/neural_compressor/utils/utility.py @@ -363,7 +363,10 @@ def recover(fp32_model, tuning_history_path, num, **kwargs): tuning_history = get_tuning_history(tuning_history_path) target_history = tuning_history[0]['history'] q_config = target_history[num]['q_config'] - framework = tuning_history[0]['cfg']['model']['framework'] + try: + framework = tuning_history[0]['cfg']['model']['framework'] + except Exception as e: + framework = tuning_history[0]['cfg'].quantization.framework if 'pytorch' in framework: from neural_compressor.utils.pytorch import load From 414ae308ccf6d1dd11fec6f91722ca4a3de67a08 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 6 Apr 2023 16:35:46 +0800 Subject: [PATCH 041/103] fixed sigopt and tpe Signed-off-by: yiliu30 --- neural_compressor/contrib/strategy/sigopt.py | 8 +- neural_compressor/experimental/__init__.py | 1 + test/strategy/test_sigopt.py | 92 ++------------------ test/strategy/test_sigopt_1.x.py | 23 +---- 4 files changed, 12 insertions(+), 112 deletions(-) diff --git a/neural_compressor/contrib/strategy/sigopt.py b/neural_compressor/contrib/strategy/sigopt.py index 8c9b230ee17..7e1039b90dd 100644 --- a/neural_compressor/contrib/strategy/sigopt.py +++ b/neural_compressor/contrib/strategy/sigopt.py @@ -1,4 +1,3 @@ - #!/usr/bin/env python # -*- coding: utf-8 -*- # @@ -124,9 +123,10 @@ def __init__(self, else: pass # SigOpt init - client_token = conf.usr_cfg.tuning.strategy.sigopt_api_token - self.project_id = conf.usr_cfg.tuning.strategy.sigopt_project_id - self.experiment_name = conf.usr_cfg.tuning.strategy.sigopt_experiment_name + strategy_kwargs = conf.quantization.tuning_criterion.strategy_kwargs + client_token = strategy_kwargs.get('sigopt_api_token', None) + self.project_id = strategy_kwargs.get('sigopt_project_id', None) + self.experiment_name = strategy_kwargs.get('sigopt_experiment_name', None) try: assert client_token != None except(AssertionError): diff --git a/neural_compressor/experimental/__init__.py b/neural_compressor/experimental/__init__.py index 265db4851ba..6b1e78ac6ac 100644 --- a/neural_compressor/experimental/__init__.py +++ b/neural_compressor/experimental/__init__.py @@ -28,6 +28,7 @@ from .distillation import Distillation from .nas import NAS from . import export +from .contrib import * __all__ = ['Component', 'Quantization', 'Pruning', 'Benchmark', 'Graph_Optimization', \ 'GraphOptimization', 'ModelConversion', 'Distillation', 'NAS', 'MixedPrecision', \ diff --git a/test/strategy/test_sigopt.py b/test/strategy/test_sigopt.py index 650a24273d0..db5990ca1ef 100644 --- a/test/strategy/test_sigopt.py +++ b/test/strategy/test_sigopt.py @@ -9,64 +9,6 @@ else: CONDITION = False -def build_fake_yaml(sigopt_api_token,sigopt_project_id): - fake_yaml = ''' - model: - name: fake_yaml - framework: tensorflow - inputs: x - outputs: op2_to_store - device: cpu - evaluation: - accuracy: - metric: - topk: 1 - tuning: - strategy: - name: sigopt - sigopt_api_token: {} - sigopt_project_id: {} - sigopt_experiment_name: nc-tune - accuracy_criterion: - relative: 0.01 - workspace: - path: saved - '''.format(sigopt_api_token, sigopt_project_id) - y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) - f.close() - -def build_fake_yaml2(sigopt_api_token,sigopt_project_id): - fake_yaml = ''' - model: - name: fake_yaml - framework: tensorflow - inputs: x - outputs: op2_to_store - device: cpu - evaluation: - accuracy: - metric: - topk: 1 - tuning: - strategy: - name: sigopt - sigopt_api_token: {} - sigopt_project_id: {} - sigopt_experiment_name: nc-tune - exit_policy: - max_trials: 3 - accuracy_criterion: - relative: -0.01 - workspace: - path: saved - '''.format(sigopt_api_token, sigopt_project_id) - y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) - f.close() - def build_fake_model(): import tensorflow as tf try: @@ -111,36 +53,11 @@ def setUpClass(self): sigopt_api_token = os.getenv('SIGOPT_API_TOKEN') sigopt_project_id = os.getenv('SIGOPT_PROJECT_ID') self.constant_graph = build_fake_model() - build_fake_yaml(sigopt_api_token,sigopt_project_id) - build_fake_yaml2(sigopt_api_token,sigopt_project_id) @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('fake_yaml2.yaml') shutil.rmtree('saved', ignore_errors=True) - def test_run_basic_one_trial(self): - from neural_compressor.experimental import Quantization, common - - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.model = self.constant_graph - quantizer.fit() - - - def test_run_basic_max_trials(self): - from neural_compressor.experimental import Quantization, common - - quantizer = Quantization('fake_yaml2.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.model = self.constant_graph - quantizer.fit() - def test_run_sigopt_one_trial_new_api(self): from neural_compressor.quantization import fit from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion @@ -156,11 +73,14 @@ def test_run_sigopt_one_trial_new_api(self): 'sigopt_project_id': 'sigopt_project_id_test', 'sigopt_experiment_name': 'nc-tune'} tuning_criterion = TuningCriterion(strategy='sigopt', strategy_kwargs=strategy_kwargs, max_trials=3) - conf = PostTrainingQuantConfig(approach="static", + conf = PostTrainingQuantConfig(quant_level=1, + approach="static", tuning_criterion=tuning_criterion, accuracy_criterion=accuracy_criterion) - self.assertEqual(conf.strategy_kwargs, strategy_kwargs) - q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader= dataloader, eval_dataloader=dataloader) + self.assertEqual(conf.tuning_criterion.strategy_kwargs, strategy_kwargs) + def fake_eval(model): + return 1 + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, eval_func=fake_eval) if __name__ == "__main__": diff --git a/test/strategy/test_sigopt_1.x.py b/test/strategy/test_sigopt_1.x.py index 650a24273d0..4a2e077dbfd 100644 --- a/test/strategy/test_sigopt_1.x.py +++ b/test/strategy/test_sigopt_1.x.py @@ -5,7 +5,7 @@ import os import yaml if os.getenv('SIGOPT_API_TOKEN') is None or os.getenv('SIGOPT_PROJECT_ID') is None: - CONDITION = True + CONDITION = False else: CONDITION = False @@ -140,27 +140,6 @@ def test_run_basic_max_trials(self): quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph quantizer.fit() - - def test_run_sigopt_one_trial_new_api(self): - from neural_compressor.quantization import fit - from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion - from neural_compressor.data import Datasets, DATALOADERS - - # dataset and dataloader - dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) - dataloader = DATALOADERS["tensorflow"](dataset) - - # tuning and accuracy criterion - accuracy_criterion = AccuracyCriterion(criterion='relative') - strategy_kwargs = {'sigopt_api_token': 'sigopt_api_token_test', - 'sigopt_project_id': 'sigopt_project_id_test', - 'sigopt_experiment_name': 'nc-tune'} - tuning_criterion = TuningCriterion(strategy='sigopt', strategy_kwargs=strategy_kwargs, max_trials=3) - conf = PostTrainingQuantConfig(approach="static", - tuning_criterion=tuning_criterion, - accuracy_criterion=accuracy_criterion) - self.assertEqual(conf.strategy_kwargs, strategy_kwargs) - q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader= dataloader, eval_dataloader=dataloader) if __name__ == "__main__": From ebaab08416f5dec381ef20e02877b194f1c7bcf7 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 6 Apr 2023 16:39:58 +0800 Subject: [PATCH 042/103] fixed fake startegy Signed-off-by: yiliu30 --- test/objective/test_objective.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/objective/test_objective.py b/test/objective/test_objective.py index cd4cea35464..ca61327ede5 100644 --- a/test/objective/test_objective.py +++ b/test/objective/test_objective.py @@ -165,7 +165,7 @@ def build_fake_model1(): def build_fake_strategy(): with open(os.path.join(os.path.dirname(importlib.util.find_spec('neural_compressor').origin), \ - 'strategy/fake.py'), 'w', encoding='utf-8') as f: + 'experimental/strategy/fake.py'), 'w', encoding='utf-8') as f: seq = [ "import time\n", "from .strategy import strategy_registry, TuneStrategy\n", @@ -221,7 +221,7 @@ def tearDownClass(self): os.remove('fake_yaml.yaml') os.remove('fake_yaml_model_size.yaml') os.remove('fake_yaml_footprint.yaml') - os.remove(os.path.join(os.path.dirname(importlib.util.find_spec('neural_compressor').origin), 'strategy/fake.py')) + os.remove(os.path.join(os.path.dirname(importlib.util.find_spec('neural_compressor').origin), 'experimental/strategy/fake.py')) shutil.rmtree('./saved', ignore_errors=True) def test_performance(self): From 019ba9cf914d5d62d861ac8e2d480412064bc3fd Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 6 Apr 2023 16:46:34 +0800 Subject: [PATCH 043/103] fixed obj Signed-off-by: yiliu30 --- test/objective/test_objective.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/objective/test_objective.py b/test/objective/test_objective.py index ca61327ede5..f2a3a80ffa3 100644 --- a/test/objective/test_objective.py +++ b/test/objective/test_objective.py @@ -282,7 +282,7 @@ def eval(model): model = build_matmul_model() - from neural_compressor import conf + from neural_compressor.conf.config import conf from neural_compressor.experimental import Quantization conf.model.framework = 'onnxrt_integerops' From 21b3de6606d20ed1982a694bb8909a24e8b01449 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 6 Apr 2023 16:56:56 +0800 Subject: [PATCH 044/103] fixed obj Signed-off-by: yiliu30 --- neural_compressor/conf/config.py | 4 +- test/objective/test_objective.py | 108 ++++++++++++++++++++----------- 2 files changed, 73 insertions(+), 39 deletions(-) diff --git a/neural_compressor/conf/config.py b/neural_compressor/conf/config.py index 5f077203138..1561137b79c 100644 --- a/neural_compressor/conf/config.py +++ b/neural_compressor/conf/config.py @@ -18,7 +18,7 @@ import yaml from schema import Schema, And, Use, Optional, Or, Hook from ..adaptor import FRAMEWORKS -from ..strategy import STRATEGIES +from ..experimental.strategy import EXP_STRATEGIES from ..objective import OBJECTIVES from ..utils import logger from ..version import __version__ @@ -914,7 +914,7 @@ def percent_to_float(data): 'diagnosis': False, }): { Optional('strategy', default={'name': 'basic'}): { - 'name': And(str, lambda s: s in STRATEGIES), + 'name': And(str, lambda s: s in EXP_STRATEGIES), Optional('sigopt_api_token'): str, Optional('sigopt_project_id'): str, Optional('sigopt_experiment_name', default='nc-tune'): str, diff --git a/test/objective/test_objective.py b/test/objective/test_objective.py index f2a3a80ffa3..7da0dbc530d 100644 --- a/test/objective/test_objective.py +++ b/test/objective/test_objective.py @@ -166,43 +166,77 @@ def build_fake_model1(): def build_fake_strategy(): with open(os.path.join(os.path.dirname(importlib.util.find_spec('neural_compressor').origin), \ 'experimental/strategy/fake.py'), 'w', encoding='utf-8') as f: - seq = [ - "import time\n", - "from .strategy import strategy_registry, TuneStrategy\n", - "from collections import OrderedDict\n", - "import copy\n", - "@strategy_registry\n", - "class FakeTuneStrategy(TuneStrategy):\n", - " def __init__(self, model, cfg, q_dataloader, q_func=None, eval_dataloader=None, eval_func=None, dicts=None, q_hooks=None):\n", - " self.id = 0\n", - " self.resume = True if dicts else False\n", - " super(FakeTuneStrategy, self).__init__(model, cfg, q_dataloader, q_func, eval_dataloader, eval_func, dicts)\n", - " def __getstate__(self):\n", - " for history in self.tuning_history:\n", - " if self._same_yaml(history['cfg'], self.cfg):\n", - " history['id'] = self.id\n", - " save_dict = super(FakeTuneStrategy, self).__getstate__()\n", - " return save_dict\n", - " def next_tune_cfg(self):\n", - " if self.resume:\n", - " assert self.id == 1\n", - " assert len(self.tuning_history) == 1\n", - " history = self.tuning_history[0]\n", - " assert self._same_yaml(history['cfg'], self.cfg)\n", - " assert len(history['history'])\n", - " for h in history['history']:\n", - " assert h\n", - " op_cfgs = {}\n", - " for iterations in self.calib_iter:\n", - " op_cfgs['calib_iteration'] = int(iterations)\n", - " op_cfgs['op'] = OrderedDict()\n", - " for op in self.opwise_quant_cfgs:\n", - " op_cfgs['op'][op] = copy.deepcopy(\n", - " self.opwise_tune_cfgs[op][0])\n", - " self.id += 1\n", - " yield op_cfgs\n", - " return\n" - ] + seq = ["import time \n", + "import copy \n", + "import numpy as np \n", + "from collections import OrderedDict \n", + "from .strategy import strategy_registry, TuneStrategy \n", + "from ...utils import logger \n", + "from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler \n", + "from .utils.tuning_structs import OpTuningConfig \n", + "import copy \n", + "@strategy_registry \n", + "class FakeTuneStrategy(TuneStrategy): \n", + " def __init__(self, model, cfg, q_dataloader, q_func=None, eval_dataloader=None, \n", + " eval_func=None, dicts=None, q_hooks=None): \n", + " self.id = 0 \n", + " self.resume = True if dicts else False \n", + " super(FakeTuneStrategy, self).__init__(model, cfg, q_dataloader, \n", + " q_func, eval_dataloader, eval_func, dicts) \n", + " def __getstate__(self): \n", + " for history in self.tuning_history: \n", + " if self._same_yaml(history['cfg'], self.cfg): \n", + " history['id'] = self.id \n", + " save_dict = super(FakeTuneStrategy, self).__getstate__() \n", + " return save_dict \n", + " def next_tune_cfg(self): \n", + " if self.resume: \n", + " #assert self.id == 1 \n", + " assert len(self.tuning_history) == 1 \n", + " history = self.tuning_history[0] \n", + " assert self._same_yaml(history['cfg'], self.cfg) \n", + " assert len(history['history']) \n", + " for h in history['history']: \n", + " assert h \n", + " from copy import deepcopy \n", + " tuning_space = self.tuning_space \n", + " initial_op_tuning_cfg = {} \n", + " for item in tuning_space.root_item.options: \n", + " if item.item_type == 'op': \n", + " op_name, op_type = item.name \n", + " initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) \n", + " calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options \n", + " for calib_sampling_size in calib_sampling_size_lst: \n", + " # step1. collect the ops that support static and dynamic \n", + " quant_mode_wise_items = OrderedDict() \n", + " query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] \n", + " pre_items = set() \n", + " for quant_mode in query_order: \n", + " items = tuning_space.query_items_by_quant_mode(quant_mode) \n", + " filtered_items = [item for item in items if item not in pre_items] \n", + " pre_items = pre_items.union(set(items)) \n", + " quant_mode_wise_items[quant_mode] = filtered_items \n", + " def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): \n", + " for item in items_lst: \n", + " op_item_dtype_dict[item.name] = target_quant_mode \n", + " op_item_dtype_dict = OrderedDict() \n", + " for quant_mode, quant_mode_items in quant_mode_wise_items.items(): \n", + " initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) \n", + " # step3. optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) \n", + " early_stop_tuning = False \n", + " stage1_cnt = 0 \n", + " int8_ops = quant_mode_wise_items['dynamic'] + quant_mode_wise_items['static'] \n", + " stage1_max = min(5, len(int8_ops)) # TODO set a more appropriate value \n", + " op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], \n", + " op_item_dtype_dict, initial_op_tuning_cfg) \n", + " for op_tuning_cfg in op_wise_tuning_sampler: \n", + " stage1_cnt += 1 \n", + " if early_stop_tuning and stage1_cnt > stage1_max: \n", + " logger.info('Early stopping the stage 1.') \n", + " break \n", + " op_tuning_cfg['calib_sampling_size'] = calib_sampling_size \n", + " self.id += 1 \n", + " yield op_tuning_cfg \n",] f.writelines(seq) f.close() From 4f29ea68ab67260c10b4995a351ecc4ee2efc207 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 6 Apr 2023 17:15:07 +0800 Subject: [PATCH 045/103] fixed ut Signed-off-by: yiliu30 --- neural_compressor/mix_precision.py | 2 ++ test/distillation/test_distillation_1.x.py | 2 +- test/mixed_precision/test_mixed_precision.py | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index b95979f602d..502ee9435b6 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -60,6 +60,7 @@ def __init__(self, conf=None): self._eval_func = None self._eval_dataloader = None self._model = None + self._metric = None def pre_process(self): """Create strategy object for tuning.""" @@ -221,6 +222,7 @@ def model(self, user_model): if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") if "tensorflow" in cfg.quantization.framework or cfg.quantization.framework == "keras": + from .model.tensorflow_model import get_model_type if get_model_type(user_model) == 'keras': self._model = Model(user_model, backend=cfg.quantization.framework, device=cfg.quantization.device, modelType="saved_model") diff --git a/test/distillation/test_distillation_1.x.py b/test/distillation/test_distillation_1.x.py index 672adf86d5c..32fc28504af 100644 --- a/test/distillation/test_distillation_1.x.py +++ b/test/distillation/test_distillation_1.x.py @@ -171,7 +171,7 @@ def test_distillation(self): distiller = Distillation(conf) distiller = Distillation() - from neural_compressor import conf + from neural_compressor.conf.config import conf conf.model.framework = 'pytorch' conf.distillation.train.end_epoch = 3 conf.distillation.train.iteration = 10 diff --git a/test/mixed_precision/test_mixed_precision.py b/test/mixed_precision/test_mixed_precision.py index 4f459dcfbc8..938530a7bcd 100644 --- a/test/mixed_precision/test_mixed_precision.py +++ b/test/mixed_precision/test_mixed_precision.py @@ -315,7 +315,8 @@ def test_mixed_precision_with_evaluation(self): eval_dataloader=self.matmul_dataloader, eval_metric=ONNXRT_QL_METRICS["MSE"]()) self.assertTrue(any([i.op_type == 'Cast' for i in output_model.nodes()])) - + + def test_mixed_precision_with_evaluation_old_api(self): from neural_compressor.conf.config import MixedPrecision_Conf from neural_compressor.experimental import MixedPrecision converter = MixedPrecision(MixedPrecision_Conf('test.yaml')) From 11a23b566da3b105e8e58fc5b8a4e711a109d04e Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 6 Apr 2023 17:46:26 +0800 Subject: [PATCH 046/103] fixed circular import Signed-off-by: yiliu30 --- neural_compressor/conf/config.py | 5 ++++- test/config/test_config.py | 2 +- test/graph_optimization/test_graph_optimization.py | 2 +- test/strategy/test_tpe_1.x.py | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/neural_compressor/conf/config.py b/neural_compressor/conf/config.py index 1561137b79c..be39f055db7 100644 --- a/neural_compressor/conf/config.py +++ b/neural_compressor/conf/config.py @@ -18,7 +18,6 @@ import yaml from schema import Schema, And, Use, Optional, Or, Hook from ..adaptor import FRAMEWORKS -from ..experimental.strategy import EXP_STRATEGIES from ..objective import OBJECTIVES from ..utils import logger from ..version import __version__ @@ -28,6 +27,10 @@ from collections import OrderedDict from .dotdict import DotDict, deep_set import os, datetime +# TODO WA for avoid circular import +# from ..experimental.strategy import EXP_STRATEGIES +EXP_STRATEGIES = ['basic', 'auto_mixed_precision', 'bayesian', 'conservative',\ + 'exhaustive', 'hawq_v2', 'mse', 'mse_v2', 'random', 'sigopt', 'tpe', 'fake'] def constructor_register(cls): yaml_key = "!{}".format(cls.__name__) diff --git a/test/config/test_config.py b/test/config/test_config.py index 326710dce6f..16088b9e907 100644 --- a/test/config/test_config.py +++ b/test/config/test_config.py @@ -21,7 +21,7 @@ def test_config(self): class TestPyConf(unittest.TestCase): def test_config(self): - from neural_compressor import conf + from neural_compressor.conf.config import conf from neural_compressor.conf.config import QuantConf, PruningConf, \ GraphOptConf, BenchmarkConf, DistillationConf diff --git a/test/graph_optimization/test_graph_optimization.py b/test/graph_optimization/test_graph_optimization.py index 20eb96d45ab..0c8f9e74471 100644 --- a/test/graph_optimization/test_graph_optimization.py +++ b/test/graph_optimization/test_graph_optimization.py @@ -301,7 +301,7 @@ def test_not_supported_model_with_conf(self): pass else: from neural_compressor.experimental import Graph_Optimization - from neural_compressor import conf + from neural_compressor.conf.config import conf import torchvision model = torchvision.models.resnet18() diff --git a/test/strategy/test_tpe_1.x.py b/test/strategy/test_tpe_1.x.py index 2ee5b2e7f7c..c6b921a4b31 100644 --- a/test/strategy/test_tpe_1.x.py +++ b/test/strategy/test_tpe_1.x.py @@ -129,7 +129,7 @@ def test_run_tpe_max_trials(self): quantizer.fit() def test_loss_calculation(self): - from neural_compressor.contrib.strategy.tpe import TpeTuneStrategy + from neural_compressor.experimental.contrib.strategy.tpe import TpeTuneStrategy from neural_compressor.experimental import Quantization, common quantizer = Quantization('fake_yaml.yaml') From 53465889dacc161bcfff9c0f3b3ae1747b497d37 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 6 Apr 2023 19:10:17 +0800 Subject: [PATCH 047/103] add contrib Signed-off-by: yiliu30 --- .../experimental/contrib/__init__.py | 19 + .../experimental/contrib/strategy/__init__.py | 26 + .../experimental/contrib/strategy/sigopt.py | 269 +++++++++ .../experimental/contrib/strategy/tpe.py | 511 ++++++++++++++++++ 4 files changed, 825 insertions(+) create mode 100644 neural_compressor/experimental/contrib/__init__.py create mode 100644 neural_compressor/experimental/contrib/strategy/__init__.py create mode 100644 neural_compressor/experimental/contrib/strategy/sigopt.py create mode 100644 neural_compressor/experimental/contrib/strategy/tpe.py diff --git a/neural_compressor/experimental/contrib/__init__.py b/neural_compressor/experimental/contrib/__init__.py new file mode 100644 index 00000000000..c2b506951b7 --- /dev/null +++ b/neural_compressor/experimental/contrib/__init__.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Built-in strategy for multiple framework backends.""" +from .strategy import * \ No newline at end of file diff --git a/neural_compressor/experimental/contrib/strategy/__init__.py b/neural_compressor/experimental/contrib/strategy/__init__.py new file mode 100644 index 00000000000..807ff72c28d --- /dev/null +++ b/neural_compressor/experimental/contrib/strategy/__init__.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Built-in strategy for multiple framework backends.""" +from os.path import dirname, basename, isfile, join +import glob + +modules = glob.glob(join(dirname(__file__), "*.py")) +for f in modules: + if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'): + __import__(basename(f)[:-3], globals(), locals(), level=1) + diff --git a/neural_compressor/experimental/contrib/strategy/sigopt.py b/neural_compressor/experimental/contrib/strategy/sigopt.py new file mode 100644 index 00000000000..e6342d975ed --- /dev/null +++ b/neural_compressor/experimental/contrib/strategy/sigopt.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The SigOpt Tuning Strategy provides support for the quantization process.""" +import copy +from neural_compressor.utils import logger +from neural_compressor.utils.utility import LazyImport +from neural_compressor.experimental.strategy.strategy import strategy_registry, TuneStrategy +from collections import OrderedDict +from neural_compressor.experimental.strategy.utils.tuning_sampler import OpWiseTuningSampler +from neural_compressor.experimental.strategy.utils.tuning_structs import OpTuningConfig + +sigopt = LazyImport('sigopt') + +@strategy_registry +class SigOptTuneStrategy(TuneStrategy): + """The tuning strategy using SigOpt HPO search in tuning space. + + Args: + model (object): The FP32 model specified for low precision tuning. + conf (Conf): The Conf class instance initialized from user yaml + config file. + q_dataloader (generator): Data loader for calibration, mandatory for + post-training quantization. + It is iterable and should yield a tuple (input, + label) for calibration dataset containing label, + or yield (input, _) for label-free calibration + dataset. The input could be a object, list, tuple or + dict, depending on user implementation, as well as + it can be taken as model input. + q_func (function, optional): Reserved for future use. + eval_dataloader (generator, optional): Data loader for evaluation. It is iterable + and should yield a tuple of (input, label). + The input could be a object, list, tuple or dict, + depending on user implementation, as well as it can + be taken as model input. The label should be able + to take as input of supported metrics. If this + parameter is not None, user needs to specify + pre-defined evaluation metrics through configuration + file and should set "eval_func" parameter as None. + Tuner will combine model, eval_dataloader and + pre-defined metrics to run evaluation process. + eval_func (function, optional): The evaluation function provided by user. + This function takes model as parameter, and + evaluation dataset and metrics should be + encapsulated in this function implementation and + outputs a higher-is-better accuracy scalar value. + + The pseudo code should be something like: + + def eval_func(model): + input, label = dataloader() + output = model(input) + accuracy = metric(output, label) + return accuracy + dicts (dict, optional): The dict containing resume information. + Defaults to None. + + """ + + def __init__(self, model, conf, q_dataloader, q_func=None, + eval_dataloader=None, eval_func=None, dicts=None, q_hooks=None): + """Initialize the SigOpt tuning strategy if the user specified to use it.""" + super().__init__( + model, + conf, + q_dataloader, + q_func, + eval_dataloader, + eval_func, + dicts, + q_hooks) + strategy_name = conf.usr_cfg.tuning.strategy.name + if strategy_name.lower() == "sigopt": + try: + import sigopt + except ImportError: + try: + import subprocess + import sys + subprocess.check_call([sys.executable, "-m", "pip", "install", "sigopt"]) + import sigopt # pylint: disable=import-error + except: + assert False, "Unable to import sigopt from the local environment." + else: + pass + # SigOpt init + client_token = conf.usr_cfg.tuning.strategy.sigopt_api_token + self.project_id = conf.usr_cfg.tuning.strategy.sigopt_project_id + self.experiment_name = conf.usr_cfg.tuning.strategy.sigopt_experiment_name + try: + assert client_token != None + except(AssertionError): + logger.error("`sigopt_api_token` field in yaml file is required. " \ + "Please refer to details in /docs/sigopt_strategy.md.") + exit(0) + try: + assert self.project_id != None + logger.warning('Project id is {}, ' \ + 'Please check whether it is created in the sigopt account.'\ + .format(self.project_id)) + except(AssertionError): + logger.error("`sigopt_project_id` field in yaml file is required. " \ + "Please refer to details in /docs/sigopt_strategy.md.") + exit(0) + if self.experiment_name == 'nc-tune': + logger.info("Default experiment name `nc-tune` is used, " \ + "Please refer to details in /docs/sigopt_strategy.md " \ + "if user wants to modify it.") + else: + logger.info("Experiment name is {}.".format(self.experiment_name)) + + self.conn = sigopt.Connection(client_token) + self.experiment = None + + def params_to_tune_configs(self, params): + """Get the parameters of the tuning strategy.""" + op_tuning_cfg = {} + calib_sampling_size_lst = self.tuning_space.root_item.get_option_by_name('calib_sampling_size').options + for op_name_type, configs in self.op_configs.items(): + if len(configs) == 1: + op_tuning_cfg[op_name_type] = configs[0] + else: + op_tuning_cfg[op_name_type] = configs[min(len(configs) - 1, int(params[op_name_type[0]]))] + calib_sampling_size = calib_sampling_size_lst[min(len(configs) - 1, int(params['calib_sampling_size']))] + op_tuning_cfg['calib_sampling_size'] = calib_sampling_size + return op_tuning_cfg + + def next_tune_cfg(self): + """Yielding the tuning config to traverse by concreting strategies according to last tuning result.""" + while self.experiment.progress.observation_count < self.experiment.observation_budget: + suggestion = self.conn.experiments(self.experiment.id).suggestions().create() + yield self.params_to_tune_configs(suggestion.assignments) + values = [ + dict(name='accuracy', value=self.last_tune_result[0]), + dict(name='latency', value=self.last_tune_result[1]) + ] + obs = self.conn.experiments(self.experiment.id).observations().create( + suggestion=suggestion.id, values=values) + logger.debug("`suggestion_id` is {}, `observation_id` is {}.". + format(suggestion.id, obs.id)) + self.experiment = self.conn.experiments(self.experiment.id).fetch() + + def get_acc_target(self, base_acc): + """Get the tuning target of the accuracy ceiterion.""" + if self.cfg.tuning.accuracy_criterion.relative: + return base_acc * (1. - self.cfg.tuning.accuracy_criterion.relative) + else: + return base_acc - self.cfg.tuning.accuracy_criterion.absolute + + def traverse(self): + """The main traverse logic, which could be override by some concrete strategy which needs more hooks. + + This is SigOpt version of traverse -- with additional constraints setting to HPO. + """ + self._eval_baseline() + + baseline_msg = '[Accuracy: {:.4f}'.format(self.baseline[0]) + \ + ''.join([', {}: {:.4f}'.format(x,y) for x,y in zip( \ + self.objectives.representation, self.baseline[1]) if x != 'Accuracy']) + ']' \ + if self.baseline else 'n/a' + logger.info("FP32 baseline is: {}".format(baseline_msg)) + self.experiment = self.create_exp(acc_target=self.get_acc_target(self.baseline[0])) + trials_count = 0 + for tune_cfg in self.next_tune_cfg(): + # add tune_cfg here as quantize use tune_cfg + tune_cfg['advance'] = self.cfg.quantization.advance + trials_count += 1 + tuning_history = self._find_tuning_history(tune_cfg) + if tuning_history and trials_count < self.cfg.tuning.exit_policy.max_trials: + self.last_tune_result = tuning_history['last_tune_result'] + self.best_tune_result = tuning_history['best_tune_result'] + logger.warn("Find evaluated tuning config, skip.") + continue + + logger.debug("Dump current tuning configuration:") + logger.debug(tune_cfg) + self.last_qmodel = self.adaptor.quantize( + tune_cfg, self.model, self.calib_dataloader, self.q_func) + assert self.last_qmodel + # Return the last quantized model as a result. if performance only. + if self.cfg.tuning.exit_policy.performance_only: + self.best_qmodel = self.last_qmodel + self._add_tuning_history(copy.deepcopy(tune_cfg), (-1, [0]), q_config=self.last_qmodel.q_config) + return + self.last_tune_cfg = copy.deepcopy(tune_cfg) + self.last_tune_result = self._evaluate(self.last_qmodel) + + need_stop = self.stop(self.cfg.tuning.exit_policy.timeout, trials_count) + + # record the tuning history + saved_tune_cfg = copy.deepcopy(tune_cfg) + saved_last_tune_result = copy.deepcopy(self.last_tune_result) + self._add_tuning_history(saved_tune_cfg, saved_last_tune_result) + + if need_stop: + break + + def create_exp(self, acc_target): + """Set the config for the experiment.""" + params = [] + from copy import deepcopy + tuning_space = self.tuning_space + initial_op_tuning_cfg = {} + for item in tuning_space.root_item.options: + if item.item_type == 'op': + op_name, op_type = item.name + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + # step1. collect the ops that support static and dynamic + quant_mode_wise_items = OrderedDict() + query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] + pre_items = set() + for quant_mode in query_order: + items = tuning_space.query_items_by_quant_mode(quant_mode) + filtered_items = [item for item in items if item not in pre_items] + pre_items = pre_items.union(set(items)) + quant_mode_wise_items[quant_mode] = filtered_items + + def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): + """Initialize the op tuning mode.""" + for item in items_lst: + op_item_dtype_dict[item.name] = target_quant_mode + + op_item_dtype_dict = OrderedDict() + for quant_mode, quant_mode_items in quant_mode_wise_items.items(): + initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) + + op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + self.op_configs = op_wise_pool.get_opwise_candidate() + for op, configs in self.op_configs.items(): + if len(configs) > 1: + params.append(dict(name=op[0], type='int', + bounds=dict(min=0, max=len(configs) - 1))) + params.append(dict(name='calib_sampling_size', type='int', + bounds=dict(min=0, max=len(calib_sampling_size_lst) - 1))) + experiment = self.conn.experiments().create( + name=self.experiment_name, + parameters=params, + metrics=[ + dict(name='accuracy', objective='maximize', strategy='constraint', \ + threshold=acc_target), + dict(name='latency', objective='minimize', strategy='optimize'), + ], + parallel_bandwidth=1, + # Define an Observation Budget for your experiment + observation_budget=100, + project=self.project_id, + ) + + logger.debug("Create experiment at https://app.sigopt.com/experiment/{}". + format(experiment.id)) + + return experiment diff --git a/neural_compressor/experimental/contrib/strategy/tpe.py b/neural_compressor/experimental/contrib/strategy/tpe.py new file mode 100644 index 00000000000..8b6fb33ac96 --- /dev/null +++ b/neural_compressor/experimental/contrib/strategy/tpe.py @@ -0,0 +1,511 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Fefine the tuning strategy that uses tpe search in tuning space.""" +import copy +import os +from pathlib import Path +from functools import partial +import numpy as np +from neural_compressor.utils import logger +from neural_compressor.utils.utility import LazyImport +from neural_compressor.experimental.strategy.strategy import strategy_registry, TuneStrategy +from collections import OrderedDict +from neural_compressor.experimental.strategy.utils.tuning_sampler import OpWiseTuningSampler +from neural_compressor.experimental.strategy.utils.tuning_structs import OpTuningConfig + +hyperopt = LazyImport('hyperopt') + +try: + import pandas as pd +except ImportError: + pd = None + logger.info("Pandas package is required for best result and CSV files generation.") + + +@strategy_registry +class TpeTuneStrategy(TuneStrategy): + """The tuning strategy using tpe search in tuning space. + + Args: + model (object): The FP32 model specified for low precision tuning. + conf (Conf): The Conf class instance initialized from user yaml + config file. + q_dataloader (generator): Data loader for calibration, mandatory for + post-training quantization. + It is iterable and should yield a tuple (input, + label) for calibration dataset containing label, + or yield (input, _) for label-free calibration + dataset. The input could be a object, list, tuple or + dict, depending on user implementation, as well as + it can be taken as model input. + q_func (function, optional): Reserved for future use. + eval_dataloader (generator, optional): Data loader for evaluation. It is iterable + and should yield a tuple of (input, label). + The input could be a object, list, tuple or dict, + depending on user implementation, as well as it can + be taken as model input. The label should be able + to take as input of supported metrics. If this + parameter is not None, user needs to specify + pre-defined evaluation metrics through configuration + file and should set "eval_func" parameter as None. + Tuner will combine model, eval_dataloader and + pre-defined metrics to run evaluation process. + eval_func (function, optional): The evaluation function provided by user. + This function takes model as parameter, and + evaluation dataset and metrics should be + encapsulated in this function implementation and + outputs a higher-is-better accuracy scalar value. + + The pseudo code should be something like: + + def eval_func(model): + input, label = dataloader() + output = model(input) + accuracy = metric(output, label) + return accuracy + dicts (dict, optional): The dict containing resume information. + Defaults to None. + + """ + def __init__(self, model, conf, q_dataloader, q_func=None, + eval_dataloader=None, eval_func=None, dicts=None, q_hooks=None): + """Initialize the tpe tuning strategy if the user specified to use it.""" + assert conf.usr_cfg.quantization.approach == 'post_training_static_quant', \ + "TPE strategy is only for post training static quantization!" + """Initialize the tpe tuning strategy if the user specified to use it.""" + strategy_name = conf.usr_cfg.tuning.strategy.name + if strategy_name.lower() == "tpe": + try: + import hyperopt + except ImportError: + try: + import subprocess + import sys + subprocess.check_call([sys.executable, "-m", "pip", "install", "hyperopt"]) + import hyperopt # pylint: disable=import-error + except: + assert False, "Unable to import hyperopt from the local environment." + else: + pass + self.hpopt_search_space = None + self.warm_start = False + self.cfg_evaluated = False + self.hpopt_trials = hyperopt.Trials() + self.max_trials = conf.usr_cfg.tuning.exit_policy.get('max_trials', 200) + self.loss_function_config = { + 'acc_th': conf.usr_cfg.tuning.accuracy_criterion.relative if \ + conf.usr_cfg.tuning.accuracy_criterion and \ + conf.usr_cfg.tuning.accuracy_criterion.relative else 0.01, + 'acc_weight': conf.usr_cfg.tuning.strategy.get('accuracy_weight', 1.0), + 'lat_weight': conf.usr_cfg.tuning.strategy.get('latency_weight', 1.0) + } + self.tpe_params = { + 'n_initial_point': 10, + 'gamma': 0.3, + 'n_EI_candidates': 100, + 'prior_weight': 1.0 + } + self.best_result = { + 'best_loss': float('inf'), + 'best_acc_loss': float('inf'), + 'best_lat_diff': 0.0 + } + self._algo = None + + super().__init__( + model, + conf, + q_dataloader, + q_func, + eval_dataloader, + eval_func, + dicts, + q_hooks) + + def __getstate__(self): + """Magic method for pickle saving. + + Returns: + dict: Saved dict for resuming + """ + for history in self.tuning_history: + if self._same_yaml(history['cfg'], self.cfg): + history['warm_start'] = True + history['hpopt_trials'] = self.hpopt_trials + history['loss_function_config'] = self.loss_function_config + history['tpe_params'] = self.tpe_params + history['hpopt_search_space'] = self.hpopt_search_space + history['_algo'] = self._algo + save_dict = super().__getstate__() + return save_dict + + def _configure_hpopt_search_space_and_params(self, search_space): + """Set the configuration of hpopt searching strategy.""" + self.hpopt_search_space = {} + for param, configs in search_space.items(): + self.hpopt_search_space[(param)] = hyperopt.hp.choice((param[0]), configs) + # Find minimum number of choices for params with more than one choice + multichoice_params = [len(configs) for param, configs in search_space.items() + if len(configs) > 1] + if not multichoice_params: + return False + min_param_size = min(multichoice_params) if len(multichoice_params) > 0 else 1 + self.tpe_params['n_EI_candidates'] = min_param_size + self.tpe_params['prior_weight'] = 1 / min_param_size + self._algo = partial(hyperopt.tpe.suggest, + n_startup_jobs=self.tpe_params['n_initial_point'], + gamma=self.tpe_params['gamma'], + n_EI_candidates=self.tpe_params['n_EI_candidates'], + prior_weight=self.tpe_params['prior_weight']) + return True + + def traverse(self): + """Tpe traverse logic.""" + logger.info("Start to run tpe strategy.") + # prepare log file + trials_file = os.path.join(os.path.dirname(self.history_path), 'tpe_trials.csv') + best_result_file = os.path.join(os.path.dirname(self.history_path), 'tpe_best_result.csv') + logger.debug("trials_file: {} ".format(trials_file) + \ + "best_result_file: {}".format(best_result_file)) + if Path(trials_file).exists(): + os.remove(trials_file) + status = True + tuning_history = self._find_self_tuning_history() + + from copy import deepcopy + tuning_space = self.tuning_space + initial_op_tuning_cfg = {} + for item in tuning_space.root_item.options: + if item.item_type == 'op': + op_name, op_type = item.name + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) + calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options + # step1. collect the ops that support static and dynamic + quant_mode_wise_items = OrderedDict() + query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] + pre_items = set() + for quant_mode in query_order: + items = tuning_space.query_items_by_quant_mode(quant_mode) + filtered_items = [item for item in items if item not in pre_items] + pre_items = pre_items.union(set(items)) + quant_mode_wise_items[quant_mode] = filtered_items + + def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): + for item in items_lst: + op_item_dtype_dict[item.name] = target_quant_mode + + op_item_dtype_dict = OrderedDict() + for quant_mode, quant_mode_items in quant_mode_wise_items.items(): + initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) + op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], + op_item_dtype_dict, initial_op_tuning_cfg) + self.op_configs = op_wise_pool.get_opwise_candidate() + self.opwise_tune_cfgs = {} + for key, val in self.op_configs.items(): + self.opwise_tune_cfgs[key[0]] =val + self.opwise_tune_cfgs['calib_sampling_size'] = \ + self.tuning_space.root_item.get_option_by_name('calib_sampling_size').options + + if tuning_history and not self.warm_start: + # prepare loss function scaling (best result from basic can be used) + best_lat, worse_acc_loss = 0, 0 + for history in tuning_history['history']: + acc_loss, lat_diff = self._calculate_acc_lat_diff( + history['tune_result'][0], + history['tune_result'][1]) + if lat_diff > best_lat: + best_lat = lat_diff + if acc_loss > worse_acc_loss: + worse_acc_loss = acc_loss + self._calculate_loss_function_scaling_components( + worse_acc_loss, + best_lat, + self.loss_function_config) + first_run_cfg = self.add_loss_to_tuned_history_and_find_best(tuning_history['history']) + # Prepare hpopt config with best cfg from history + self._configure_hpopt_search_space_and_params(first_run_cfg) + # Run first iteration with best result from history + trials_count = len(self.hpopt_trials.trials) + 1 + hyperopt.fmin(partial(self.object_evaluation, model=self.model), + space=self.hpopt_search_space, + algo=self._algo, + max_evals=trials_count, + trials=self.hpopt_trials, + show_progressbar=False) + if pd is not None: + self._save_trials(trials_file) + self._update_best_result(best_result_file) + # Prepare full hpopt search space + new_tune_cfgs = self._prepare_final_searchspace( + first_run_cfg, + self.opwise_tune_cfgs) + status = self._configure_hpopt_search_space_and_params(new_tune_cfgs) + elif not self.warm_start: + self._calculate_loss_function_scaling_components(0.01, 2, self.loss_function_config) + status = self._configure_hpopt_search_space_and_params(self.opwise_tune_cfgs) + + if status: + trials_count = len(self.hpopt_trials.trials) + 1 + # get fp32 model baseline + if self.baseline is None: + logger.info("Get FP32 model baseline.") + self.baseline = self._evaluate(self.model) + self._add_tuning_history() + + baseline_msg = '[Accuracy: {:.4f}'.format(self.baseline[0]) + \ + ''.join([', {}: {:.4f}'.format(x,y) for x,y in zip( \ + self.objectives.representation, self.baseline[1]) if x != 'Accuracy']) \ + + ']' if self.baseline else 'n/a' + logger.info("FP32 baseline is: {}".format(baseline_msg)) + + if not self.objectives.relative: + self.loss_function_config['acc_th'] =\ + (self.baseline[0] - self.objectives.acc_goal) / self.baseline[0] + # start trials + exit = False + while not exit: + self.cfg_evaluated = False + logger.debug("Trial iteration start: {} / {}.".format( + trials_count, self.max_trials)) + hyperopt.fmin(partial(self.object_evaluation, model=self.model), + space=self.hpopt_search_space, + algo=self._algo, + max_evals=trials_count, + trials=self.hpopt_trials, + show_progressbar=False) + trials_count += 1 + if pd is not None: + self._save_trials(trials_file) + self._update_best_result(best_result_file) + self._save() + if self.stop(self.cfg.tuning.exit_policy.timeout, trials_count): + exit = True + else: + logger.warn("Can't create search space for input model.") + + def _prepare_final_searchspace(self, first, second): + """Set the final search space.""" + for key, cfgs in second.items(): + new_cfg = [] + for cfg in cfgs: + if cfg != first[key][0]: + new_cfg.append(cfg) + first[key] = first[key] + new_cfg + return first + + def add_loss_to_tuned_history_and_find_best(self, tuning_history_list): + """Find the best tuned history.""" + logger.debug("Number of resumed configs is {}.".format(len(tuning_history_list))) + best_loss = None + first_run_cfg = None + for history in tuning_history_list: + result = self._compute_metrics( + history['tune_cfg']['op'], + history['tune_result'][0], + history['tune_result'][1]) + if best_loss is None or result['loss'] < best_loss: + best_loss = result['loss'] + first_run_cfg = history['tune_cfg']['op'].copy() + result['source'] = 'finetune' + history['result'] = result + logger.debug( + "Resumed iteration loss is {}, acc_loss is {}, lat_diff is {}, " \ + "quantization_ratio is {}.".format(result['loss'], + result['acc_loss'], + result['lat_diff'], + result['quantization_ratio'])) + for op, cfg in first_run_cfg.items(): + first_run_cfg[op] = [cfg,] + return first_run_cfg + + def object_evaluation(self, tune_cfg, model): + """Check if config was alredy evaluated.""" + op_cfgs = self._tune_cfg_converter(tune_cfg) + self.last_qmodel = self.adaptor.quantize(op_cfgs, self.model, self.calib_dataloader) + self.last_tune_cfg = copy.deepcopy(tune_cfg) + self.last_tune_result = self._evaluate(self.last_qmodel) + logger.info("The last tune result is {}.".format( + (self.last_tune_result[0], self.last_tune_result[1][0]))) + saved_tune_cfg = copy.deepcopy(op_cfgs) + saved_last_tune_result = copy.deepcopy(self.last_tune_result) + # prepare result + result = self._compute_metrics( + op_cfgs['op'], + self.last_tune_result[0], + self.last_tune_result[1][0]) + result['source'] = 'tpe' + self._add_tuning_history(saved_tune_cfg, saved_last_tune_result, result=result) + logger.info("Current iteration loss is {}, acc_loss is {}, lat_diff is {}, " \ + "quantization_ratio is {}.".format(result['loss'], + result['acc_loss'], + result['lat_diff'], + result['quantization_ratio'])) + return result + + def _compute_metrics(self, tune_cfg, acc, lat): + quantization_ratio = 1 - len([param for param in tune_cfg.values() + if param['activation']['dtype'] =='fp32']) / len(tune_cfg) + acc_diff, lat_diff = self._calculate_acc_lat_diff(acc, lat) + return { + 'loss': self.calculate_loss(acc_diff, lat_diff, self.loss_function_config), + 'acc' : acc, + 'lat' : lat, + 'acc_loss': acc_diff, + 'lat_diff': lat_diff, + 'quantization_ratio': quantization_ratio, + 'status': hyperopt.STATUS_OK} + + def _calculate_acc_lat_diff(self, acc, lat): + int8_acc = acc + int8_lat = lat + fp32_acc = self.baseline[0] + fp32_lat = self.baseline[1][0] + acc_diff = (fp32_acc - int8_acc) / fp32_acc + lat_diff = fp32_lat / int8_lat + return acc_diff, lat_diff + + def calculate_loss(self, acc_diff, lat_diff, config): + """Calculate the accuracy loss.""" + gamma_penalty = 40 # penalty term + acc_loss_component = self._calculate_acc_loss_component(acc_diff) + lat_loss_component = self._calculate_lat_diff_component(lat_diff) + acc_weight = config['acc_weight'] if acc_diff > config['acc_th'] else 0.0 + if acc_weight == 0 and config['lat_weight'] == 0: + acc_weight = 1.0 + loss = acc_weight * (config['acc_scale'] * (acc_loss_component - config['acc_min'])) \ + + config['lat_weight']\ + * (config['lat_scale'] * (lat_loss_component - config['lat_min'])) + if acc_diff > config['acc_th']: + loss += 2 * gamma_penalty + return loss + + def _calculate_acc_loss_component(self, acc_loss): + return np.exp(acc_loss) + + def _calculate_lat_diff_component(self, lat_diff): + return np.log(np.power((1 / (1000 * lat_diff)), 8)) + + def _calculate_loss_function_scaling_components(self, acc_loss, lat_diff, config): + acc_min = self._calculate_acc_loss_component(0) + acc_max = self._calculate_acc_loss_component(acc_loss) + if acc_max == acc_min: + acc_max = self._calculate_acc_loss_component(config['acc_th']) + config['acc_min'] = acc_min + config['acc_scale'] = 10 / np.abs(acc_max - acc_min) + + lat_min = self._calculate_lat_diff_component(lat_diff) + lat_max = self._calculate_lat_diff_component(1) + if lat_min == lat_max: + lat_min = self._calculate_lat_diff_component(2) + config['lat_min'] = lat_min + config['lat_scale'] = 10 / np.abs(lat_max - lat_min) + + def _save_trials(self, trials_log): + """Save the trial result to the log file.""" + tpe_trials_results = pd.DataFrame(self.hpopt_trials.results) + csv_file = trials_log + tpe_trials_results.to_csv(csv_file) + + def _update_best_result(self, best_result_file): + if not self.hpopt_trials: + raise Exception( + 'No trials loaded to get best result') + trials_results = pd.DataFrame(self.hpopt_trials.results) + + if not trials_results[trials_results.acc_loss <= + self.loss_function_config['acc_th']].empty: + # If accuracy threshold reached, choose best latency + best_result = trials_results[trials_results.acc_loss <= + self.loss_function_config['acc_th']] \ + .reset_index(drop=True).sort_values(by=['lat_diff', 'acc_loss'], + ascending=[False, True]) \ + .reset_index(drop=True).loc[0] + else: + # If accuracy threshold is not reached, choose based on loss function + best_result = \ + trials_results.sort_values('loss', ascending=True).reset_index(drop=True).loc[0] + + update_best_result = False + if not self.best_result['best_loss']: + update_best_result = True + elif self.best_result['best_acc_loss'] <= self.loss_function_config['acc_th']: + if best_result['acc_loss'] <= self.loss_function_config['acc_th'] \ + and best_result['lat_diff'] > self.best_result['best_lat_diff']: + update_best_result = True + else: + if best_result['acc_loss'] <= self.loss_function_config['acc_th'] or \ + best_result['loss'] < self.best_result['best_loss']: + update_best_result = True + + if update_best_result: + best_result.to_csv(best_result_file, header=False) + self.best_result['best_loss'] = best_result['loss'] + self.best_result['best_acc_loss'] = best_result['acc_loss'] + self.best_result['best_lat_diff'] = best_result['lat_diff'] + self.best_result['quantization_ratio'] = best_result['quantization_ratio'] + + logger.info("Trial iteration end is {} / {}, best loss is {}, acc_loss is {}, " \ + "lat_diff is {}, quantization_ratio is {}.".format( + len(self.hpopt_trials.trials), + self.max_trials, + self.best_result['best_loss'], + self.best_result['best_acc_loss'], + self.best_result['best_lat_diff'], + self.best_result['quantization_ratio'])) + + def stop(self, timeout, trials_count): + """Check if need to stop traversing the tuning space, either accuracy goal is met or timeout is reach. + + Returns: + bool: True if need stop, otherwise False. + """ + need_stop = False + if not self.cfg_evaluated: + if self.objectives.compare(self.best_tune_result, self.baseline): + del self.best_tune_result + del self.best_qmodel + self.best_tune_result = self.last_tune_result + self.best_qmodel = self.last_qmodel + self.adaptor.save(self.best_qmodel, os.path.dirname(self.deploy_path)) + else: + del self.last_qmodel + + last_tune_msg = '[Accuracy ({}|fp32): {:.4f}|{:.4f}'.format( \ + self.cfg.quantization.dtype, self.last_tune_result[0], self.baseline[0]) + \ + ''.join([', {} ({}|fp32): {:.4f}|{:.4f}'.format(x,self.cfg.quantization.dtype,y,z) \ + for x,y,z in zip(self.objectives.representation, \ + self.last_tune_result[1], self.baseline[1]) if x != 'Accuracy']) + ']' \ + if self.last_tune_result else 'n/a' + + best_tune_msg = '[Accuracy: {:.4f}'.format(self.best_tune_result[0]) + \ + ''.join([', {}: {:.4f}'.format(x,y) for x,y in zip( \ + self.objectives.representation, self.best_tune_result[1]) if x != 'Accuracy']) \ + + ']' if self.best_tune_result else 'n/a' + + logger.info("Tune {} result is: {}, Best tune result is: {}".format(trials_count, + last_tune_msg, + best_tune_msg)) + + if timeout == 0 and self.best_tune_result: + need_stop = True + elif trials_count >= self.cfg.tuning.exit_policy.max_trials: + need_stop = True + else: + need_stop = False + + return need_stop From b01ae68f2c91c48b1e888f37b085cee525fcc058 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Fri, 7 Apr 2023 13:53:24 +0800 Subject: [PATCH 048/103] fix for ut Signed-off-by: Cheng, Zixuan --- neural_compressor/__init__.py | 2 ++ neural_compressor/compression/pruner/utils.py | 2 +- neural_compressor/experimental/distillation.py | 4 ++-- test/config/test_config.py | 5 +++-- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py index 47a11d76ee8..1ac3a75c4af 100644 --- a/neural_compressor/__init__.py +++ b/neural_compressor/__init__.py @@ -20,6 +20,8 @@ # we need to set a global 'NA' backend, or Model can't be used from .utils.utility import set_random_seed, set_tensorboard, set_workspace from .utils import options +from .conf.config import conf +from .conf.pythonic_config import config from .config import DistillationConfig, PostTrainingQuantConfig, \ WeightPruningConfig, QuantizationAwareTrainingConfig, \ MixedPrecisionConfig diff --git a/neural_compressor/compression/pruner/utils.py b/neural_compressor/compression/pruner/utils.py index 6bd739dd345..c5134f6ec36 100644 --- a/neural_compressor/compression/pruner/utils.py +++ b/neural_compressor/compression/pruner/utils.py @@ -20,7 +20,7 @@ import yaml try: - from ...config import WeightPruningConfig + from ...conf.pythonic_config import WeightPruningConfig from ...conf.config import PrunerV2 from ...utils.utility import LazyImport from neural_compressor.conf.dotdict import DotDict diff --git a/neural_compressor/experimental/distillation.py b/neural_compressor/experimental/distillation.py index e1f2270b14e..de9d50ba3c9 100644 --- a/neural_compressor/experimental/distillation.py +++ b/neural_compressor/experimental/distillation.py @@ -165,10 +165,10 @@ def create_criterion(self): assert 'criterion' in self._train_cfg.keys(), \ "criterion part in train field of distillation section in yaml file " \ "must be configured for distillation if criterion is NOT set." - criterion_cfg = self._train_cfg.criterion + criterion_cfg = self._train_cfg.criterion.config assert len(criterion_cfg) == 1, "There must be exactly one loss in " \ "criterion part, instead got {} loss.".format(len(criterion_cfg)) - loss = list(criterion_cfg.keys())[0] + loss = [i for i in criterion_cfg.keys()][0] loss_cfg = criterion_cfg[loss] criterion_builder = Criterions(self.framework)[loss](loss_cfg) criterion_tuple = criterion_builder() diff --git a/test/config/test_config.py b/test/config/test_config.py index 16088b9e907..36c09ad05e8 100644 --- a/test/config/test_config.py +++ b/test/config/test_config.py @@ -21,10 +21,10 @@ def test_config(self): class TestPyConf(unittest.TestCase): def test_config(self): - from neural_compressor.conf.config import conf + from neural_compressor import conf from neural_compressor.conf.config import QuantConf, PruningConf, \ GraphOptConf, BenchmarkConf, DistillationConf - + conf.tuning.accuracy_criterion.relative = 0.2 a = QuantConf(conf) self.assertEqual(a.usr_cfg.tuning.accuracy_criterion.relative, 0.2) @@ -69,6 +69,7 @@ def test_config(self): self.assertEqual(a.usr_cfg.graph_optimization.op_wise, {'weight': {'dtype': ['bf16']}, 'activation': {'dtype': ['bf16']}}) conf.distillation.train.iteration = 900 + import pdb; pdb.set_trace() a = DistillationConf(conf) self.assertEqual(a.usr_cfg.distillation.train.iteration, 900) From 1cfb3e961eeadc14b54b2837a2d1690c717cd945 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 7 Apr 2023 14:47:04 +0800 Subject: [PATCH 049/103] update conf compare Signed-off-by: yiliu30 --- neural_compressor/strategy/strategy.py | 68 +++++++++-------------- neural_compressor/utils/utility.py | 74 +++++++++++++++++++++++++- 2 files changed, 98 insertions(+), 44 deletions(-) diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index e1e2a01f7f8..31718c40e52 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -27,20 +27,18 @@ from pathlib import Path import yaml import numpy as np -from typing import OrderedDict as T_OrderedDict from neural_compressor.adaptor.tensorflow import TensorFlowAdaptor -from neural_compressor.config import PostTrainingQuantConfig from ..config import MixedPrecisionConfig from ..objective import MultiObjective from ..adaptor import FRAMEWORKS -from ..utils.utility import Statistics, dump_data_to_local -from ..utils.utility import fault_tolerant_file, equal_dicts, GLOBAL_STATE, MODE -from ..utils.create_obj_from_config import create_eval_func, create_train_func +from ..utils.utility import Statistics +from ..utils.utility import fault_tolerant_file, GLOBAL_STATE, MODE +from ..utils.create_obj_from_config import create_eval_func from ..utils.utility import LazyImport from ..utils import logger from ..version import __version__ -from ..conf.dotdict import DotDict, deep_get, deep_set +from ..utils.utility import DotDict from ..algorithm import AlgorithmScheduler, ALGORITHMS import copy @@ -954,7 +952,7 @@ def setup_resume(self, resume): """ self.__dict__.update(resume) for history in self.tuning_history: - if self._same_yaml(history['cfg'], self.conf): + if self._same_conf(history['cfg'], self.conf): self.__dict__.update({k: v for k, v in history.items() \ if k not in ['version', 'history']}) logger.info("Start to resume tuning process.") @@ -1005,7 +1003,6 @@ def _set_framework_info(self, q_dataloader, q_func=None): if item not in framework_specific_info['recipes']: framework_specific_info['recipes'].update({item: True}) if self.conf.quantization.backend == 'itex': - #TODO replace it with when config ready framework = 'tensorflow_itex' if 'keras' in framework: framework_specific_info.update({ @@ -1092,23 +1089,11 @@ def _set_objectives(self): metric_weight=self.metric_weight, obj_criterion=obj_higher_is_better, obj_weight=obj_weight) - - def _same_yaml(self, src_yaml, dst_yaml): - """Check if the two yamls are the same. - - The check will exclude those keys which do not really impact the tuning result, such as - tensorboard, workspace, resume options under the tuning section of YAML. - """ - return False - # TODO rewrite the compare method for new API - if equal_dicts(src_yaml, dst_yaml, ignore_keys=['tuning']) and \ - equal_dicts(src_yaml.tuning, src_yaml.tuning, compare_keys=['objective', - 'accuracy_criterion', - 'random_seed', - 'exit_policy']): - return True - - return False + + def _same_conf(self, src_conf, dst_conf): + """Check if the two configs are the same.""" + from ..utils.utility import compare_objects + return compare_objects(src_conf, dst_conf, {'_options', '_tuning', '_accuracy'}) def update_best_op_tuning_cfg(self, op_tuning_cfg): """Track and update the best tuning config with correspondence accuracy result. @@ -1143,7 +1128,6 @@ def update_best_op_tuning_cfg(self, op_tuning_cfg): def deploy_config(self): """Save the configuration locally for deployment.""" - # TODO need to double check self.deploy_cfg = OrderedDict() model_cfg = dict() model_cfg['inputs'] = self.conf.quantization.inputs @@ -1425,7 +1409,7 @@ def _save(self): pickle.dump(self, f, protocol=pickle.HIGHEST_PROTOCOL) def _find_tuning_history(self, tune_cfg): - """Check if the specified tune_cfg is evaluated or not on same yaml config. + """Check if the specified tune_cfg is evaluated or not on same config. Args: tune_cfg (dict): The tune_cfg to check if evaluated before. @@ -1434,10 +1418,9 @@ def _find_tuning_history(self, tune_cfg): tuning_history or None: The tuning history containing evaluated tune_cfg. """ for tuning_history in self.tuning_history: - # only check if a tune_cfg is evaluated under same yam config, excluding - # some fields in tuning section of yaml, such as tensorboard, snapshot, resume. - # TODO double check - if self._same_yaml(tuning_history['cfg'], self.conf): + # only check if a tune_cfg is evaluated under same config, excluding + # some fields in tuning section of config, such as tensorboard, snapshot, resume. + if self._same_conf(tuning_history['cfg'], self.conf): for history in tuning_history['history']: if history and history['tune_cfg'] == tune_cfg: return tuning_history @@ -1445,16 +1428,15 @@ def _find_tuning_history(self, tune_cfg): return None def _find_history(self, tune_cfg): - """Check if the specified tune_cfg is evaluated or not on same yaml config. + """Check if the specified tune_cfg is evaluated or not on same config. Returns: history or None: The history containing evaluated tune_cfg. """ for tuning_history in self.tuning_history: - # only check if a tune_cfg is evaluated under same yam config, excluding - # some fields in tuning section of yaml, such as tensorboard, snapshot, resume. - # TODO double check - if self._same_yaml(tuning_history['cfg'], self.conf): + # only check if a tune_cfg is evaluated under same config, excluding + # some fields in tuning section of config, such as tensorboard, snapshot, resume. + if self._same_conf(tuning_history['cfg'], self.conf): for history in tuning_history['history']: if history and history['tune_cfg'] == tune_cfg: return history @@ -1467,9 +1449,9 @@ def _find_self_tuning_history(self): history or None: The history for self. """ for tuning_history in self.tuning_history: - # only check if a tune_cfg is evaluated under same yam config, excluding - # some fields in tuning section of yaml, such as tensorboard, snapshot, resume. - if self._same_yaml(tuning_history['cfg'], self.conf): + # only check if a tune_cfg is evaluated under same config, excluding + # some fields in tuning section of config, such as tensorboard, snapshot, resume. + if self._same_conf(tuning_history['cfg'], self.conf): return tuning_history return None @@ -1487,7 +1469,7 @@ def _add_tuning_history(self, tune_cfg=None, tune_result=None, **kwargs): 'last_tune_result': last_tune_result1, 'best_tune_result': best_tune_result1, 'history': [ - # tuning history under same yaml config + # tuning history under same config {'tune_cfg': tune_cfg1, 'tune_result': \ tune_result1, 'q_config': q_config1, ...}, @@ -1496,16 +1478,16 @@ def _add_tuning_history(self, tune_cfg=None, tune_result=None, **kwargs): # new fields added by subclass for resuming ..., }, - # tuning history under different yaml configs + # tuning history under different configs ..., ] - Note this record is added under same yaml config. + Note this record is added under same config. """ found = False d = {'tune_cfg': tune_cfg, 'tune_result': tune_result} for tuning_history in self.tuning_history: - if self._same_yaml(tuning_history['cfg'], self.conf): + if self._same_conf(tuning_history['cfg'], self.conf): d.update(kwargs) tuning_history['history'].append(d) tuning_history['last_tune_result'] = self.last_tune_result diff --git a/neural_compressor/utils/utility.py b/neural_compressor/utils/utility.py index 7e7b8d2b02d..189f9e5fa4a 100644 --- a/neural_compressor/utils/utility.py +++ b/neural_compressor/utils/utility.py @@ -585,4 +585,76 @@ def dump_class_attrs(obj, result = {}): dump_class_attrs(value, result=result[obj_name]) else: attr = attr[1:] if attr.startswith('_') else attr - result[obj_name][attr] = value \ No newline at end of file + result[obj_name][attr] = value + + + + +class DotDict(dict): + """access yaml using attributes instead of using the dictionary notation. + + Args: + value (dict): The dict object to access. + + """ + + def __init__(self, value=None): + if value is None: + pass + elif isinstance(value, dict): + for key in value: + self.__setitem__(key, value[key]) + else: + raise TypeError('expected dict') + + def __getitem__(self, key): + value = self.get(key, None) + return value + + def __setitem__(self, key, value): + if isinstance(value, dict) and not isinstance(value, DotDict): + value = DotDict(value) + if isinstance(value, list) and len(value) == 1 and isinstance( + value[0], dict): + value = DotDict(value[0]) + if isinstance(value, list) and len(value) > 1 and all(isinstance( + v, dict) for v in value): + value = DotDict({k: v for d in value for k, v in d.items()}) + super(DotDict, self).__setitem__(key, value) + + def __getstate__(self): + return self.__dict__ + + def __setstate__(self, d): + self.__dict__.update(d) + + __setattr__, __getattr__ = __setitem__, __getitem__ + + + +def compare_objects(obj1, obj2, ignore_attrs): + """ + Compare two objects and ignore the specified attributes. + + Args: + obj1: The first object to compare. + obj2: The second object to compare. + ignore_attrs: A list of attribute names to ignore during the comparison. + + Returns: + True if the objects are equal ignoring the specified attributes, False otherwise. + """ + + # Check if the objects are of the same type + if type(obj1) != type(obj2): + return False + + # Check if the objects have the same set of attributes + attrs1 = set(obj1.__dict__.keys()) + attrs2 = set(obj2.__dict__.keys()) + if attrs1 != attrs2: + return False + # Compare the attributes, ignoring the specified ones + for attr in attrs1 - set(ignore_attrs): + if getattr(obj1, attr) != getattr(obj2, attr): + return False \ No newline at end of file From a2c5620b0c0a9053bd588be2e69aa2986e4d700d Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 7 Apr 2023 15:22:36 +0800 Subject: [PATCH 050/103] fix ut Signed-off-by: yiliu30 --- test/config/test_config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/config/test_config.py b/test/config/test_config.py index 36c09ad05e8..e0eaf98bcc5 100644 --- a/test/config/test_config.py +++ b/test/config/test_config.py @@ -69,7 +69,6 @@ def test_config(self): self.assertEqual(a.usr_cfg.graph_optimization.op_wise, {'weight': {'dtype': ['bf16']}, 'activation': {'dtype': ['bf16']}}) conf.distillation.train.iteration = 900 - import pdb; pdb.set_trace() a = DistillationConf(conf) self.assertEqual(a.usr_cfg.distillation.train.iteration, 900) From e387d79629bd1a4bc372f1e9d1d495b8c89d2b81 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 7 Apr 2023 15:44:21 +0800 Subject: [PATCH 051/103] refine the log hints Signed-off-by: yiliu30 --- neural_compressor/strategy/strategy.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 31718c40e52..5c9e584df82 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -81,8 +81,15 @@ class TuneStrategy(object): def _check_tuning_status(self): if self.eval_func: self._not_tuning = False + logger.info("Execute the tuning process due to detect the evaluation function.") if self.eval_dataloader and self.eval_metric: self._not_tuning = False + logger.info("Create evaluation function according to evaluation dataloader and metric\ + and Execute the tuning process.") + if self._not_tuning: + logger.info("Quantize the model with default configuration without evaluating the model.\ + To perform the tuning process, please either provide an eval_func or provide an\ + eval_dataloader an eval_metric.") def __init__(self, model, @@ -681,8 +688,8 @@ def _remove_redundant_qmodel(self): def _eval_baseline(self): """Evaluate the fp32 model if needed.""" if self._not_tuning: - logger.info("Neither evaluation function nor metric and evaluation dataloader is defined." \ - " Generate a quantized model with default quantization configuration.") + + logger.info("Do not evaluate the baseline and quantize the model with default configuration.") return else: # get fp32 model baseline From ce763db52cf03d9ed508c2bf21ba2809e15072e5 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 7 Apr 2023 16:13:08 +0800 Subject: [PATCH 052/103] add docstring Signed-off-by: yiliu30 --- neural_compressor/utils/utility.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/neural_compressor/utils/utility.py b/neural_compressor/utils/utility.py index 189f9e5fa4a..cc991865fbc 100644 --- a/neural_compressor/utils/utility.py +++ b/neural_compressor/utils/utility.py @@ -599,6 +599,11 @@ class DotDict(dict): """ def __init__(self, value=None): + """Init DotDict. + + Args: + value: The value to be initialized. Defaults to None. + """ if value is None: pass elif isinstance(value, dict): @@ -608,10 +613,21 @@ def __init__(self, value=None): raise TypeError('expected dict') def __getitem__(self, key): + """Get value by key. + + Args: + key: The query item. + """ value = self.get(key, None) return value def __setitem__(self, key, value): + """Add new key and value pair. + + Args: + key: something like key in dict. + value: value assigned to key. + """ if isinstance(value, dict) and not isinstance(value, DotDict): value = DotDict(value) if isinstance(value, list) and len(value) == 1 and isinstance( @@ -623,9 +639,11 @@ def __setitem__(self, key, value): super(DotDict, self).__setitem__(key, value) def __getstate__(self): + """Return self dict.""" return self.__dict__ def __setstate__(self, d): + """Update self dict.""" self.__dict__.update(d) __setattr__, __getattr__ = __setitem__, __getitem__ @@ -633,8 +651,7 @@ def __setstate__(self, d): def compare_objects(obj1, obj2, ignore_attrs): - """ - Compare two objects and ignore the specified attributes. + """Compare two objects and ignore the specified attributes. Args: obj1: The first object to compare. From 3b572af21175847511bca318f796b3241dedf4cc Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 7 Apr 2023 16:16:03 +0800 Subject: [PATCH 053/103] update conf compare Signed-off-by: yiliu30 --- neural_compressor/contrib/strategy/tpe.py | 2 +- neural_compressor/strategy/bayesian.py | 2 +- neural_compressor/strategy/mse.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/neural_compressor/contrib/strategy/tpe.py b/neural_compressor/contrib/strategy/tpe.py index abafa483bb7..ff9c777006d 100644 --- a/neural_compressor/contrib/strategy/tpe.py +++ b/neural_compressor/contrib/strategy/tpe.py @@ -157,7 +157,7 @@ def __getstate__(self): dict: Saved dict for resuming """ for history in self.tuning_history: - if self._same_yaml(history['cfg'], self.conf): + if self._same_conf(history['cfg'], self.conf): history['warm_start'] = True history['hpopt_trials'] = self.hpopt_trials history['loss_function_config'] = self.loss_function_config diff --git a/neural_compressor/strategy/bayesian.py b/neural_compressor/strategy/bayesian.py index eed2ef84cbf..5f6232ce1cd 100644 --- a/neural_compressor/strategy/bayesian.py +++ b/neural_compressor/strategy/bayesian.py @@ -81,7 +81,7 @@ def __getstate__(self): dict: Saved dict for resuming """ for history in self.tuning_history: - if self._same_yaml(history['cfg'], self.conf): + if self._same_conf(history['cfg'], self.conf): history['bayes_opt'] = self.bayes_opt save_dict = super().__getstate__() return save_dict diff --git a/neural_compressor/strategy/mse.py b/neural_compressor/strategy/mse.py index 14803e04b1d..5a769ee4159 100644 --- a/neural_compressor/strategy/mse.py +++ b/neural_compressor/strategy/mse.py @@ -81,7 +81,7 @@ def __getstate__(self): save_dict: Saved dict for resuming """ for history in self.tuning_history: - if self._same_yaml(history['cfg'], self.conf): + if self._same_conf(history['cfg'], self.conf): history['ordered_ops'] = self.ordered_ops save_dict = super().__getstate__() return save_dict From d9193acb1ed9348e249e4bf99f18426eb5b1d84d Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 7 Apr 2023 16:29:24 +0800 Subject: [PATCH 054/103] update the condition for not tuning Signed-off-by: yiliu30 --- neural_compressor/strategy/strategy.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 5c9e584df82..3488dd14a92 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -82,10 +82,23 @@ def _check_tuning_status(self): if self.eval_func: self._not_tuning = False logger.info("Execute the tuning process due to detect the evaluation function.") + if self.eval_dataloader: + logger.warning("Ignore the evaluation dataloader due to evaluation function exist.") + if self.eval_metric: + logger.warning("Ignore the evaluation metric due to evaluation function exist.") + return if self.eval_dataloader and self.eval_metric: self._not_tuning = False logger.info("Create evaluation function according to evaluation dataloader and metric\ and Execute the tuning process.") + return + else: + if self.eval_dataloader: + assert self.eval_metric, "Detected evaluation dataloader but no evaluation metric, " \ + "Please provide both to perform tuning process or neither for the default quantization." + if self.eval_metric: + assert self.eval_dataloader, "Detected evaluation metric but no evaluation dataloader, "\ + "Please provide both to perform tuning process or neither for the default quantization." if self._not_tuning: logger.info("Quantize the model with default configuration without evaluating the model.\ To perform the tuning process, please either provide an eval_func or provide an\ From 34d8926bd08238c41fa861d94b5d17ba592d2f64 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Fri, 7 Apr 2023 17:03:31 +0800 Subject: [PATCH 055/103] fix for ut Signed-off-by: Cheng, Zixuan --- neural_compressor/__init__.py | 3 -- neural_compressor/compression/callbacks.py | 50 +++++++++---------- neural_compressor/compression/pruner/utils.py | 5 +- neural_compressor/config.py | 10 +--- test/benchmark/test_benchmark.py | 4 +- test/config/test_pythonic_config.py | 2 +- test/pruning_1.x_v2/test_pruning.py | 3 +- test/pruning_1.x_v2/test_pruning_config.py | 3 +- test/pruning_1.x_v2/test_pruning_criteria.py | 3 +- test/pruning_1.x_v2/test_pruning_patterns.py | 3 +- test/pruning_1.x_v2/test_pruning_regs.py | 3 +- .../pruning_1.x_v2/test_pruning_schedulers.py | 3 +- test/pruning_1.x_v2/test_pruning_types.py | 3 +- 13 files changed, 39 insertions(+), 56 deletions(-) diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py index 1ac3a75c4af..82f88fb4d92 100644 --- a/neural_compressor/__init__.py +++ b/neural_compressor/__init__.py @@ -20,9 +20,6 @@ # we need to set a global 'NA' backend, or Model can't be used from .utils.utility import set_random_seed, set_tensorboard, set_workspace from .utils import options -from .conf.config import conf -from .conf.pythonic_config import config from .config import DistillationConfig, PostTrainingQuantConfig, \ WeightPruningConfig, QuantizationAwareTrainingConfig, \ MixedPrecisionConfig -from .contrib import * \ No newline at end of file diff --git a/neural_compressor/compression/callbacks.py b/neural_compressor/compression/callbacks.py index 3bfda9697d0..ee813185669 100644 --- a/neural_compressor/compression/callbacks.py +++ b/neural_compressor/compression/callbacks.py @@ -219,22 +219,22 @@ def model(self, user_model): user_model.model if isinstance(user_model, BaseModel) else user_model) if self.framework == "tensorflow": try: - if self.cfg.qat_quantization.approach == "quant_aware_training": + if self.cfg.quantization.approach == "quant_aware_training": self.framework = 'tensorflow_itex' else: from ..model.tensorflow_model import get_model_type - if get_model_type(user_model) == 'keras' and self.cfg.qat_quantization.backend == 'itex': + if get_model_type(user_model) == 'keras' and self.cfg.quantization.backend == 'itex': self.framework = 'keras' except Exception as e: pass if self.framework == "pytorch": try: - if self.cfg.qat_quantization.backend == "default": + if self.cfg.quantization.backend == "default": self.framework = "pytorch_fx" - elif self.cfg.qat_quantization.backend == "ipex": + elif self.cfg.quantization.backend == "ipex": self.framework = "pytorch_ipex" - self.cfg.qat_quantization.framework = self.framework + self.cfg.quantization.framework = self.framework except Exception as e: pass @@ -247,7 +247,7 @@ def model(self, user_model): self._model = TensorflowQATModel(user_model._model) elif "tensorflow" in self.framework or self.framework == "keras": try: - self._model = Model(user_model, backend=self.framework, device=self.cfg.qat_quantization.device) + self._model = Model(user_model, backend=self.framework, device=self.cfg.quantization.device) except Exception as e: self._model = Model(user_model, backend=self.framework, device=None) else: @@ -257,9 +257,9 @@ def model(self, user_model): if 'tensorflow' in self.framework: try: - self._model.name = self.cfg.qat_quantization.model_name - self._model.output_tensor_names = self.cfg.qat_quantization.outputs - self._model.input_tensor_names = self.cfg.qat_quantization.inputs + self._model.name = self.cfg.quantization.model_name + self._model.output_tensor_names = self.cfg.quantization.outputs + self._model.input_tensor_names = self.cfg.quantization.inputs self._model.workspace_path = self.cfg.options.workspace except Exception as e: self._model.name = None @@ -274,17 +274,17 @@ def pre_process(self): self.remove_hook("on_train_begin", self.adaptor._pre_hook_for_qat) self.remove_hook("on_train_end", self.adaptor._post_hook_for_qat) - strategy = self.cfg.qat_quantization.tuning_criterion.strategy.lower() - if self.cfg.qat_quantization.quant_level == 0: + strategy = self.cfg.quantization.tuning_criterion.strategy.lower() + if self.cfg.quantization.quant_level == 0: strategy = "conservative" logger.info(f"On the premise that the accuracy meets the conditions, improve the performance.") if strategy == "mse_v2": - if not (self.cfg.qat_quantization.framework.startswith("tensorflow") \ - or self.cfg.qat_quantization.framework == 'pytorch_fx'): + if not (self.cfg.quantization.framework.startswith("tensorflow") \ + or self.cfg.quantization.framework == 'pytorch_fx'): strategy = "basic" logger.warning(f"MSE_v2 does not support \ - {self.cfg.qat_quantization.framework} now, use basic instead.") + {self.cfg.quantization.framework} now, use basic instead.") logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.") assert strategy in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy) @@ -313,7 +313,7 @@ def pre_process(self): def execute(self): """Quantization Aware Training execute routinue based on strategy design.""" try: - with time_limit(self.conf.qat_quantization.tuning_criterion.timeout): + with time_limit(self.conf.quantization.tuning_criterion.timeout): logger.debug("Dump user yaml configuration:") logger.debug(self.conf) self.strategy.traverse() @@ -470,7 +470,7 @@ def metric(self, user_metric): metric_cls = type(user_metric).__name__ name = 'user_' + metric_cls metric_cfg = {name: id(user_metric)} - metrics = METRICS(self.conf.qat_quantization.framework) + metrics = METRICS(self.conf.quantization.framework) metrics.register(name, metric_cls) self._metric = metric_cfg @@ -505,21 +505,21 @@ def __init__(self, conf=None, model=None): random.seed(seed) np.random.seed(seed) - framework_specific_info = {'device': self.cfg.qat_quantization.device, + framework_specific_info = {'device': self.cfg.quantization.device, 'random_seed': self.cfg.options.random_seed, 'workspace_path': self.cfg.options.workspace, 'q_dataloader': None, - 'backend': self.cfg.qat_quantization.backend if \ - self.cfg.qat_quantization.backend is not None else 'default', - 'format': self.cfg.qat_quantization.quant_format if \ - self.cfg.qat_quantization.quant_format is not None else 'default'} - if self.cfg.qat_quantization.approach is not None: - framework_specific_info['approach'] = self.cfg.qat_quantization.approach + 'backend': self.cfg.quantization.backend if \ + self.cfg.quantization.backend is not None else 'default', + 'format': self.cfg.quantization.quant_format if \ + self.cfg.quantization.quant_format is not None else 'default'} + if self.cfg.quantization.approach is not None: + framework_specific_info['approach'] = self.cfg.quantization.approach if 'tensorflow' in self.framework: framework_specific_info.update( - {"inputs": self.cfg.qat_quantization.inputs, \ - "outputs": self.cfg.qat_quantization.outputs}) + {"inputs": self.cfg.quantization.inputs, \ + "outputs": self.cfg.quantization.outputs}) self.adaptor = FRAMEWORKS[self.framework](framework_specific_info) self.adaptor.model = self.model self.register_hook('on_train_begin', self.adaptor._pre_hook_for_qat) diff --git a/neural_compressor/compression/pruner/utils.py b/neural_compressor/compression/pruner/utils.py index c5134f6ec36..90896d09176 100644 --- a/neural_compressor/compression/pruner/utils.py +++ b/neural_compressor/compression/pruner/utils.py @@ -18,6 +18,7 @@ import re import yaml +from ...config import WeightPruningConfig as WeightPruningConf try: from ...conf.pythonic_config import WeightPruningConfig @@ -349,7 +350,7 @@ def process_and_check_config(val): default_config.update(default_global_config) default_config.update(default_local_config) default_config.update(params_default_config) - if isinstance(val, WeightPruningConfig): + if isinstance(val, WeightPruningConfig) or isinstance(val, WeightPruningConf): global_configs = val.weight_compression pruning_configs = val.pruning_configs check_key_validity(default_config, pruning_configs) @@ -390,7 +391,7 @@ def process_config(config): "The yaml file format is not correct. Please refer to document." ) - if isinstance(config, WeightPruningConfig): + if isinstance(config, WeightPruningConfig) or isinstance(config, WeightPruningConf): return process_and_check_config(config) else: assert False, f"not supported type {config}" diff --git a/neural_compressor/config.py b/neural_compressor/config.py index a6c7e9c9b63..9a99689119d 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -2045,7 +2045,6 @@ def __init__(self, precisions=None): quantization = PostTrainingQuantConfig() -qat_quantization = QuantizationAwareTrainingConfig() benchmark = BenchmarkConfig() options = Options() mixed_precision = MixedPrecisionConfig() @@ -2063,7 +2062,6 @@ class Config: """Main config class.""" def __init__(self, quantization=quantization, - qat_quantization=qat_quantization, benchmark=benchmark, options=options, mixed_precision=mixed_precision, @@ -2079,8 +2077,7 @@ def __init__(self, tuning_criterion=tuning_criterion ): """Init a config object.""" - self._quantization = quantization - self._qat_quantization = qat_quantization + self._quantization = None self._benchmark = benchmark self._options = options self._mixed_precision=mixed_precision @@ -2134,11 +2131,6 @@ def pruning(self): def quantization(self): """Get the quantization object.""" return self._quantization - - @property - def qat_quantization(self): - """Get the qat quantization object.""" - return self._qat_quantization @property def benchmark(self): diff --git a/test/benchmark/test_benchmark.py b/test/benchmark/test_benchmark.py index 1a0450b4425..4181d4fab39 100644 --- a/test/benchmark/test_benchmark.py +++ b/test/benchmark/test_benchmark.py @@ -110,7 +110,7 @@ def build_benchmark2(): arg_parser.add_argument('--input_model', dest='input_model', default='input_model', help='input odel') args = arg_parser.parse_args() -from neural_compressor import conf +from neural_compressor.conf.config import conf from neural_compressor.experimental import Benchmark, common conf.evaluation.performance.dataloader.dataset = {'dummy': {'shape': [100,32,32,1], 'label':True}} benchmarker = Benchmark(conf) @@ -134,7 +134,7 @@ def reset(self): def result(self): return 1. -from neural_compressor import conf +from neural_compressor.conf.config import conf from neural_compressor.experimental import Benchmark, common conf.evaluation.accuracy.dataloader.dataset = {'dummy': {'shape': [100,32,32,1], 'label':True}} benchmarker = Benchmark(conf) diff --git a/test/config/test_pythonic_config.py b/test/config/test_pythonic_config.py index 4437e0ee3f8..5f10adf1dd6 100644 --- a/test/config/test_pythonic_config.py +++ b/test/config/test_pythonic_config.py @@ -2,7 +2,7 @@ import copy import unittest import os -from neural_compressor import config +from neural_compressor.conf.pythonic_config import config import onnxruntime as ort from onnx import helper, TensorProto import numpy as np diff --git a/test/pruning_1.x_v2/test_pruning.py b/test/pruning_1.x_v2/test_pruning.py index 2eea6600800..947c44c6348 100644 --- a/test/pruning_1.x_v2/test_pruning.py +++ b/test/pruning_1.x_v2/test_pruning.py @@ -5,8 +5,7 @@ import torch.nn as nn from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.conf.pythonic_config import Config -from neural_compressor.config import WeightPruningConfig +from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.experimental.pruning_v2 import Pruning diff --git a/test/pruning_1.x_v2/test_pruning_config.py b/test/pruning_1.x_v2/test_pruning_config.py index 3a7f62735aa..ec43a0e409a 100644 --- a/test/pruning_1.x_v2/test_pruning_config.py +++ b/test/pruning_1.x_v2/test_pruning_config.py @@ -6,8 +6,7 @@ from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.conf.pythonic_config import Config -from neural_compressor.config import WeightPruningConfig +from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.experimental.pruning_v2 import Pruning diff --git a/test/pruning_1.x_v2/test_pruning_criteria.py b/test/pruning_1.x_v2/test_pruning_criteria.py index 100024840a3..d99d156da37 100644 --- a/test/pruning_1.x_v2/test_pruning_criteria.py +++ b/test/pruning_1.x_v2/test_pruning_criteria.py @@ -6,8 +6,7 @@ from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.conf.pythonic_config import Config -from neural_compressor.config import WeightPruningConfig +from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.experimental.pruning_v2 import Pruning diff --git a/test/pruning_1.x_v2/test_pruning_patterns.py b/test/pruning_1.x_v2/test_pruning_patterns.py index 86d16ade6a5..8704ead9bd5 100644 --- a/test/pruning_1.x_v2/test_pruning_patterns.py +++ b/test/pruning_1.x_v2/test_pruning_patterns.py @@ -6,8 +6,7 @@ from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.conf.pythonic_config import Config -from neural_compressor.config import WeightPruningConfig +from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.experimental.pruning_v2 import Pruning diff --git a/test/pruning_1.x_v2/test_pruning_regs.py b/test/pruning_1.x_v2/test_pruning_regs.py index 468768ddec9..2b123a27241 100644 --- a/test/pruning_1.x_v2/test_pruning_regs.py +++ b/test/pruning_1.x_v2/test_pruning_regs.py @@ -6,8 +6,7 @@ from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.conf.pythonic_config import Config -from neural_compressor.config import WeightPruningConfig +from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.experimental.pruning_v2 import Pruning local_regs_config = [ diff --git a/test/pruning_1.x_v2/test_pruning_schedulers.py b/test/pruning_1.x_v2/test_pruning_schedulers.py index e8a263f4aa7..cdb8e235ed2 100644 --- a/test/pruning_1.x_v2/test_pruning_schedulers.py +++ b/test/pruning_1.x_v2/test_pruning_schedulers.py @@ -6,8 +6,7 @@ from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.conf.pythonic_config import Config -from neural_compressor.config import WeightPruningConfig +from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.experimental.pruning_v2 import Pruning local_schedulers_config = [ diff --git a/test/pruning_1.x_v2/test_pruning_types.py b/test/pruning_1.x_v2/test_pruning_types.py index 7eb43e58eb8..1a5bfeef270 100644 --- a/test/pruning_1.x_v2/test_pruning_types.py +++ b/test/pruning_1.x_v2/test_pruning_types.py @@ -6,8 +6,7 @@ from neural_compressor.data import Datasets from neural_compressor.experimental.data.dataloaders.pytorch_dataloader import PyTorchDataLoader -from neural_compressor.conf.pythonic_config import Config -from neural_compressor.config import WeightPruningConfig +from neural_compressor.conf.pythonic_config import Config, WeightPruningConfig from neural_compressor.experimental.pruning_v2 import Pruning local_types_config = [ From d782c9e28b0a6a0a419a84738bef508915f55ef9 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Fri, 7 Apr 2023 17:31:47 +0800 Subject: [PATCH 056/103] minor fix Signed-off-by: Cheng, Zixuan --- neural_compressor/config.py | 4 ++-- neural_compressor/utils/utility.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 9a99689119d..6df8059fb9a 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -2077,10 +2077,10 @@ def __init__(self, tuning_criterion=tuning_criterion ): """Init a config object.""" - self._quantization = None + self._quantization = quantization self._benchmark = benchmark self._options = options - self._mixed_precision=mixed_precision + self._mixed_precision = mixed_precision self._onnxruntime = onnxruntime self._pruning = pruning self._distillation = distillation diff --git a/neural_compressor/utils/utility.py b/neural_compressor/utils/utility.py index cc991865fbc..43d95c26e0d 100644 --- a/neural_compressor/utils/utility.py +++ b/neural_compressor/utils/utility.py @@ -661,7 +661,6 @@ def compare_objects(obj1, obj2, ignore_attrs): Returns: True if the objects are equal ignoring the specified attributes, False otherwise. """ - # Check if the objects are of the same type if type(obj1) != type(obj2): return False From 41ce7f10779a65cea1c19e3feaae8548c2a2159f Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Fri, 7 Apr 2023 18:34:25 +0800 Subject: [PATCH 057/103] fix for ut Signed-off-by: Cheng, Zixuan --- test/adaptor/mxnet_adaptor/test_mxnet_query_fwk.py | 3 ++- test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/test/adaptor/mxnet_adaptor/test_mxnet_query_fwk.py b/test/adaptor/mxnet_adaptor/test_mxnet_query_fwk.py index 2c3723c0ad9..c378d01c783 100644 --- a/test/adaptor/mxnet_adaptor/test_mxnet_query_fwk.py +++ b/test/adaptor/mxnet_adaptor/test_mxnet_query_fwk.py @@ -10,6 +10,7 @@ import mxnet as mx import neural_compressor +import neural_compressor.adaptor class TestMXNetQuery(unittest.TestCase): @@ -17,7 +18,7 @@ class TestMXNetQuery(unittest.TestCase): def setUpClass(self): if platform.system().lower() == "windows": self.skipTest(self, "not support mxnet on windows yet") - self.yaml_path = os.path.join(os.getcwd() + "/../neural_compressor/adaptor/mxnet.yaml") + self.yaml_path = os.path.abspath(os.path.join(os.getcwd(), "../../../neural_compressor/adaptor/mxnet.yaml")) self.Queryhandler = neural_compressor.adaptor.mxnet.MXNetQuery(self.yaml_path) self.version = mx.__version__ diff --git a/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py b/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py index 744c18ef93c..e7a37dac263 100644 --- a/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py +++ b/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py @@ -14,7 +14,7 @@ from neural_compressor.experimental import Quantization, common from neural_compressor.experimental import Benchmark, common from neural_compressor.adaptor.pytorch import get_torch_version -from neural_compressor import conf +from neural_compressor.conf.config import conf from packaging.version import Version from neural_compressor import quantization, PostTrainingQuantConfig From cb81d6984436313f48331a787ed09fa4fde8dcc9 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Sun, 9 Apr 2023 18:02:26 +0800 Subject: [PATCH 058/103] fixed tpe Signed-off-by: yiliu30 --- neural_compressor/__init__.py | 1 + test/strategy/test_tpe.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py index 82f88fb4d92..ffd79a5bb06 100644 --- a/neural_compressor/__init__.py +++ b/neural_compressor/__init__.py @@ -23,3 +23,4 @@ from .config import DistillationConfig, PostTrainingQuantConfig, \ WeightPruningConfig, QuantizationAwareTrainingConfig, \ MixedPrecisionConfig +from .contrib import * diff --git a/test/strategy/test_tpe.py b/test/strategy/test_tpe.py index 79ad7bcc20a..61e30ca249a 100644 --- a/test/strategy/test_tpe.py +++ b/test/strategy/test_tpe.py @@ -58,7 +58,7 @@ def test_run_tpe_one_trial(self): dataloader = DATALOADERS["tensorflow"](dataset) # tuning and accuracy criterion - tune_cri = TuningCriterion(strategy='tpe') + tune_cri = TuningCriterion(strategy='tpe', max_trials=200) acc_cri = AccuracyCriterion(tolerable_loss=-0.01) def eval_func(model): return 1 From 894c3261bb48892cb400b307f2dc1675306f8639 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Sun, 9 Apr 2023 18:07:01 +0800 Subject: [PATCH 059/103] fixed resnet50 example Signed-off-by: yiliu30 --- .../tensorflow_models/resnet50_v1_5/quantization/ptq/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py index ceaa73eb549..86cb6b2efd9 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py @@ -116,9 +116,12 @@ def run(self): 'filter': None } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) + + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) conf = PostTrainingQuantConfig(outputs=['softmax_tensor'], calibration_sampling_size=[50, 100]) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) if args.benchmark: From 22e161dbb742108c1b5f9ec3f31c4e61ad6593a9 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Sun, 9 Apr 2023 18:10:03 +0800 Subject: [PATCH 060/103] fixed mxnet query Signed-off-by: yiliu30 --- test/adaptor/mxnet_adaptor/test_mxnet_query_fwk.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/adaptor/mxnet_adaptor/test_mxnet_query_fwk.py b/test/adaptor/mxnet_adaptor/test_mxnet_query_fwk.py index c378d01c783..227b03d8312 100644 --- a/test/adaptor/mxnet_adaptor/test_mxnet_query_fwk.py +++ b/test/adaptor/mxnet_adaptor/test_mxnet_query_fwk.py @@ -18,7 +18,9 @@ class TestMXNetQuery(unittest.TestCase): def setUpClass(self): if platform.system().lower() == "windows": self.skipTest(self, "not support mxnet on windows yet") - self.yaml_path = os.path.abspath(os.path.join(os.getcwd(), "../../../neural_compressor/adaptor/mxnet.yaml")) + import importlib + nc_path = os.path.dirname(importlib.util.find_spec('neural_compressor').origin) + self.yaml_path = os.path.join(nc_path, 'adaptor/mxnet.yaml') self.Queryhandler = neural_compressor.adaptor.mxnet.MXNetQuery(self.yaml_path) self.version = mx.__version__ From 08d339066c8b9694bb757912a752e152677fcdb5 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Sun, 9 Apr 2023 18:20:56 +0800 Subject: [PATCH 061/103] fixed ipex ut Signed-off-by: yiliu30 --- test/ipex/test_adaptor_ipex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/ipex/test_adaptor_ipex.py b/test/ipex/test_adaptor_ipex.py index 9573d995d0e..3244ccf4a43 100644 --- a/test/ipex/test_adaptor_ipex.py +++ b/test/ipex/test_adaptor_ipex.py @@ -6,7 +6,7 @@ from neural_compressor.experimental import common from packaging.version import Version from neural_compressor.utils.utility import LazyImport -from neural_compressor import config +from neural_compressor.conf.pythonic_config import config from neural_compressor.utils.pytorch import load torch_utils = LazyImport("neural_compressor.adaptor.torch_utils") From 7b1542bff9b15d057230f214f1bc02cea8ba6a7d Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Mon, 10 Apr 2023 10:49:45 +0800 Subject: [PATCH 062/103] fix for 1.x distillation Signed-off-by: Cheng, Zixuan --- neural_compressor/experimental/distillation.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/neural_compressor/experimental/distillation.py b/neural_compressor/experimental/distillation.py index de9d50ba3c9..eb267905a98 100644 --- a/neural_compressor/experimental/distillation.py +++ b/neural_compressor/experimental/distillation.py @@ -26,7 +26,7 @@ from ..adaptor import FRAMEWORKS from neural_compressor.experimental.common import Criterions, Optimizers from ..conf.config import DistillationConf -from ..conf.pythonic_config import Config +from ..conf.pythonic_config import Config, DotDict class Distillation(Component): """Distillation class derived from Component class. @@ -165,7 +165,12 @@ def create_criterion(self): assert 'criterion' in self._train_cfg.keys(), \ "criterion part in train field of distillation section in yaml file " \ "must be configured for distillation if criterion is NOT set." - criterion_cfg = self._train_cfg.criterion.config + + if isinstance(self._train_cfg.criterion, DotDict): + criterion_cfg = self._train_cfg.criterion + else: + criterion_cfg = self._train_cfg.criterion.config + assert len(criterion_cfg) == 1, "There must be exactly one loss in " \ "criterion part, instead got {} loss.".format(len(criterion_cfg)) loss = [i for i in criterion_cfg.keys()][0] From 5688795fffdc4a0163f3302d2b595d529fa896d9 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Mon, 10 Apr 2023 12:09:23 +0800 Subject: [PATCH 063/103] fix for 1.x config ut Signed-off-by: Cheng, Zixuan --- test/config/test_config.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/config/test_config.py b/test/config/test_config.py index e0eaf98bcc5..032792b73b1 100644 --- a/test/config/test_config.py +++ b/test/config/test_config.py @@ -21,8 +21,7 @@ def test_config(self): class TestPyConf(unittest.TestCase): def test_config(self): - from neural_compressor import conf - from neural_compressor.conf.config import QuantConf, PruningConf, \ + from neural_compressor.conf.config import conf, QuantConf, PruningConf, \ GraphOptConf, BenchmarkConf, DistillationConf conf.tuning.accuracy_criterion.relative = 0.2 @@ -289,7 +288,7 @@ def test_tuning(self): framework: mxnet tuning: accuracy_criterion: - relative: 0.01 + relative: 0.01 strategy: name: fake ''' From c389cb4e880247c6305c1dc41a16771580736900 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Mon, 10 Apr 2023 13:16:34 +0800 Subject: [PATCH 064/103] add more ut for strategy Signed-off-by: yiliu30 --- test/strategy/test_bayesian.py | 141 ++++++++++----------------- test/strategy/test_exhaustive.py | 119 +++++++++------------- test/strategy/test_exhaustive_1.x.py | 128 ++++++++++++++++++++++++ test/strategy/test_random.py | 121 +++++++++-------------- test/strategy/test_sigopt.py | 1 - test/strategy/test_tpe.py | 4 +- 6 files changed, 270 insertions(+), 244 deletions(-) create mode 100644 test/strategy/test_exhaustive_1.x.py diff --git a/test/strategy/test_bayesian.py b/test/strategy/test_bayesian.py index e2d54d9b3e2..46fbc9ee674 100644 --- a/test/strategy/test_bayesian.py +++ b/test/strategy/test_bayesian.py @@ -1,73 +1,7 @@ """Tests for quantization""" import numpy as np import unittest -import os import shutil -import yaml - -def build_fake_yaml(): - fake_yaml = ''' - model: - name: fake_yaml - framework: tensorflow - inputs: x - outputs: op_to_store - device: cpu - quantization: - calibration: - sampling_size: 10 - evaluation: - accuracy: - metric: - topk: 1 - tuning: - strategy: - name: bayesian - exit_policy: - max_trials: 1 - accuracy_criterion: - relative: 0.01 - workspace: - path: saved - ''' - y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) - f.close() - -def build_fake_yaml2(): - fake_yaml = ''' - model: - name: fake_yaml - framework: tensorflow - inputs: input - outputs: final - device: cpu - quantization: - calibration: - sampling_size: 10, 20 - op_wise: { - \"conv1\": { - \"activation\": {\"dtype\": [\"fp32\"]}, - }, - } - evaluation: - accuracy: - metric: - topk: 1 - tuning: - strategy: - name: bayesian - exit_policy: - max_trials: 3 - accuracy_criterion: - relative: 0.01 - workspace: - path: saved - ''' - with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: - f.write(fake_yaml) - f.close() def build_fake_model(): import tensorflow as tf @@ -256,37 +190,66 @@ class TestQuantization(unittest.TestCase): def setUpClass(self): self.constant_graph = build_fake_model() self.test_graph = create_test_graph() - build_fake_yaml() - build_fake_yaml2() @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('fake_yaml2.yaml') - shutil.rmtree("saved", ignore_errors=True) def test_run_bayesian_one_trial(self): - - from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.model = self.constant_graph - output_graph = quantizer.fit() - self.assertNotEqual(output_graph, None) + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion + from neural_compressor.data import Datasets, DATALOADERS + + # dataset and dataloader + dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) + dataloader = DATALOADERS["tensorflow"](dataset) + + # tuning and accuracy criterion + tune_cri = TuningCriterion(strategy='bayesian', max_trials=1) + acc_cri = AccuracyCriterion(tolerable_loss=0.01) + + conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) + def fake_eval(model): + return 1 + + q_model = fit(model=self.constant_graph, + conf=conf, + calib_dataloader=dataloader, + eval_func=fake_eval) + self.assertNotEqual(q_model, None) def test_run_bayesian_max_trials(self): - - from neural_compressor.experimental import Quantization, common - quantizer = Quantization('fake_yaml2.yaml') - dataset = quantizer.dataset('dummy', shape=(1, 224, 224, 3), label=True) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.model = self.test_graph - output_graph = quantizer.fit() - self.assertNotEqual(output_graph, None) + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion + from neural_compressor.data import Datasets, DATALOADERS + + # dataset and dataloader + dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) + dataloader = DATALOADERS["tensorflow"](dataset) + + # tuning and accuracy criterion + tune_cri = TuningCriterion(strategy='bayesian', max_trials=3) + acc_cri = AccuracyCriterion(tolerable_loss=0.01) + + op_name_dict = { + "conv1": { + "activation": {"dtype": ["fp32"]}, + }, + } + + acc = [0, 1, 0.9, 0.9, 1] + def fake_eval(model): + acc.pop(0) + return acc[0] + + conf = PostTrainingQuantConfig(quant_level=1, op_name_dict = op_name_dict,\ + tuning_criterion=tune_cri, accuracy_criterion=acc_cri) + q_model = fit(model=self.constant_graph, + conf=conf, + calib_dataloader=dataloader, + eval_func=fake_eval) + self.assertNotEqual(q_model, None) + def test_bayesian_opt_class(self): from neural_compressor.strategy.bayesian import BayesianOptimization diff --git a/test/strategy/test_exhaustive.py b/test/strategy/test_exhaustive.py index 40e3160be4b..1bb01b95c70 100644 --- a/test/strategy/test_exhaustive.py +++ b/test/strategy/test_exhaustive.py @@ -1,61 +1,7 @@ """Tests for quantization""" import numpy as np import unittest -import os import shutil -import yaml - -def build_fake_yaml(): - fake_yaml = ''' - model: - name: fake_yaml - framework: tensorflow - inputs: x - outputs: op_to_store - device: cpu - evaluation: - accuracy: - metric: - topk: 1 - tuning: - strategy: - name: exhaustive - accuracy_criterion: - relative: 0.01 - workspace: - path: saved - ''' - y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) - f.close() - -def build_fake_yaml2(): - fake_yaml = ''' - model: - name: fake_yaml - framework: tensorflow - inputs: x - outputs: op_to_store - device: cpu - evaluation: - accuracy: - metric: - topk: 1 - tuning: - strategy: - name: exhaustive - exit_policy: - max_trials: 5 - accuracy_criterion: - relative: -0.01 - workspace: - path: saved - ''' - y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: - yaml.dump(y,f) - f.close() def build_fake_model(): import tensorflow as tf @@ -94,35 +40,58 @@ class TestQuantization(unittest.TestCase): @classmethod def setUpClass(self): self.constant_graph = build_fake_model() - build_fake_yaml() - build_fake_yaml2() @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('fake_yaml2.yaml') - shutil.rmtree("saved", ignore_errors=True) def test_ru_exhaustive_one_trial(self): - from neural_compressor.experimental import Quantization, common - - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.model = self.constant_graph - quantizer.fit() + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion + from neural_compressor.data import Datasets, DATALOADERS + + # dataset and dataloader + dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) + dataloader = DATALOADERS["tensorflow"](dataset) + + # tuning and accuracy criterion + tune_cri = TuningCriterion(strategy='exhaustive', max_trials=1) + acc_cri = AccuracyCriterion(tolerable_loss=0.01) + + conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) + def fake_eval(model): + return 1 + + q_model = fit(model=self.constant_graph, + conf=conf, + calib_dataloader=dataloader, + eval_func=fake_eval) + self.assertNotEqual(q_model, None) def test_ru_exhaustive_max_trials(self): - from neural_compressor.experimental import Quantization, common - - quantizer = Quantization('fake_yaml2.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.model = self.constant_graph - quantizer.fit() + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion + from neural_compressor.data import Datasets, DATALOADERS + + # dataset and dataloader + dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) + dataloader = DATALOADERS["tensorflow"](dataset) + + # tuning and accuracy criterion + tune_cri = TuningCriterion(strategy='exhaustive', max_trials=3) + acc_cri = AccuracyCriterion(tolerable_loss=0.01) + + acc = [0, 1, 0.9, 0.9, 1] + def fake_eval(model): + acc.pop(0) + return acc[0] + + conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) + q_model = fit(model=self.constant_graph, + conf=conf, + calib_dataloader=dataloader, + eval_func=fake_eval) + self.assertNotEqual(q_model, None) if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_exhaustive_1.x.py b/test/strategy/test_exhaustive_1.x.py new file mode 100644 index 00000000000..40e3160be4b --- /dev/null +++ b/test/strategy/test_exhaustive_1.x.py @@ -0,0 +1,128 @@ +"""Tests for quantization""" +import numpy as np +import unittest +import os +import shutil +import yaml + +def build_fake_yaml(): + fake_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: x + outputs: op_to_store + device: cpu + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: exhaustive + accuracy_criterion: + relative: 0.01 + workspace: + path: saved + ''' + y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) + with open('fake_yaml.yaml',"w",encoding="utf-8") as f: + yaml.dump(y,f) + f.close() + +def build_fake_yaml2(): + fake_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: x + outputs: op_to_store + device: cpu + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: exhaustive + exit_policy: + max_trials: 5 + accuracy_criterion: + relative: -0.01 + workspace: + path: saved + ''' + y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) + with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: + yaml.dump(y,f) + f.close() + +def build_fake_model(): + import tensorflow as tf + try: + graph = tf.Graph() + graph_def = tf.GraphDef() + with tf.Session() as sess: + x = tf.placeholder(tf.float64, shape=(1,3,3,1), name='x') + y = tf.constant(np.random.random((2,2,1,1)), name='y') + op = tf.nn.conv2d(input=x, filter=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + + sess.run(tf.global_variables_initializer()) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + except: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float64, shape=(1,3,3,1), name='x') + y = tf.compat.v1.constant(np.random.random((2,2,1,1)), name='y') + op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph + +class TestQuantization(unittest.TestCase): + + @classmethod + def setUpClass(self): + self.constant_graph = build_fake_model() + build_fake_yaml() + build_fake_yaml2() + + @classmethod + def tearDownClass(self): + os.remove('fake_yaml.yaml') + os.remove('fake_yaml2.yaml') + + shutil.rmtree("saved", ignore_errors=True) + + def test_ru_exhaustive_one_trial(self): + from neural_compressor.experimental import Quantization, common + + quantizer = Quantization('fake_yaml.yaml') + dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.model = self.constant_graph + quantizer.fit() + + def test_ru_exhaustive_max_trials(self): + from neural_compressor.experimental import Quantization, common + + quantizer = Quantization('fake_yaml2.yaml') + dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.model = self.constant_graph + quantizer.fit() + +if __name__ == "__main__": + unittest.main() diff --git a/test/strategy/test_random.py b/test/strategy/test_random.py index d0f007fd7f2..8f4a35d93f6 100644 --- a/test/strategy/test_random.py +++ b/test/strategy/test_random.py @@ -3,62 +3,6 @@ import unittest import os import shutil -import yaml - - -def build_fake_yaml(): - fake_yaml = ''' - model: - name: fake_yaml - framework: tensorflow - inputs: x - outputs: op_to_store - device: cpu - evaluation: - accuracy: - metric: - topk: 1 - tuning: - strategy: - name: random - accuracy_criterion: - relative: 0.01 - workspace: - path: saved - ''' - y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml.yaml', "w", encoding="utf-8") as f: - yaml.dump(y, f) - f.close() - - -def build_fake_yaml2(): - fake_yaml = ''' - model: - name: fake_yaml - framework: tensorflow - inputs: x - outputs: op_to_store - device: cpu - evaluation: - accuracy: - metric: - topk: 1 - tuning: - strategy: - name: random - exit_policy: - max_trials: 3 - accuracy_criterion: - relative: -0.01 - workspace: - path: saved - ''' - y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) - with open('fake_yaml2.yaml', "w", encoding="utf-8") as f: - yaml.dump(y, f) - f.close() - def build_fake_model(): import tensorflow as tf @@ -102,35 +46,58 @@ class TestQuantization(unittest.TestCase): @classmethod def setUpClass(self): self.constant_graph = build_fake_model() - build_fake_yaml() - build_fake_yaml2() @classmethod def tearDownClass(self): - os.remove('fake_yaml.yaml') - os.remove('fake_yaml2.yaml') - shutil.rmtree("saved", ignore_errors=True) def test_ru_random_one_trial(self): - from neural_compressor.experimental import Quantization, common - - quantizer = Quantization('fake_yaml.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.model = self.constant_graph - quantizer.fit() + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion + from neural_compressor.data import Datasets, DATALOADERS + + # dataset and dataloader + dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) + dataloader = DATALOADERS["tensorflow"](dataset) + + # tuning and accuracy criterion + tune_cri = TuningCriterion(strategy='random', max_trials=1) + acc_cri = AccuracyCriterion(tolerable_loss=0.01) + + conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) + def fake_eval(model): + return 1 + + q_model = fit(model=self.constant_graph, + conf=conf, + calib_dataloader=dataloader, + eval_func=fake_eval) + self.assertNotEqual(q_model, None) def test_ru_random_max_trials(self): - from neural_compressor.experimental import Quantization, common - - quantizer = Quantization('fake_yaml2.yaml') - dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) - quantizer.calib_dataloader = common.DataLoader(dataset) - quantizer.eval_dataloader = common.DataLoader(dataset) - quantizer.model = self.constant_graph - quantizer.fit() + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion + from neural_compressor.data import Datasets, DATALOADERS + + # dataset and dataloader + dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) + dataloader = DATALOADERS["tensorflow"](dataset) + + # tuning and accuracy criterion + tune_cri = TuningCriterion(strategy='random', max_trials=3) + acc_cri = AccuracyCriterion(tolerable_loss=0.01) + + acc = [0, 1, 0.9, 0.9, 1] + def fake_eval(model): + acc.pop(0) + return acc[0] + + conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) + q_model = fit(model=self.constant_graph, + conf=conf, + calib_dataloader=dataloader, + eval_func=fake_eval) + self.assertNotEqual(q_model, None) if __name__ == "__main__": diff --git a/test/strategy/test_sigopt.py b/test/strategy/test_sigopt.py index db5990ca1ef..72b4e659b99 100644 --- a/test/strategy/test_sigopt.py +++ b/test/strategy/test_sigopt.py @@ -3,7 +3,6 @@ import unittest import shutil import os -import yaml if os.getenv('SIGOPT_API_TOKEN') is None or os.getenv('SIGOPT_PROJECT_ID') is None: CONDITION = True else: diff --git a/test/strategy/test_tpe.py b/test/strategy/test_tpe.py index 61e30ca249a..d367bdd5c1e 100644 --- a/test/strategy/test_tpe.py +++ b/test/strategy/test_tpe.py @@ -59,7 +59,7 @@ def test_run_tpe_one_trial(self): # tuning and accuracy criterion tune_cri = TuningCriterion(strategy='tpe', max_trials=200) - acc_cri = AccuracyCriterion(tolerable_loss=-0.01) + acc_cri = AccuracyCriterion(tolerable_loss=0.01) def eval_func(model): return 1 conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) @@ -79,7 +79,7 @@ def test_run_tpe_max_trials(self): # tuning and accuracy criterion tune_cri = TuningCriterion(strategy='tpe', max_trials=5) - acc_cri = AccuracyCriterion(tolerable_loss=-0.01) + acc_cri = AccuracyCriterion(tolerable_loss=0.01) from neural_compressor.metric import METRICS metrics = METRICS('tensorflow') From 282c3fca97b85d785aec38fb8315b956856ff365 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Mon, 10 Apr 2023 13:28:45 +0800 Subject: [PATCH 065/103] update cfg intialization Signed-off-by: yiliu30 --- neural_compressor/strategy/mse_v2.py | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/neural_compressor/strategy/mse_v2.py b/neural_compressor/strategy/mse_v2.py index 9b9fef6d8dc..b3ad336d07d 100644 --- a/neural_compressor/strategy/mse_v2.py +++ b/neural_compressor/strategy/mse_v2.py @@ -58,30 +58,9 @@ def next_tune_cfg(self): from copy import deepcopy tuning_space = self.tuning_space initial_op_tuning_cfg = {} - for item in tuning_space.root_item.options: - if item.item_type == 'op': - op_name, op_type = item.name - initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options for calib_sampling_size in calib_sampling_size_lst: - # Collect the ops that support static and dynamic - quant_mode_wise_items = OrderedDict() - query_order = ['static', 'dynamic', 'bf16', 'fp16', 'fp32'] - pre_items = set() - for quant_mode in query_order: - items = tuning_space.query_items_by_quant_mode(quant_mode) - filtered_items = [item for item in items if item not in pre_items] - pre_items = pre_items.union(set(items)) - quant_mode_wise_items[quant_mode] = filtered_items - - def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): - for item in items_lst: - op_item_dtype_dict[item.name] = target_quant_mode - - op_item_dtype_dict = OrderedDict() - for quant_mode, quant_mode_items in quant_mode_wise_items.items(): - initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) - + op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() quant_ops = quant_mode_wise_items.get('static', []) quant_ops += quant_mode_wise_items.get('dynamic', []) # Optype-wise tuning From 15cb16ded8ab99377d322357fef9cc187b4924a9 Mon Sep 17 00:00:00 2001 From: "Lv, Liang1" Date: Tue, 11 Apr 2023 16:50:55 +0800 Subject: [PATCH 066/103] add metric for tf examples Signed-off-by: Lv, Liang1 --- examples/helloworld/tf_example1/README.md | 4 +++- examples/helloworld/tf_example1/test.py | 5 ++++- examples/helloworld/tf_example3/README.md | 5 ++++- examples/helloworld/tf_example3/test.py | 5 ++++- examples/helloworld/tf_example5/README.md | 5 ++++- examples/helloworld/tf_example5/test.py | 5 ++++- examples/helloworld/tf_example6/README.md | 4 +++- examples/helloworld/tf_example6/test.py | 5 ++++- .../efficientnet-b0/quantization/ptq/main.py | 4 +++- .../inception_resnet_v2/quantization/ptq/main.py | 4 +++- .../tensorflow_models/inception_v1/quantization/ptq/main.py | 4 +++- .../tensorflow_models/inception_v2/quantization/ptq/main.py | 4 +++- .../tensorflow_models/inception_v3/quantization/ptq/main.py | 4 +++- .../tensorflow_models/inception_v4/quantization/ptq/main.py | 4 +++- .../tensorflow_models/mobilenet_v1/quantization/ptq/main.py | 4 +++- .../tensorflow_models/mobilenet_v2/export/main.py | 4 +++- .../tensorflow_models/mobilenet_v2/quantization/ptq/main.py | 4 +++- .../tensorflow_models/mobilenet_v3/quantization/ptq/main.py | 4 +++- .../tensorflow_models/resnet50_v1/quantization/ptq/main.py | 4 +++- .../tensorflow_models/resnet_v2_101/quantization/ptq/main.py | 4 +++- .../tensorflow_models/resnet_v2_152/quantization/ptq/main.py | 4 +++- .../tensorflow_models/resnet_v2_50/quantization/ptq/main.py | 4 +++- 22 files changed, 72 insertions(+), 22 deletions(-) diff --git a/examples/helloworld/tf_example1/README.md b/examples/helloworld/tf_example1/README.md index c8776a28512..1dcdc146d96 100644 --- a/examples/helloworld/tf_example1/README.md +++ b/examples/helloworld/tf_example1/README.md @@ -30,12 +30,14 @@ python test.py --dataset_location=/path/to/imagenet/ ### 2. Introduction We only need to add the following lines for quantization to create an int8 model. ```python + top1 = TensorflowTopK(k=1) config = PostTrainingQuantConfig(calibration_sampling_size=[20]) q_model = fit( model="./mobilenet_v1_1.0_224_frozen.pb", conf=config, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, + eval_metric=top1) ``` diff --git a/examples/helloworld/tf_example1/test.py b/examples/helloworld/tf_example1/test.py index 8ef42a5ed30..1e602a34fea 100644 --- a/examples/helloworld/tf_example1/test.py +++ b/examples/helloworld/tf_example1/test.py @@ -6,6 +6,7 @@ from neural_compressor.data import DefaultDataLoader from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig +from neural_compressor.metric import TensorflowTopK flags = tf.compat.v1.flags FLAGS = flags.FLAGS @@ -21,12 +22,14 @@ eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) def main(): + top1 = TensorflowTopK(k=1) config = PostTrainingQuantConfig(calibration_sampling_size=[20]) q_model = fit( model="./mobilenet_v1_1.0_224_frozen.pb", conf=config, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, + eval_metric=top1) if __name__ == "__main__": main() diff --git a/examples/helloworld/tf_example3/README.md b/examples/helloworld/tf_example3/README.md index 5559b97fe32..2cc37bf41ab 100644 --- a/examples/helloworld/tf_example3/README.md +++ b/examples/helloworld/tf_example3/README.md @@ -29,9 +29,12 @@ We can get a BF16 model using the Mixed Precision API. ```python from neural_compressor.config import MixedPrecisionConfig from neural_compressor import mix_precision + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) config = MixedPrecisionConfig() mix_precision_model = mix_precision.fit( model="./mobilenet_v1_1.0_224_frozen.pb", config=config, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, + eval_metric=top1) ``` diff --git a/examples/helloworld/tf_example3/test.py b/examples/helloworld/tf_example3/test.py index 1fd3c9c13cf..28c47fe9bb5 100644 --- a/examples/helloworld/tf_example3/test.py +++ b/examples/helloworld/tf_example3/test.py @@ -16,11 +16,14 @@ def main(): from neural_compressor.config import MixedPrecisionConfig from neural_compressor import mix_precision + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) config = MixedPrecisionConfig() mix_precision_model = mix_precision.fit( model="./mobilenet_v1_1.0_224_frozen.pb", config=config, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, + eval_metric=top1) if __name__ == "__main__": main() diff --git a/examples/helloworld/tf_example5/README.md b/examples/helloworld/tf_example5/README.md index 6453215374e..1ab0b765170 100644 --- a/examples/helloworld/tf_example5/README.md +++ b/examples/helloworld/tf_example5/README.md @@ -37,12 +37,15 @@ python test.py --benchmark --dataset_location=/path/to/imagenet/ * We only need to add the following lines for quantization to create an int8 model. ```python from neural_compressor.quantization import fit + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) config = PostTrainingQuantConfig(calibration_sampling_size=[20]) q_model = fit( model="./mobilenet_v1_1.0_224_frozen.pb", conf=config, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, + eval_metric=top1) q_model.save('./int8.pb') ``` * Run benchmark according to config. diff --git a/examples/helloworld/tf_example5/test.py b/examples/helloworld/tf_example5/test.py index 3008418b6ba..e36aefcc3ef 100644 --- a/examples/helloworld/tf_example5/test.py +++ b/examples/helloworld/tf_example5/test.py @@ -24,12 +24,15 @@ def main(): if args.tune: from neural_compressor.quantization import fit + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) config = PostTrainingQuantConfig(calibration_sampling_size=[20]) q_model = fit( model="./mobilenet_v1_1.0_224_frozen.pb", conf=config, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, + eval_metric=top1) q_model.save('./int8.pb') if args.benchmark: diff --git a/examples/helloworld/tf_example6/README.md b/examples/helloworld/tf_example6/README.md index 1fb714346bf..50b7e64811a 100644 --- a/examples/helloworld/tf_example6/README.md +++ b/examples/helloworld/tf_example6/README.md @@ -36,11 +36,13 @@ python test.py --benchmark --dataset_location=/path/to/imagenet/ ### 2. Introduction * We only need to add the following lines for quantization to create an int8 model. ```python + top1 = TensorflowTopK(k=1) quantized_model = fit( model="./mobilenet_v1_1.0_224_frozen.pb", conf=config, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, + eval_metric=top1) tf.io.write_graph(graph_or_graph_def=quantized_model.model, logdir='./', name='int8.pb', diff --git a/examples/helloworld/tf_example6/test.py b/examples/helloworld/tf_example6/test.py index 1a930f2996a..91ee319469b 100644 --- a/examples/helloworld/tf_example6/test.py +++ b/examples/helloworld/tf_example6/test.py @@ -76,12 +76,15 @@ def main(): if args.tune: from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) config = PostTrainingQuantConfig(calibration_sampling_size=[20]) quantized_model = fit( model="./mobilenet_v1_1.0_224_frozen.pb", conf=config, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, + eval_metric=top1) tf.io.write_graph(graph_or_graph_def=quantized_model.model, logdir='./', name='int8.pb', diff --git a/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py index 492771bc3c5..1f8f3fe6ffe 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py @@ -132,8 +132,10 @@ def run(self): conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100], inputs=['truediv'], outputs=['Squeeze'], op_name_dict=op_name_dict) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) if args.benchmark: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py index 7b4207515e3..04aaab2a85d 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py @@ -112,8 +112,10 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) if args.benchmark: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py index 7aac7df844b..da22e136f84 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py @@ -112,8 +112,10 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=cali_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) if args.benchmark: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py index 6ba2b4b436a..aa713d0b0f1 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py @@ -112,8 +112,10 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) if args.benchmark: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py index 277ecf5f1ef..6ca34ea6180 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py @@ -114,8 +114,10 @@ def run(self): op_name_dict = {'v0/cg/conv0/conv2d/Conv2D': { 'activation': {'dtype': ['fp32']}}} conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100], op_name_dict=op_name_dict) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) if args.benchmark: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py index 02947e5fc1e..de098c98a09 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py @@ -112,8 +112,10 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) if args.benchmark: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py index 9c88fbeb599..e8c04fa9713 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py @@ -112,8 +112,10 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[20, 50]) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) if args.benchmark: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py index 326f89afff5..a0b3479721b 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py @@ -96,8 +96,10 @@ def run(self): } dataloader = create_dataloader('tensorflow', dataloader_args) conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100]) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=dataloader, - eval_dataloader=dataloader) + eval_dataloader=dataloader, eval_metric=top1) q_model.save("./tf-quant.pb") from neural_compressor.config import TF2ONNXConfig config = TF2ONNXConfig(dtype=self.args.dtype, input_names='input[-1,224,224,3]') diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py index 6ba382280a9..ec7ea64fa4d 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py @@ -112,8 +112,10 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[20, 50]) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) if args.benchmark: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py index a9223196638..94f39bb7654 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py @@ -247,8 +247,10 @@ def run(self): } conf = PostTrainingQuantConfig(calibration_sampling_size=[20, 50], op_name_dict=op_name_dict) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) if args.benchmark: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py index 754dc1fea41..96acbd2a71c 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py @@ -112,8 +112,10 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) if args.benchmark: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py index 6ba2b4b436a..aa713d0b0f1 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py @@ -112,8 +112,10 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) if args.benchmark: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py index 037c988e9b6..b63e0eb1160 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py @@ -112,8 +112,10 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) if args.benchmark: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py index 6ba2b4b436a..aa713d0b0f1 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py @@ -112,8 +112,10 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) + from neural_compressor.metric import TensorflowTopK + top1 = TensorflowTopK(k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader) + eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) if args.benchmark: From faf1ce4b728fc0db2138af9639012249d2101191 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Tue, 11 Apr 2023 17:19:46 +0800 Subject: [PATCH 067/103] fix for test Signed-off-by: Cheng, Zixuan --- .azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt | 3 ++- neural_compressor/adaptor/pytorch.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt index 562eeea0326..1f9309432a8 100644 --- a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt +++ b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt @@ -1331,6 +1331,8 @@ OTHpart Ott oup outdir +Outlier +outliers OutputData outputfile ov @@ -2595,5 +2597,4 @@ llamanorm nbias pc cdrdv -NeuralCompressor zp diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 957714f4ed5..12d6d50c979 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -822,7 +822,8 @@ def __init__(self, framework_specific_info): self.q_mapping = \ tq.quantization_mappings.get_default_dynamic_quant_module_mappings() else: - assert False, "Unsupport approach: {}".format(self.approach) + if not self.benchmark: + assert False, "Unsupport approach: {}".format(self.approach) self.fp32_results = [] self.fp32_preds_as_label = False From f4b8dce29741d897ff9beaef26c4fa91041d39d1 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Tue, 11 Apr 2023 17:26:10 +0800 Subject: [PATCH 068/103] fix dict Signed-off-by: Cheng, Zixuan --- .azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt index 1f9309432a8..62cea0de411 100644 --- a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt +++ b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt @@ -2598,3 +2598,4 @@ nbias pc cdrdv zp +TensorflowTopK From 6f138e8d2d649cd3abaa30c2a348aa4360ce9986 Mon Sep 17 00:00:00 2001 From: Zixuan Cheng <110808245+violetch24@users.noreply.github.com> Date: Fri, 14 Apr 2023 14:52:52 +0800 Subject: [PATCH 069/103] Update inc_dict.txt --- .azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt index 62cea0de411..00b61e10200 100644 --- a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt +++ b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt @@ -2597,5 +2597,6 @@ llamanorm nbias pc cdrdv +NeuralCompressor zp TensorflowTopK From 01be4322ce5e3fb119e3aceb06c36adf91af2a51 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Fri, 14 Apr 2023 15:53:30 +0800 Subject: [PATCH 070/103] fix for merge Signed-off-by: Cheng, Zixuan --- neural_compressor/strategy/strategy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index a4c19cc4b89..3b55533d673 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -1055,7 +1055,7 @@ def _set_framework_info(self, q_dataloader, q_func=None): framework = 'pytorch_fx' if self.mixed_precision_mode: framework_specific_info.update({"approach": "post_training_dynamic_quant"}) - framework_specific_info.update({'recipes': self.cfg.quantization.get('recipes', {})}) + framework_specific_info.update({'recipes': self.conf.quantization.recipes}) framework_specific_info.update({"q_dataloader": q_dataloader}) framework_specific_info.update({"use_bf16": self.conf.quantization.use_bf16 \ if self.conf.quantization.use_bf16 is not None else True}) From 6d0129815d4864e9133d49c0dc4411389dd90feb Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Mon, 17 Apr 2023 12:02:48 +0800 Subject: [PATCH 071/103] fix for ipex recipes Signed-off-by: Cheng, Zixuan --- neural_compressor/adaptor/pytorch.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index b785f787879..c32eda80462 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -791,6 +791,8 @@ def __init__(self, framework_specific_info): self.example_inputs = framework_specific_info.get("example_inputs", None) if 'recipes' in framework_specific_info: self.recipes = framework_specific_info['recipes'] + else: + self.recipes = None if 'approach' in framework_specific_info: # pragma: no cover self.approach = framework_specific_info['approach'] From 022282ccaddfd69426494d486e2e34bbce00cfc8 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Wed, 19 Apr 2023 15:04:24 +0800 Subject: [PATCH 072/103] add config 2.x ut Signed-off-by: Cheng, Zixuan --- .../{test_config.py => test_config_1.x.py} | 2 +- test/config/test_config_2.x.py | 68 +++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) rename test/config/{test_config.py => test_config_1.x.py} (99%) create mode 100644 test/config/test_config_2.x.py diff --git a/test/config/test_config.py b/test/config/test_config_1.x.py similarity index 99% rename from test/config/test_config.py rename to test/config/test_config_1.x.py index 032792b73b1..03adc0153d0 100644 --- a/test/config/test_config.py +++ b/test/config/test_config_1.x.py @@ -1,4 +1,4 @@ -"""Tests for config file""" +"""Tests for 1.x config file""" import unittest import os from neural_compressor.conf import config as conf diff --git a/test/config/test_config_2.x.py b/test/config/test_config_2.x.py new file mode 100644 index 00000000000..a942733b0cc --- /dev/null +++ b/test/config/test_config_2.x.py @@ -0,0 +1,68 @@ +"""Tests for 2.x config file""" +import unittest +import os +from neural_compressor.config import Config as conf +from neural_compressor.utils.constant import * +from neural_compressor.config import PostTrainingQuantConfig, BenchmarkConfig, Options +from neural_compressor.config import MixedPrecisionConfig, DotDict + + +def helper(content): + with open('fake_conf.yaml', 'w', encoding="utf-8") as f: + f.write(content) + +class TestConfig(unittest.TestCase): + def test_config(self): + config = PostTrainingQuantConfig() + self.assertEqual(config.recipes['smooth_quant'], False) + self.assertEqual(config.recipes['fast_bias_correction'], False) + self.assertEqual(config.recipes['weight_correction'], False) + self.assertEqual(config.recipes['dedicated_qdq_pair'], False) + self.assertEqual(config.recipes['add_qdq_pair_to_weight'], False) + self.assertEqual(config.recipes['graph_optimization_level'], None) + +class TestGeneralConf(unittest.TestCase): + def test_config(self): + cfg = PostTrainingQuantConfig() + cfg.accuracy_criterion.tolerable_loss = 0.2 + a = conf(quantization=cfg) + self.assertEqual(a.accuracy.tolerable_loss, 0.2) + + cfg.op_type_dict = {'Conv': { + 'weight': { + 'dtype': ['fp32']}, + 'activation': { + 'dtype': ['fp32']}} + } + cfg.op_name_dict = {"layer1.0.conv1": { + "activation": { + "dtype": ["fp32"]}, + "weight": { + "dtype": ["fp32"]}}, + } + a = conf(quantization=cfg) + self.assertEqual(a.quantization.op_type_dict['Conv']['weight']['dtype'], ['fp32']) + + cfg.tuning_criterion.strategy = 'mse' + a = conf(quantization=cfg) + self.assertEqual(a.tuning.strategy, 'mse') + + cfg = BenchmarkConfig() + cfg.cores_per_instance = 4 + cfg.iteration = 100 + cfg.num_of_instance = 7 + a = conf(benchmark=cfg) + self.assertEqual(a.benchmark.iteration, 100) + + cfg = Options() + cfg.workspace = "workspace_path" + a = conf(options=cfg) + self.assertEqual(a.options.workspace, "workspace_path") + + cfg = MixedPrecisionConfig() + a = conf(mixed_precision=cfg) + self.assertEqual(a.mixed_precision.precision, ["bf16"]) + + +if __name__ == "__main__": + unittest.main() From f025a2edf490fd8f8c02a40edd83efff6ecdb641 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 20 Apr 2023 11:19:52 +0800 Subject: [PATCH 073/103] improve code coverage Signed-off-by: yiliu30 --- .../experimental/strategy/basic.py | 128 ------ .../experimental/strategy/conservative.py | 412 ------------------ .../experimental/strategy/hawq_v2.py | 110 ----- .../experimental/strategy/strategy.py | 232 ---------- 4 files changed, 882 deletions(-) delete mode 100644 neural_compressor/experimental/strategy/conservative.py delete mode 100644 neural_compressor/experimental/strategy/hawq_v2.py diff --git a/neural_compressor/experimental/strategy/basic.py b/neural_compressor/experimental/strategy/basic.py index 33ea8c7d675..68267808d05 100644 --- a/neural_compressor/experimental/strategy/basic.py +++ b/neural_compressor/experimental/strategy/basic.py @@ -33,134 +33,6 @@ class BasicTuneStrategy(TuneStrategy): and the tuning process ends once the condition meets the exit policy. """ - def distributed_next_tune_cfg_lst(self, comm): - """Generate and yield the next tuning config list with below order. - - 1. OP Type Wise Tuning - 2. Fallback OP One by One - 3. Fallback Multiple OPs Accumulated - - Yields: - tuning_config_list (list): A list containing dicts of the tuning configuration for quantization. - """ - from copy import deepcopy - tuning_space = self.tuning_space - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options - rank = comm.Get_rank() - for calib_sampling_size in calib_sampling_size_lst: - # Initialize the tuning config for each op according to the quantization approach - op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) - early_stop_tuning = False - stage1_cnt = 0 - quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] - quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] - stage1_max = 1e9 # TODO set a more appropriate value - op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) - ############ stage 1: yield op_tune_cfg_lst - op_tuning_cfg_lst_stage_1 = [] - for op_tuning_cfg in op_wise_tuning_sampler: - stage1_cnt += 1 - if early_stop_tuning and stage1_cnt > stage1_max: - logger.info("Early stopping the stage 1.") - break - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - op_tuning_cfg_lst_stage_1.append(deepcopy(op_tuning_cfg)) - logger.info("yield op_tuning_cfg_lst_stage_1 with length {}".format(len(op_tuning_cfg_lst_stage_1))) - yield op_tuning_cfg_lst_stage_1 - - #### Coordinate: only master knows cur best tune cfg - cur_best_tuning_cfg = self.cur_best_tuning_cfg if rank == 0 else None - if rank == 0: - comm.bcast(cur_best_tuning_cfg, root=0) - else: - self.cur_best_tuning_cfg = comm.bcast(cur_best_tuning_cfg, root=0) - - ############ stage 2: yield new_op_tuning_cfg_lst (length of 1) - # Fallback the ops supported both static and dynamic from static to dynamic - # Tuning items: None - if self.cfg.quantization.approach == 'post_training_auto_quant': - static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if - item in tuning_space.query_items_by_quant_mode('dynamic')] - if static_dynamic_items: - logger.info("Fallback all ops that support both dynamic and static to dynamic.") - else: - logger.info("Non ops that support both dynamic") - - new_op_tuning_cfg = deepcopy(self.cur_best_tuning_cfg) - for item in static_dynamic_items: - new_op_tuning_cfg[item.name] = self._initial_dynamic_cfg_based_on_static_cfg( - new_op_tuning_cfg[item.name]) - new_op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - op_tuning_cfg_lst_stage_2 = [deepcopy(new_op_tuning_cfg)] - logger.info("yield op_tuning_cfg_lst_stage_2 with length {}".format(len(op_tuning_cfg_lst_stage_2))) - yield op_tuning_cfg_lst_stage_2 - - #### Coordinate: only master knows cur best tune cfg - cur_best_tuning_cfg = self.cur_best_tuning_cfg if rank == 0 else None - if rank == 0: - comm.bcast(cur_best_tuning_cfg, root=0) - else: - self.cur_best_tuning_cfg = comm.bcast(cur_best_tuning_cfg, root=0) - - best_op_tuning_cfg_stage1 = deepcopy(self.cur_best_tuning_cfg) - - # Fallback - ############ stage 3, 4: yield op_tuning_cfg_lst - op_tuning_cfg_lst_stage_3 = [] - op_tuning_cfg_lst_stage_4 = [] - for target_dtype in ['bf16', 'fp32']: - target_type_lst = set(tuning_space.query_items_by_quant_mode(target_dtype)) - fallback_items_lst = [item for item in quant_ops if item in target_type_lst] - if fallback_items_lst: - logger.info(f"Start to fallback op to {target_dtype} one by one.") - self._fallback_started() - fallback_items_name_lst = [item.name for item in fallback_items_lst][::-1] # from bottom to up - op_dtypes = OrderedDict(zip(fallback_items_name_lst, [target_dtype] * len(fallback_items_name_lst))) - initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=False) - op_fallback_acc_impact = OrderedDict() - for op_index, op_tuning_cfg in enumerate(fallback_sampler): - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - # yield op_tuning_cfg - op_tuning_cfg_lst_stage_3.append(deepcopy(op_tuning_cfg)) - logger.info("yield op_tuning_cfg_lst_stage_3 with length {}".format(len(op_tuning_cfg_lst_stage_3))) - yield op_tuning_cfg_lst_stage_3 - - # Only master updates op_fallback_acc_impact - if rank == 0: - for op_index, op_tuning_cfg in enumerate(fallback_sampler): - acc, _ = self.eval_results[op_index] - op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc - - #### Coordinate: only master knows op_fallback_acc_impact - op_fallback_acc_impact = op_fallback_acc_impact if rank == 0 else None - if rank == 0: - comm.bcast(op_fallback_acc_impact, root=0) - else: - op_fallback_acc_impact = comm.bcast(op_fallback_acc_impact, root=0) - - # Fallback OPs accumulated according to the order in the previous stage - if len(op_fallback_acc_impact) > 0: - ordered_ops = sorted(op_fallback_acc_impact.keys(), - key=lambda key: op_fallback_acc_impact[key], - reverse=self.higher_is_better) - op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) - logger.info(f"Start to accumulate fallback to {target_dtype}.") - initial_op_tuning_cfg = deepcopy(best_op_tuning_cfg_stage1) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=initial_op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True) - for op_tuning_cfg in fallback_sampler: - op_tuning_cfg['calib_sampling_size'] = calib_sampling_size - # yield op_tuning_cfg - op_tuning_cfg_lst_stage_4.append(deepcopy(op_tuning_cfg)) - logger.info("yield op_tuning_cfg_lst_stage_4 with length {}".format(len(op_tuning_cfg_lst_stage_4))) - yield op_tuning_cfg_lst_stage_4 - def next_tune_cfg(self): """Generate and yield the next tuning config with below order. diff --git a/neural_compressor/experimental/strategy/conservative.py b/neural_compressor/experimental/strategy/conservative.py deleted file mode 100644 index 7608ca1a894..00000000000 --- a/neural_compressor/experimental/strategy/conservative.py +++ /dev/null @@ -1,412 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""The conservative tuning strategy for quantization level 0.""" -import copy -import os -import numpy as np - -from collections import deque -from collections import OrderedDict as COrderedDict -from copy import deepcopy -from typing import Dict, List, Tuple, OrderedDict - -from .strategy import strategy_registry, TuneStrategy -from .utils.tuning_space import TuningItem -from ...utils import logger -from ...utils.utility import Statistics -from ...algorithm import AlgorithmScheduler - -@strategy_registry -class ConservativeTuneStrategy(TuneStrategy): - """Tuning strategy with accuracy first, performance second. - - The quantization level O0 is designed for user who want to keep the accuracy - of the model after quantization. It starts with the original(fp32) model, - and then quantize the OPs to lower precision OP type wisely and OP wisely. - """ - - def __init__(self, model, conf, q_dataloader, q_func=None, eval_dataloader=None, - eval_func=None, dicts=None, q_hooks=None): - """Init conservative tuning strategy.""" - super().__init__(model, conf, q_dataloader, q_func, eval_dataloader, - eval_func, dicts, q_hooks) - self.acc_meet_flag = False - - def next_tune_cfg(self): - """Generate and yield the next tuning config with below order. - - 1. Query all quantifiable ops and save as a list of [(op_name, op_type), ...] - 2. Classify the op by its op type - 3. Add op to quant_queue according to the op type priority - 4. Go through the quant_queue and replace it with the fp32 config in tune_cfg if - accuracy meets the requirements else continue - 5. For bf16 and fp16 operators, do the same as int8 operators. - - Returns: - tune_config (dict): It's a dict containing the tuning configuration to run. - """ - tuning_space = self.tuning_space - calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options - calib_sampling_size = calib_sampling_size_lst[0] - tune_cfg = self._initialize_tune_cfg() - tune_cfg['calib_sampling_size'] = calib_sampling_size - op_type_priority = self._get_op_type_priority() - quant_items_pool = self._quant_items_pool(op_type_priority) - logger.info(f"*** Try to convert op into lower precision to improve performance.") - for dtype, op_items in quant_items_pool.items(): - logger.info(f"*** Start to convert op into {dtype}.") - for op_type, items_lst in op_items.items(): - logger.info(f"*** Try to convert all {op_type} ops into {dtype}.") - tmp_tune_cfg = deepcopy(tune_cfg) - for item, quant_mode in items_lst: - op_info = item.name - op_config = tuning_space.get_default_config(op_info, quant_mode) - tmp_tune_cfg[op_info] = op_config - yield tmp_tune_cfg - if self.acc_meet_flag: - logger.info(f"*** Convert all {op_type} ops to {dtype} and accuracy still meet the requirements") - tune_cfg = deepcopy(tmp_tune_cfg) - else: - tmp_tune_cfg = deepcopy(tune_cfg) - logger.info(f"*** Convert all {op_type} ops to {dtype} but accuracy not meet the requirements") - logger.info(f"*** Try to convert {op_type} op into {dtype} one by one.") - for item, quant_mode in items_lst: - op_info = item.name - op_config = tuning_space.get_default_config(op_info, quant_mode) - tmp_tune_cfg[op_info] = op_config - yield tmp_tune_cfg - if self.acc_meet_flag: - tune_cfg[op_info] = op_config - logger.info((f"*** Convert one {op_type} op({op_info}) " - f"into {dtype} and accuracy still meet the requirements")) - else: - tmp_tune_cfg[op_info] = tune_cfg[op_info] - logger.info(f"*** Skip convert {op_info}.") - logger.info(f"*** Ending tuning process due to no quantifiable op left.") - - def traverse(self): - """Traverse the tuning space.""" - self._eval_baseline() - - # Start tuning - trials_count = 0 - for op_tuning_cfg in self.next_tune_cfg(): - tune_cfg = self._tune_cfg_converter(op_tuning_cfg) - trials_count += 1 - tuning_history = self._find_tuning_history(tune_cfg) - if tuning_history and trials_count < self.cfg.tuning.exit_policy.max_trials: - self.last_tune_result = tuning_history['last_tune_result'] - self.best_tune_result = tuning_history['best_tune_result'] - logger.warn("Find evaluated tuning config, skip.") - continue - logger.debug("Dump current tuning configuration:") - logger.debug(tune_cfg) - self.tuning_times += 1 - # set the parameter for pre quantization algos and run - self.set_param_for_pre_quantization_algos(self.algo_scheduler, tune_cfg, self.model) - self.model = self.algo_scheduler('pre_quantization') - # quantize - q_model = self.adaptor.quantize(copy.deepcopy(tune_cfg), self.model, self.calib_dataloader, self.q_func) - assert self.adaptor.pre_optimized_model - # set the parameter for post quantization algos and run - self.set_param_for_post_quantization_algos(self.algo_scheduler, tune_cfg, self.adaptor.pre_optimized_model, - q_model) - self.last_qmodel = self.algo_scheduler('post_quantization') - self.last_tune_cfg = copy.deepcopy(tune_cfg) - # Remove the reference to model - self.algo_scheduler.reset_exec_algorithms() - assert self.last_qmodel - # Return the last quantized model as a result. if performance only. - if self.cfg.tuning.exit_policy.performance_only: - self.best_qmodel = self.last_qmodel - self._add_tuning_history(copy.deepcopy(tune_cfg), (-1, [0]), q_config=self.last_qmodel.q_config) - return - self.last_tune_cfg = copy.deepcopy(tune_cfg) - self.last_tune_result = self._evaluate(self.last_qmodel) - self.acc_meet_flag = self.objectives.accuracy_meets() - if self.acc_meet_flag: - # For the first tuning - if not self.best_tune_result: - self.best_tune_result = self.last_tune_result - self.best_qmodel = self.last_qmodel - self.best_tune_result = self.last_tune_result - else: - # Update current tuning config and model with best performance - get_better_performance = self._compare_performace(self.last_tune_result, self.best_tune_result) - if get_better_performance: - logger.info(f"*** Update the model with better performance.") - self.best_qmodel = self.last_qmodel - self.best_tune_result = self.last_tune_result - else: - logger.info(f"*** The qmodel was not updated due to not achieving better performance.") - # Dump the current state to log - self._dump_tuning_state(trials_count, self.last_tune_result, self.best_tune_result, self.baseline) - # Judge stop or continue tuning - need_stop = self.stop(trials_count) - # Record the tuning history - saved_tune_cfg = copy.deepcopy(tune_cfg) - saved_last_tune_result = copy.deepcopy(self.last_tune_result) - self._add_tuning_history(saved_tune_cfg, - saved_last_tune_result, - q_config=q_model.q_config) - self.tune_result_record.append(copy.deepcopy(self.last_tune_result)) - self.tune_cfg = tune_cfg - self._dump_tuning_process_statistics() - if need_stop: - if self.cfg.tuning.diagnosis and self.cfg.tuning.diagnosis.diagnosis_after_tuning: - logger.debug(f'*** Start to do diagnosis (inspect tensor).') - self._diagnosis() - self._recover_best_qmodel_from_tuning_cfg() - if self.use_multi_objective and len(self.tune_result_record) > 1 and \ - self.best_tune_result is not None: - best_trail, best_result = self.objectives.best_result(self.tune_result_record, - copy.deepcopy(self.baseline)) - if best_result != self.best_tune_result: - from neural_compressor.utils.utility import recover - self.best_qmodel = recover(self.model.model, - os.path.join(self.cfg.tuning.workspace.path, 'history.snapshot'), - best_trail) - self.best_tune_result = best_result - self._dump_tuning_process_statistics() - break - - def stop(self, trials_count): - """Check whether needed to stop the traverse procedure. - - Args: - trials_count (int): current total count of tuning trails. - - Returns: - bool: whether needed to stop the traverse procedure. - """ - need_stop = False - if trials_count >= self.cfg.tuning.exit_policy.max_trials: - need_stop = True - return need_stop - - def _compare_performace(self, last_tune_result, best_tune_result): # pragma: no cover - """Compare the tuning result with performance only. - - Args: - last_tune_result (list): The list of last tuning result. - best_tune_result (list): The list of best tuning result. - - Returns: - bool: whether the best tuning result is better than last tuning result - in performance. - """ - _, last_perf = last_tune_result - _, best_perf = best_tune_result - return last_perf[0] < best_perf[0] - - def _dump_tuning_state(self, trials_count, last_tune_result, best_tune_result, baseline): - if last_tune_result: - last_tune = last_tune_result[0] if \ - isinstance(last_tune_result[0], list) else [last_tune_result[0]] - for name, data in zip(self.metric_name, last_tune): - if len(self.tune_data[name]) == 1: - self.tune_data[name].append(data) - else: - self.tune_data[name][1] = data - - if self.metric_weight and len(last_tune) > 1: - weighted_acc = np.mean(np.array(last_tune) * self.metric_weight) - if len(self.tune_data['Weighted accuracy']) == 1: - self.tune_data['Weighted accuracy'].append(weighted_acc) - else: - self.tune_data['Weighted accuracy'][1] = weighted_acc - last_tune = [weighted_acc] - - last_tune_msg = '[Accuracy (int8|fp32):' + \ - ''.join([' {:.4f}|{:.4f}'.format(last, base) for last, base in \ - zip(last_tune, self.tune_data['baseline'])]) + \ - ''.join([', {} (int8|fp32): {:.4f}|{:.4f}'.format( \ - x, y, z) for x, y, z in zip( \ - self.objectives.representation, last_tune_result[1], baseline[1]) \ - if x != 'Accuracy']) + ']' - else: # pragma: no cover - last_tune_msg = 'n/a' - for name in self.tune_data.keys() - {'baseline'}: - if len(self.tune_data[name]) == 1: - self.tune_data[name].append('n/a') - else: - self.tune_data[name][1] = 'n/a' - - if best_tune_result: - best_tune = best_tune_result[0] if isinstance(best_tune_result[0], list) \ - else [best_tune_result[0]] - - for name, data in zip(self.metric_name, best_tune): - if len(self.tune_data[name]) == 2: - self.tune_data[name].append(data) - else: - self.tune_data[name][2] = data - - if self.metric_weight and len(best_tune) > 1: - weighted_acc = np.mean(np.array(best_tune) * self.metric_weight) - - if len(self.tune_data['Weighted accuracy']) == 2: - self.tune_data['Weighted accuracy'].append(weighted_acc) - else: # pragma: no cover - self.tune_data['Weighted accuracy'][2] = weighted_acc - - best_tune = [weighted_acc] - - best_tune_msg = '[Accuracy:' + ''.join([' {:.4f}'.format(best) \ - for best in best_tune]) + ''.join([', {}: {:.4f}'.format(x,y) \ - for x,y in zip(self.objectives.representation, \ - best_tune_result[1]) if x != 'Accuracy']) + ']' - - else: - best_tune_msg = 'n/a' - for name in self.tune_data.keys() - {'baseline'}: - if len(self.tune_data[name]) == 2: - self.tune_data[name].append('n/a') - else: - self.tune_data[name][2] = 'n/a' - - logger.info("Tune {} result is: {}, Best tune result is: {}".format(trials_count, - last_tune_msg, - best_tune_msg)) - output_data = [[info_type, - '{:.4f} '.format(self.tune_data[info_type][0]) if \ - not isinstance(self.tune_data[info_type][0], str) else self.tune_data[info_type][0], - '{:.4f} '.format(self.tune_data[info_type][1]) if \ - not isinstance(self.tune_data[info_type][1], str) else self.tune_data[info_type][1], - '{:.4f} '.format(self.tune_data[info_type][2]) if \ - not isinstance(self.tune_data[info_type][2], str) else self.tune_data[info_type][2]] \ - for info_type in self.tune_data.keys() if info_type != 'baseline'] - - output_data.extend([[obj, - '{:.4f} '.format(baseline[1][i]) if baseline else 'n/a', - '{:.4f} '.format(last_tune_result[1][i]) if last_tune_result else 'n/a', - '{:.4f} '.format(best_tune_result[1][i]) if best_tune_result else 'n/a'] \ - for i, obj in enumerate(self.objectives.representation)]) - - Statistics(output_data, - header='Tune Result Statistics', - field_names=['Info Type', 'Baseline', 'Tune {} result'.format(trials_count), \ - 'Best tune result']).print_stat() - - def _get_op_type_priority(self): - optypewise_cap = self.capability['optypewise'] - op_type_priority = list(optypewise_cap.keys()) - return op_type_priority - - def _sorted_item_by_op_type(self, - items_lst: List[Tuple[TuningItem, str]], - op_type_priority: List[str]) -> OrderedDict[str, List]: - """Socring the tuning items according to its op type. - - Args: - items_lst: The tuning item list. # [(op_item, quant_mode), ... ] - op_type_priority: The op type list with the order. # [optype_1, optype_2] - - Returns: - The tuning items list that sorted according to its op type. - OrderDict: - # op_type: [(TuningItem, quant_mode), ...] - conv2d: [(TuningItem, static), (TuningItem, static)] - linear: [(TuningItem, static), (TuningItem, static)] - """ - op_type_lst_from_items_lst = list(set([item[0].name[1] for item in items_lst])) - # For items whose op type does not exist in the priority list, assign it with lowest priority. - sorted_op_type_lst = [op_type for op_type in op_type_priority if op_type in op_type_lst_from_items_lst] - sorted_op_type_lst += list(set(op_type_lst_from_items_lst) - set(op_type_priority)) - sorted_items = COrderedDict() - for op_type in sorted_op_type_lst: - sorted_items[op_type] = [] - for op_item, quant_mode in items_lst: - op_type = op_item.name[1] - sorted_items[op_type].append((op_item, quant_mode)) - return sorted_items - - def _initialize_tune_cfg(self): - """Initialize the tuning config with fp32 AMAP. - - Returns: - The intialized tuning config. - """ - tuning_space = self.tuning_space - quant_mode_wise_items = tuning_space.quant_mode_wise_items - # Initialize the tuning config - initial_tuning_cfg = {} - all_ops = set() - fp32_ops = [] - for quant_mode, items_lst in quant_mode_wise_items.items(): - items_name_lst = [item.name for item in items_lst] - all_ops = all_ops.union(set(items_name_lst)) - if quant_mode == "fp32": - fp32_ops += [item.name for item in items_lst] - non_fp32_ops_dtype = {} - fp32_ops_set = set(fp32_ops) - for quant_mode, items_lst in quant_mode_wise_items.items(): - items_name_set = set([item.name for item in items_lst]) - tmp_non_fp32_ops = items_name_set.difference(fp32_ops_set) - if tmp_non_fp32_ops: - for op_info in tmp_non_fp32_ops: - non_fp32_ops_dtype[op_info] = quant_mode - for op_info in fp32_ops: - initial_tuning_cfg[op_info] = tuning_space.get_default_config(op_info, "fp32") - for op_info, quant_mode in non_fp32_ops_dtype.items(): - initial_tuning_cfg[op_info] = tuning_space.get_default_config(op_info, quant_mode) - return initial_tuning_cfg - - def _quant_items_pool(self, op_type_priority: List[str]) -> OrderedDict[ - str, OrderedDict[str, List[Tuple[TuningItem, str]]]]: - """Create the op queue to be quantized. - - -------------------------------------------------------------------------- - | Level 1 | bf16 | fp16 | static/dynamic | - | Level 2 | conv2d, linear, ...| conv2d, linear, ...| conv2d, linear, ...| - - Args: - op_type_priority: The optype list with priority. - - Returns: - The op item pool to convert into lower precision. - quant_items_pool(OrderDict): - bf16: - OrderDict: - conv2d: [(TuningItem, bf16), (TuningItem, bf16)] - linear: [(TuningItem, bf16), (TuningItem, bf16)] - int8: - OrderDict: - # (TuningItem, quant_mode) - conv2d: [(TuningItem, static), (TuningItem, static)] - linear: [(TuningItem, static), (TuningItem, static)] - """ - quant_mode_wise_items = self.tuning_space.quant_mode_wise_items - # Add all quantized pair into queue - quant_items_pool = COrderedDict() - # collect and sorted all ops that support bf16 and fp16 - for quant_mode in ['bf16', 'fp16']: - if quant_mode in quant_mode_wise_items: - op_item_pairs = [(op_item, quant_mode) for op_item in quant_mode_wise_items[quant_mode]] - op_item_pairs = self._sorted_item_by_op_type(op_item_pairs, op_type_priority) - quant_items_pool[quant_mode] = op_item_pairs - op_item_pairs = [] - quant_ops_name_set = set() - # collect and sorted all ops that support int8 - for quant_mode, items_lst in quant_mode_wise_items.items(): - if "static" in quant_mode or 'dynamic' in quant_mode: - _quant_mode = "static" if "static" in quant_mode else "dynamic" - op_item_pairs += [(item, _quant_mode) for item in items_lst if item.name not in quant_ops_name_set] - quant_ops_name_set = quant_ops_name_set.union([item.name for item in items_lst]) - op_item_pairs = self._sorted_item_by_op_type(op_item_pairs, op_type_priority) - quant_items_pool['int8'] = op_item_pairs - return quant_items_pool diff --git a/neural_compressor/experimental/strategy/hawq_v2.py b/neural_compressor/experimental/strategy/hawq_v2.py deleted file mode 100644 index 1fd76b9b7dd..00000000000 --- a/neural_compressor/experimental/strategy/hawq_v2.py +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""The HAWQ_V2 tuning strategy.""" -from collections import OrderedDict -from copy import deepcopy - -from .strategy import strategy_registry, TuneStrategy - -from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler -from .utils.tuning_structs import OpTuningConfig -from .utils.constant import TUNING_ITEMS_LST -from ...utils import logger - -@strategy_registry -class HAWQ_V2TuneStrategy(TuneStrategy): - """The HAWQ V2 tuning strategy. - - HAWQ_V2 implements the "Hawq-v2: Hessian aware trace-weighted quantization of neural networks". - We made a small change to it by using the hessian trace to score the op impact and then - fallback the OPs according to the scoring result. - - """ - - def next_tune_cfg(self): - """Generate and yield the next tuning config using HAWQ v2 search in tuning space. - - Returns: - tune_config (dict): A dict containing the tuning configuration for quantization. - """ - tuning_space = self.tuning_space - calib_size = tuning_space.root_item.get_option_by_name('calib_sampling_size').options[0] - - # Initialize the tuning config for each op according to the quantization approach - op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) - early_stop_tuning = True - stage1_cnt = 0 - quant_ops = quant_mode_wise_items.get('static', []) - quant_ops += quant_mode_wise_items.get('dynamic', []) - stage1_max = 1 # TODO set a more appropriate value - op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) - for op_tuning_cfg in op_wise_tuning_sampler: - stage1_cnt += 1 - if early_stop_tuning and stage1_cnt > stage1_max: - logger.info("Early stopping the stage 1.") - break - op_tuning_cfg['calib_sampling_size'] = calib_size - yield op_tuning_cfg - # Start compute the hessian trace - logger.info(f"************** Start compute the hessian trace *****************") - target_dtype = "fp32" - hawq_v2_criterion =self.cfg.tuning.strategy.hawq_v2_loss - # assert hawq_v2_criterion is not None, "HAWQ-V2 strategy needs model loss function to compute the gradient, \ - # Please assign it by strategy_kwargs({'hawq_v2_loss': hawq_v2_loss})." - op_to_traces = self.adaptor.calculate_hessian_trace(fp32_model = self._fp32_model, - dataloader = self.calib_dataloader, - q_model = self.last_qmodel, - criterion =hawq_v2_criterion, - enable_act = False) - sorted_op_to_traces = dict(sorted(op_to_traces.items(), key=lambda item: item[1], reverse=True)) - logger.info(f"************** Hessian Trace *****************") - for op_name, trace in sorted_op_to_traces.items(): - logger.info(f"*** op: {op_name}, hessian trace : {trace}") - logger.info(f"************************************************") - # WA for op mapping - ordered_ops_tmp = {} - for op_info in list(initial_op_tuning_cfg.keys()): - op_name, op_type = op_info - for op_trace_name in op_to_traces.keys(): - if isinstance(op_trace_name, str) and op_trace_name.startswith(op_name): - if op_name in ordered_ops_tmp: - logger.info((f"*** Already assigned the hessian trace to {op_name}", - f"update it with the value of {op_trace_name}")) - ordered_ops_tmp[op_name] = op_to_traces[op_trace_name] - - ordered_ops_tmp = sorted(ordered_ops_tmp.keys(), - key=lambda key: ordered_ops_tmp[key], - reverse=self.higher_is_better) - # WA for add op type - op_info_map = {} - for op_info in list(initial_op_tuning_cfg.keys()): - op_info_map[op_info[0]] = op_info # op_name: (op_name, op_type) - tmp_ordered_ops = [op_info_map[op_name] for op_name in ordered_ops_tmp] - op_dtypes = OrderedDict(zip(tmp_ordered_ops, [target_dtype] * len(ordered_ops_tmp))) - - logger.info(f"Start to accumulate fallback to {target_dtype}.") - initial_op_tuning_cfg = deepcopy(op_tuning_cfg) - fallback_sampler = FallbackTuningSampler(tuning_space, tuning_order_lst=[], - initial_op_tuning_cfg=op_tuning_cfg, - op_dtypes=op_dtypes, accumulate=True, - skip_first=False) - for op_tuning_cfg in fallback_sampler: - op_tuning_cfg['calib_sampling_size'] = calib_size - yield op_tuning_cfg - diff --git a/neural_compressor/experimental/strategy/strategy.py b/neural_compressor/experimental/strategy/strategy.py index b7abb59ed5f..36019e13260 100644 --- a/neural_compressor/experimental/strategy/strategy.py +++ b/neural_compressor/experimental/strategy/strategy.py @@ -231,238 +231,6 @@ def _initialize_recipe(self): logger.debug(self._not_tuning_recipes_values) logger.info(f"{len(self._tuning_recipes)} recipes require future tuning.") logger.debug(self._tuning_recipes) - - - def distributed_next_tune_cfg_lst(self, comm): - """Interface for generate the distributed next tuning config list. - - The generator of yielding next tuning config list to distributed traverse by concrete strategies or - quantization level according to tuning result and traverse logic. - - It should be implemented by the sub-class. Currently, it is only implemented in the BasicTuneStrategy. - """ - pass - - def meet_acc_req(self, eval_res): - """Compare the result of last tuning with baseline to check whether the result meet requirements. - - Args: - eval_res: The evaluation result of tuning. - - Returns: - Return True if the accuracy meets requirements else False. - """ - self.last_tune_result = eval_res - return self.objectives.accuracy_meet_req(deepcopy(self.last_tune_result)) - - def master_worker_handle(self, comm): - """Matster worker handles the task assignment and result management. - - Master node send all task ids to all free nodes, and wait until any result. - When receiving any result, directly send a new task id to the sender (it's free). - - Args: - comm (MPI.COMM): The instance of comunication for MPI. - """ - MPI = LazyImport("mpi4py.MPI") - size = comm.Get_size() - for process_id in range(1, min(len(self.tune_cfg_lst) + 1, size)): - tune_cfg_id = process_id - 1 - logger.info("~~~~~~master sending tune cfg: {} to rank {}".format(tune_cfg_id, process_id)) - comm.send( - obj=tune_cfg_id, # just send the tune cfg id is enough - dest=process_id, # rank 0 send to rank 1, 2, ... - tag=tune_cfg_id # tag, the index of tune cfg 0,1,2,3 - ) - import time as ttime - ttime.sleep(0.5) # WA for UT - - cur_cfg_id = min(len(self.tune_cfg_lst), size - 1) # 4 master should be aware of the next config id to send - self.eval_results = {} # record all results - self.num_acks = 0 # number of all response acks, break when it equals to len() - status = MPI.Status() # used to obtain the source and the tag for each received message - - self.already_ack_id_lst = set() - self.requirements_met_min_cfg_id = sys.maxsize - - # stuck here to receive any result - while True: - eval_res = comm.recv( - source=MPI.ANY_SOURCE, - tag=MPI.ANY_TAG, - status=status # get MPI status object - ) - self.num_acks += 1 - sender_rank = status.Get_source() # sender rank - tag = status.Get_tag() # the task id that is finished - - logger.info("~~~~~~master receiving eval result: {} from rank {}".format(eval_res, sender_rank)) - - self.last_tune_result = eval_res # for context coordination of stage 3 - self.eval_results[tag] = eval_res - - self.overall_trials += 1 - self.best_tune_cfg_id = None - self.already_ack_id_lst.add(tag) - - # if meet accuracy requirement, then update minimum id that met requirement - if(self.meet_acc_req(eval_res)): - logger.info("~~~~~~master has one tuning cfg meet acc: {}".format(tag)) - self.met_flag = True - self.requirements_met_min_cfg_id = min(self.requirements_met_min_cfg_id, tag) - - # must ensure every id lower than current min_id has been acknowledged - # because a tune cfg (not acked yet) with lower id can have better acc - for i in range(self.requirements_met_min_cfg_id): - if i not in self.already_ack_id_lst: - logger.info("~~~~~~master has one tuning cfg meet acc: {} but not collect all acks before"\ - .format(tag)) - self.met_flag = False # not completely collected yet! - break - - if self.met_flag: - # found the best tune cfg! - logger.info("~~~~~~master has one tuning cfg meet acc: {} and also collect all acks before"\ - .format(tag)) - self.best_tune_cfg_id = self.requirements_met_min_cfg_id - else: - # get the current best acc but not meet requirements - logger.info("~~~~~~master gets the current best acc: {} but not meet requirements".format(tag)) - self.cur_best_acc, self.cur_best_tuning_cfg = self.update_best_op_tuning_cfg(self.tune_cfg_lst[tag]) - - if self.best_tune_cfg_id is not None: - #### we find the best tune cfg id that meet requirements!! - logger.info("~~~~~~master finds best tune cfg id~~~~~~~") - logger.info(self.best_tune_cfg_id) - logger.info(self.tune_cfg_lst[self.best_tune_cfg_id]) - break - - # send the next cfg if not exceed max trials - if self.overall_trials > self.cfg.tuning.exit_policy.max_trials: - self.max_trial_flag = True - # elif time.time() - self.overall_time_start > self.cfg.tuning.exit_policy.timeout: - # self.max_time_flag = True - elif cur_cfg_id < len(self.tune_cfg_lst): - logger.info("~~~~~~master sends new tuning cfg {} to rank: {}".format(cur_cfg_id, sender_rank)) - comm.send(obj=cur_cfg_id, dest=sender_rank, tag=cur_cfg_id) - cur_cfg_id += 1 - else: - logger.info("All tune configs are sent, no more sending, just collecting...") - - if len(self.tune_cfg_lst) == self.num_acks: # all collected (ack should collected == acks) - # all processes ended - # return self.requirements_met_min_cfg_id if it has been updated - if self.requirements_met_min_cfg_id == sys.maxsize: - logger.info("~~~~~~Not found any tune cfg that meet requirements~~~~~~") - self.cur_best_tuning_cfg = self.tune_cfg_lst[0] # TODO select cur_best_tuning_cfg - else: - logger.info("~~~~~~Find best tune cfg id~~~~~~") - logger.info(self.requirements_met_min_cfg_id) - self.met_flag = True - self.best_tune_cfg_id = self.requirements_met_min_cfg_id - logger.info(self.tune_cfg_lst[self.best_tune_cfg_id]) - break - - # send END signal to all other slaves - logger.info("~~~~~~master sends END signal to all other slaves~~~~") - for process_id in range(1, size): - logger.info("~~~~~~master sends END signal to rank: {}".format(process_id)) - comm.send( - obj="MET" if self.met_flag else "NOT MET", # send whether met criterion in the current stage - dest=process_id, # rank 0 send to rank 1, 2, ... - tag=len(self.tune_cfg_lst) - ) - - if self.best_tune_cfg_id is not None: - self.best_qmodel = self.adaptor.quantize( - copy.deepcopy(self.tune_cfg_lst[self.best_tune_cfg_id]), self.model, self.calib_dataloader, \ - self.q_func) - - - def slave_worker_handle(self, comm): - """Slave worker handles the task processing. - - When receiving any task id, slave node finds it in self.tune_cfg_lst and run it. - Then slave node sends back the tune result to master node. - - Args: - comm (MPI.COMM): The instance of comunication for MPI. - """ - MPI = LazyImport("mpi4py.MPI") - status = MPI.Status() - while True: - task = comm.recv( - source=MPI.ANY_SOURCE, - tag=MPI.ANY_TAG, - status=status # sender (master) - ) - cfg_idx = status.Get_tag() - if status.Get_tag() >= len(self.tune_cfg_lst): - logger.info("~~~~~~slave {} receiving END signal in the current stage".format(comm.Get_rank())) - if task == "MET": - logger.info("~~~~~~met criterion in this stage!") - self.met_flag = True - break - tune_cfg = self.tune_cfg_lst[cfg_idx] - - # set the parameter for pre quantization algos and run - self.set_param_for_pre_quantization_algos(self.algo_scheduler, tune_cfg, self.model) - self.model = self.algo_scheduler('pre_quantization') - # quantize - q_model = self.adaptor.quantize(copy.deepcopy(tune_cfg), self.model, self.calib_dataloader, self.q_func) - assert self.adaptor.pre_optimized_model - # set the parameter for post quantization algos and run - self.set_param_for_post_quantization_algos(self.algo_scheduler, tune_cfg, self.adaptor.pre_optimized_model, - q_model) - self.last_qmodel = self.algo_scheduler('post_quantization') - self.last_tune_cfg = copy.deepcopy(tune_cfg) - # Remove the reference to model - self.algo_scheduler.reset_exec_algorithms() - assert self.last_qmodel - self.last_tune_result = self._evaluate(self.last_qmodel) - - ##### send back the tuning statistics ######### - logger.debug("##### Slave sends back the tuning statistics #########") - logger.debug(self.last_tune_result) - comm.send( - obj=self.last_tune_result, - dest=0, # rank 0 send to rank 1, 2, ... - tag=cfg_idx - ) - - def distributed_traverse(self): - """Disributed traverse the tuning space. - - The main traverse logic which could be override by some concrete strategy which needs more hooks. - """ - MPI = LazyImport("mpi4py.MPI") - comm = MPI.COMM_WORLD - rank = comm.Get_rank() - - self.met_flag = False - self.max_trial_flag = False # whether exceed max trials - self.max_time_flag = False # whether exceed max time - self.overall_trials = 0 - self.overall_time_start = time() - - # for all the stages, handle the tune cfg lst - # the tune cfg lst is generated/yielded each time by distributed_next_self.tune_cfg_lst - # we must pass the comm to the specific strategy because slaves may not know - # contexts such as the best_tune_cfg - # master should make sure slaves have all the contexts needed before going to the next computation stage - for op_tuning_cfg_lst in self.distributed_next_tune_cfg_lst(comm): - self.tune_cfg_lst = [self._tune_cfg_converter(op_tuning_cfg) for op_tuning_cfg in op_tuning_cfg_lst] - if self.tune_cfg_lst == []: - # skip empty list at some stages - continue - if rank == 0: - self.master_worker_handle(comm) - else: - self.slave_worker_handle(comm) - logger.debug("# if self.met_flag or self.max_trial_flag or self.max_time_flag:" \ - .format(self.met_flag or self.max_trial_flag or self.max_time_flag)) - if self.met_flag or self.max_trial_flag or self.max_time_flag: - break def _open_all_recipes(self): """Open all tunable recipes.""" From 98b724fe4c323bd25d5b7b0f4b3c644208f5a3c2 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 20 Apr 2023 12:11:59 +0800 Subject: [PATCH 074/103] fixed pylint check Signed-off-by: yiliu30 --- neural_compressor/experimental/strategy/strategy.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/neural_compressor/experimental/strategy/strategy.py b/neural_compressor/experimental/strategy/strategy.py index 36019e13260..3d8810e6490 100644 --- a/neural_compressor/experimental/strategy/strategy.py +++ b/neural_compressor/experimental/strategy/strategy.py @@ -340,9 +340,6 @@ def traverse(self): The main traverse logic which could be override by some concrete strategy which needs more hooks. """ self._eval_baseline() - logger.info("use distributed traverse: {}".format(self.cfg.tuning.use_distributed_tuning)) - if self.cfg.tuning.use_distributed_tuning: - return self.distributed_traverse() trials_count = 0 traverse_start_time = time() for op_tuning_cfg in self.next_tune_cfg(): From a486a29afe144d193f1e8e12861dc03223ea05c8 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 20 Apr 2023 15:29:27 +0800 Subject: [PATCH 075/103] improve code coverage Signed-off-by: yiliu30 --- .../experimental/strategy/basic.py | 7 +- .../strategy/utils/tuning_space.py | 3 +- neural_compressor/strategy/basic.py | 10 +- .../strategy/utils/tuning_space.py | 2 +- .../onnxrt_adaptor/test_adaptor_onnxrt.py | 19 +- test/strategy/test_basic_1.x.py | 42 +++ test/strategy/test_mse.py | 255 +++++++++++++++ test/strategy/test_mse_1.x.py | 293 ++++++++++++++++++ 8 files changed, 620 insertions(+), 11 deletions(-) create mode 100644 test/strategy/test_mse.py create mode 100644 test/strategy/test_mse_1.x.py diff --git a/neural_compressor/experimental/strategy/basic.py b/neural_compressor/experimental/strategy/basic.py index 68267808d05..6841c197137 100644 --- a/neural_compressor/experimental/strategy/basic.py +++ b/neural_compressor/experimental/strategy/basic.py @@ -60,9 +60,10 @@ def next_tune_cfg(self): quant_ops = quant_mode_wise_items.get('static', []) quant_ops += quant_mode_wise_items.get('dynamic', []) stage1_max = 1e9 # TODO set a more appropriate value - op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) - for index, op_tuning_cfg in enumerate(op_wise_tuning_sampler): + op_type_wise_tuning_sampler = OpTypeWiseTuningSampler( + tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) + for index, op_tuning_cfg in enumerate(op_type_wise_tuning_sampler): + logger.debug(f"[OP TYPE WISE STAGE], Trial {index + 1}") op_tuning_cfg['calib_sampling_size'] = calib_sampling_size # Apply all recipes, if not got the qmodel that meet the requirements, discard it. if index == 1 and not self.applied_all_recipes_flag: diff --git a/neural_compressor/experimental/strategy/utils/tuning_space.py b/neural_compressor/experimental/strategy/utils/tuning_space.py index 6ea1998dbb8..e6b83c452d0 100644 --- a/neural_compressor/experimental/strategy/utils/tuning_space.py +++ b/neural_compressor/experimental/strategy/utils/tuning_space.py @@ -252,7 +252,8 @@ def _merge_op_cfg(self, cur_op_cap, op_user_cfg, fw_op_cap): def _merge_optype_wise_cfg(self, cap: Dict, optype_wise_usr_cfg: Dict, fw_cap: Dict): for op_type, op_user_cfg in optype_wise_usr_cfg.items(): - op_lst = [op_name_type for op_name_type in cap['op'] if op_name_type[1] == op_type] + op_type_pattern = re.compile(op_type) + op_lst = [op_name_type for op_name_type in cap['op'] if op_type_pattern.fullmatch(op_name_type[1])] for op_name_type in op_lst: cap['op'][op_name_type] = self._merge_op_cfg(cap['op'][op_name_type], op_user_cfg, diff --git a/neural_compressor/strategy/basic.py b/neural_compressor/strategy/basic.py index 3beee9cfabf..0e348d8ed05 100644 --- a/neural_compressor/strategy/basic.py +++ b/neural_compressor/strategy/basic.py @@ -57,11 +57,11 @@ def distributed_next_tune_cfg_lst(self, comm): quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] stage1_max = 1e9 # TODO set a more appropriate value - op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_type_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) # stage 1: yield op_tune_cfg_lst op_tuning_cfg_lst_stage_1 = [] - for op_tuning_cfg in op_wise_tuning_sampler: + for op_tuning_cfg in op_type_wise_tuning_sampler: stage1_cnt += 1 if early_stop_tuning and stage1_cnt > stage1_max: logger.info("Early stopping the stage 1.") @@ -233,9 +233,9 @@ def next_tune_cfg(self): quant_ops = quant_mode_wise_items.get('static', []) quant_ops += quant_mode_wise_items.get('dynamic', []) stage1_max = 1e9 # TODO set a more appropriate value - op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], - op_item_dtype_dict, initial_op_tuning_cfg) - for index, op_tuning_cfg in enumerate(op_wise_tuning_sampler): + op_type_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [],\ + op_item_dtype_dict, initial_op_tuning_cfg) + for index, op_tuning_cfg in enumerate(op_type_wise_tuning_sampler): op_tuning_cfg['calib_sampling_size'] = calib_sampling_size # Apply all recipes, if not got the qmodel that meet the requirements, discard it. if index == 1 and not self.applied_all_recipes_flag: diff --git a/neural_compressor/strategy/utils/tuning_space.py b/neural_compressor/strategy/utils/tuning_space.py index 51a742e002a..f19cd410c6b 100644 --- a/neural_compressor/strategy/utils/tuning_space.py +++ b/neural_compressor/strategy/utils/tuning_space.py @@ -227,7 +227,7 @@ def _merge_op_cfg(self, cur_op_cap, op_user_cfg, fw_op_cap): new_op_cap = deepcopy(cur_op_cap) for att in ['activation', 'weight']: if op_user_cfg.get(att, None) is not None: - user_dtype_lst = op_user_cfg[att]['dtype'] if op_user_cfg[att]['dtype'] is not None else [] + user_dtype_lst = op_user_cfg[att]['dtype'] if op_user_cfg[att].get('dtype', None) is not None else [] # Merge the precision part. fwk_att_precision_cap = fw_op_cap['precision'].get(att, {}) fwk_precision_set = set(fwk_att_precision_cap.keys()) diff --git a/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py b/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py index e7a37dac263..11cb65d2ae8 100644 --- a/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py +++ b/test/adaptor/onnxrt_adaptor/test_adaptor_onnxrt.py @@ -802,9 +802,11 @@ def test_set_tensor(self): def test_auto_quant(self): conf.model.framework = 'onnxrt_qlinearops' conf.quantization.approach = 'post_training_auto_quant' + conf.quantization.optype_wise ={"Add|MatMul|Conv": {'weight': {'algorithm': ['minmax']}, \ + 'activation': {'algorithm': ['minmax']}}} conf.quantization.calibration.sampling_size = 1 conf.tuning.exit_policy.timeout = 1000000 - conf.tuning.exit_policy.max_trials = 5 + conf.tuning.exit_policy.max_trials = 8 conf.evaluation.accuracy.metric = {'MSE': {'compare_label': False}} quantizer = Quantization(conf) quantizer.calib_dataloader = self.cv_dataloader @@ -821,6 +823,21 @@ def test_auto_quant(self): q_model = quantizer.fit() self.assertNotEqual(q_model, None) + def test_auto_quant_v2(self): + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion + tuning_criterion = TuningCriterion(max_trials=8, timeout=10000) + accuracy_criterion = AccuracyCriterion(tolerable_loss=0.01) + conf = PostTrainingQuantConfig(quant_level=1, approach="auto", + op_type_dict={"Add|MatMul|Conv": {'weight': {'algorithm': ['minmax']},\ + 'activation': {'algorithm': ['minmax']}}}, + tuning_criterion=tuning_criterion, + accuracy_criterion=accuracy_criterion) + conf.framework = "onnxrt_qlinearops" + q_model = fit(model=self.rn50_model, conf=conf, calib_dataloader=self.cv_dataloader, eval_func=lambda model: 1) + self.assertIsNotNone(q_model) + + def test_quantize_data_per_channel(self): from neural_compressor.adaptor.ox_utils.util import quantize_data_per_channel tensor_value = np.ones([2, 1]) diff --git a/test/strategy/test_basic_1.x.py b/test/strategy/test_basic_1.x.py index 89b47ffa722..435a5d96d9c 100644 --- a/test/strategy/test_basic_1.x.py +++ b/test/strategy/test_basic_1.x.py @@ -30,6 +30,36 @@ def build_fake_yaml(): yaml.dump(y,f) f.close() +def build_fake_yaml_recipe(): + fake_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: x + outputs: op2_to_store + device: cpu + evaluation: + accuracy: + metric: + topk: 1 + quantization: + approach: + post_training_auto_quant + tuning: + strategy: + name: basic + exit_policy: + max_trials: 10 + accuracy_criterion: + relative: -0.01 + workspace: + path: saved + ''' + y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) + with open('fake_yaml_recipe.yaml',"w",encoding="utf-8") as f: + yaml.dump(y,f) + f.close() + def build_fake_yaml2(): fake_yaml = ''' model: @@ -164,6 +194,7 @@ def setUpClass(self): build_fake_yaml2() build_fake_yaml3() build_fake_yaml4() + build_fake_yaml_recipe() @classmethod def tearDownClass(self): @@ -171,6 +202,7 @@ def tearDownClass(self): os.remove('fake_yaml2.yaml') os.remove('fake_yaml3.yaml') os.remove('fake_yaml4.yaml') + os.remove('fake_yaml_recipe.yaml') shutil.rmtree('saved', ignore_errors=True) def test_run_basic_one_trial(self): @@ -196,6 +228,16 @@ def test_run_basic_max_trials(self): quantizer.eval_dataloader = common.DataLoader(dataset) quantizer.model = self.constant_graph quantizer.fit() + + def test_run_basic_recipe(self): + from neural_compressor.experimental import Quantization, common + + quantizer = Quantization('fake_yaml_recipe.yaml') + dataset = quantizer.dataset('dummy', (100, 3, 3, 1), label=True) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.model = self.constant_graph + quantizer.fit() def test_run_basic_max_trials_multimetric(self): from neural_compressor.experimental import Quantization, common diff --git a/test/strategy/test_mse.py b/test/strategy/test_mse.py new file mode 100644 index 00000000000..0c0d84ea9dc --- /dev/null +++ b/test/strategy/test_mse.py @@ -0,0 +1,255 @@ +"""Tests for quantization""" +import numpy as np +import unittest +import shutil + +def build_fake_model(): + import tensorflow as tf + try: + graph = tf.Graph() + graph_def = tf.GraphDef() + with tf.Session() as sess: + x = tf.placeholder(tf.float64, shape=(1,3,3,1), name='x') + y = tf.constant(np.random.random((2,2,1,1)), name='y') + op = tf.nn.conv2d(input=x, filter=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + + sess.run(tf.global_variables_initializer()) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + except: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float64, shape=(1,3,3,1), name='x') + y = tf.compat.v1.constant(np.random.random((2,2,1,1)), name='y') + op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph + +def create_test_graph(): + from tensorflow.core.framework import attr_value_pb2 + from tensorflow.core.framework import graph_pb2 + from tensorflow.core.framework import node_def_pb2 + from tensorflow.python.framework import tensor_util + from tensorflow.python.framework import dtypes + input_node = node_def_pb2.NodeDef() + input_node.name = "input" + input_node.op = "Placeholder" + input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue( + type=dtypes.float32.as_datatype_enum)) + + conv1_weight_node = node_def_pb2.NodeDef() + conv1_weight_node.name = "conv1_weights" + conv1_weight_node.op = "Const" + conv1_weight_value = np.float32(np.abs(np.random.randn(3,3,3,32))) + conv1_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv1_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape))) + + conv1_node = node_def_pb2.NodeDef() + conv1_node.name = "conv1" + conv1_node.op = "Conv2D" + conv1_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( + type=dtypes.float32.as_datatype_enum)) + conv1_node.input.extend([input_node.name, conv1_weight_node.name]) + conv1_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) + conv1_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) + conv1_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) + conv1_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + + bias_node = node_def_pb2.NodeDef() + bias_node.name = "conv1_bias" + bias_node.op = "Const" + bias_value = np.float32(np.abs(np.random.randn(32))) + bias_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( + bias_value, bias_value.dtype.type, bias_value.shape))) + + bias_add_node = node_def_pb2.NodeDef() + bias_add_node.name = "conv1_bias_add" + bias_add_node.op = "BiasAdd" + bias_add_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node.input.extend([conv1_node.name, bias_node.name]) + bias_add_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + + relu_node = node_def_pb2.NodeDef() + relu_node.op = "Relu" + relu_node.name = "relu" + relu_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node.input.extend([bias_add_node.name]) + + conv2_weight_node = node_def_pb2.NodeDef() + conv2_weight_node.name = "conv2_weights" + conv2_weight_node.op = "Const" + conv2_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) + conv2_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv2_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape))) + + conv2_node = node_def_pb2.NodeDef() + conv2_node.name = "conv2" + conv2_node.op = "Conv2D" + conv2_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( + type=dtypes.float32.as_datatype_enum)) + conv2_node.input.extend([relu_node.name, conv2_weight_node.name]) + conv2_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) + conv2_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) + conv2_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) + conv2_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + + bias_node2 = node_def_pb2.NodeDef() + bias_node2.name = "conv2_bias" + bias_node2.op = "Const" + bias_value2 = np.float32(np.abs(np.random.randn(32))) + bias_node2.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node2.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( + bias_value2, bias_value2.dtype.type, bias_value2.shape))) + + bias_add_node2 = node_def_pb2.NodeDef() + bias_add_node2.name = "conv2_bias_add" + bias_add_node2.op = "BiasAdd" + bias_add_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node2.input.extend([conv2_node.name, bias_node2.name]) + bias_add_node2.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + + relu_node2 = node_def_pb2.NodeDef() + relu_node2.op = "Relu" + relu_node2.name = "relu2" + relu_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node2.input.extend([bias_add_node2.name]) + + conv3_weight_node = node_def_pb2.NodeDef() + conv3_weight_node.name = "conv3_weights" + conv3_weight_node.op = "Const" + conv3_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) + conv3_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv3_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape))) + + conv3_node = node_def_pb2.NodeDef() + conv3_node.name = "conv3" + conv3_node.op = "Conv2D" + conv3_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( + type=dtypes.float32.as_datatype_enum)) + conv3_node.input.extend([relu_node2.name, conv3_weight_node.name]) + conv3_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) + conv3_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) + conv3_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) + conv3_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + + identity_node = node_def_pb2.NodeDef() + identity_node.name = "final" + identity_node.op = "Identity" + identity_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( + type=dtypes.float32.as_datatype_enum)) + identity_node.input.extend([conv3_node.name]) + + test_graph = graph_pb2.GraphDef() + + test_graph.node.extend([input_node, + conv1_weight_node, + conv1_node, + bias_node, + bias_add_node, + relu_node, + conv2_weight_node, + conv2_node, + bias_node2, + bias_add_node2, + relu_node2, + conv3_weight_node, + conv3_node, + identity_node + ]) + return test_graph + +def objective_func(params): + return params['x1']**2 + params['x2'] + +class TestQuantization(unittest.TestCase): + + @classmethod + def setUpClass(self): + self.constant_graph = build_fake_model() + self.test_graph = create_test_graph() + + @classmethod + def tearDownClass(self): + shutil.rmtree("saved", ignore_errors=True) + + def test_run_mse_one_trial(self): + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion + from neural_compressor.data import Datasets, DATALOADERS + + # dataset and dataloader + dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) + dataloader = DATALOADERS["tensorflow"](dataset) + + # tuning and accuracy criterion + tune_cri = TuningCriterion(strategy='mse', max_trials=1) + acc_cri = AccuracyCriterion(tolerable_loss=0.01) + + conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) + def fake_eval(model): + return 1 + + q_model = fit(model=self.constant_graph, + conf=conf, + calib_dataloader=dataloader, + eval_func=fake_eval) + self.assertNotEqual(q_model, None) + + def test_run_mse_max_trials(self): + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion + from neural_compressor.data import Datasets, DATALOADERS + + # dataset and dataloader + dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) + dataloader = DATALOADERS["tensorflow"](dataset) + + # tuning and accuracy criterion + tune_cri = TuningCriterion(strategy='mse', max_trials=3) + acc_cri = AccuracyCriterion(tolerable_loss=0.01) + + op_name_dict = { + "conv1": { + "activation": {"dtype": ["fp32"]}, + }, + } + + acc = [0, 1, 0.9, 0.9, 1] + def fake_eval(model): + acc.pop(0) + return acc[0] + + conf = PostTrainingQuantConfig(quant_level=1, op_name_dict = op_name_dict,\ + tuning_criterion=tune_cri, accuracy_criterion=acc_cri) + q_model = fit(model=self.constant_graph, + conf=conf, + calib_dataloader=dataloader, + eval_func=fake_eval) + self.assertNotEqual(q_model, None) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/strategy/test_mse_1.x.py b/test/strategy/test_mse_1.x.py new file mode 100644 index 00000000000..eab15e5fc7a --- /dev/null +++ b/test/strategy/test_mse_1.x.py @@ -0,0 +1,293 @@ +"""Tests for quantization""" +import numpy as np +import unittest +import os +import shutil +import yaml + +def build_fake_yaml(): + fake_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: x + outputs: op_to_store + device: cpu + quantization: + calibration: + sampling_size: 10 + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: mse + exit_policy: + max_trials: 1 + accuracy_criterion: + relative: 0.01 + workspace: + path: saved + ''' + y = yaml.load(fake_yaml, Loader=yaml.SafeLoader) + with open('fake_yaml.yaml',"w",encoding="utf-8") as f: + yaml.dump(y,f) + f.close() + +def build_fake_yaml2(): + fake_yaml = ''' + model: + name: fake_yaml + framework: tensorflow + inputs: input + outputs: final + device: cpu + quantization: + calibration: + sampling_size: 10, 20 + op_wise: { + \"conv1\": { + \"activation\": {\"dtype\": [\"fp32\"]}, + }, + } + evaluation: + accuracy: + metric: + topk: 1 + tuning: + strategy: + name: mse + exit_policy: + max_trials: 3 + accuracy_criterion: + relative: 0.01 + workspace: + path: saved + ''' + with open('fake_yaml2.yaml',"w",encoding="utf-8") as f: + f.write(fake_yaml) + f.close() + +def build_fake_model(): + import tensorflow as tf + try: + graph = tf.Graph() + graph_def = tf.GraphDef() + with tf.Session() as sess: + x = tf.placeholder(tf.float64, shape=(1,3,3,1), name='x') + y = tf.constant(np.random.random((2,2,1,1)), name='y') + op = tf.nn.conv2d(input=x, filter=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + + sess.run(tf.global_variables_initializer()) + constant_graph = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + except: + graph = tf.Graph() + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(tf.float64, shape=(1,3,3,1), name='x') + y = tf.compat.v1.constant(np.random.random((2,2,1,1)), name='y') + op = tf.nn.conv2d(input=x, filters=y, strides=[1,1,1,1], padding='VALID', name='op_to_store') + + sess.run(tf.compat.v1.global_variables_initializer()) + constant_graph = tf.compat.v1.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['op_to_store']) + + graph_def.ParseFromString(constant_graph.SerializeToString()) + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + return graph + +def create_test_graph(): + from tensorflow.core.framework import attr_value_pb2 + from tensorflow.core.framework import graph_pb2 + from tensorflow.core.framework import node_def_pb2 + from tensorflow.python.framework import tensor_util + from tensorflow.python.framework import dtypes + input_node = node_def_pb2.NodeDef() + input_node.name = "input" + input_node.op = "Placeholder" + input_node.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue( + type=dtypes.float32.as_datatype_enum)) + + conv1_weight_node = node_def_pb2.NodeDef() + conv1_weight_node.name = "conv1_weights" + conv1_weight_node.op = "Const" + conv1_weight_value = np.float32(np.abs(np.random.randn(3,3,3,32))) + conv1_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv1_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv1_weight_value, conv1_weight_value.dtype.type, conv1_weight_value.shape))) + + conv1_node = node_def_pb2.NodeDef() + conv1_node.name = "conv1" + conv1_node.op = "Conv2D" + conv1_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( + type=dtypes.float32.as_datatype_enum)) + conv1_node.input.extend([input_node.name, conv1_weight_node.name]) + conv1_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) + conv1_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) + conv1_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) + conv1_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + + bias_node = node_def_pb2.NodeDef() + bias_node.name = "conv1_bias" + bias_node.op = "Const" + bias_value = np.float32(np.abs(np.random.randn(32))) + bias_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( + bias_value, bias_value.dtype.type, bias_value.shape))) + + bias_add_node = node_def_pb2.NodeDef() + bias_add_node.name = "conv1_bias_add" + bias_add_node.op = "BiasAdd" + bias_add_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node.input.extend([conv1_node.name, bias_node.name]) + bias_add_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + + relu_node = node_def_pb2.NodeDef() + relu_node.op = "Relu" + relu_node.name = "relu" + relu_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node.input.extend([bias_add_node.name]) + + conv2_weight_node = node_def_pb2.NodeDef() + conv2_weight_node.name = "conv2_weights" + conv2_weight_node.op = "Const" + conv2_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) + conv2_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv2_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv2_weight_value, conv2_weight_value.dtype.type, conv2_weight_value.shape))) + + conv2_node = node_def_pb2.NodeDef() + conv2_node.name = "conv2" + conv2_node.op = "Conv2D" + conv2_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( + type=dtypes.float32.as_datatype_enum)) + conv2_node.input.extend([relu_node.name, conv2_weight_node.name]) + conv2_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) + conv2_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) + conv2_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) + conv2_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + + bias_node2 = node_def_pb2.NodeDef() + bias_node2.name = "conv2_bias" + bias_node2.op = "Const" + bias_value2 = np.float32(np.abs(np.random.randn(32))) + bias_node2.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_node2.attr['value'].CopyFrom(attr_value_pb2.AttrValue(tensor=tensor_util.make_tensor_proto( + bias_value2, bias_value2.dtype.type, bias_value2.shape))) + + bias_add_node2 = node_def_pb2.NodeDef() + bias_add_node2.name = "conv2_bias_add" + bias_add_node2.op = "BiasAdd" + bias_add_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + bias_add_node2.input.extend([conv2_node.name, bias_node2.name]) + bias_add_node2.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + + relu_node2 = node_def_pb2.NodeDef() + relu_node2.op = "Relu" + relu_node2.name = "relu2" + relu_node2.attr['T'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + relu_node2.input.extend([bias_add_node2.name]) + + conv3_weight_node = node_def_pb2.NodeDef() + conv3_weight_node.name = "conv3_weights" + conv3_weight_node.op = "Const" + conv3_weight_value = np.float32(np.abs(np.random.randn(3,3,32,32))) + conv3_weight_node.attr['dtype'].CopyFrom(attr_value_pb2.AttrValue(type=dtypes.float32.as_datatype_enum)) + conv3_weight_node.attr['value'].CopyFrom(attr_value_pb2.AttrValue( + tensor=tensor_util.make_tensor_proto( + conv3_weight_value, conv3_weight_value.dtype.type, conv3_weight_value.shape))) + + conv3_node = node_def_pb2.NodeDef() + conv3_node.name = "conv3" + conv3_node.op = "Conv2D" + conv3_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( + type=dtypes.float32.as_datatype_enum)) + conv3_node.input.extend([relu_node2.name, conv3_weight_node.name]) + conv3_node.attr['strides'].CopyFrom(attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) + conv3_node.attr['dilations'].CopyFrom(attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue(i=[1,1,1,1]))) + conv3_node.attr['padding'].CopyFrom(attr_value_pb2.AttrValue(s=b'SAME')) + conv3_node.attr['data_format'].CopyFrom(attr_value_pb2.AttrValue(s=b'NHWC')) + + identity_node = node_def_pb2.NodeDef() + identity_node.name = "final" + identity_node.op = "Identity" + identity_node.attr['T'].CopyFrom(attr_value_pb2.AttrValue( + type=dtypes.float32.as_datatype_enum)) + identity_node.input.extend([conv3_node.name]) + + test_graph = graph_pb2.GraphDef() + + test_graph.node.extend([input_node, + conv1_weight_node, + conv1_node, + bias_node, + bias_add_node, + relu_node, + conv2_weight_node, + conv2_node, + bias_node2, + bias_add_node2, + relu_node2, + conv3_weight_node, + conv3_node, + identity_node + ]) + return test_graph + +def objective_func(params): + return params['x1']**2 + params['x2'] + +class TestQuantization(unittest.TestCase): + + @classmethod + def setUpClass(self): + self.constant_graph = build_fake_model() + self.test_graph = create_test_graph() + build_fake_yaml() + build_fake_yaml2() + + @classmethod + def tearDownClass(self): + os.remove('fake_yaml.yaml') + os.remove('fake_yaml2.yaml') + + shutil.rmtree("saved", ignore_errors=True) + + def test_run_mse_one_trial(self): + + from neural_compressor.experimental import Quantization, common + quantizer = Quantization('fake_yaml.yaml') + dataset = quantizer.dataset('dummy', shape=(100, 3, 3, 1), label=True) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.model = self.constant_graph + output_graph = quantizer.fit() + self.assertNotEqual(output_graph, None) + + def test_run_mse_max_trials(self): + + from neural_compressor.experimental import Quantization, common + quantizer = Quantization('fake_yaml2.yaml') + dataset = quantizer.dataset('dummy', shape=(1, 224, 224, 3), label=True) + quantizer.eval_dataloader = common.DataLoader(dataset) + quantizer.calib_dataloader = common.DataLoader(dataset) + quantizer.model = self.test_graph + output_graph = quantizer.fit() + self.assertNotEqual(output_graph, None) + + +if __name__ == "__main__": + unittest.main() From 87691716bac348a711c820d721064f88e1f18cb6 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Thu, 20 Apr 2023 15:41:16 +0800 Subject: [PATCH 076/103] edit ut for coverage Signed-off-by: Cheng, Zixuan --- neural_compressor/config.py | 5 +++-- test/config/test_config_2.x.py | 7 ++++++- test/config/test_pythonic_config.py | 9 ++++++++- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/neural_compressor/config.py b/neural_compressor/config.py index b01ffe9f023..47aec414f06 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -1991,8 +1991,9 @@ def precisions(self, precisions): """Set precision.""" if not isinstance(precisions, list): precisions = [precisions] - if _check_value('precisions', precisions, str, ['int8', 'uint8', 'fp32', 'bf16', 'fp16']): - self._precisions = precisions + for pr in precisions: + _check_value('precision', pr, str, ['int8', 'uint8', 'fp32', 'bf16', 'fp16']) + self._precisions = precisions class ONNX(MXNet): diff --git a/test/config/test_config_2.x.py b/test/config/test_config_2.x.py index a942733b0cc..914e28d2a0f 100644 --- a/test/config/test_config_2.x.py +++ b/test/config/test_config_2.x.py @@ -4,7 +4,7 @@ from neural_compressor.config import Config as conf from neural_compressor.utils.constant import * from neural_compressor.config import PostTrainingQuantConfig, BenchmarkConfig, Options -from neural_compressor.config import MixedPrecisionConfig, DotDict +from neural_compressor.config import MixedPrecisionConfig, MXNet def helper(content): @@ -63,6 +63,11 @@ def test_config(self): a = conf(mixed_precision=cfg) self.assertEqual(a.mixed_precision.precision, ["bf16"]) + cfg = MXNet() + cfg.precisions = "bf16" + a = conf(mxnet=cfg) + self.assertEqual(a.mxnet.precisions, ["bf16"]) + if __name__ == "__main__": unittest.main() diff --git a/test/config/test_pythonic_config.py b/test/config/test_pythonic_config.py index 5f10adf1dd6..44f13857531 100644 --- a/test/config/test_pythonic_config.py +++ b/test/config/test_pythonic_config.py @@ -143,10 +143,14 @@ def test_config_setting(self): config.quantization.outputs = ['out'] config.quantization.approach = 'post_training_dynamic_quant' config.quantization.device = 'gpu' - config.quantization.op_type_dict = {'Conv': {'weight': {'dtype': ['fp32']}, 'activation': {'dtype': ['fp32']}}} + config.quantization.op_type_dict = {'Conv': {'weight': {'dtype': ['fp32']} + , 'activation': {'dtype': ['fp32']}}} + config.quantization.op_name_dict = {"layer1.0.conv1": {"activation": {"dtype": ["fp32"]} + ,"weight": {"dtype": ["fp32"]}}} config.quantization.strategy = 'mse' config.quantization.objective = 'accuracy' config.quantization.timeout = 100 + config.quantization.max_trials = 100 config.quantization.accuracy_criterion.relative = 0.5 config.quantization.reduce_range = False config.quantization.use_bf16 = False @@ -158,9 +162,12 @@ def test_config_setting(self): self.assertEqual(config.quantization.device, 'gpu') self.assertEqual(config.quantization.op_type_dict, {'Conv': {'weight': {'dtype': ['fp32']}, 'activation': {'dtype': ['fp32']}}}) + self.assertEqual(config.quantization.op_name_dict, + {"layer1.0.conv1": {"activation": {"dtype": ["fp32"]},"weight": {"dtype": ["fp32"]}}}) self.assertEqual(config.quantization.strategy, 'mse') self.assertEqual(config.quantization.objective, 'accuracy') self.assertEqual(config.quantization.timeout, 100) + self.assertEqual(config.quantization.max_trials, 100) self.assertEqual(config.quantization.accuracy_criterion.relative, 0.5) self.assertEqual(config.benchmark.cores_per_instance, 10) From fdc566997e0609416cc24c1bce6956937656b09a Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 20 Apr 2023 17:16:47 +0800 Subject: [PATCH 077/103] enhance ut Signed-off-by: yiliu30 --- .../experimental/strategy/bayesian.py | 3 - .../experimental/strategy/exhaustive.py | 6 +- neural_compressor/strategy/exhaustive.py | 6 +- test/strategy/test_bayesian_1.x.py | 2 +- test/strategy/test_sigopt.py | 29 ++++++++- test/strategy/test_tuning_sampler.py | 65 ++++++++++++++++++- test/strategy/test_tuning_sampler_1.x.py | 11 +++- 7 files changed, 101 insertions(+), 21 deletions(-) diff --git a/neural_compressor/experimental/strategy/bayesian.py b/neural_compressor/experimental/strategy/bayesian.py index 58edcdee024..3f684f2c102 100644 --- a/neural_compressor/experimental/strategy/bayesian.py +++ b/neural_compressor/experimental/strategy/bayesian.py @@ -17,20 +17,17 @@ """The Bayesian tuning strategy.""" -import copy import warnings import numpy as np from scipy.optimize import minimize from sklearn.gaussian_process.kernels import Matern from sklearn.gaussian_process import GaussianProcessRegressor -from collections import OrderedDict from copy import deepcopy from ...utils import logger from .strategy import strategy_registry, TuneStrategy from .utils.tuning_sampler import OpWiseTuningSampler -from .utils.tuning_structs import OpTuningConfig @strategy_registry diff --git a/neural_compressor/experimental/strategy/exhaustive.py b/neural_compressor/experimental/strategy/exhaustive.py index b40d5b70397..6cbfdd985bd 100644 --- a/neural_compressor/experimental/strategy/exhaustive.py +++ b/neural_compressor/experimental/strategy/exhaustive.py @@ -15,12 +15,8 @@ # See the License for the specific language governing permissions and # limitations under the License. """The exhaustive tuning strategy.""" -from collections import OrderedDict from .strategy import strategy_registry, TuneStrategy - -from .utils.tuning_sampler import OpWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler -from .utils.tuning_structs import OpTuningConfig -from ...utils import logger +from .utils.tuning_sampler import OpWiseTuningSampler @strategy_registry class ExhaustiveTuneStrategy(TuneStrategy): diff --git a/neural_compressor/strategy/exhaustive.py b/neural_compressor/strategy/exhaustive.py index fa75f0d0638..f192481efc4 100644 --- a/neural_compressor/strategy/exhaustive.py +++ b/neural_compressor/strategy/exhaustive.py @@ -15,12 +15,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """The exhaustive tuning strategy.""" -from collections import OrderedDict from .strategy import strategy_registry, TuneStrategy -from .utils.tuning_sampler import OpWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler -from .utils.tuning_structs import OpTuningConfig -from ..utils import logger +from .utils.tuning_sampler import OpWiseTuningSampler @strategy_registry class ExhaustiveTuneStrategy(TuneStrategy): @@ -46,4 +43,3 @@ def next_tune_cfg(self): for op_tuning_cfg in op_wise_tuning_sampler: op_tuning_cfg['calib_sampling_size'] = calib_sampling_size yield op_tuning_cfg - return diff --git a/test/strategy/test_bayesian_1.x.py b/test/strategy/test_bayesian_1.x.py index e2d54d9b3e2..4864b840763 100644 --- a/test/strategy/test_bayesian_1.x.py +++ b/test/strategy/test_bayesian_1.x.py @@ -289,7 +289,7 @@ def test_run_bayesian_max_trials(self): self.assertNotEqual(output_graph, None) def test_bayesian_opt_class(self): - from neural_compressor.strategy.bayesian import BayesianOptimization + from neural_compressor.experimental.strategy.bayesian import BayesianOptimization pbounds = {} pbounds['x1'] = (0, 1) pbounds['x2'] = (0, 1) diff --git a/test/strategy/test_sigopt.py b/test/strategy/test_sigopt.py index 72b4e659b99..4fd5e3ac271 100644 --- a/test/strategy/test_sigopt.py +++ b/test/strategy/test_sigopt.py @@ -44,7 +44,7 @@ def build_fake_model(): tf.import_graph_def(graph_def, name='') return graph -@unittest.skipIf(CONDITION , "missing the env variables 'SIGOPT_API_TOKEN' or 'SIGOPT_PROJECT_ID'") + class TestSigoptTuningStrategy(unittest.TestCase): @classmethod @@ -56,7 +56,8 @@ def setUpClass(self): @classmethod def tearDownClass(self): shutil.rmtree('saved', ignore_errors=True) - + + @unittest.skipIf(CONDITION , "missing the env variables 'SIGOPT_API_TOKEN' or 'SIGOPT_PROJECT_ID'") def test_run_sigopt_one_trial_new_api(self): from neural_compressor.quantization import fit from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion @@ -80,6 +81,30 @@ def test_run_sigopt_one_trial_new_api(self): def fake_eval(model): return 1 q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, eval_func=fake_eval) + + def test_run_sigopt_one_trial_fake_token(self): + from neural_compressor.quantization import fit + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion + from neural_compressor.data import Datasets, DATALOADERS + + # dataset and dataloader + dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) + dataloader = DATALOADERS["tensorflow"](dataset) + + # tuning and accuracy criterion + accuracy_criterion = AccuracyCriterion(criterion='relative') + strategy_kwargs = {'sigopt_api_token': 'sigopt_api_token_test', + 'sigopt_project_id': 'sigopt_project_id_test', + 'sigopt_experiment_name': 'nc-tune'} + tuning_criterion = TuningCriterion(strategy='sigopt', strategy_kwargs=strategy_kwargs, max_trials=3) + conf = PostTrainingQuantConfig(quant_level=1, + approach="static", + tuning_criterion=tuning_criterion, + accuracy_criterion=accuracy_criterion) + self.assertEqual(conf.tuning_criterion.strategy_kwargs, strategy_kwargs) + def fake_eval(model): + return 1 + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, eval_func=fake_eval) if __name__ == "__main__": diff --git a/test/strategy/test_tuning_sampler.py b/test/strategy/test_tuning_sampler.py index 6e795aa99c1..06e6f3e5e7d 100644 --- a/test/strategy/test_tuning_sampler.py +++ b/test/strategy/test_tuning_sampler.py @@ -1,7 +1,13 @@ -from neural_compressor.strategy.utils.tuning_sampler import OpTypeWiseTuningSampler, ModelWiseTuningSampler -from neural_compressor.strategy.utils.tuning_sampler import OpWiseTuningSampler, FallbackTuningSampler +from neural_compressor.strategy.utils.tuning_sampler import ( + OpTypeWiseTuningSampler, + ModelWiseTuningSampler, + OpWiseTuningSampler, + FallbackTuningSampler, + BlockFallbackTuningSampler + ) from neural_compressor.strategy.utils.tuning_structs import OpTuningConfig from neural_compressor.strategy.utils.tuning_space import TuningSpace +from neural_compressor.utils import logger from collections import OrderedDict from copy import deepcopy import unittest @@ -151,11 +157,16 @@ } +block_info = [[('op_name2', 'op_type1'),('op_name4', 'op_type3')], + [('op_name4', 'op_type3'), ('op_name1', 'op_type1')]] + + + class TestTuningSampler(unittest.TestCase): def test_tuning_sampler(self): capability = { 'calib': {'calib_sampling_size': [1, 10, 50]}, - 'op': op_cap + 'op': deepcopy(op_cap) } conf = None tuning_space = TuningSpace(capability, conf) @@ -165,6 +176,7 @@ def test_tuning_sampler(self): if item.item_type == 'op': op_name, op_type = item.name initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) + logger.debug(initial_op_tuning_cfg[item.name]) quant_mode_wise_items = OrderedDict() from neural_compressor.strategy.utils.constant import auto_query_order as query_order pre_items = set() @@ -240,5 +252,52 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): fallback_cnt.append(cnt) self.assertListEqual(fallback_cnt, [2, 3, 4]) + def test_block_sampler(self): + capability = { + 'calib': {'calib_sampling_size': [1, 10, 50]}, + 'op': deepcopy(op_cap), + 'block_info': block_info + } + conf = None + tuning_space = TuningSpace(capability, conf) + initial_op_tuning_cfg = {} + for item in tuning_space.root_item.options: + if item.item_type == 'op': + op_name, op_type = item.name + initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) + logger.debug(initial_op_tuning_cfg[item.name]) + quant_mode_wise_items = OrderedDict() + from neural_compressor.strategy.utils.constant import auto_query_order as query_order + pre_items = set() + for quant_mode in query_order: + items = tuning_space.query_items_by_quant_mode(quant_mode) + filtered_items = [item for item in items if item not in pre_items] + pre_items = pre_items.union(set(items)) + quant_mode_wise_items[quant_mode] = filtered_items + + def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): + for item in items_lst: + op_item_dtype_dict[item.name] = target_quant_mode + + op_item_dtype_dict = OrderedDict() + for quant_mode, quant_mode_items in quant_mode_wise_items.items(): + initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) + + op_block_lst = capability.get('block_info', []) + if op_block_lst: + # Fallback block by block + target_type_lst = set(tuning_space.query_items_by_quant_mode('fp32')) + fallback_items_lst = [item for item in target_type_lst] + op_block_fallback_lst = [] + for op_block_index, op_block in enumerate(op_block_lst): + op_block_fallback_lst.append(op_block) + block_fallback_sampler = BlockFallbackTuningSampler(tuning_space=tuning_space, + tuning_order_lst=[], + initial_op_tuning_cfg=initial_op_tuning_cfg, + op_block_lst=op_block_fallback_lst, + accumulate=False, + target_dtype='fp32') + self.assertEqual(2, len(list(block_fallback_sampler))) + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_tuning_sampler_1.x.py b/test/strategy/test_tuning_sampler_1.x.py index fc0dfdd341b..fd9f336fff9 100644 --- a/test/strategy/test_tuning_sampler_1.x.py +++ b/test/strategy/test_tuning_sampler_1.x.py @@ -1,11 +1,17 @@ -from neural_compressor.experimental.strategy.utils.tuning_sampler import OpTypeWiseTuningSampler, ModelWiseTuningSampler -from neural_compressor.experimental.strategy.utils.tuning_sampler import OpWiseTuningSampler, FallbackTuningSampler +from neural_compressor.experimental.strategy.utils.tuning_sampler import ( + OpTypeWiseTuningSampler, + ModelWiseTuningSampler, + OpWiseTuningSampler, + FallbackTuningSampler + ) from neural_compressor.experimental.strategy.utils.tuning_structs import OpTuningConfig from neural_compressor.experimental.strategy.utils.tuning_space import TuningSpace +from neural_compressor.utils import logger from collections import OrderedDict from copy import deepcopy import unittest + op_cap = { ('op_name1', 'op_type1'): [ { @@ -165,6 +171,7 @@ def test_tuning_sampler(self): if item.item_type == 'op': op_name, op_type = item.name initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) + logger.debug(initial_op_tuning_cfg[item.name]) quant_mode_wise_items = OrderedDict() from neural_compressor.experimental.strategy.utils.constant import auto_query_order as query_order pre_items = set() From d2d19e569402d4d22a06e588cd9a1459a62c9e54 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 20 Apr 2023 20:41:26 +0800 Subject: [PATCH 078/103] enhance ut Signed-off-by: yiliu30 --- .../experimental/contrib/strategy/tpe.py | 2 + .../experimental/strategy/strategy.py | 50 +++---------------- .../strategy/utils/tuning_space.py | 29 +---------- .../strategy/utils/tuning_structs.py | 2 +- neural_compressor/strategy/strategy.py | 35 ------------- .../strategy/utils/tuning_space.py | 29 +---------- .../strategy/utils/tuning_structs.py | 2 +- test/strategy/test_basic.py | 25 ++++++++++ test/strategy/test_tuning_sampler.py | 5 +- test/strategy/test_tuning_sampler_1.x.py | 3 +- 10 files changed, 42 insertions(+), 140 deletions(-) diff --git a/neural_compressor/experimental/contrib/strategy/tpe.py b/neural_compressor/experimental/contrib/strategy/tpe.py index 8b6fb33ac96..3f1735757bd 100644 --- a/neural_compressor/experimental/contrib/strategy/tpe.py +++ b/neural_compressor/experimental/contrib/strategy/tpe.py @@ -335,6 +335,8 @@ def add_loss_to_tuned_history_and_find_best(self, tuning_history_list): def object_evaluation(self, tune_cfg, model): """Check if config was alredy evaluated.""" + for k, v in self.op_configs.items(): + tune_cfg.update({k : tune_cfg.pop(k[0])}) op_cfgs = self._tune_cfg_converter(tune_cfg) self.last_qmodel = self.adaptor.quantize(op_cfgs, self.model, self.calib_dataloader) self.last_tune_cfg = copy.deepcopy(tune_cfg) diff --git a/neural_compressor/experimental/strategy/strategy.py b/neural_compressor/experimental/strategy/strategy.py index 3d8810e6490..6e8804cd9d5 100644 --- a/neural_compressor/experimental/strategy/strategy.py +++ b/neural_compressor/experimental/strategy/strategy.py @@ -231,14 +231,6 @@ def _initialize_recipe(self): logger.debug(self._not_tuning_recipes_values) logger.info(f"{len(self._tuning_recipes)} recipes require future tuning.") logger.debug(self._tuning_recipes) - - def _open_all_recipes(self): - """Open all tunable recipes.""" - opened_recipes = {} - for recipe_name, recipe_val_lst in self._tuning_recipes.items(): - opened_recipes[recipe_name] = recipe_val_lst[-1] - logger.info("Opened all recipes.") - logger.info(opened_recipes) def _fallback_ops(self, tune_cfg, recipe_op_lst, tuning_space): """Fallback ops in recipe op list.""" @@ -274,13 +266,14 @@ def apply_recipe_one_by_one(self, tune_cfg): new_tune_cfg = self._fallback_ops(copy.deepcopy(tune_cfg), \ self.capability['recipes_ops'][recipe_name], self.tuning_space) yield new_tune_cfg - if recipe_name in all_registered_samplers: - recipe_sampler = all_registered_samplers[recipe_name](tuning_space=None, - tuning_order_lst=[], - initial_op_tuning_cfg=copy.deepcopy(tune_cfg), - kwargs={recipe_name: recipe_vals}) - for new_tune_cfg in recipe_sampler: - yield new_tune_cfg + if recipe_name == "smooth_quant": + sq_args = {'smooth_quant': True} + if 'recipe_cfgs' not in new_tune_cfg: + new_tune_cfg['recipe_cfgs'] = sq_args + else: + new_tune_cfg['recipe_cfgs'].update(sq_args) + new_tune_cfg['recipe_cfgs'] = sq_args + yield new_tune_cfg def set_param_for_pre_quantization_algos(self, algo_scheduler, tune_cfg, fp32_model) -> None: """Set the parameter for pre-quantization algos, such as smooth quantization. @@ -930,33 +923,6 @@ def setup_yaml(): yaml.dump(self.deploy_cfg, f) logger.info("Save deploy yaml to {}".format(self.deploy_path)) - def _get_common_cfg(self, model_wise_cfg, op_wise_cfgs): - """Get the common parts from the model_wise_cfg. - - This function is focused on composing the configuration that consists of - model-wise field and op-wise unique field data. - - Args: - model_wise_cfg ([DotDict]): The model-wise configuration. - op_wise_cfgs ([List]): The list of each op's config in DotDict type. - - Returns: - [DotDict]: The combined configration with the op-wise unique field. - """ - model_wise_keys = model_wise_cfg.keys() - - result = op_wise_cfgs[0] - for each_op_wise_cfg in op_wise_cfgs: - tmp_cfg = {} - for k in model_wise_keys: - tmp_cfg[k] = each_op_wise_cfg[k] - - if model_wise_cfg == tmp_cfg: - result = each_op_wise_cfg - break - - return result - @property def evaluation_result(self): """Evaluate the given model. diff --git a/neural_compressor/experimental/strategy/utils/tuning_space.py b/neural_compressor/experimental/strategy/utils/tuning_space.py index e6b83c452d0..ef9698dc53f 100644 --- a/neural_compressor/experimental/strategy/utils/tuning_space.py +++ b/neural_compressor/experimental/strategy/utils/tuning_space.py @@ -180,13 +180,6 @@ def _parse(cap, root, path, op_name_type): else: self.quant_mode_wise_items[q_option.name].append(op_item) - def _create_tuning_item(self, tuning_items: Dict, attr_name: str, quant_mode_item: TuningItem): - for tuning_item_name, options in tuning_items.items(): - if tuning_item_name not in ['dtype', 'quant_mode']: - name = (attr_name, tuning_item_name) - tuning_item = TuningItem(name=name, options=options, item_type=name) - quant_mode_item.append(tuning_item) - def _merge_op_cfg(self, cur_op_cap, op_user_cfg, fw_op_cap): """Merge the op cfg with user cfg. @@ -633,27 +626,7 @@ def get_op_default_path_by_pattern(self, op_name_type, pattern): att_lst = ['activation', 'weight'] if has_weight else ['activation'] for att in att_lst: result[att] = self.get_default_full_path(op_name_type, full_path[att]) - return result - -def get_op_mode_by_query_order(tuning_space: TuningSpace, query_order): - """Get the op mode according to the query order.""" - quant_mode_wise_items = OrderedDict() # mode, op_item_lst - pre_items = set() - # Collect op items supported the specified mode. - for quant_mode in query_order: - items = tuning_space.query_items_by_quant_mode(quant_mode) - filtered_items = list(filter(lambda item: item not in pre_items, items)) - pre_items = pre_items.union(set(items)) - quant_mode_wise_items[quant_mode] = filtered_items - - def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): - for item in items_lst: - op_item_dtype_dict[item.name] = target_quant_mode - op_item_dtype_dict = OrderedDict() - for quant_mode, quant_mode_items in quant_mode_wise_items.items(): - initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) - - return op_item_dtype_dict + return result def pattern_to_internal(pattern, default_dtype='int8'): """Convert pattern to internal format. diff --git a/neural_compressor/experimental/strategy/utils/tuning_structs.py b/neural_compressor/experimental/strategy/utils/tuning_structs.py index b13f27cf0cd..c4222a2f6c3 100644 --- a/neural_compressor/experimental/strategy/utils/tuning_structs.py +++ b/neural_compressor/experimental/strategy/utils/tuning_structs.py @@ -60,7 +60,7 @@ def _set_dtype(self): # f"with quant_mode {self.op_quant_mode}") - def __str__(self) -> str: + def __repr__(self) -> str: """Display the tuning config as string. Returns: diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 3b55533d673..516ee4c4dfc 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -496,14 +496,6 @@ def distributed_traverse(self): .format(self.met_flag or self.max_trial_flag or self.max_time_flag)) if self.met_flag or self.max_trial_flag or self.max_time_flag: break - - def _open_all_recipes(self): - """Open all tunable recipes.""" - opened_recipes = {} - for recipe_name, recipe_val_lst in self._tuning_recipes.items(): - opened_recipes[recipe_name] = recipe_val_lst[-1] - logger.info("Opened all recipes.") - logger.info(opened_recipes) def _fallback_ops(self, tune_cfg, recipe_op_lst, tuning_space): """Fallback ops in recipe op list.""" @@ -1177,33 +1169,6 @@ def setup_yaml(): yaml.dump(self.deploy_cfg, f) logger.info("Save deploy yaml to {}".format(self.deploy_path)) - def _get_common_cfg(self, model_wise_cfg, op_wise_cfgs): - """Get the common parts from the model_wise_cfg. - - This function is focused on composing the configuration that consists of - model-wise field and op-wise unique field data. - - Args: - model_wise_cfg ([DotDict]): The model-wise configuration. - op_wise_cfgs ([List]): The list of each op's config in DotDict type. - - Returns: - [DotDict]: The combined configration with the op-wise unique field. - """ - model_wise_keys = model_wise_cfg.keys() - - result = op_wise_cfgs[0] - for each_op_wise_cfg in op_wise_cfgs: - tmp_cfg = {} - for k in model_wise_keys: - tmp_cfg[k] = each_op_wise_cfg[k] - - if model_wise_cfg == tmp_cfg: - result = each_op_wise_cfg - break - - return result - @property def evaluation_result(self): """Evaluate the given model. diff --git a/neural_compressor/strategy/utils/tuning_space.py b/neural_compressor/strategy/utils/tuning_space.py index f19cd410c6b..ea1d0df9978 100644 --- a/neural_compressor/strategy/utils/tuning_space.py +++ b/neural_compressor/strategy/utils/tuning_space.py @@ -188,13 +188,6 @@ def _parse(cap, root, path, op_name_type): else: self.quant_mode_wise_items[q_option.name].append(op_item) - def _create_tuning_item(self, tuning_items: Dict, attr_name: str, quant_mode_item: TuningItem): - for tuning_item_name, options in tuning_items.items(): - if tuning_item_name not in ['dtype', 'quant_mode']: - name = (attr_name, tuning_item_name) - tuning_item = TuningItem(name=name, options=options, item_type=name) - quant_mode_item.append(tuning_item) - def _merge_op_cfg(self, cur_op_cap, op_user_cfg, fw_op_cap): """Merge the op cfg with user cfg. @@ -641,27 +634,7 @@ def get_op_default_path_by_pattern(self, op_name_type, pattern): att_lst = ['activation', 'weight'] if has_weight else ['activation'] for att in att_lst: result[att] = self.get_default_full_path(op_name_type, full_path[att]) - return result - -def get_op_mode_by_query_order(tuning_space: TuningSpace, query_order): - """Get the op mode according to the query order.""" - quant_mode_wise_items = OrderedDict() # mode, op_item_lst - pre_items = set() - # Collect op items supported the specified mode. - for quant_mode in query_order: - items = tuning_space.query_items_by_quant_mode(quant_mode) - filtered_items = list(filter(lambda item: item not in pre_items, items)) - pre_items = pre_items.union(set(items)) - quant_mode_wise_items[quant_mode] = filtered_items - - def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): - for item in items_lst: - op_item_dtype_dict[item.name] = target_quant_mode - op_item_dtype_dict = OrderedDict() - for quant_mode, quant_mode_items in quant_mode_wise_items.items(): - initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) - - return op_item_dtype_dict + return result def pattern_to_internal(pattern, default_dtype='int8'): """Convert pattern to internal format. diff --git a/neural_compressor/strategy/utils/tuning_structs.py b/neural_compressor/strategy/utils/tuning_structs.py index ec0fcd28751..0e9fe5a30aa 100644 --- a/neural_compressor/strategy/utils/tuning_structs.py +++ b/neural_compressor/strategy/utils/tuning_structs.py @@ -60,7 +60,7 @@ def _set_dtype(self): # f"with quant_mode {self.op_quant_mode}") - def __str__(self) -> str: + def __repr__(self) -> str: """Display the tuning config as string. Returns: diff --git a/test/strategy/test_basic.py b/test/strategy/test_basic.py index 365a012be88..27160d18423 100644 --- a/test/strategy/test_basic.py +++ b/test/strategy/test_basic.py @@ -2,6 +2,7 @@ import numpy as np import unittest import shutil +import os def build_fake_model(): import tensorflow as tf @@ -45,10 +46,12 @@ class TestBasicTuningStrategy(unittest.TestCase): @classmethod def setUpClass(self): self.constant_graph = build_fake_model() + self.workspace = os.path.join(os.getcwd(), 'nc_workspace') @classmethod def tearDownClass(self): shutil.rmtree('saved', ignore_errors=True) + shutil.rmtree(self.workspace) def test_run_basic_one_trial_new_api(self): from neural_compressor.quantization import fit @@ -66,7 +69,29 @@ def fake_eval(model): conf = PostTrainingQuantConfig() q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader= dataloader, eval_func=fake_eval) self.assertIsNotNone(q_model) + + + def test_diagnosis(self): + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.data import Datasets, DATALOADERS + + # dataset and dataloader + dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) + dataloader = DATALOADERS["tensorflow"](dataset) + from neural_compressor.metric import METRICS + metrics = METRICS('tensorflow') + top1 = metrics['topk']() + # tuning and accuracy criterion + conf = PostTrainingQuantConfig(diagnosis=True) + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader= dataloader,\ + eval_dataloader=dataloader, eval_metric=top1) + self.assertEqual(os.path.exists(os.path.join(os.getcwd(), './nc_workspace/inspect_saved/fp32/inspect_result.pkl')), True) + self.assertEqual(os.path.exists(os.path.join(os.getcwd(), './nc_workspace/inspect_saved/quan/inspect_result.pkl')), True) + + + def test_run_create_eval_from_metric_and_dataloader(self): from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig diff --git a/test/strategy/test_tuning_sampler.py b/test/strategy/test_tuning_sampler.py index 06e6f3e5e7d..80bc3e09cef 100644 --- a/test/strategy/test_tuning_sampler.py +++ b/test/strategy/test_tuning_sampler.py @@ -7,7 +7,6 @@ ) from neural_compressor.strategy.utils.tuning_structs import OpTuningConfig from neural_compressor.strategy.utils.tuning_space import TuningSpace -from neural_compressor.utils import logger from collections import OrderedDict from copy import deepcopy import unittest @@ -176,7 +175,7 @@ def test_tuning_sampler(self): if item.item_type == 'op': op_name, op_type = item.name initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) - logger.debug(initial_op_tuning_cfg[item.name]) + print(initial_op_tuning_cfg[item.name]) quant_mode_wise_items = OrderedDict() from neural_compressor.strategy.utils.constant import auto_query_order as query_order pre_items = set() @@ -265,7 +264,7 @@ def test_block_sampler(self): if item.item_type == 'op': op_name, op_type = item.name initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) - logger.debug(initial_op_tuning_cfg[item.name]) + print(initial_op_tuning_cfg[item.name]) quant_mode_wise_items = OrderedDict() from neural_compressor.strategy.utils.constant import auto_query_order as query_order pre_items = set() diff --git a/test/strategy/test_tuning_sampler_1.x.py b/test/strategy/test_tuning_sampler_1.x.py index fd9f336fff9..46adf411317 100644 --- a/test/strategy/test_tuning_sampler_1.x.py +++ b/test/strategy/test_tuning_sampler_1.x.py @@ -6,7 +6,6 @@ ) from neural_compressor.experimental.strategy.utils.tuning_structs import OpTuningConfig from neural_compressor.experimental.strategy.utils.tuning_space import TuningSpace -from neural_compressor.utils import logger from collections import OrderedDict from copy import deepcopy import unittest @@ -171,7 +170,7 @@ def test_tuning_sampler(self): if item.item_type == 'op': op_name, op_type = item.name initial_op_tuning_cfg[item.name] = OpTuningConfig(op_name, op_type, 'fp32', tuning_space) - logger.debug(initial_op_tuning_cfg[item.name]) + print(initial_op_tuning_cfg[item.name]) quant_mode_wise_items = OrderedDict() from neural_compressor.experimental.strategy.utils.constant import auto_query_order as query_order pre_items = set() From 8c0c5e74d507f38edf48cee6d4a2aa5950cbfdd9 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 20 Apr 2023 23:20:41 +0800 Subject: [PATCH 079/103] fixed ut Signed-off-by: yiliu30 --- test/strategy/test_basic.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/test/strategy/test_basic.py b/test/strategy/test_basic.py index 27160d18423..fa159ab793e 100644 --- a/test/strategy/test_basic.py +++ b/test/strategy/test_basic.py @@ -79,14 +79,11 @@ def test_diagnosis(self): # dataset and dataloader dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) dataloader = DATALOADERS["tensorflow"](dataset) - from neural_compressor.metric import METRICS - metrics = METRICS('tensorflow') - top1 = metrics['topk']() # tuning and accuracy criterion conf = PostTrainingQuantConfig(diagnosis=True) q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader= dataloader,\ - eval_dataloader=dataloader, eval_metric=top1) + eval_func=lambda model: 1) self.assertEqual(os.path.exists(os.path.join(os.getcwd(), './nc_workspace/inspect_saved/fp32/inspect_result.pkl')), True) self.assertEqual(os.path.exists(os.path.join(os.getcwd(), './nc_workspace/inspect_saved/quan/inspect_result.pkl')), True) From b2f639ab7c06dd1a8b91a86b6b6d7259da240c96 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Fri, 21 Apr 2023 11:27:04 +0800 Subject: [PATCH 080/103] enhance ut coverage Signed-off-by: Cheng, Zixuan --- neural_compressor/compression/callbacks.py | 2 +- neural_compressor/mix_precision.py | 9 +++++---- neural_compressor/quantization.py | 6 +++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/neural_compressor/compression/callbacks.py b/neural_compressor/compression/callbacks.py index 32552cb7276..081ddbaa61a 100644 --- a/neural_compressor/compression/callbacks.py +++ b/neural_compressor/compression/callbacks.py @@ -280,7 +280,7 @@ def pre_process(self): if strategy == "mse_v2": if not (self.cfg.quantization.framework.startswith("tensorflow") \ - or self.cfg.quantization.framework == 'pytorch_fx'): + or self.cfg.quantization.framework == 'pytorch_fx'): # pragma: no cover strategy = "basic" logger.warning(f"MSE_v2 does not support \ {self.cfg.quantization.framework} now, use basic instead.") diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index 94628748f08..2ccb1ee7cd2 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -166,6 +166,7 @@ def eval_dataloader(self, dataloader): framework infomation can be known. Future we will support creating iterable dataloader from neural_compressor.common.DataLoader """ + # pragma: no cover assert hasattr(dataloader, '__iter__') and \ hasattr(dataloader, 'batch_size'), \ 'dataloader must implement __iter__ method and batch_size attribute' @@ -198,7 +199,7 @@ def model(self, user_model): if cfg.quantization.backend == "ipex": assert cfg.quantization.framework == "pytorch_ipex",\ "Please wrap the model with correct Model class!" - if cfg.quantization.backend == "itex": + if cfg.quantization.backend == "itex": # pragma: no cover from .model.tensorflow_model import get_model_type if get_model_type(user_model.model) == 'keras': assert cfg.quantization.framework == "keras",\ @@ -226,7 +227,7 @@ def model(self, user_model): , device=cfg.quantization.device) else: self._model = Model(user_model, backend=cfg.quantization.framework) - else: + else: # pragma: no cover if cfg.quantization.framework == "pytorch_ipex": from neural_compressor.model.torch_model import IPEXModel assert type(user_model) == IPEXModel, \ @@ -379,7 +380,7 @@ def fit(model, converter.model = model if ('bf16' in precisions or 'fp16' in precisions) and \ - converter.conf.quantization.framework == "onnxruntime": + converter.conf.quantization.framework == "onnxruntime": # pragma: no cover if config.device == "cpu": logger.warning("Mix precision exits due to device isn't gpu for onnx models.") sys.exit(0) @@ -387,7 +388,7 @@ def fit(model, logger.warning("Mix precision exits due to backend isn't onnxrt_cuda_ep for onnx models.") sys.exit(0) elif 'bf16' in precisions and not CpuInfo().bf16 and \ - converter.conf.quantization.framework != "onnxruntime": + converter.conf.quantization.framework != "onnxruntime": # pragma: no cover if os.getenv('FORCE_BF16') == '1': logger.warning("Mix precision will generate bf16 graph although " \ "the hardware doesn't support bf16 instruction.") diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index 6fff9e3a8c2..5432b6d8538 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -83,7 +83,7 @@ def pre_proccess(self): if strategy == "mse_v2": if not (cfg.quantization.framework.startswith("tensorflow")\ - or cfg.quantization.framework == 'pytorch_fx'): + or cfg.quantization.framework == 'pytorch_fx'): # pragma: no cover strategy = "basic" logger.warning(f"MSE_v2 does not support {cfg.quantization.framework} now, use basic instead.") logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.") @@ -179,7 +179,7 @@ def model(self, user_model): """ cfg = self.conf if cfg.quantization.framework is None: - if isinstance(user_model, BaseModel): + if isinstance(user_model, BaseModel): # pragma: no cover cfg.quantization.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))] if cfg.quantization.backend == "ipex": assert cfg.quantization.framework == "pytorch_ipex",\ @@ -211,7 +211,7 @@ def model(self, user_model): self._model = Model(user_model, backend=cfg.quantization.framework, device=cfg.quantization.device) else: self._model = Model(user_model, backend=cfg.quantization.framework) - else: + else: # pragma: no cover if cfg.quantization.framework == "pytorch_ipex": from neural_compressor.model.torch_model import IPEXModel assert type(user_model) == IPEXModel, \ From 828a1ba7c05cfc1df40d2b779705b7082ee25399 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Fri, 21 Apr 2023 19:05:03 +0800 Subject: [PATCH 081/103] Change global options and _Config Signed-off-by: Cheng, Penghui --- neural_compressor/benchmark.py | 70 ++++++++--------- neural_compressor/compression/__init__.py | 2 +- neural_compressor/compression/callbacks.py | 77 +++++++++---------- neural_compressor/config.py | 11 +-- .../experimental/strategy/basic.py | 2 - neural_compressor/mix_precision.py | 12 +-- neural_compressor/quantization.py | 15 ++-- neural_compressor/strategy/strategy.py | 22 +++--- neural_compressor/training.py | 2 - test/config/test_config_2.x.py | 2 +- 10 files changed, 98 insertions(+), 117 deletions(-) diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py index 7a5b58beb58..7d4cec75b41 100644 --- a/neural_compressor/benchmark.py +++ b/neural_compressor/benchmark.py @@ -26,7 +26,7 @@ from threading import Thread from .adaptor import FRAMEWORKS from .objective import MultiObjective -from .config import Config, BenchmarkConfig +from .config import BenchmarkConfig, options from .utils import logger from .utils import OPTIONS from .utils.utility import GLOBAL_STATE, MODE @@ -54,17 +54,17 @@ def set_all_env_var(conf, overwrite_existing=False): Neural Compressor only uses physical cores """ cpu_counts = psutil.cpu_count(logical=False) - assert isinstance(conf, Config), \ + assert isinstance(conf, BenchmarkConfig), \ 'input has to be a Config object' - if conf.benchmark.cores_per_instance is not None: - assert conf.benchmark.cores_per_instance * conf.benchmark.num_of_instance <= cpu_counts, \ + if conf.cores_per_instance is not None: + assert conf.cores_per_instance * conf.num_of_instance <= cpu_counts, \ 'num_of_instance * cores_per_instance should <= cpu physical cores' else: - assert conf.benchmark.num_of_instance <= cpu_counts, \ + assert conf.num_of_instance <= cpu_counts, \ 'num_of_instance should <= cpu counts' - conf.benchmark.cores_per_instance = int(cpu_counts / conf.benchmark.num_of_instance) - for var, value in dict(conf.benchmark).items(): + conf.cores_per_instance = int(cpu_counts / conf.num_of_instance) + for var, value in dict(conf).items(): set_env_var(var.upper(), value, overwrite_existing) @@ -153,9 +153,9 @@ def __init__(self, conf): self._results = {} assert isinstance(conf, BenchmarkConfig), \ "The config object should be config.BenchmarkConfig, not {}".format(type(conf)) - self.conf = Config(quantization=None, benchmark=conf, pruning=None, distillation=None, nas=None) - if self.conf.benchmark.framework is not None: - self.framework = self.conf.benchmark.framework.lower() + self.conf = conf + if self.conf.framework is not None: + self.framework = self.conf.framework.lower() def __call__(self, raw_cmd=None): """Directly call a Benchmark object. @@ -167,7 +167,7 @@ def __call__(self, raw_cmd=None): assert sys.platform in ['linux', 'win32'], 'only support platform windows and linux...' # disable multi-instance for running bechmark on GPU device set_all_env_var(cfg) - if cfg.benchmark.device == 'gpu': + if cfg.device == 'gpu': set_env_var('NC_ENV_CONF', True, overwrite_existing=True) logger.info("Start to run Benchmark.") @@ -322,28 +322,28 @@ def run_instance(self): if self._b_func is None: cfg = self.conf GLOBAL_STATE.STATE = MODE.BENCHMARK - framework_specific_info = {'device': cfg.benchmark.device, \ + framework_specific_info = {'device': cfg.device, \ 'approach': None, \ - 'random_seed': cfg.options.random_seed, - 'backend': cfg.benchmark.backend \ - if cfg.benchmark.backend is not None else 'default', + 'random_seed': options.random_seed, + 'backend': cfg.backend \ + if cfg.backend is not None else 'default', 'format': None} - framework = cfg.benchmark.framework.lower() + framework = cfg.framework.lower() if 'tensorflow' in framework: - framework_specific_info.update({"inputs": cfg.benchmark.inputs, \ - "outputs": cfg.benchmark.outputs, \ + framework_specific_info.update({"inputs": cfg.inputs, \ + "outputs": cfg.outputs, \ "recipes": None, \ - 'workspace_path': cfg.options.workspace}) + 'workspace_path': options.workspace}) if framework == 'keras': - framework_specific_info.update({'workspace_path': cfg.options.workspace}) + framework_specific_info.update({'workspace_path': options.workspace}) if framework == 'mxnet': framework_specific_info.update({"b_dataloader": self._b_dataloader}) if 'onnx' in framework: framework_specific_info.update( - {'workspace_path': cfg.options.workspace, \ + {'workspace_path': options.workspace, \ 'graph_optimization': OPTIONS[framework].graph_optimization}) if framework == 'pytorch_ipex' or framework == 'pytorch' or framework == 'pytorch_fx': - framework_specific_info.update({"workspace_path": cfg.options.workspace, + framework_specific_info.update({"workspace_path": options.workspace, "q_dataloader": None}) assert isinstance(self._model, BaseModel), 'need set neural_compressor Model for quantization....' @@ -367,7 +367,7 @@ def run_instance(self): # also measurer have result list among steps acc, _ = val batch_size = self._b_dataloader.batch_size - warmup = cfg.benchmark.warmup + warmup = cfg.warmup if len(self.objectives.objectives[0].result_list()) < warmup: if len(self.objectives.objectives[0].result_list()) > 1 and warmup != 0: warmup = 1 @@ -472,42 +472,42 @@ def model(self, user_model): make sure the name is in the supported slim model list. """ cfg = self.conf - if cfg.benchmark.framework is None: + if cfg.framework is None: assert not isinstance(user_model, BaseModel), \ "Please pass an original framework model but not neural compressor model!" self.framework = get_model_fwk_name(user_model) if self.framework == "tensorflow": from .model.tensorflow_model import get_model_type - if get_model_type(user_model) == 'keras' and cfg.benchmark.backend == 'itex': + if get_model_type(user_model) == 'keras' and cfg.backend == 'itex': self.framework = 'keras' if self.framework == "pytorch": - if cfg.benchmark.backend == "default": + if cfg.backend == "default": self.framework = "pytorch_fx" - elif cfg.benchmark.backend == "ipex": + elif cfg.backend == "ipex": self.framework = "pytorch_ipex" import intel_extension_for_pytorch - cfg.benchmark.framework = self.framework + cfg.framework = self.framework if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") if "tensorflow" in self.framework or self.framework == "keras": - self._model = NCModel(user_model, backend=self.framework, device=cfg.benchmark.device) + self._model = NCModel(user_model, backend=self.framework, device=cfg.device) else: self._model = NCModel(user_model, backend=self.framework) else: # It is config of neural_compressor version < 2.0, no need in 2.0 - if cfg.benchmark.framework == "pytorch_ipex": + if cfg.framework == "pytorch_ipex": from neural_compressor.model.torch_model import IPEXModel if not isinstance(user_model, IPEXModel): - self._model = NCModel(user_model.model, framework=cfg.benchmark.framework) + self._model = NCModel(user_model.model, framework=cfg.framework) return self._model = user_model if 'tensorflow' in self.framework: - self._model.name = cfg.benchmark.model_name - self._model.output_tensor_names = cfg.benchmark.outputs - self._model.input_tensor_names = cfg.benchmark.inputs - self._model.workspace_path = cfg.options.workspace + self._model.name = cfg.model_name + self._model.output_tensor_names = cfg.outputs + self._model.input_tensor_names = cfg.inputs + self._model.workspace_path = options.workspace def __repr__(self): """Get the object representation in string format.""" diff --git a/neural_compressor/compression/__init__.py b/neural_compressor/compression/__init__.py index 1f6f058697d..14f79b22d7c 100644 --- a/neural_compressor/compression/__init__.py +++ b/neural_compressor/compression/__init__.py @@ -18,4 +18,4 @@ from .callbacks import QuantizationAwareTrainingCallbacks, DistillationCallbacks, PruningCallbacks from ..experimental.compression import prepare_pruning from .. import WeightPruningConfig -from .callbacks import model_slim, parse_auto_slim_config +from .pruner.model_slim.auto_slim import model_slim, parse_auto_slim_config diff --git a/neural_compressor/compression/callbacks.py b/neural_compressor/compression/callbacks.py index 081ddbaa61a..cc3eecadc17 100644 --- a/neural_compressor/compression/callbacks.py +++ b/neural_compressor/compression/callbacks.py @@ -21,25 +21,23 @@ 'PruningCallbacks' and 'DistillationCallbacks'. """ -import copy import numpy as np import os import pickle import random from .distillation.criterions import Criterions from ..adaptor import FRAMEWORKS -from ..config import Config, QuantizationAwareTrainingConfig, DistillationConfig, WeightPruningConfig +from ..config import _Config, options from ..utils import logger from ..utils.utility import time_limit, LazyImport from ..model import BaseModel, Model from ..model.model import get_model_fwk_name from ..model.tensorflow_model import TensorflowQATModel from ..strategy import STRATEGIES -from .pruner.utils import process_config, parse_to_prune, \ - generate_pruner_config, get_sparsity_ratio +from .pruner.utils import process_config, parse_to_prune, get_sparsity_ratio from .pruner.pruners import get_pruner, PRUNERS # model auto slim related -from .pruner.model_slim.auto_slim import model_slim, parse_auto_slim_config + LazyImport('torch.nn') torch = LazyImport('torch') @@ -218,22 +216,22 @@ def model(self, user_model): user_model.model if isinstance(user_model, BaseModel) else user_model) if self.framework == "tensorflow": try: - if self.cfg.quantization.approach == "quant_aware_training": + if self.conf.quantization.approach == "quant_aware_training": self.framework = 'tensorflow_itex' else: from ..model.tensorflow_model import get_model_type - if get_model_type(user_model) == 'keras' and self.cfg.quantization.backend == 'itex': + if get_model_type(user_model) == 'keras' and self.conf.quantization.backend == 'itex': self.framework = 'keras' except Exception as e: pass if self.framework == "pytorch": try: - if self.cfg.quantization.backend == "default": + if self.conf.quantization.backend == "default": self.framework = "pytorch_fx" - elif self.cfg.quantization.backend == "ipex": + elif self.conf.quantization.backend == "ipex": self.framework = "pytorch_ipex" - self.cfg.quantization.framework = self.framework + self.conf.quantization.framework = self.framework except Exception as e: pass @@ -246,7 +244,7 @@ def model(self, user_model): self._model = TensorflowQATModel(user_model._model) elif "tensorflow" in self.framework or self.framework == "keras": try: - self._model = Model(user_model, backend=self.framework, device=self.cfg.quantization.device) + self._model = Model(user_model, backend=self.framework, device=self.conf.quantization.device) except Exception as e: self._model = Model(user_model, backend=self.framework, device=None) else: @@ -256,10 +254,10 @@ def model(self, user_model): if 'tensorflow' in self.framework: try: - self._model.name = self.cfg.quantization.model_name - self._model.output_tensor_names = self.cfg.quantization.outputs - self._model.input_tensor_names = self.cfg.quantization.inputs - self._model.workspace_path = self.cfg.options.workspace + self._model.name = self.conf.quantization.model_name + self._model.output_tensor_names = self.conf.quantization.outputs + self._model.input_tensor_names = self.conf.quantization.inputs + self._model.workspace_path = options.workspace except Exception as e: self._model.name = None self._model.output_tensor_names = None @@ -273,31 +271,31 @@ def pre_process(self): self.remove_hook("on_train_begin", self.adaptor._pre_hook_for_qat) self.remove_hook("on_train_end", self.adaptor._post_hook_for_qat) - strategy = self.cfg.quantization.tuning_criterion.strategy.lower() - if self.cfg.quantization.quant_level == 0: + strategy = self.conf.quantization.tuning_criterion.strategy.lower() + if self.conf.quantization.quant_level == 0: strategy = "conservative" logger.info(f"On the premise that the accuracy meets the conditions, improve the performance.") if strategy == "mse_v2": - if not (self.cfg.quantization.framework.startswith("tensorflow") \ - or self.cfg.quantization.framework == 'pytorch_fx'): # pragma: no cover + if not (self.conf.quantization.framework.startswith("tensorflow") \ + or self.conf.quantization.framework == 'pytorch_fx'): # pragma: no cover strategy = "basic" logger.warning(f"MSE_v2 does not support \ - {self.cfg.quantization.framework} now, use basic instead.") + {self.conf.quantization.framework} now, use basic instead.") logger.warning("Only tensorflow, pytorch_fx is supported by MSE_v2 currently.") assert strategy in STRATEGIES, "Tuning strategy {} is NOT supported".format(strategy) _resume = None # check if interrupted tuning procedure exists. if yes, it will resume the # whole auto tune process. - self.resume_file = os.path.abspath(os.path.expanduser(self.cfg.options.resume_from)) \ - if self.cfg.options.workspace and self.cfg.options.resume_from else None + self.resume_file = os.path.abspath(os.path.expanduser(options.resume_from)) \ + if options.workspace and options.resume_from else None if self.resume_file: assert os.path.exists(self.resume_file), \ "The specified resume file {} doesn't exist!".format(self.resume_file) with open(self.resume_file, 'rb') as f: _resume = pickle.load(f).__dict__ - + self.strategy = STRATEGIES[strategy]( model = self.model, conf = self.conf, @@ -495,30 +493,28 @@ def __init__(self, conf=None, model=None): model: Model to be quantized in this object. """ super(QuantizationAwareTrainingCallbacks, self).__init__(conf=None) - self.conf = Config(quantization=conf, benchmark=None, \ - pruning=None, distillation=None, nas=None) - self.cfg = self.conf + self.conf = _Config(quantization=conf, benchmark=None,pruning=None, distillation=None, nas=None) self.model = model - seed = self.conf.options.random_seed + seed = options.random_seed random.seed(seed) np.random.seed(seed) - framework_specific_info = {'device': self.cfg.quantization.device, - 'random_seed': self.cfg.options.random_seed, - 'workspace_path': self.cfg.options.workspace, + framework_specific_info = {'device': self.conf.quantization.device, + 'random_seed': options.random_seed, + 'workspace_path': options.workspace, 'q_dataloader': None, - 'backend': self.cfg.quantization.backend if \ - self.cfg.quantization.backend is not None else 'default', - 'format': self.cfg.quantization.quant_format if \ - self.cfg.quantization.quant_format is not None else 'default'} - if self.cfg.quantization.approach is not None: - framework_specific_info['approach'] = self.cfg.quantization.approach + 'backend': self.conf.quantization.backend if \ + self.conf.quantization.backend is not None else 'default', + 'format': self.conf.quantization.quant_format if \ + self.conf.quantization.quant_format is not None else 'default'} + if self.conf.quantization.approach is not None: + framework_specific_info['approach'] = self.conf.quantization.approach if 'tensorflow' in self.framework: framework_specific_info.update( - {"inputs": self.cfg.quantization.inputs, \ - "outputs": self.cfg.quantization.outputs}) + {"inputs": self.conf.quantization.inputs, \ + "outputs": self.conf.quantization.outputs}) self.adaptor = FRAMEWORKS[self.framework](framework_specific_info) self.adaptor.model = self.model self.register_hook('on_train_begin', self.adaptor._pre_hook_for_qat) @@ -543,7 +539,7 @@ def __init__(self, conf=None, model=None): model: Model to be Pruning in this object. """ super(PruningCallbacks, self).__init__(conf=None) - self.conf = Config(pruning=conf, quantization=None, benchmark=None + self.conf = _Config(pruning=conf, quantization=None, benchmark=None , distillation=None, nas=None) self.cfg = self.conf.pruning self.model = model @@ -607,8 +603,7 @@ class DistillationCallbacks(BaseCallbacks): def __init__(self, conf=None, model=None): """Initialize the attributes.""" super(DistillationCallbacks, self).__init__() - self.conf = Config(quantization=None, benchmark=None, pruning=None - , distillation=conf, nas=None) + self.conf = _Config(quantization=None, benchmark=None, pruning=None, distillation=conf, nas=None) self.cfg = self.conf.distillation self.model = model diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 47aec414f06..6f5a3bbe3ee 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -2051,12 +2051,11 @@ def __init__(self, precisions=None): mxnet_config = MXNet() -class Config: +class _Config: """Main config class.""" def __init__(self, quantization=quantization, benchmark=benchmark, - options=options, mixed_precision=mixed_precision, pruning=pruning, distillation=distillation, @@ -2072,7 +2071,6 @@ def __init__(self, """Init a config object.""" self._quantization = quantization self._benchmark = benchmark - self._options = options self._mixed_precision = mixed_precision self._onnxruntime = onnxruntime self._pruning = pruning @@ -2130,11 +2128,6 @@ def benchmark(self): """Get the benchmark object.""" return self._benchmark - @property - def options(self): - """Get the options object.""" - return self._options - @property def mixed_precision(self): """Get the mixed_precision object.""" @@ -2155,4 +2148,4 @@ def tuning(self): """Get the tuning object.""" return self._tuning -config = Config() +config = _Config() diff --git a/neural_compressor/experimental/strategy/basic.py b/neural_compressor/experimental/strategy/basic.py index 6841c197137..1e906b61027 100644 --- a/neural_compressor/experimental/strategy/basic.py +++ b/neural_compressor/experimental/strategy/basic.py @@ -164,5 +164,3 @@ def _initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg:OpTuningConfig) tuning_item = quant_mode_item.get_option_by_name(att_and_method_name) dynamic_state[att_and_method_name] = tuning_item.options[0] if tuning_item else None return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state) - - \ No newline at end of file diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index 2ccb1ee7cd2..59fdfa2bdb8 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -22,7 +22,7 @@ import random from .utils.utility import time_limit, CpuInfo from .strategy import STRATEGIES -from .config import Config +from .config import _Config, options from .utils import logger from .model.model import BaseModel, get_model_fwk_name, Model, MODELS @@ -52,8 +52,8 @@ def __init__(self, conf=None): Args: conf (obj): The MixedPrecisionConfig class containing accuracy goal, tuning objective etc. """ - self.conf = Config(quantization=conf, benchmark=None, pruning=None, distillation=None, nas=None) - seed = self.conf.options.random_seed + self.conf = _Config(quantization=conf, benchmark=None, pruning=None, distillation=None, nas=None) + seed = options.random_seed random.seed(seed) np.random.seed(seed) @@ -69,8 +69,8 @@ def pre_process(self): _resume = None # check if interrupted tuning procedure exists. if yes, it will resume the # whole auto tune process. - self.resume_file = os.path.abspath(os.path.expanduser(cfg.options.resume_from)) \ - if cfg.options.workspace and cfg.options.resume_from else None + self.resume_file = os.path.abspath(os.path.expanduser(options.resume_from)) \ + if options.workspace and options.resume_from else None if self.resume_file: assert os.path.exists(self.resume_file), \ "The specified resume file {} doesn't exist!".format(self.resume_file) @@ -243,7 +243,7 @@ def model(self, user_model): self._model.name = cfg.quantization.model_name self._model.output_tensor_names = cfg.quantization.outputs self._model.input_tensor_names = cfg.quantization.inputs - self._model.workspace_path = cfg.options.workspace + self._model.workspace_path = options.workspace @property def metric(self): diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index 5432b6d8538..8f058a283c3 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -20,7 +20,7 @@ import pickle import random import numpy as np -from .config import Config +from .config import _Config, options from .model.model import BaseModel, get_model_fwk_name, get_model_type, Model, MODELS from .strategy import STRATEGIES from .utils import logger @@ -54,9 +54,8 @@ def __init__(self, conf, **kwargs): conf (PostTrainingQuantConfig): A instance of PostTrainingQuantConfig to specify the quantization behavior. """ - self.conf = Config(quantization=conf, benchmark=None - , pruning=None, distillation=None, nas=None) - seed = self.conf.options.random_seed + self.conf = _Config(quantization=conf, benchmark=None, pruning=None, distillation=None, nas=None) + seed = options.random_seed random.seed(seed) np.random.seed(seed) self._train_func = None @@ -93,8 +92,8 @@ def pre_proccess(self): _resume = None # check if interrupted tuning procedure exists. if yes, it will resume the # whole auto tune process. - self.resume_file = os.path.abspath(os.path.expanduser(cfg.options.resume_from)) \ - if cfg.options.workspace and cfg.options.resume_from else None + self.resume_file = os.path.abspath(os.path.expanduser(options.resume_from)) \ + if options.workspace and options.resume_from else None if self.resume_file: assert os.path.exists(self.resume_file), \ "The specified resume file {} doesn't exist!".format(self.resume_file) @@ -185,7 +184,6 @@ def model(self, user_model): assert cfg.quantization.framework == "pytorch_ipex",\ "Please wrap the model with correct Model class!" if cfg.quantization.backend == "itex": - from .model.tensorflow_model import get_model_type if get_model_type(user_model.model) == 'keras': assert cfg.quantization.framework == "keras",\ "Please wrap the model with KerasModel class!" @@ -195,7 +193,6 @@ def model(self, user_model): else: framework = get_model_fwk_name(user_model) if framework == "tensorflow": - from .model.tensorflow_model import get_model_type if get_model_type(user_model) == 'keras' and cfg.quantization.backend == 'itex': framework = 'keras' if framework == "pytorch": @@ -227,7 +224,7 @@ def model(self, user_model): self._model.name = cfg.quantization.model_name self._model.output_tensor_names = cfg.quantization.outputs self._model.input_tensor_names = cfg.quantization.inputs - self._model.workspace_path = cfg.options.workspace + self._model.workspace_path = options.workspace @property def eval_func(self): diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 516ee4c4dfc..0216e68cded 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -29,7 +29,7 @@ import numpy as np from neural_compressor.adaptor.tensorflow import TensorFlowAdaptor -from ..config import MixedPrecisionConfig +from ..config import MixedPrecisionConfig, options from ..objective import MultiObjective from ..adaptor import FRAMEWORKS from ..utils.utility import Statistics @@ -132,8 +132,8 @@ def __init__(self, """ self.model = model self.conf = conf - self.history_path = self._create_path(self.conf.options.workspace, './history.snapshot') - self.deploy_path = self._create_path(self.conf.options.workspace, 'deploy.yaml') + self.history_path = self._create_path(options.workspace, './history.snapshot') + self.deploy_path = self._create_path(options.workspace, 'deploy.yaml') self.calib_dataloader = q_dataloader self.eval_func = eval_func self.eval_dataloader = eval_dataloader @@ -680,7 +680,7 @@ def traverse(self): if best_result != self.best_tune_result: from neural_compressor.utils.utility import recover self.best_qmodel = recover(self.model.model, - os.path.join(self.conf.options.workspace, 'history.snapshot'), + os.path.join(options.workspace, 'history.snapshot'), best_trail) logger.debug(f"*** Update the best qmodel by recovering from history.") self.best_tune_result = best_result @@ -1002,7 +1002,7 @@ def _create_path(self, custom_path, filename): def _set_framework_info(self, q_dataloader, q_func=None): framework_specific_info = {'device': self.conf.quantization.device, 'approach': self.conf.quantization.approach, - 'random_seed': self.conf.options.random_seed, + 'random_seed': options.random_seed, 'performance_only': self._not_tuning} framework = self.conf.quantization.framework.lower() framework_specific_info.update({'backend': self.conf.quantization.backend}) @@ -1015,7 +1015,7 @@ def _set_framework_info(self, q_dataloader, q_func=None): framework_specific_info.update( {"inputs": self.conf.quantization.inputs, "outputs": self.conf.quantization.outputs, - 'workspace_path': self.conf.options.workspace, + 'workspace_path': options.workspace, 'recipes': self.conf.quantization.recipes, 'use_bf16': self.conf.quantization.use_bf16 if self.conf.quantization.use_bf16 is not None else False}) for item in ['scale_propagation_max_pooling', 'scale_propagation_concat']: @@ -1025,14 +1025,14 @@ def _set_framework_info(self, q_dataloader, q_func=None): framework = 'tensorflow_itex' if 'keras' in framework: framework_specific_info.update({ - 'workspace_path': self.conf.options.workspace, }) + 'workspace_path': options.workspace, }) if framework == 'mxnet': framework_specific_info.update({"q_dataloader": q_dataloader}) if 'onnx' in framework.lower(): if self.mixed_precision_mode: framework_specific_info.update({"approach": "post_training_dynamic_quant"}) framework_specific_info.update({"deploy_path": os.path.dirname(self.deploy_path)}) - framework_specific_info.update({'workspace_path': self.conf.options.workspace}) + framework_specific_info.update({'workspace_path': options.workspace}) framework_specific_info.update({'recipes': self.conf.quantization.recipes}) framework_specific_info.update({'reduce_range': self.conf.quantization.reduce_range}) framework_specific_info.update({'recipes': self.conf.quantization.recipes}) @@ -1188,14 +1188,14 @@ def _evaluate(self, model): Objective: The objective value evaluated. """ if self.eval_func: - if self.conf.options.tensorboard: + if options.tensorboard: # Pytorch can insert observer to model in this hook. # Tensorflow don't support this mode for now model = self.adaptor._pre_eval_hook(model) val = self.objectives.evaluate( self.eval_func, model if self.framework == "pytorch_ipex" else model.model ) - if self.conf.options.tensorboard: + if options.tensorboard: # post_eval_hook to deal the tensor self.adaptor._post_eval_hook(model, accuracy=val[0]) else: @@ -1210,7 +1210,7 @@ def _evaluate(self, model): metric_cfg, postprocess_cfg, iteration, - tensorboard = self.conf.options.tensorboard, + tensorboard = options.tensorboard, fp32_baseline = self.baseline == None) if getattr(self.eval_dataloader, 'distributed', False): diff --git a/neural_compressor/training.py b/neural_compressor/training.py index 7daa73b8b8f..c9902267f24 100644 --- a/neural_compressor/training.py +++ b/neural_compressor/training.py @@ -271,8 +271,6 @@ def prepare_compression(model: Callable, confs: Union[Callable, List], **kwargs) confs (Union[Callable, List]): The instance of QuantizationAwareTrainingConfig, PruningConfig and distillationConfig, or a list of config for orchestration optimization. - options (Options, optional): The configure for random_seed, workspace, - resume path and tensorboard flag. Returns: An object of CompressionManager. diff --git a/test/config/test_config_2.x.py b/test/config/test_config_2.x.py index 914e28d2a0f..a2330db043f 100644 --- a/test/config/test_config_2.x.py +++ b/test/config/test_config_2.x.py @@ -1,7 +1,7 @@ """Tests for 2.x config file""" import unittest import os -from neural_compressor.config import Config as conf +from neural_compressor.config import _Config as conf from neural_compressor.utils.constant import * from neural_compressor.config import PostTrainingQuantConfig, BenchmarkConfig, Options from neural_compressor.config import MixedPrecisionConfig, MXNet From f73760d37436fabe91c3fa58775ee4e60cad83d5 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Fri, 21 Apr 2023 23:20:15 +0800 Subject: [PATCH 082/103] Update Code Signed-off-by: Cheng, Penghui --- neural_compressor/__init__.py | 1 + neural_compressor/compression/callbacks.py | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py index ffd79a5bb06..bd6185da14a 100644 --- a/neural_compressor/__init__.py +++ b/neural_compressor/__init__.py @@ -24,3 +24,4 @@ WeightPruningConfig, QuantizationAwareTrainingConfig, \ MixedPrecisionConfig from .contrib import * +from .model import * diff --git a/neural_compressor/compression/callbacks.py b/neural_compressor/compression/callbacks.py index cc3eecadc17..cca4979219a 100644 --- a/neural_compressor/compression/callbacks.py +++ b/neural_compressor/compression/callbacks.py @@ -199,7 +199,7 @@ def model(self, user_model): user_model: user are supported to set model from original framework model format (eg, tensorflow frozen_pb or path to a saved model), but not recommended. Best practice is to set from a initialized - neural_compressor.model.Model. + neural_compressor.Model. If tensorflow model is used, model's inputs/outputs will be auto inferenced, but sometimes auto inferenced inputs/outputs will not meet your requests, @@ -734,8 +734,7 @@ def teacher_model(self, user_model): user_model: user are supported to set model from original framework model format (eg, tensorflow frozen_pb or path to a saved model), but not recommended. Best practice is to set from a initialized - neural_compressor.experimental.common.Model. - If tensorflow model is used, model's inputs/outputs will be + neural_compressor.Model. If tensorflow model is used, model's inputs/outputs will be auto inferenced, but sometimes auto inferenced inputs/outputs will not meet your requests, set them manually in config yaml file. From 48da0a3718edd259ff46bfc81898aa03b7655b0c Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Sat, 22 Apr 2023 10:00:40 +0800 Subject: [PATCH 083/103] Fixed UT error Signed-off-by: Cheng, Penghui --- neural_compressor/__init__.py | 2 +- neural_compressor/strategy/bayesian.py | 33 ++++++++++++-------------- test/config/test_config_2.x.py | 31 ++++++++++++++++-------- test/strategy/test_bayesian.py | 12 +++++----- 4 files changed, 43 insertions(+), 35 deletions(-) diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py index bd6185da14a..7e494942dd9 100644 --- a/neural_compressor/__init__.py +++ b/neural_compressor/__init__.py @@ -18,7 +18,7 @@ """Intel® Neural Compressor: An open-source Python library supporting popular model compression techniques.""" from .version import __version__ # we need to set a global 'NA' backend, or Model can't be used -from .utils.utility import set_random_seed, set_tensorboard, set_workspace +from .utils.utility import set_random_seed, set_tensorboard, set_workspace, set_resume_from from .utils import options from .config import DistillationConfig, PostTrainingQuantConfig, \ WeightPruningConfig, QuantizationAwareTrainingConfig, \ diff --git a/neural_compressor/strategy/bayesian.py b/neural_compressor/strategy/bayesian.py index 5f6232ce1cd..49b4d250381 100644 --- a/neural_compressor/strategy/bayesian.py +++ b/neural_compressor/strategy/bayesian.py @@ -17,26 +17,23 @@ """The Bayesian tuning strategy.""" -import copy import warnings import numpy as np from scipy.optimize import minimize from sklearn.gaussian_process.kernels import Matern from sklearn.gaussian_process import GaussianProcessRegressor -from collections import OrderedDict from copy import deepcopy - +from ..config import options from ..utils import logger from .strategy import strategy_registry, TuneStrategy from .utils.tuning_sampler import OpWiseTuningSampler -from .utils.tuning_structs import OpTuningConfig @strategy_registry class BayesianTuneStrategy(TuneStrategy): """The Bayesian tuning strategy.""" - + def __init__(self, model, conf, @@ -103,7 +100,7 @@ def _params_to_tune_configs(self, params): def next_tune_cfg(self): """Generate the next tuning config according to bayesian search algorithm. - + This strategy comes from the Bayesian optimization package and changed it to a discrete version. It uses Gaussian processes to define the prior/posterior distribution over the black-box function with the tuning history and then finds the tuning configuration that maximizes @@ -131,7 +128,7 @@ def next_tune_cfg(self): return if self.bayes_opt is None: self.bayes_opt = BayesianOptimization( - pbounds=pbounds, random_seed=self.conf.options.random_seed) + pbounds=pbounds, random_seed=options.random_seed) while True: params = self.bayes_opt.gen_next_params() logger.debug("Dump current bayesian params:") @@ -149,7 +146,7 @@ def next_tune_cfg(self): def acq_max(ac, gp, y_max, bounds, random_seed, n_warmup=10000, n_iter=10): """Find the maximum of the acquisition function parameters. - + Args: ac: The acquisition function object that return its point-wise value. gp: A gaussian process fitted to the relevant data. @@ -158,7 +155,7 @@ def acq_max(ac, gp, y_max, bounds, random_seed, n_warmup=10000, n_iter=10): random_seed: instance of np.RandomState random number generator n_warmup: number of times to randomly sample the acquisition function n_iter: number of times to run scipy.minimize - + Returns: x_max: The arg max of the acquisition function. """ @@ -182,7 +179,7 @@ def acq_max(ac, gp, y_max, bounds, random_seed, n_warmup=10000, n_iter=10): # See if success if not res.success: continue - + if isinstance(res.fun, float): res.fun = np.array([res.fun]) # Store it if better than previous minimum(maximum). @@ -202,13 +199,13 @@ def _hashable(x): # Target space part class TargetSpace(object): """Holds the param-space coordinates (X) and target values (Y). - + Allows for constant-time appends while ensuring no duplicates are added. """ def __init__(self, pbounds, random_seed=9527): """Construct a TargetSpace. - + Args: target_func (function): Function to be maximized. pbounds (dict): Dictionary with parameters names as keys and a tuple with minimum and maximum values. @@ -325,13 +322,13 @@ def _as_array(self, x): def register(self, params, target): """Append a point and its target value to the known data. - + Runs in amortized constant time. Args: params (ndarray): a single point, with len(params) == self.dim target (float): target function value - + Raises: KeyError: if the point is not unique """ @@ -347,7 +344,7 @@ def register(self, params, target): def get_target(self, params): """Get the target value of params. - + Args: params (ndarray): a single point, with len(params) == self.dim @@ -360,7 +357,7 @@ def get_target(self, params): def random_sample(self): """Create random points within the bounds of the space. - + Returns: data (ndarray): [num x dim] array points with dimensions corresponding to `self._keys` """ @@ -396,11 +393,11 @@ def res(self): # Tuning part class BayesianOptimization(): """The class for bayesian optimization. - + This class takes the parameters bounds in order to find which values for the parameters yield the maximum value using bayesian optimization. """ - + def __init__(self, pbounds, random_seed=9527, verbose=2): """Init bayesian optimization. diff --git a/test/config/test_config_2.x.py b/test/config/test_config_2.x.py index a2330db043f..e24b5ebc276 100644 --- a/test/config/test_config_2.x.py +++ b/test/config/test_config_2.x.py @@ -1,10 +1,9 @@ """Tests for 2.x config file""" import unittest -import os +from neural_compressor import set_workspace, set_random_seed, set_resume_from, set_tensorboard from neural_compressor.config import _Config as conf +from neural_compressor.config import PostTrainingQuantConfig, BenchmarkConfig, MixedPrecisionConfig, MXNet, options from neural_compressor.utils.constant import * -from neural_compressor.config import PostTrainingQuantConfig, BenchmarkConfig, Options -from neural_compressor.config import MixedPrecisionConfig, MXNet def helper(content): @@ -42,11 +41,11 @@ def test_config(self): } a = conf(quantization=cfg) self.assertEqual(a.quantization.op_type_dict['Conv']['weight']['dtype'], ['fp32']) - + cfg.tuning_criterion.strategy = 'mse' a = conf(quantization=cfg) self.assertEqual(a.tuning.strategy, 'mse') - + cfg = BenchmarkConfig() cfg.cores_per_instance = 4 cfg.iteration = 100 @@ -54,11 +53,6 @@ def test_config(self): a = conf(benchmark=cfg) self.assertEqual(a.benchmark.iteration, 100) - cfg = Options() - cfg.workspace = "workspace_path" - a = conf(options=cfg) - self.assertEqual(a.options.workspace, "workspace_path") - cfg = MixedPrecisionConfig() a = conf(mixed_precision=cfg) self.assertEqual(a.mixed_precision.precision, ["bf16"]) @@ -68,6 +62,23 @@ def test_config(self): a = conf(mxnet=cfg) self.assertEqual(a.mxnet.precisions, ["bf16"]) + set_workspace("workspace_path") + self.assertEqual(options.workspace, "workspace_path") + + set_random_seed(1) + self.assertEqual(options.random_seed, 1) + + tmp_resume_from = options.resume_from + set_resume_from("resume_from_path") + self.assertEqual(options.resume_from, "resume_from_path") + set_resume_from(tmp_resume_from) + + tmp_tensorboard = options.tensorboard + set_tensorboard(True) + self.assertEqual(options.tensorboard, True) + set_tensorboard(tmp_tensorboard) + + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_bayesian.py b/test/strategy/test_bayesian.py index 46fbc9ee674..daf21b565ce 100644 --- a/test/strategy/test_bayesian.py +++ b/test/strategy/test_bayesian.py @@ -199,19 +199,19 @@ def test_run_bayesian_one_trial(self): from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion from neural_compressor.data import Datasets, DATALOADERS - + # dataset and dataloader dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) dataloader = DATALOADERS["tensorflow"](dataset) - + # tuning and accuracy criterion tune_cri = TuningCriterion(strategy='bayesian', max_trials=1) acc_cri = AccuracyCriterion(tolerable_loss=0.01) - + conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) def fake_eval(model): return 1 - + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, @@ -222,11 +222,11 @@ def test_run_bayesian_max_trials(self): from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion from neural_compressor.data import Datasets, DATALOADERS - + # dataset and dataloader dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) dataloader = DATALOADERS["tensorflow"](dataset) - + # tuning and accuracy criterion tune_cri = TuningCriterion(strategy='bayesian', max_trials=3) acc_cri = AccuracyCriterion(tolerable_loss=0.01) From 2b104d45e13ef25c6d31096785249734e8f78147 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Sat, 22 Apr 2023 16:49:53 +0800 Subject: [PATCH 084/103] Support method of Obtaining the built-in metric by Metric class Signed-off-by: Cheng, Penghui --- examples/helloworld/tf_example1/README.md | 3 +- examples/helloworld/tf_example1/test.py | 4 +- examples/helloworld/tf_example3/README.md | 2 +- examples/helloworld/tf_example3/test.py | 4 +- examples/helloworld/tf_example5/README.md | 4 +- examples/helloworld/tf_example5/test.py | 4 +- examples/helloworld/tf_example6/README.md | 3 +- examples/helloworld/tf_example6/test.py | 8 +- .../quantization/ptq/main.py | 4 +- .../inception_v3/quantization/ptq/main.py | 4 +- .../mobilenet_v2/quantization/ptq/main.py | 4 +- .../resnet101/quantization/ptq/main.py | 4 +- .../resnet50/quantization/ptq/main.py | 4 +- .../resnet50_fashion/quantization/ptq/main.py | 4 +- .../resnetv2_101/quantization/ptq/main.py | 4 +- .../resnetv2_50/quantization/ptq/main.py | 4 +- .../vgg16/quantization/ptq/main.py | 4 +- .../vgg19/quantization/ptq/main.py | 4 +- .../xception/quantization/ptq/main.py | 4 +- .../quantization/ptq/main.py | 4 +- .../mobilenet_v1/quantization/ptq/main.py | 4 +- .../mobilenet_v2/quantization/ptq/main.py | 4 +- .../resnet_v2/quantization/qat/main.py | 4 +- .../densenet121/quantization/ptq/main.py | 8 +- .../densenet161/quantization/ptq/main.py | 8 +- .../densenet169/quantization/ptq/main.py | 8 +- .../efficientnet-b0/quantization/ptq/main.py | 8 +- .../quantization/ptq/main.py | 8 +- .../inception_v1/quantization/ptq/main.py | 8 +- .../inception_v2/quantization/ptq/main.py | 8 +- .../inception_v3/quantization/ptq/main.py | 8 +- .../inception_v4/quantization/ptq/main.py | 8 +- .../mobilenet_v1/quantization/ptq/main.py | 8 +- .../mobilenet_v2/export/main.py | 8 +- .../mobilenet_v2/quantization/ptq/main.py | 8 +- .../mobilenet_v3/quantization/ptq/main.py | 8 +- .../resnet101/quantization/ptq/main.py | 8 +- .../resnet50_v1/export/main.py | 8 +- .../resnet50_v1/quantization/ptq/main.py | 8 +- .../resnet50_v1_5/export/main.py | 8 +- .../resnet50_v1_5/quantization/ptq/main.py | 8 +- .../resnet_v2_101/quantization/ptq/main.py | 8 +- .../resnet_v2_152/quantization/ptq/main.py | 8 +- .../resnet_v2_50/quantization/ptq/main.py | 8 +- .../tensorflow_models/vgg16/export/main.py | 4 +- .../vgg16/quantization/ptq/main.py | 8 +- .../vgg19/quantization/ptq/main.py | 8 +- neural_compressor/__init__.py | 1 + .../experimental/metric/metric.py | 6 +- neural_compressor/metric/metric.py | 104 +++++++++--------- neural_compressor/quantization.py | 23 ++-- .../test_adaptor_pytorch_2.x.py | 40 +++++-- test/metric/test_metrics_2.x.py | 10 +- 53 files changed, 245 insertions(+), 219 deletions(-) diff --git a/examples/helloworld/tf_example1/README.md b/examples/helloworld/tf_example1/README.md index 1dcdc146d96..08c0b0d2996 100644 --- a/examples/helloworld/tf_example1/README.md +++ b/examples/helloworld/tf_example1/README.md @@ -30,7 +30,8 @@ python test.py --dataset_location=/path/to/imagenet/ ### 2. Introduction We only need to add the following lines for quantization to create an int8 model. ```python - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) config = PostTrainingQuantConfig(calibration_sampling_size=[20]) q_model = fit( model="./mobilenet_v1_1.0_224_frozen.pb", diff --git a/examples/helloworld/tf_example1/test.py b/examples/helloworld/tf_example1/test.py index 1e602a34fea..8ba5462257b 100644 --- a/examples/helloworld/tf_example1/test.py +++ b/examples/helloworld/tf_example1/test.py @@ -6,7 +6,7 @@ from neural_compressor.data import DefaultDataLoader from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig -from neural_compressor.metric import TensorflowTopK +from neural_compressor import Metric flags = tf.compat.v1.flags FLAGS = flags.FLAGS @@ -22,7 +22,7 @@ eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) def main(): - top1 = TensorflowTopK(k=1) + top1 = Metric(name="topk", k=1) config = PostTrainingQuantConfig(calibration_sampling_size=[20]) q_model = fit( model="./mobilenet_v1_1.0_224_frozen.pb", diff --git a/examples/helloworld/tf_example3/README.md b/examples/helloworld/tf_example3/README.md index 2cc37bf41ab..905f8e7904a 100644 --- a/examples/helloworld/tf_example3/README.md +++ b/examples/helloworld/tf_example3/README.md @@ -29,7 +29,7 @@ We can get a BF16 model using the Mixed Precision API. ```python from neural_compressor.config import MixedPrecisionConfig from neural_compressor import mix_precision - from neural_compressor.metric import TensorflowTopK + from neural_compressor import Metric top1 = TensorflowTopK(k=1) config = MixedPrecisionConfig() mix_precision_model = mix_precision.fit( diff --git a/examples/helloworld/tf_example3/test.py b/examples/helloworld/tf_example3/test.py index 28c47fe9bb5..d94f38cc9cc 100644 --- a/examples/helloworld/tf_example3/test.py +++ b/examples/helloworld/tf_example3/test.py @@ -16,8 +16,8 @@ def main(): from neural_compressor.config import MixedPrecisionConfig from neural_compressor import mix_precision - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) config = MixedPrecisionConfig() mix_precision_model = mix_precision.fit( model="./mobilenet_v1_1.0_224_frozen.pb", diff --git a/examples/helloworld/tf_example5/README.md b/examples/helloworld/tf_example5/README.md index 1ab0b765170..399ebd8bfb8 100644 --- a/examples/helloworld/tf_example5/README.md +++ b/examples/helloworld/tf_example5/README.md @@ -37,8 +37,8 @@ python test.py --benchmark --dataset_location=/path/to/imagenet/ * We only need to add the following lines for quantization to create an int8 model. ```python from neural_compressor.quantization import fit - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) config = PostTrainingQuantConfig(calibration_sampling_size=[20]) q_model = fit( model="./mobilenet_v1_1.0_224_frozen.pb", diff --git a/examples/helloworld/tf_example5/test.py b/examples/helloworld/tf_example5/test.py index e36aefcc3ef..e844ae5009c 100644 --- a/examples/helloworld/tf_example5/test.py +++ b/examples/helloworld/tf_example5/test.py @@ -24,8 +24,8 @@ def main(): if args.tune: from neural_compressor.quantization import fit - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) config = PostTrainingQuantConfig(calibration_sampling_size=[20]) q_model = fit( model="./mobilenet_v1_1.0_224_frozen.pb", diff --git a/examples/helloworld/tf_example6/README.md b/examples/helloworld/tf_example6/README.md index 50b7e64811a..4397136d55a 100644 --- a/examples/helloworld/tf_example6/README.md +++ b/examples/helloworld/tf_example6/README.md @@ -36,7 +36,8 @@ python test.py --benchmark --dataset_location=/path/to/imagenet/ ### 2. Introduction * We only need to add the following lines for quantization to create an int8 model. ```python - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) quantized_model = fit( model="./mobilenet_v1_1.0_224_frozen.pb", conf=config, diff --git a/examples/helloworld/tf_example6/test.py b/examples/helloworld/tf_example6/test.py index 91ee319469b..fba2f13a603 100644 --- a/examples/helloworld/tf_example6/test.py +++ b/examples/helloworld/tf_example6/test.py @@ -1,6 +1,6 @@ import tensorflow as tf from argparse import ArgumentParser -from neural_compressor.metric import TensorflowTopK +from neural_compressor import Metric from neural_compressor.data import LabelShift from neural_compressor.data import TensorflowImageRecord from neural_compressor.data import BilinearImagenetTransform @@ -41,7 +41,7 @@ def evaluate(model): if args.benchmark: iteration = 100 postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) + metric = Metric(name="topk", k=1) def eval_func(dataloader): latency_list = [] @@ -76,8 +76,8 @@ def main(): if args.tune: from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) config = PostTrainingQuantConfig(calibration_sampling_size=[20]) quantized_model = fit( model="./mobilenet_v1_1.0_224_frozen.pb", diff --git a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py b/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py index e3c2810cb02..22b3f48a61c 100644 --- a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py +++ b/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py @@ -51,7 +51,7 @@ flags.DEFINE_integer( 'iters', 100, 'maximum iteration when evaluating performance') -from neural_compressor.metric.metric import TensorflowTopK +from neural_compressor.metric import Metric from neural_compressor.data.transforms.transform import ComposeTransform from neural_compressor.data.datasets.dataset import TensorflowImageRecord from neural_compressor.data.transforms.imagenet_transform import LabelShift @@ -81,7 +81,7 @@ def evaluate(model): accuracy (float): evaluation result, the larger is better. """ postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) + metric = Metric(name="topk", k=1) latency_list = [] def eval_func(dataloader, metric): diff --git a/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py b/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py index d62bbb07288..8ab59d75b79 100644 --- a/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py +++ b/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py @@ -51,7 +51,7 @@ flags.DEFINE_integer( 'iters', 100, 'maximum iteration when evaluating performance') -from neural_compressor.metric.metric import TensorflowTopK +from neural_compressor import Metric from neural_compressor.data.transforms.transform import ComposeTransform from neural_compressor.data.datasets.dataset import TensorflowImageRecord from neural_compressor.data.transforms.imagenet_transform import LabelShift @@ -81,7 +81,7 @@ def evaluate(model): accuracy (float): evaluation result, the larger is better. """ postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) + metric = Metric(name="topk", k=1) latency_list = [] def eval_func(dataloader, metric): diff --git a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py b/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py index 9f7baed9449..cc2c580d492 100644 --- a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py +++ b/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py @@ -51,7 +51,7 @@ flags.DEFINE_integer( 'iters', 100, 'maximum iteration when evaluating performance') -from neural_compressor.metric.metric import TensorflowTopK +from neural_compressor import Metric from neural_compressor.data.transforms.transform import ComposeTransform from neural_compressor.data.datasets.dataset import TensorflowImageRecord from neural_compressor.data.transforms.imagenet_transform import LabelShift @@ -80,7 +80,7 @@ def evaluate(model): accuracy (float): evaluation result, the larger is better. """ postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) + metric = Metric(name="topk", k=1) latency_list = [] def eval_func(dataloader, metric): diff --git a/examples/keras/image_recognition/resnet101/quantization/ptq/main.py b/examples/keras/image_recognition/resnet101/quantization/ptq/main.py index 61a2f0129c6..fb5b7634f7f 100644 --- a/examples/keras/image_recognition/resnet101/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnet101/quantization/ptq/main.py @@ -51,7 +51,7 @@ flags.DEFINE_integer( 'iters', 100, 'maximum iteration when evaluating performance') -from neural_compressor.metric.metric import TensorflowTopK +from neural_compressor import Metric from neural_compressor.data.transforms.transform import ComposeTransform from neural_compressor.data.datasets.dataset import TensorflowImageRecord from neural_compressor.data.transforms.imagenet_transform import LabelShift @@ -88,7 +88,7 @@ def evaluate(model): # disable eager mode model.compile(run_eagerly=False) postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) + metric = Metric(name="topk", k=1) latency_list = [] def eval_func(dataloader, metric): warmup = 5 diff --git a/examples/keras/image_recognition/resnet50/quantization/ptq/main.py b/examples/keras/image_recognition/resnet50/quantization/ptq/main.py index 9dbf321c7e2..c2d0d466540 100644 --- a/examples/keras/image_recognition/resnet50/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnet50/quantization/ptq/main.py @@ -51,7 +51,7 @@ flags.DEFINE_integer( 'iters', 100, 'maximum iteration when evaluating performance') -from neural_compressor.metric import TensorflowTopK +from neural_compressor import Metric from neural_compressor.data import TensorflowImageRecord from neural_compressor.data.dataloaders.tensorflow_dataloader import TensorflowDataLoader from neural_compressor.data import ComposeTransform @@ -79,7 +79,7 @@ def evaluate(model): accuracy (float): evaluation result, the larger is better. """ postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) + metric = Metric(name="topk", k=1) def eval_func(dataloader, metric): warmup = 5 diff --git a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py b/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py index 767536b6852..0795441082f 100644 --- a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py @@ -51,7 +51,7 @@ flags.DEFINE_integer( 'iters', 100, 'maximum iteration when evaluating performance') -from neural_compressor.metric import TensorflowTopK +from neural_compressor import Metric def evaluate(model): """Custom evaluate function to inference the model for specified metric on validation dataset. @@ -62,7 +62,7 @@ def evaluate(model): Returns: accuracy (float): evaluation result, the larger is better. """ - metric = TensorflowTopK(k=1) + metric = Metric(name="topk", k=1) def eval_func(data_loader, metric): warmup = 5 diff --git a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py index 9f7baed9449..cc2c580d492 100644 --- a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py @@ -51,7 +51,7 @@ flags.DEFINE_integer( 'iters', 100, 'maximum iteration when evaluating performance') -from neural_compressor.metric.metric import TensorflowTopK +from neural_compressor import Metric from neural_compressor.data.transforms.transform import ComposeTransform from neural_compressor.data.datasets.dataset import TensorflowImageRecord from neural_compressor.data.transforms.imagenet_transform import LabelShift @@ -80,7 +80,7 @@ def evaluate(model): accuracy (float): evaluation result, the larger is better. """ postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) + metric = Metric(name="topk", k=1) latency_list = [] def eval_func(dataloader, metric): diff --git a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py index 9f7baed9449..cc2c580d492 100644 --- a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py @@ -51,7 +51,7 @@ flags.DEFINE_integer( 'iters', 100, 'maximum iteration when evaluating performance') -from neural_compressor.metric.metric import TensorflowTopK +from neural_compressor import Metric from neural_compressor.data.transforms.transform import ComposeTransform from neural_compressor.data.datasets.dataset import TensorflowImageRecord from neural_compressor.data.transforms.imagenet_transform import LabelShift @@ -80,7 +80,7 @@ def evaluate(model): accuracy (float): evaluation result, the larger is better. """ postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) + metric = Metric(name="topk", k=1) latency_list = [] def eval_func(dataloader, metric): diff --git a/examples/keras/image_recognition/vgg16/quantization/ptq/main.py b/examples/keras/image_recognition/vgg16/quantization/ptq/main.py index f708d0168d6..59f3a4f30f5 100644 --- a/examples/keras/image_recognition/vgg16/quantization/ptq/main.py +++ b/examples/keras/image_recognition/vgg16/quantization/ptq/main.py @@ -51,7 +51,7 @@ flags.DEFINE_integer( 'iters', 100, 'maximum iteration when evaluating performance') -from neural_compressor.metric import TensorflowTopK +from neural_compressor import Metric from neural_compressor.data import TensorflowImageRecord from neural_compressor.data.dataloaders.tensorflow_dataloader import TensorflowDataLoader from neural_compressor.data import ComposeTransform @@ -79,7 +79,7 @@ def evaluate(model): accuracy (float): evaluation result, the larger is better. """ postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) + metric = Metric(name="topk", k=1) def eval_func(dataloader, metric): warmup = 5 diff --git a/examples/keras/image_recognition/vgg19/quantization/ptq/main.py b/examples/keras/image_recognition/vgg19/quantization/ptq/main.py index 9dbf321c7e2..c2d0d466540 100644 --- a/examples/keras/image_recognition/vgg19/quantization/ptq/main.py +++ b/examples/keras/image_recognition/vgg19/quantization/ptq/main.py @@ -51,7 +51,7 @@ flags.DEFINE_integer( 'iters', 100, 'maximum iteration when evaluating performance') -from neural_compressor.metric import TensorflowTopK +from neural_compressor import Metric from neural_compressor.data import TensorflowImageRecord from neural_compressor.data.dataloaders.tensorflow_dataloader import TensorflowDataLoader from neural_compressor.data import ComposeTransform @@ -79,7 +79,7 @@ def evaluate(model): accuracy (float): evaluation result, the larger is better. """ postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) + metric = Metric(name="topk", k=1) def eval_func(dataloader, metric): warmup = 5 diff --git a/examples/keras/image_recognition/xception/quantization/ptq/main.py b/examples/keras/image_recognition/xception/quantization/ptq/main.py index 42928cb69b6..19543e10798 100644 --- a/examples/keras/image_recognition/xception/quantization/ptq/main.py +++ b/examples/keras/image_recognition/xception/quantization/ptq/main.py @@ -51,7 +51,7 @@ flags.DEFINE_integer( 'iters', 100, 'maximum iteration when evaluating performance') -from neural_compressor.metric.metric import TensorflowTopK +from neural_compressor import Metric from neural_compressor.data.transforms.transform import ComposeTransform from neural_compressor.data.datasets.dataset import TensorflowImageRecord from neural_compressor.data.transforms.imagenet_transform import LabelShift @@ -80,7 +80,7 @@ def evaluate(model): accuracy (float): evaluation result, the larger is better. """ postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) + metric = Metric(name="topk", k=1) latency_list = [] def eval_func(dataloader, metric): diff --git a/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py b/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py index 01ecc92f776..6a1b8d67a16 100644 --- a/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py @@ -53,8 +53,8 @@ def evaluate(model): infer = model.signatures["serving_default"] output_dict_keys = infer.structured_outputs.keys() output_name = list(output_dict_keys )[0] - from neural_compressor.metric import TensorflowTopK - metric = TensorflowTopK(k=1) + from neural_compressor import Metric + metric = Metric(name="topk", k=1) def eval_func(dataloader, metric): warmup = 5 diff --git a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py index af7ca47424e..4fffda1e017 100644 --- a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py @@ -51,8 +51,8 @@ def evaluate(model): infer = model.signatures["serving_default"] output_dict_keys = infer.structured_outputs.keys() output_name = list(output_dict_keys )[0] - from neural_compressor.metric import TensorflowTopK - metric = TensorflowTopK(k=1) + from neural_compressor import Metric + metric = Metric(name="topk", k=1) def eval_func(dataloader, metric): warmup = 5 diff --git a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py index af7ca47424e..4fffda1e017 100644 --- a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py @@ -51,8 +51,8 @@ def evaluate(model): infer = model.signatures["serving_default"] output_dict_keys = infer.structured_outputs.keys() output_name = list(output_dict_keys )[0] - from neural_compressor.metric import TensorflowTopK - metric = TensorflowTopK(k=1) + from neural_compressor import Metric + metric = Metric(name="topk", k=1) def eval_func(dataloader, metric): warmup = 5 diff --git a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/main.py b/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/main.py index d39cae3e978..928d8e14044 100644 --- a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/main.py +++ b/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/main.py @@ -340,7 +340,7 @@ def evaluate(model): accuracy (float): evaluation result, the larger is better. """ from neural_compressor.model import Model - from neural_compressor.metric import TensorflowTopK + from neural_compressor import Metric model = Model(model) input_tensor = model.input_tensor output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ @@ -348,7 +348,7 @@ def evaluate(model): iteration = -1 if FLAGS.benchmark and FLAGS.mode == 'performance': iteration = FLAGS.iters - metric = TensorflowTopK(k=1) + metric = Metric(name="topk", k=1) def eval_func(dataloader): latency_list = [] diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/main.py index 39a22b72215..8090854e7d7 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/main.py @@ -117,8 +117,8 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[5, 10, 50, 100]) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) from neural_compressor.data import LabelShift postprocess = LabelShift(label_shift=1) def eval(model): @@ -137,8 +137,8 @@ def eval(model): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) from neural_compressor.data import LabelShift postprocess = LabelShift(label_shift=1) def eval(model): diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/main.py index 9a5a259cf10..b6bc5a4beb4 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/main.py @@ -117,8 +117,8 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) from neural_compressor.data import LabelShift postprocess = LabelShift(label_shift=1) def eval(model): @@ -137,8 +137,8 @@ def eval(model): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) from neural_compressor.data import LabelShift postprocess = LabelShift(label_shift=1) def eval(model): diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/main.py index 9a5a259cf10..b6bc5a4beb4 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/main.py @@ -117,8 +117,8 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) from neural_compressor.data import LabelShift postprocess = LabelShift(label_shift=1) def eval(model): @@ -137,8 +137,8 @@ def eval(model): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) from neural_compressor.data import LabelShift postprocess = LabelShift(label_shift=1) def eval(model): diff --git a/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py index 0352df45c2e..54a10864ec1 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py @@ -132,8 +132,8 @@ def run(self): conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100], inputs=['truediv'], outputs=['Squeeze'], op_name_dict=op_name_dict) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) @@ -152,8 +152,8 @@ def run(self): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) def eval(model): return evaluate(model, dataloader, top1) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py index 04aaab2a85d..f7344a2069e 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py @@ -112,8 +112,8 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) @@ -127,8 +127,8 @@ def run(self): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) def eval(model): return evaluate(model, dataloader, top1) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py index da22e136f84..bfdb6828847 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py @@ -112,8 +112,8 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=cali_dataloader, eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) @@ -127,8 +127,8 @@ def run(self): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) def eval(model): return evaluate(model, dataloader, top1) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py index aa713d0b0f1..9d067450d09 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py @@ -112,8 +112,8 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) @@ -127,8 +127,8 @@ def run(self): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) def eval(model): return evaluate(model, dataloader, top1) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py index 6ca34ea6180..584ee7d5878 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py @@ -114,8 +114,8 @@ def run(self): op_name_dict = {'v0/cg/conv0/conv2d/Conv2D': { 'activation': {'dtype': ['fp32']}}} conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100], op_name_dict=op_name_dict) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) @@ -129,8 +129,8 @@ def run(self): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) def eval(model): return evaluate(model, dataloader, top1) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py index de098c98a09..976f63e10d2 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py @@ -112,8 +112,8 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) @@ -127,8 +127,8 @@ def run(self): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) def eval(model): return evaluate(model, dataloader, top1) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py index e8c04fa9713..b2a3f63b9c2 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py @@ -112,8 +112,8 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[20, 50]) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) @@ -127,8 +127,8 @@ def run(self): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) def eval(model): return evaluate(model, dataloader, top1) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py index a0b3479721b..16227eeb321 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/export/main.py @@ -96,8 +96,8 @@ def run(self): } dataloader = create_dataloader('tensorflow', dataloader_args) conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100]) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=dataloader, eval_dataloader=dataloader, eval_metric=top1) q_model.save("./tf-quant.pb") @@ -125,8 +125,8 @@ def run(self): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) def eval(model): if isinstance(model, str): return eval_func_tf(model, dataloader, top1) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py index ec7ea64fa4d..29295247a9f 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py @@ -112,8 +112,8 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[20, 50]) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) @@ -127,8 +127,8 @@ def run(self): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) def eval(model): return evaluate(model, dataloader, top1) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py index 94f39bb7654..d331f07eb9e 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py @@ -247,8 +247,8 @@ def run(self): } conf = PostTrainingQuantConfig(calibration_sampling_size=[20, 50], op_name_dict=op_name_dict) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) @@ -262,8 +262,8 @@ def run(self): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) def eval(model): return evaluate(model, dataloader, top1) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/main.py index 4124f7a9ee8..34504fdd348 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/main.py @@ -115,8 +115,8 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) from neural_compressor.data import LabelShift postprocess = LabelShift(label_shift=1) def eval(model): @@ -135,8 +135,8 @@ def eval(model): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) from neural_compressor.data import LabelShift postprocess = LabelShift(label_shift=1) def eval(model): diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py index aaf5a55aa01..6a7f34aaa12 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/export/main.py @@ -106,8 +106,8 @@ def run(self): eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100], outputs=['softmax_tensor']) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save("./tf-quant.pb") @@ -137,8 +137,8 @@ def run(self): } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) def eval(model): if isinstance(model, str): diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py index 96acbd2a71c..43a0389fbd3 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py @@ -112,8 +112,8 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) @@ -127,8 +127,8 @@ def run(self): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) def eval(model): return evaluate(model, dataloader, top1) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py index 6d4c9f455aa..d0db8763532 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py @@ -115,8 +115,8 @@ def run(self): eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100], outputs=['softmax_tensor']) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) q_model = quantization.fit(self.args.input_graph, conf=conf, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save("./tf-quant.pb") @@ -146,8 +146,8 @@ def run(self): } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) def eval(model): if isinstance(model, str): diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py index 86cb6b2efd9..08e16e38d94 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py @@ -117,8 +117,8 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) conf = PostTrainingQuantConfig(outputs=['softmax_tensor'], calibration_sampling_size=[50, 100]) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader, eval_metric=top1) @@ -134,8 +134,8 @@ def run(self): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) def eval(model): return evaluate(model, dataloader, top1) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py index aa713d0b0f1..9d067450d09 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py @@ -112,8 +112,8 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) @@ -127,8 +127,8 @@ def run(self): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) def eval(model): return evaluate(model, dataloader, top1) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py index b63e0eb1160..8226dcb053b 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py @@ -112,8 +112,8 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) @@ -127,8 +127,8 @@ def run(self): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) def eval(model): return evaluate(model, dataloader, top1) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py index aa713d0b0f1..9d067450d09 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py @@ -112,8 +112,8 @@ def run(self): } eval_dataloader = create_dataloader('tensorflow', eval_dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) q_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=calib_dataloader, eval_dataloader=eval_dataloader, eval_metric=top1) q_model.save(args.output_graph) @@ -127,8 +127,8 @@ def run(self): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) def eval(model): return evaluate(model, dataloader, top1) diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py index 792d3d4e0e2..5e9d4ad1e69 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/export/main.py @@ -23,7 +23,7 @@ import onnxruntime as ort from argparse import ArgumentParser from neural_compressor.data import LabelShift -from neural_compressor.metric import TensorflowTopK +from neural_compressor import Metric from neural_compressor.utils.create_obj_from_config import create_dataloader tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) @@ -113,7 +113,7 @@ def run(self): raise ValueError("Only support tensorflow export to ONNX for QDQ format, " "please make sure input the correct quant_format.") - top1 = TensorflowTopK(k=1) + top1 = Metric(name="topk", k=1) postprocess = LabelShift(label_shift=1) if args.export: diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/main.py index ca19710cae5..7e002f8bd50 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/main.py @@ -106,8 +106,8 @@ def run(self): } dataloader = create_dataloader('tensorflow', dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) from neural_compressor.data import LabelShift postprocess = LabelShift(label_shift=1) def eval(model): @@ -126,8 +126,8 @@ def eval(model): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) from neural_compressor.data import LabelShift postprocess = LabelShift(label_shift=1) def eval(model): diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/main.py b/examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/main.py index ca19710cae5..7e002f8bd50 100644 --- a/examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/main.py @@ -106,8 +106,8 @@ def run(self): } dataloader = create_dataloader('tensorflow', dataloader_args) conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) from neural_compressor.data import LabelShift postprocess = LabelShift(label_shift=1) def eval(model): @@ -126,8 +126,8 @@ def eval(model): 'filter': None } dataloader = create_dataloader('tensorflow', dataloader_args) - from neural_compressor.metric import TensorflowTopK - top1 = TensorflowTopK(k=1) + from neural_compressor import Metric + top1 = Metric(name="topk", k=1) from neural_compressor.data import LabelShift postprocess = LabelShift(label_shift=1) def eval(model): diff --git a/neural_compressor/__init__.py b/neural_compressor/__init__.py index 7e494942dd9..977406e75a1 100644 --- a/neural_compressor/__init__.py +++ b/neural_compressor/__init__.py @@ -25,3 +25,4 @@ MixedPrecisionConfig from .contrib import * from .model import * +from .metric import * diff --git a/neural_compressor/experimental/metric/metric.py b/neural_compressor/experimental/metric/metric.py index 5d613e3100d..6efce83038d 100644 --- a/neural_compressor/experimental/metric/metric.py +++ b/neural_compressor/experimental/metric/metric.py @@ -871,16 +871,16 @@ def result(self): @metric_registry('topk', 'tensorflow, tensorflow_itex') class TensorflowTopK(BaseMetric): """Compute Top-k Accuracy classification score for Tensorflow model. - + This metric computes the number of times where the correct label is among the top k labels predicted. - + Attributes: k (int): The number of most likely outcomes considered to find the correct label. num_correct: The number of predictions that were correct classified. num_sample: The total number of predictions. """ - + def __init__(self, k=1): """Initialize the k, number of samples and correct predictions. diff --git a/neural_compressor/metric/metric.py b/neural_compressor/metric/metric.py index cfc6d1b3b8a..94cde8f62ac 100644 --- a/neural_compressor/metric/metric.py +++ b/neural_compressor/metric/metric.py @@ -37,20 +37,20 @@ class Metric(object): The metric class should take the outputs of the model as the metric's inputs, neural_compressor built-in metric always take (predictions, labels) as inputs, it's recommended to design metric_cls to take (predictions, labels) as inputs. + + Args: + metric_cls (cls): Should be a instance of sub_class of neural_compressor.metric.BaseMetric or a customer's metric, + which takes (predictions, labels) as inputs + name (str, optional): Name for metric. Defaults to 'user_metric'. """ - - def __init__(self, metric_cls, name='user_metric', **kwargs): - """Initialize a Metric with needed information. - - Args: - metric_cls (cls): Should be a sub_class of neural_compressor.metric.BaseMetric, - which takes (predictions, labels) as inputs - name (str, optional): Name for metric. Defaults to 'user_metric'. - """ + + def __init__(self, name='user_metric', metric_cls=None, **kwargs): + """Initialize a Metric with needed information.""" self.metric_cls = metric_cls self.name = name self.kwargs = kwargs + @singleton class TensorflowMetrics(object): """Tensorflow metrics collection. @@ -58,7 +58,7 @@ class TensorflowMetrics(object): Attributes: metrics: A dict to maintain all metrics for Tensorflow model. """ - + def __init__(self) -> None: """Initialize the metrics collection.""" self.metrics = {} @@ -72,7 +72,7 @@ class PyTorchMetrics(object): Attributes: metrics: A dict to maintain all metrics for PyTorch model. """ - + def __init__(self) -> None: """Initialize the metrics collection.""" self.metrics = {} @@ -86,7 +86,7 @@ class MXNetMetrics(object): Attributes: metrics: A dict to maintain all metrics for MXNet model. """ - + def __init__(self) -> None: """Initialize the metrics collection.""" from neural_compressor.adaptor.mxnet_utils.util import check_mx_version @@ -110,7 +110,7 @@ class ONNXRTQLMetrics(object): Attributes: metrics: A dict to maintain all metrics for ONNXRT QLinear model. """ - + def __init__(self) -> None: """Initialize the metrics collection.""" self.metrics = {} @@ -124,7 +124,7 @@ class ONNXRTITMetrics(object): Attributes: metrics: A dict to maintain all metrics for ONNXRT Integer model. """ - + def __init__(self) -> None: """Initialize the metrics collection.""" self.metrics = {} @@ -180,7 +180,7 @@ def __init__(self, framework: str): framework: The framwork name. """ assert framework in ("tensorflow", "tensorflow_itex", "keras", - "pytorch", "pytorch_ipex", "pytorch_fx", "onnxrt_qdq", + "pytorch", "pytorch_ipex", "pytorch_fx", "onnxrt_qdq", "onnxrt_qlinearops", "onnxrt_integerops", "mxnet", "onnxruntime"), \ "framework support tensorflow pytorch mxnet onnxrt" @@ -212,18 +212,18 @@ def register(self, name, metric_cls) -> None: def metric_registry(metric_type: str, framework: str): """Decorate for registering all Metric subclasses. - + The cross-framework metric is supported by specifying the framework param as one of tensorflow, pytorch, mxnet, onnxrt. - + Args: metric_type: The metric type. framework: The framework name. - + Returns: decorator_metric: The function to register metric class. """ - + def decorator_metric(cls): for single_framework in [fwk.strip() for fwk in framework.split(',')]: assert single_framework in [ @@ -249,8 +249,8 @@ def decorator_metric(cls): class BaseMetric(object): """The base class of Metric.""" - - def __init__(self, metric, single_output = False, hvd = None): + + def __init__(self, metric, single_output=False, hvd=None): """Initialize the basic metric. Args: @@ -333,10 +333,10 @@ def hvd(self, hvd): class WrapPyTorchMetric(BaseMetric): """The wrapper of Metric class for PyTorch.""" - + def update(self, preds, labels=None, sample_weight=None): """Convert the prediction to torch. - + Args: preds: The prediction result. labels: The reference. Defaults to None. @@ -359,10 +359,10 @@ def result(self): class WrapMXNetMetric(BaseMetric): """The wrapper of Metric class for MXNet.""" - + def update(self, preds, labels=None, sample_weight=None): """Convert the prediction to MXNet array. - + Args: preds: The prediction result. labels: The reference. Defaults to None. @@ -378,7 +378,7 @@ def reset(self): def result(self): """Evaluate the difference between predictions and labels. - + Returns: acc: The evaluated result. """ @@ -387,10 +387,10 @@ def result(self): class WrapONNXRTMetric(BaseMetric): """The wrapper of Metric class for ONNXRT.""" - + def update(self, preds, labels=None, sample_weight=None): """Convert the prediction to NumPy array. - + Args: preds: The prediction result. labels: The reference. Defaults to None. @@ -406,7 +406,7 @@ def reset(self): def result(self): """Evaluate the difference between predictions and labels. - + Returns: acc: The evaluated result. """ @@ -486,16 +486,16 @@ def _shape_validate(preds, labels): @metric_registry('F1', 'tensorflow, tensorflow_itex, pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops') class F1(BaseMetric): """F1 score of a binary classification problem. - + The F1 score is the harmonic mean of the precision and recall. It can be computed with the equation: F1 = 2 * (precision * recall) / (precision + recall) """ - + def __init__(self): """Initialize the F1 score list.""" self._score_list = [] - + def update(self, preds, labels): """Add the predictions and labels. @@ -575,7 +575,7 @@ def _accuracy_type_check(preds, labels): @metric_registry('Accuracy', 'tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops') class Accuracy(BaseMetric): """The Accuracy for the classification tasks. - + The accuracy score is the proportion of the total number of predictions that were correct classified. @@ -584,7 +584,7 @@ class Accuracy(BaseMetric): label_list: List of labels to score. sample: The total number of samples. """ - + def __init__(self): """Initialize predictions, labels and sample.""" self.pred_list = [] @@ -642,10 +642,10 @@ def result(self): class PyTorchLoss(): """A dummy PyTorch Metric. - + A dummy metric that computes the average of predictions and prints it directly. """ - + def __init__(self): """Initialize the number of examples, sum of prediction. and device.""" self._num_examples = 0 @@ -686,14 +686,14 @@ def compute(self): @metric_registry('Loss', 'tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops') class Loss(BaseMetric): """A dummy Metric. - + A dummy metric that computes the average of predictions and prints it directly. - + Attributes: sample: The number of samples. sum: The sum of prediction. """ - + def __init__(self): """Initialize the number of samples, sum of prediction.""" self.sample = 0 @@ -718,7 +718,7 @@ def reset(self): def result(self): """Compute the average of predictions. - + Returns: The dummy loss. """ @@ -790,12 +790,12 @@ def result(self): @metric_registry('RMSE', 'tensorflow, tensorflow_itex, pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops') class RMSE(BaseMetric): """Computes Root Mean Squared Error (RMSE) loss. - + Attributes: mse: The instance of MSE Metric. """ - + def __init__(self, compare_label=True): """Initialize the mse. @@ -834,18 +834,18 @@ def result(self): @metric_registry('MSE', 'tensorflow, tensorflow_itex, pytorch, onnxrt_qlinearops, onnxrt_integerops') class MSE(BaseMetric): """Computes Mean Squared Error (MSE) loss. - + Mean Squared Error(MSE) represents the average of the squares of errors. For example, the average squared difference between the estimated values and the actual values. - + Attributes: pred_list: List of prediction to score. label_list: List of references corresponding to the prediction result. compare_label (bool): Whether to compare label. False if there are no labels and will use FP32 preds as labels. """ - + def __init__(self, compare_label=True): """Initialize the list of prediction and labels. @@ -893,16 +893,16 @@ def result(self): @metric_registry('topk', 'tensorflow, tensorflow_itex') class TensorflowTopK(BaseMetric): """Compute Top-k Accuracy classification score for Tensorflow model. - + This metric computes the number of times where the correct label is among the top k labels predicted. - + Attributes: k (int): The number of most likely outcomes considered to find the correct label. num_correct: The number of predictions that were correct classified. num_sample: The total number of predictions. """ - + def __init__(self, k=1): """Initialize the k, number of samples and correct predictions. @@ -960,16 +960,16 @@ def result(self): @metric_registry('topk', 'pytorch, mxnet, onnxrt_qlinearops, onnxrt_integerops') class GeneralTopK(BaseMetric): """Compute Top-k Accuracy classification score. - + This metric computes the number of times where the correct label is among the top k labels predicted. - + Attributes: k (int): The number of most likely outcomes considered to find the correct label. num_correct: The number of predictions that were correct classified. num_sample: The total number of predictions. """ - + def __init__(self, k=1): """Initialize the k, number of samples and correct predictions. @@ -1023,7 +1023,7 @@ def result(self): allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) return allgather_num_correct / allgather_num_sample return self.num_correct / self.num_sample - + @metric_registry('COCOmAPv2', 'tensorflow, tensorflow_itex, onnxrt_qlinearops, onnxrt_integerops') class COCOmAPv2(BaseMetric): diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index 8f058a283c3..7de9d4d2722 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -73,10 +73,10 @@ def pre_proccess(self): cfg = self.conf strategy = cfg.quantization.tuning_criterion.strategy - + if cfg.quantization.quant_level == "auto": strategy = "auto" - + elif cfg.quantization.quant_level == 0: strategy = "conservative" @@ -279,7 +279,7 @@ def metric(self, user_metric): 1. neural_compressor have many built-in metrics, user can pass a metric configure dict to tell neural compressor what metric will be use. - You can set multi-metrics to evaluate the performance of a specific model. + You also can set multi-metrics to evaluate the performance of a specific model. Single metric: {topk: 1} Multi-metrics: @@ -291,7 +291,9 @@ def metric(self, user_metric): For the built-in metrics, please refer to below link: https://github.com/intel/neural-compressor/blob/master/docs/source/metric.md#supported-built-in-metric-matrix. - 2. User also can set specific metric through this api. The metric class should take the outputs of the model or + 2. User also can get the built-in metrics by neural_compressor.Metric: + Metric(name="topk", k=1) + 3. User also can set specific metric through this api. The metric class should take the outputs of the model or postprocess(if have) as inputs, neural_compressor built-in metric always take(predictions, labels) as inputs for update, and user_metric.metric_cls should be sub_class of neural_compressor.metric.BaseMetric. @@ -306,9 +308,16 @@ def metric(self, user_metric): metric_cfg = user_metric else: if isinstance(user_metric, NCMetric): - name = user_metric.name - metric_cls = user_metric.metric_cls - metric_cfg = {name: {**user_metric.kwargs}} + if user_metric.metric_cls is None: + name = user_metric.name + metric_cls = METRICS(self.conf.quantization.framework).metrics[name] + metric_cfg = {name: {**user_metric.kwargs}} + self._metric = metric_cfg + return + else: + name = user_metric.name + metric_cls = user_metric.metric_cls + metric_cfg = {name: {**user_metric.kwargs}} else: for i in ['reset', 'update', 'result']: assert hasattr(user_metric, i), 'Please realise {} function' \ diff --git a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py index f9853d46fbd..d08eb9cac7d 100644 --- a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py +++ b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2.x.py @@ -5,7 +5,7 @@ import torch import torch.nn as nn import unittest -from neural_compressor import PostTrainingQuantConfig, QuantizationAwareTrainingConfig, set_workspace +from neural_compressor import PostTrainingQuantConfig, QuantizationAwareTrainingConfig, set_workspace, Metric from neural_compressor.data import Datasets, DATALOADERS, DataLoader from neural_compressor import quantization from neural_compressor.training import prepare_compression, fit @@ -325,19 +325,13 @@ def test_fx_quant(self): q_model = quantization.fit(model_origin, conf, calib_dataloader=dataloader, - calib_func=eval_func) + eval_func=eval_func) q_model.save("./saved") # Load configure and weights with neural_compressor.utils model_fx = load("./saved", model_origin) self.assertTrue("quantize" in str(type(q_model.model.fc))) self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) - if approach != "qat": - # recover int8 model with only tune_cfg - history_file = "./saved/history.snapshot" - model_fx_recover = recover(model_origin, history_file, 0, - **{"dataloader": dataloader}) - self.assertEqual(model_fx.code, model_fx_recover.code) shutil.rmtree("./saved", ignore_errors=True) for approach in ["qat", "static"]: @@ -367,6 +361,29 @@ def test_fx_quant(self): self.assertTrue(isinstance(model_fx, torch.fx.graph_module.GraphModule)) shutil.rmtree("./saved", ignore_errors=True) + def test_quantize_with_metric(self): + model_origin = resnet18() + dataset = Datasets("pytorch")["dummy"]((1, 3, 224, 224)) + dataloader = DATALOADERS["pytorch"](dataset) + # run fx_quant in neural_compressor and save the quantized GraphModule + conf = PostTrainingQuantConfig() + q_model = quantization.fit(model_origin, + conf, + calib_dataloader=dataloader, + eval_dataloader=dataloader, + eval_metric=Metric(name="topk", k=1)) + self.assertTrue("quantize" in str(type(q_model.model.fc))) + + def test_quantize_with_calib_func(self): + model_origin = resnet18() + # run fx_quant in neural_compressor and save the quantized GraphModule + conf = PostTrainingQuantConfig() + q_model = quantization.fit(model_origin, + conf, + calib_func=eval_func, + eval_func=eval_func) + self.assertTrue("quantize" in str(type(q_model.model.fc))) + @unittest.skipIf(PT_VERSION < Version("1.9.0").release, "Please use PyTroch 1.9 or higher version for dynamic quantization with pytorch_fx backend") def test_fx_dynamic_quant(self): @@ -498,7 +515,7 @@ def test_mix_precision(self): self.assertEqual(q_model._model.conv.module.module.weight.dtype, torch.bfloat16) self.assertEqual(q_model._model.conv.module.module.bias.dtype, torch.bfloat16) - + def test_hawq_metric(self): # Test for hawq metric import torchvision @@ -507,17 +524,16 @@ def test_hawq_metric(self): from neural_compressor.config import PostTrainingQuantConfig from neural_compressor.model.torch_model import PyTorchFXModel from neural_compressor.adaptor.torch_utils.hawq_metric import hawq_top - + ori_model = torchvision.models.resnet18() pt_model = PyTorchFXModel(ori_model) dataset = Datasets("pytorch")["dummy"](((16, 3, 224, 224))) dataloader = DATALOADERS["pytorch"](dataset) - q_model = fit(ori_model, conf = PostTrainingQuantConfig(), calib_dataloader=dataloader) + q_model = fit(ori_model, conf=PostTrainingQuantConfig(), calib_dataloader=dataloader) op_to_traces = hawq_top(fp32_model=pt_model, q_model=q_model, dataloader=dataloader, \ criterion=None, enable_act=True) self.assertIsNotNone(op_to_traces) - if __name__ == "__main__": unittest.main() diff --git a/test/metric/test_metrics_2.x.py b/test/metric/test_metrics_2.x.py index 5380f6f16c3..5515031860c 100644 --- a/test/metric/test_metrics_2.x.py +++ b/test/metric/test_metrics_2.x.py @@ -117,7 +117,6 @@ def test_squad_evaluate(self): f1_squad = evaluate_squad(dataset,predictions) self.assertEqual(f1_squad['f1'], 100.) self.assertEqual(f1_squad['exact_match'], 100.) - def test_pytorch_F1(self): metrics = METRICS('pytorch') @@ -373,11 +372,11 @@ def test_tensorflow_mAP(self): ] mAP = metrics['mAP']() - + self.assertEqual(mAP.result(), 0) mAP.update(detection, ground_truth) - + mAP.update(detection, ground_truth) self.assertEqual(format(mAP.result(), '.5f'), '0.18182') @@ -436,7 +435,6 @@ def test_tensorflow_mAP(self): self.assertRaises(ValueError, mAP.update, detection_2, ground_truth_2) os.remove('anno.yaml') - def test_tensorflow_VOCmAP(self): import os metrics = METRICS('tensorflow') @@ -537,11 +535,11 @@ def test_tensorflow_VOCmAP(self): np.array([[64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55]]), np.array([b'000000037777.jpg']) ] - + self.assertEqual(mAP.result(), 0) mAP.update(detection, ground_truth) - + mAP.update(detection, ground_truth) self.assertEqual(format(mAP.result(), '.5f'), '0.18182') From 5e0fe92f2f63a93e580ee330ee72d9875273d479 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Sat, 22 Apr 2023 18:12:24 +0800 Subject: [PATCH 085/103] Fixed pylink error Signed-off-by: Cheng, Penghui --- neural_compressor/metric/metric.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_compressor/metric/metric.py b/neural_compressor/metric/metric.py index 94cde8f62ac..91a2328390a 100644 --- a/neural_compressor/metric/metric.py +++ b/neural_compressor/metric/metric.py @@ -39,8 +39,8 @@ class Metric(object): recommended to design metric_cls to take (predictions, labels) as inputs. Args: - metric_cls (cls): Should be a instance of sub_class of neural_compressor.metric.BaseMetric or a customer's metric, - which takes (predictions, labels) as inputs + metric_cls (cls): Should be a instance of sub_class of neural_compressor.metric.BaseMetric + or a customer's metric, which takes (predictions, labels) as inputs. name (str, optional): Name for metric. Defaults to 'user_metric'. """ From a2d69337707c76fbb03df37b4c84b0f78f54747b Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Sun, 23 Apr 2023 09:31:53 +0800 Subject: [PATCH 086/103] Fixed typo Signed-off-by: Cheng, Penghui --- examples/helloworld/tf_example3/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/helloworld/tf_example3/README.md b/examples/helloworld/tf_example3/README.md index 905f8e7904a..3f754d13ad1 100644 --- a/examples/helloworld/tf_example3/README.md +++ b/examples/helloworld/tf_example3/README.md @@ -30,7 +30,7 @@ We can get a BF16 model using the Mixed Precision API. from neural_compressor.config import MixedPrecisionConfig from neural_compressor import mix_precision from neural_compressor import Metric - top1 = TensorflowTopK(k=1) + top1 = Metric(k=1) config = MixedPrecisionConfig() mix_precision_model = mix_precision.fit( model="./mobilenet_v1_1.0_224_frozen.pb", From 5e298d6bb34faf8b3b67038a31dea7153dbe6943 Mon Sep 17 00:00:00 2001 From: "Cheng, Penghui" Date: Sun, 23 Apr 2023 09:52:12 +0800 Subject: [PATCH 087/103] add mixed_precision config Signed-off-by: Cheng, Penghui --- neural_compressor/config.py | 152 ++++++++++++++++++++++------- neural_compressor/mix_precision.py | 75 +++++++------- 2 files changed, 152 insertions(+), 75 deletions(-) diff --git a/neural_compressor/config.py b/neural_compressor/config.py index 6f5a3bbe3ee..aa4f329f10d 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -167,7 +167,7 @@ class Options: set_workspace("workspace_path") set_resume_from("workspace_path") set_tensorboard(True) - + """ def __init__(self, random_seed=1978, workspace=default_workspace, resume_from=None, tensorboard=False): @@ -275,7 +275,7 @@ def __init__(self, self.num_of_instance = num_of_instance self.inter_num_of_threads = inter_num_of_threads self.intra_num_of_threads = intra_num_of_threads - self._framework=None + self._framework = None def keys(self): """Returns keys of the dict.""" @@ -286,7 +286,7 @@ def keys(self): def __getitem__(self, item): """Get the dict.""" return getattr(self, item) - + @property def backend(self): """Get backend.""" @@ -362,7 +362,7 @@ def cores_per_instance(self): def cores_per_instance(self, cores_per_instance): """Set cores_per_instance.""" if cores_per_instance is None or _check_value('cores_per_instance', cores_per_instance, - int): + int): self._cores_per_instance = cores_per_instance @property @@ -397,14 +397,14 @@ def intra_num_of_threads(self): def intra_num_of_threads(self, intra_num_of_threads): """Get intra_num_of_threads.""" if intra_num_of_threads is None or _check_value('intra_num_of_threads', - intra_num_of_threads, int): + intra_num_of_threads, int): self._intra_num_of_threads = intra_num_of_threads @property def model(self): """Get model.""" return self._model - + @model.setter def model(self, model): """Set model.""" @@ -420,12 +420,12 @@ def model_name(self, model_name): """Set model name.""" if _check_value("model_name", model_name, str): self._model_name = model_name - + @property def framework(self): """Set framework.""" return self._framework - + @framework.setter def framework(self, framework): """Get framework.""" @@ -536,7 +536,7 @@ def __getitem__(self, item): class TuningCriterion: """Class for Tuning Criterion. - + Args: strategy: Strategy name used in tuning. Please refer to docs/source/tuning_strategies.md. strategy_kwargs: Parameters for strategy. Please refer to docs/source/tuning_strategies.md. @@ -545,10 +545,10 @@ class TuningCriterion: Please refer to docs/source/objective.md. timeout: Tuning timeout (seconds). Default value is 0 which means early stop. max_trials: Max tune times. Default value is 100. Combine with timeout field to decide when to exit. - + Example:: from neural_compressor.config import TuningCriterion - + tuning_criterion=TuningCriterion( timeout=0, max_trials=100, @@ -598,7 +598,7 @@ def objective(self, objective): ['performance', 'accuracy', 'modelsize', 'footprint']): self._objective = objective return - + if _check_value('objective', objective, dict): if 'weight' in objective.keys() and isinstance(objective['weight'], list): assert len(objective['objective']) == len(objective['weight']) @@ -735,8 +735,8 @@ def __init__(self, self.tuning_criterion = tuning_criterion self.calibration_sampling_size = calibration_sampling_size self.quant_level = quant_level - self.use_distributed_tuning=use_distributed_tuning - self._framework=None + self.use_distributed_tuning = use_distributed_tuning + self._framework = None self._example_inputs = example_inputs @property @@ -1019,15 +1019,15 @@ def inputs(self): def inputs(self, inputs): if _check_value('inputs', inputs, str): self._inputs = inputs - + @property def framework(self): return self._framework - + @framework.setter def framework(self, framework): self._framework = framework - + @property def example_inputs(self): """Get strategy_kwargs.""" @@ -1638,9 +1638,9 @@ def teacher_model(self, teacher_model): self._teacher_model = teacher_model -class MixedPrecisionConfig(_BaseQuantizationConfig): +class MixedPrecisionConfig(object): """Config Class for MixedPrecision. - + Args: device (str, optional): Device for execution. Support 'cpu' and 'gpu', default is 'cpu'. @@ -1669,25 +1669,26 @@ def __init__(self, backend="default", precision="bf16", model=None, + model_name="", inputs=[], outputs=[], tuning_criterion=tuning_criterion, accuracy_criterion=accuracy_criterion, excluded_precisions=[]): """Init a MixedPrecisionConfig object.""" - super().__init__(inputs=inputs, - outputs=outputs, - device=device, - backend=backend, - tuning_criterion=tuning_criterion, - accuracy_criterion=accuracy_criterion, - excluded_precisions=excluded_precisions, - ) + self.inputs = inputs + self.outputs = outputs + self.backend = backend + self.device = device + self.excluded_precisions = excluded_precisions + self.accuracy_criterion = accuracy_criterion + self.tuning_criterion = tuning_criterion self.precision = precision + self.use_bf16 = "bf16" in self.precision self.model = model - # For align with quant config - self._approach = None - + self.model_name = model_name + self._framework = None + @property def precision(self): """Get precision.""" @@ -1703,21 +1704,102 @@ def precision(self, precision): assert all([i in ["fp16", "bf16"] for i in precision]), "Only " \ "support 'fp16' and 'bf16' for mix precision." self._precision = precision - + @property def model(self): """Get model.""" return self._model - + @model.setter def model(self, model): """Set model.""" self._model = model @property - def approach(self): - """Get approach.""" - return self._approach + def model_name(self): + """Get model name.""" + return self._model_name + + @model_name.setter + def model_name(self, model_name): + """Set model name.""" + if _check_value("model_name", model_name, str): + self._model_name = model_name + + @property + def accuracy_criterion(self): + return self._accuracy_criterion + + @accuracy_criterion.setter + def accuracy_criterion(self, accuracy_criterion): + if _check_value("accuracy_criterion", accuracy_criterion, AccuracyCriterion): + self._accuracy_criterion = accuracy_criterion + + @property + def tuning_criterion(self): + """Get tuning_criterion.""" + return self._tuning_criterion + + @tuning_criterion.setter + def tuning_criterion(self, tuning_criterion): + """Set tuning_criterion.""" + if _check_value("tuning_criterion", tuning_criterion, TuningCriterion): + self._tuning_criterion = tuning_criterion + + @property + def device(self): + return self._device + + @device.setter + def device(self, device): + if _check_value('device', device, str, ['cpu', 'gpu']): + self._device = device + + @property + def backend(self): + return self._backend + + @backend.setter + def backend(self, backend): + if _check_value('backend', backend, str, [ + 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep']): + self._backend = backend + + @property + def outputs(self): + return self._outputs + + @outputs.setter + def outputs(self, outputs): + if _check_value('outputs', outputs, str): + self._outputs = outputs + + @property + def inputs(self): + return self._inputs + + @inputs.setter + def inputs(self, inputs): + if _check_value('inputs', inputs, str): + self._inputs = inputs + + @property + def framework(self): + return self._framework + + @framework.setter + def framework(self, framework): + self._framework = framework + + @property + def excluded_precisions(self): + return self._excluded_precisions + + @excluded_precisions.setter + def excluded_precisions(self, excluded_precisions): + if _check_value("excluded_precisions", excluded_precisions, str, ["bf16", "fp16"]): + self._excluded_precisions = excluded_precisions + self._use_bf16 = "bf16" not in excluded_precisions class ExportConfig: diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index 59fdfa2bdb8..be5c4474c51 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -38,7 +38,7 @@ class _MixedPrecision: def eval_func(model): ... return accuracy - + conf = MixedPrecisionConfig() output_model = mix_precision.fit( model, @@ -52,7 +52,7 @@ def __init__(self, conf=None): Args: conf (obj): The MixedPrecisionConfig class containing accuracy goal, tuning objective etc. """ - self.conf = _Config(quantization=conf, benchmark=None, pruning=None, distillation=None, nas=None) + self.conf = _Config(mixed_precision=conf, quantization=None, benchmark=None, pruning=None, distillation=None, nas=None) seed = options.random_seed random.seed(seed) np.random.seed(seed) @@ -64,7 +64,6 @@ def __init__(self, conf=None): def pre_process(self): """Create strategy object for tuning.""" - cfg = self.conf strategy = 'automixedprecision' _resume = None # check if interrupted tuning procedure exists. if yes, it will resume the @@ -78,10 +77,8 @@ def pre_process(self): _resume = pickle.load(f).__dict__ self.strategy = STRATEGIES[strategy]( - model = self.model, - conf = self.conf, - q_dataloader=None, - q_func=None, + model=self.model, + conf=self.conf, eval_func=self._eval_func, eval_dataloader=self._eval_dataloader, eval_metric=self.metric, @@ -91,7 +88,7 @@ def pre_process(self): def execute(self): """Execute routinue based on strategy design.""" try: - with time_limit(self.conf.quantization.tuning_criterion.timeout): + with time_limit(self.conf.mixed_precision.tuning_criterion.timeout): self.strategy.traverse() except KeyboardInterrupt: pass @@ -162,7 +159,7 @@ def eval_dataloader(self, dataloader): batched data and only in this setter method a 'real' eval_dataloader will be created, the reason is we have to know the framework info - and only after the Quantization object created then + and only after the mixed_precision object created then framework infomation can be known. Future we will support creating iterable dataloader from neural_compressor.common.DataLoader """ @@ -193,56 +190,56 @@ def model(self, user_model): make sure the name is in supported slim model list. """ cfg = self.conf - if cfg.quantization.framework is None: + if cfg.mixed_precision.framework is None: if isinstance(user_model, BaseModel): - cfg.quantization.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))] - if cfg.quantization.backend == "ipex": - assert cfg.quantization.framework == "pytorch_ipex",\ + cfg.mixed_precision.framework = list(MODELS.keys())[list(MODELS.values()).index(type(user_model))] + if cfg.mixed_precision.backend == "ipex": + assert cfg.mixed_precision.framework == "pytorch_ipex",\ "Please wrap the model with correct Model class!" - if cfg.quantization.backend == "itex": # pragma: no cover + if cfg.mixed_precision.backend == "itex": # pragma: no cover from .model.tensorflow_model import get_model_type if get_model_type(user_model.model) == 'keras': - assert cfg.quantization.framework == "keras",\ + assert cfg.mixed_precision.framework == "keras",\ "Please wrap the model with KerasModel class!" else: - assert cfg.quantization.framework == "pytorch_itex", \ + assert cfg.mixed_precision.framework == "pytorch_itex", \ "Please wrap the model with TensorflowModel class!" else: framework = get_model_fwk_name(user_model) if framework == "tensorflow": from .model.tensorflow_model import get_model_type - if get_model_type(user_model) == 'keras' and cfg.quantization.backend == 'itex': + if get_model_type(user_model) == 'keras' and cfg.mixed_precision.backend == 'itex': framework = 'keras' if framework == "pytorch": - if cfg.quantization.backend == "default": + if cfg.mixed_precision.backend == "default": framework = "pytorch_fx" - elif cfg.quantization.backend == "ipex": + elif cfg.mixed_precision.backend == "ipex": framework = "pytorch_ipex" - cfg.quantization.framework = framework + cfg.mixed_precision.framework = framework if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") - if "tensorflow" in cfg.quantization.framework or cfg.quantization.framework == "keras": - self._model = Model(user_model, backend=cfg.quantization.framework - , device=cfg.quantization.device) + if "tensorflow" in cfg.mixed_precision.framework or cfg.mixed_precision.framework == "keras": + self._model = Model(user_model, backend=cfg.mixed_precision.framework + , device=cfg.mixed_precision.device) else: - self._model = Model(user_model, backend=cfg.quantization.framework) + self._model = Model(user_model, backend=cfg.mixed_precision.framework) else: # pragma: no cover - if cfg.quantization.framework == "pytorch_ipex": + if cfg.mixed_precision.framework == "pytorch_ipex": from neural_compressor.model.torch_model import IPEXModel assert type(user_model) == IPEXModel, \ "The backend is ipex, please wrap the model with IPEXModel class!" - elif cfg.quantization.framework == "pytorch_fx": + elif cfg.mixed_precision.framework == "pytorch_fx": from neural_compressor.model.torch_model import PyTorchFXModel assert type(user_model) == PyTorchFXModel, \ "The backend is default, please wrap the model with PyTorchFXModel class!" self._model = user_model - if 'tensorflow' in cfg.quantization.framework: - self._model.name = cfg.quantization.model_name - self._model.output_tensor_names = cfg.quantization.outputs - self._model.input_tensor_names = cfg.quantization.inputs + if 'tensorflow' in cfg.mixed_precision.framework: + self._model.name = cfg.mixed_precision.model_name + self._model.output_tensor_names = cfg.mixed_precision.outputs + self._model.input_tensor_names = cfg.mixed_precision.inputs self._model.workspace_path = options.workspace @property @@ -290,7 +287,7 @@ def metric(self, user_metric): metric_cls = type(user_metric).__name__ name = 'user_' + metric_cls metric_cfg = {name: id(user_metric)} - metrics = METRICS(self.conf.quantization.framework) + metrics = METRICS(self.conf.mixed_precision.framework) metrics.register(name, metric_cls) self._metric = metric_cfg @@ -330,10 +327,8 @@ def fit(model, to .onnx file or onnx.onnx_ml_pb2.ModelProto. For MXNet model, it's mxnet.symbol.Symbol or gluon.HybirdBlock instance. - config (MixedPrecisionConfig): The path to the YAML configuration file or - QuantConf class containing accuracy goal, - tuning objective and preferred calibration & - quantization tuning space etc. + config (MixedPrecisionConfig): The MixedPrecisionConfig class containing accuracy goal, + tuning objective and mixed_precision tuning space etc. eval_func (function, optional): The evaluation function provided by user. This function takes model as parameter, and evaluation dataset and metrics should be @@ -354,7 +349,7 @@ def fit(model, and pre-defined metrics to run evaluation process. eval_metric (obj, optional): An Accuracy object that measures metric for - quantization. + mixed_precision. Returns: A _MixedPrecision object that generates low precision model across various DL frameworks. @@ -376,11 +371,11 @@ def fit(model, "please modify precision or excluded_precisions to make it understandable.") sys.exit(0) precisions = list(set(config.precision) - set(config.excluded_precisions)) - converter.conf.quantization.precisions = precisions + converter.conf.mixed_precision.precisions = precisions converter.model = model if ('bf16' in precisions or 'fp16' in precisions) and \ - converter.conf.quantization.framework == "onnxruntime": # pragma: no cover + converter.conf.mixed_precision.framework == "onnxruntime": # pragma: no cover if config.device == "cpu": logger.warning("Mix precision exits due to device isn't gpu for onnx models.") sys.exit(0) @@ -388,7 +383,7 @@ def fit(model, logger.warning("Mix precision exits due to backend isn't onnxrt_cuda_ep for onnx models.") sys.exit(0) elif 'bf16' in precisions and not CpuInfo().bf16 and \ - converter.conf.quantization.framework != "onnxruntime": # pragma: no cover + converter.conf.mixed_precision.framework != "onnxruntime": # pragma: no cover if os.getenv('FORCE_BF16') == '1': logger.warning("Mix precision will generate bf16 graph although " \ "the hardware doesn't support bf16 instruction.") @@ -396,7 +391,7 @@ def fit(model, logger.warning("Mix precision exits due to the hardware " \ "doesn't support bf16 instruction.") sys.exit(0) - elif 'fp16' in precisions and converter.conf.quantization.framework != "onnxruntime": + elif 'fp16' in precisions and converter.conf.mixed_precision.framework != "onnxruntime": logger.warning("Currently mix precision only supports fp16 for onnx models.") sys.exit(0) if eval_func is not None: From bcf0fbc83098504c63e7be793558d80aecbd73b1 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Sun, 23 Apr 2023 10:37:55 +0800 Subject: [PATCH 088/103] distinguish mix precision and quantization Signed-off-by: yiliu30 --- neural_compressor/contrib/strategy/sigopt.py | 13 +- neural_compressor/contrib/strategy/tpe.py | 15 ++- neural_compressor/strategy/auto.py | 7 - .../strategy/auto_mixed_precision.py | 6 +- neural_compressor/strategy/basic.py | 4 +- neural_compressor/strategy/hawq_v2.py | 2 +- neural_compressor/strategy/mse_v2.py | 2 +- neural_compressor/strategy/strategy.py | 120 +++++++++--------- .../strategy/utils/tuning_space.py | 14 +- test/strategy/test_tuning_space_1.x.py | 2 +- test/strategy/test_tuning_space_v2_1.x.py | 2 +- 11 files changed, 91 insertions(+), 96 deletions(-) diff --git a/neural_compressor/contrib/strategy/sigopt.py b/neural_compressor/contrib/strategy/sigopt.py index 7e1039b90dd..4b6d6bb5661 100644 --- a/neural_compressor/contrib/strategy/sigopt.py +++ b/neural_compressor/contrib/strategy/sigopt.py @@ -108,7 +108,8 @@ def __init__(self, resume=resume, q_hooks=q_hooks) logger.info(f"*** Initialize SigOpt tuning") - strategy_name = conf.quantization.tuning_criterion.strategy + self.config = conf.quantization + strategy_name = self.config.tuning_criterion.strategy if strategy_name.lower() == "sigopt": try: import sigopt @@ -118,12 +119,12 @@ def __init__(self, import sys subprocess.check_call([sys.executable, "-m", "pip", "install", "sigopt"]) import sigopt # pylint: disable=import-error - except: + finally: assert False, "Unable to import sigopt from the local environment." else: pass # SigOpt init - strategy_kwargs = conf.quantization.tuning_criterion.strategy_kwargs + strategy_kwargs = self.config.tuning_criterion.strategy_kwargs client_token = strategy_kwargs.get('sigopt_api_token', None) self.project_id = strategy_kwargs.get('sigopt_project_id', None) self.experiment_name = strategy_kwargs.get('sigopt_experiment_name', None) @@ -182,7 +183,7 @@ def next_tune_cfg(self): def get_acc_target(self, base_acc): """Get the tuning target of the accuracy ceiterion.""" - accuracy_criterion_conf = self.conf.quantization.accuracy_criterion + accuracy_criterion_conf = self.config.accuracy_criterion if accuracy_criterion_conf.criterion == 'relative': return base_acc * (1. - accuracy_criterion_conf.tolerable_loss) else: @@ -206,7 +207,7 @@ def traverse(self): # add tune_cfg here as quantize use tune_cfg trials_count += 1 tuning_history = self._find_tuning_history(tune_cfg) - if tuning_history and trials_count < self.conf.quantization.tuning_criterion.max_trials: + if tuning_history and trials_count < self.config.tuning_criterion.max_trials: self.last_tune_result = tuning_history['last_tune_result'] self.best_tune_result = tuning_history['best_tune_result'] logger.warn("Find evaluated tuning config, skip.") @@ -225,7 +226,7 @@ def traverse(self): self.last_tune_cfg = copy.deepcopy(tune_cfg) self.last_tune_result = self._evaluate(self.last_qmodel) - need_stop = self.stop(self.conf.quantization.tuning_criterion.timeout, trials_count) + need_stop = self.stop(self.config.tuning_criterion.timeout, trials_count) # record the tuning history saved_tune_cfg = copy.deepcopy(tune_cfg) diff --git a/neural_compressor/contrib/strategy/tpe.py b/neural_compressor/contrib/strategy/tpe.py index ff9c777006d..719174221cb 100644 --- a/neural_compressor/contrib/strategy/tpe.py +++ b/neural_compressor/contrib/strategy/tpe.py @@ -93,10 +93,11 @@ def __init__(self, resume=None, q_hooks=None): """Initialize the tpe tuning strategy if the user specified to use it.""" - assert conf.quantization.approach == 'post_training_static_quant', \ + self.config = conf.quantization + assert self.config.approach == 'post_training_static_quant', \ "TPE strategy is only for post training static quantization!" """Initialize the tpe tuning strategy if the user specified to use it.""" - strategy_name = conf.quantization.tuning_criterion.strategy + strategy_name = self.config.tuning_criterion.strategy if strategy_name.lower() == "tpe": try: import hyperopt @@ -115,15 +116,15 @@ def __init__(self, self.cfg_evaluated = False self.hpopt_trials = hyperopt.Trials() self.max_trials = 200 - if conf.quantization.tuning_criterion.max_trials: - self.max_trials = conf.quantization.tuning_criterion.max_trials + if self.config.tuning_criterion.max_trials: + self.max_trials = self.config.tuning_criterion.max_trials self.loss_function_config = { 'acc_th': 0.01, 'acc_weight': 1.0, 'lat_weight': 1.0 } - accuracy_criterion = conf.quantization.accuracy_criterion + accuracy_criterion = self.config.accuracy_criterion if accuracy_criterion.criterion == 'relative': self.loss_function_config['acc_th'] = accuracy_criterion.tolerable_loss @@ -306,7 +307,7 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): self._save_trials(trials_file) self._update_best_result(best_result_file) self._save() - if self.stop(self.conf.quantization.tuning_criterion.timeout, trials_count): + if self.stop(self.config.tuning_criterion.timeout, trials_count): exit = True else: logger.warn("Can't create search space for input model.") @@ -518,7 +519,7 @@ def stop(self, timeout, trials_count): if timeout == 0 and self.best_tune_result: need_stop = True - elif trials_count >= self.conf.quantization.tuning_criterion.max_trials: + elif trials_count >= self.config.tuning_criterion.max_trials: need_stop = True else: need_stop = False diff --git a/neural_compressor/strategy/auto.py b/neural_compressor/strategy/auto.py index e779a55d945..01eb8c3148c 100644 --- a/neural_compressor/strategy/auto.py +++ b/neural_compressor/strategy/auto.py @@ -15,17 +15,10 @@ # See the License for the specific language governing permissions and # limitations under the License. """The auto tuning strategy.""" -import copy from copy import deepcopy -import numpy as np -from collections import OrderedDict from .strategy import strategy_registry, TuneStrategy, STRATEGIES from ..utils import logger -from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler, ModelWiseTuningSampler -from .utils.tuning_structs import OpTuningConfig -from .utils.constant import TUNING_ITEMS_LST - @strategy_registry class AutoTuneStrategy(TuneStrategy): """The auto tuning strategy. diff --git a/neural_compressor/strategy/auto_mixed_precision.py b/neural_compressor/strategy/auto_mixed_precision.py index bee43873601..0ca90c01a31 100644 --- a/neural_compressor/strategy/auto_mixed_precision.py +++ b/neural_compressor/strategy/auto_mixed_precision.py @@ -46,7 +46,7 @@ def next_tune_cfg(self): # filter quantization dtype # TODO align with the old mixed-precison - target_dtypes = self.conf.quantization.precisions + target_dtypes = self.config.precisions target_dtypes = list(set(target_dtypes) - set(['fp32'])) tuning_space = self.tuning_space initial_op_tuning_cfg = {} @@ -116,7 +116,7 @@ def traverse(self): tune_cfg = self._tune_cfg_converter(op_tuning_cfg) self.trials_count += 1 tuning_history = self._find_tuning_history(tune_cfg) - if tuning_history and self.trials_count < self.conf.quantization.tuning_criterion.max_trials: + if tuning_history and self.trials_count < self.config.tuning_criterion.max_trials: self.last_tune_result = tuning_history['last_tune_result'] self.best_tune_result = tuning_history['best_tune_result'] logger.warn("Find evaluated tuning config, skip.") @@ -137,7 +137,7 @@ def traverse(self): q_config = copy.deepcopy(self.last_qmodel.q_config) self.last_tune_result = self._evaluate(self.last_qmodel) self.cur_best_acc, self.cur_best_tuning_cfg = self.update_best_op_tuning_cfg(op_tuning_cfg) - need_stop = self.stop(self.conf.quantization.tuning_criterion.timeout, self.trials_count) + need_stop = self.stop(self.config.tuning_criterion.timeout, self.trials_count) # record the tuning history saved_tune_cfg = copy.deepcopy(tune_cfg) saved_last_tune_result = copy.deepcopy(self.last_tune_result) diff --git a/neural_compressor/strategy/basic.py b/neural_compressor/strategy/basic.py index 0e348d8ed05..b788fb868c9 100644 --- a/neural_compressor/strategy/basic.py +++ b/neural_compressor/strategy/basic.py @@ -81,7 +81,7 @@ def distributed_next_tune_cfg_lst(self, comm): # stage 2: yield new_op_tuning_cfg_lst (length of stage 1) # Fallback the ops supported both static and dynamic from static to dynamic # Tuning items: None - if self.conf.quantization.approach == 'post_training_auto_quant': + if self.config.approach == 'post_training_auto_quant': static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if item in tuning_space.query_items_by_quant_mode('dynamic')] if static_dynamic_items: @@ -256,7 +256,7 @@ def next_tune_cfg(self): # Fallback the ops supported both static and dynamic from static to dynamic # Tuning items: None - if self.conf.quantization.approach == 'post_training_auto_quant': + if self.config.approach == 'post_training_auto_quant': static_dynamic_items = [item for item in tuning_space.query_items_by_quant_mode('static') if item in tuning_space.query_items_by_quant_mode('dynamic')] if static_dynamic_items: diff --git a/neural_compressor/strategy/hawq_v2.py b/neural_compressor/strategy/hawq_v2.py index de005885bee..7bfb39b56be 100644 --- a/neural_compressor/strategy/hawq_v2.py +++ b/neural_compressor/strategy/hawq_v2.py @@ -65,7 +65,7 @@ def next_tune_cfg(self): logger.info(f"************** Start compute the hessian trace *****************") target_dtype = "fp32" hawq_v2_criterion = None - strategy_kwargs = self.conf.quantization.tuning_criterion.strategy_kwargs + strategy_kwargs = self.config.tuning_criterion.strategy_kwargs if strategy_kwargs: hawq_v2_criterion = strategy_kwargs.get('hawq_v2_loss', None) # assert hawq_v2_criterion is not None, "HAWQ-V2 strategy needs model loss function to compute the gradient, \ diff --git a/neural_compressor/strategy/mse_v2.py b/neural_compressor/strategy/mse_v2.py index b3ad336d07d..5750bf4e607 100644 --- a/neural_compressor/strategy/mse_v2.py +++ b/neural_compressor/strategy/mse_v2.py @@ -107,7 +107,7 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): requantize_cfg = deepcopy(self._tune_cfg_converter(self.cur_best_tuning_cfg)) self.output_op_names = self.adaptor.get_output_op_names(self.last_qmodel) confidence_batches = 2 - strategy_kwargs = self.conf.quantization.tuning_criterion.strategy_kwargs + strategy_kwargs = self.config.tuning_criterion.strategy_kwargs if strategy_kwargs and strategy_kwargs.get('confidence_batches', None): confidence_batches = strategy_kwargs.get('confidence_batches', None) diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 0216e68cded..639c3e3ba1f 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -77,7 +77,7 @@ def strategy_registry(cls): @strategy_registry class TuneStrategy(object): """Basic class for tuning strategy.""" - + def _check_tuning_status(self): if self.eval_func: self._not_tuning = False @@ -103,7 +103,7 @@ def _check_tuning_status(self): logger.info("Quantize the model with default configuration without evaluating the model.\ To perform the tuning process, please either provide an eval_func or provide an\ eval_dataloader an eval_metric.") - + def __init__(self, model, conf, @@ -132,6 +132,8 @@ def __init__(self, """ self.model = model self.conf = conf + # TODO replace self.config with mixed_precision when self is an instance of AutoMixedPrecisionTuneStrategy + self.config = conf.quantization self.history_path = self._create_path(options.workspace, './history.snapshot') self.deploy_path = self._create_path(options.workspace, 'deploy.yaml') self.calib_dataloader = q_dataloader @@ -168,10 +170,10 @@ def __init__(self, self.trials_count = 0 self.capability = self.adaptor.query_fw_capability(model) logger.debug(self.capability) - self.set_tuning_space(conf) + self.set_tuning_space(self.config) #For algo scheduler - self.algo_scheduler = AlgorithmScheduler(self.conf.quantization.recipes) + self.algo_scheduler = AlgorithmScheduler(self.config.recipes) self.algo_scheduler.dataloader = self.calib_dataloader # reuse the calibration iteration self.algo_scheduler.origin_model = self.model self.algo_scheduler.adaptor = self.adaptor @@ -230,7 +232,7 @@ def _initialize_recipe(self): # not tuning list: the value is not equal to the default value logger.info(f"Adaptor has {len(adaptor_recipes)} recipes.") logger.debug(adaptor_recipes) - usr_recipes_cfg = self.conf.quantization.recipes if self.conf.quantization.recipes else {} + usr_recipes_cfg = self.config.recipes if self.config.recipes else {} for recipe_name, recipe_val in usr_recipes_cfg.items(): # for not tuning recipes, use the value specified by user. if recipe_name in adaptor_recipes and recipe_val != adaptor_recipes[recipe_name][0]: @@ -365,9 +367,9 @@ def master_worker_handle(self, comm): break # send the next cfg if not exceed max trials - if self.overall_trials > self.conf.quantization.tuning_criterion.max_trials: + if self.overall_trials > self.config.tuning_criterion.max_trials: self.max_trial_flag = True - # elif time.time() - self.overall_time_start > self.conf.quantization.tuning_criterion.timeout: + # elif time.time() - self.overall_time_start > self.config.tuning_criterion.timeout: # self.max_time_flag = True elif cur_cfg_id < len(self.tune_cfg_lst): logger.info("[Rank {}]master sends new tuning cfg {} to rank: {}".format(comm.Get_rank(), \ @@ -603,8 +605,8 @@ def traverse(self): The main traverse logic which could be override by some concrete strategy which needs more hooks. """ self._eval_baseline() - if self.conf.quantization.use_distributed_tuning: - logger.info("use distributed traverse: {}".format(self.conf.quantization.use_distributed_tuning)) + if self.config.use_distributed_tuning: + logger.info("use distributed traverse: {}".format(self.config.use_distributed_tuning)) return self.distributed_traverse() traverse_start_time = time() for op_tuning_cfg in self.next_tune_cfg(): @@ -612,7 +614,7 @@ def traverse(self): tune_cfg = self._tune_cfg_converter(op_tuning_cfg) self.trials_count += 1 tuning_history = self._find_tuning_history(tune_cfg) - if tuning_history and self.trials_count < self.conf.quantization.tuning_criterion.max_trials: + if tuning_history and self.trials_count < self.config.tuning_criterion.max_trials: self.last_tune_result = tuning_history['last_tune_result'] self.best_tune_result = tuning_history['best_tune_result'] logger.warn("Find evaluated tuning config, skip.") @@ -642,7 +644,7 @@ def traverse(self): return self.last_tune_result = self._evaluate(self.last_qmodel) self.cur_best_acc, self.cur_best_tuning_cfg = self.update_best_op_tuning_cfg(op_tuning_cfg) - need_stop = self.stop(self.conf.quantization.tuning_criterion.timeout, self.trials_count) + need_stop = self.stop(self.config.tuning_criterion.timeout, self.trials_count) # record the tuning history saved_tune_cfg = copy.deepcopy(tune_cfg) @@ -670,7 +672,7 @@ def traverse(self): continue # recover the best quantized model from tuning config self._recover_best_qmodel_from_tuning_cfg() - if self.conf.quantization.diagnosis: + if self.config.diagnosis: logger.debug(f'*** Start to do diagnosis (inspect tensor).') self._diagnosis() if self.use_multi_objective and len(self.tune_result_record) > 1 and \ @@ -827,13 +829,13 @@ def initial_tuning_cfg(self): """ from .utils.constant import auto_query_order, static_query_order, dynamic_query_order from .utils.tuning_space import initial_tuning_cfg_with_quant_mode - if self.conf.quantization.approach == 'post_training_auto_quant': + if self.config.approach == 'post_training_auto_quant': query_order = auto_query_order - elif self.conf.quantization.approach == 'post_training_dynamic_quant': + elif self.config.approach == 'post_training_dynamic_quant': query_order = dynamic_query_order - elif self.conf.quantization.approach == 'post_training_static_quant': + elif self.config.approach == 'post_training_static_quant': query_order = static_query_order - elif self.conf.quantization.approach == 'quant_aware_training': + elif self.config.approach == 'quant_aware_training': query_order = auto_query_order quant_mode_wise_items = OrderedDict() # mode, op_item_lst @@ -927,29 +929,29 @@ def _tune_cfg_converter(self, op_tuning_cfg): self.calib_dataloader.batch_size) else: tune_cfg['calib_iteration'] = 1 - tune_cfg['approach'] = self.conf.quantization.approach + tune_cfg['approach'] = self.config.approach # Add the recipe config tune_cfg['recipe_cfgs'] = tune_cfg.get('recipe_cfgs', {}) # For not tuning recipe, tune cfg use it directly tune_cfg['recipe_cfgs'].update(self._not_tuning_recipes_values) # WA for get the smooth quant args - if 'smooth_quant_args' in self.conf.quantization.recipes: - tune_cfg['recipe_cfgs']['smooth_quant_args'] = self.conf.quantization.recipes['smooth_quant_args'] + if 'smooth_quant_args' in self.config.recipes: + tune_cfg['recipe_cfgs']['smooth_quant_args'] = self.config.recipes['smooth_quant_args'] # For tuning recipe, use the default value if it not specified by recipe tuning sampler. for recipe_name, recipe_val in self._tuning_recipes_default_values.items(): if recipe_name not in tune_cfg['recipe_cfgs']: tune_cfg['recipe_cfgs'][recipe_name] = recipe_val return tune_cfg - def set_tuning_space(self, conf): + def set_tuning_space(self, config): """Create the tuning space. Create the tuning space based on the framework capability and user configuration. Args: - conf: The Conf class instance includes all user configurations. + config: The Conf class instance includes all user configurations. """ - calib_sampling_size_lst = self.conf.quantization.calibration_sampling_size + calib_sampling_size_lst = self.config.calibration_sampling_size calib_sampling_size_lst = [int(calib_sampling_size) for calib_sampling_size in calib_sampling_size_lst] if self.calib_dataloader: self.calib_iter = [math.ceil(int(x) / self.calib_dataloader.batch_size) \ @@ -961,7 +963,7 @@ def set_tuning_space(self, conf): 'calib': {'calib_sampling_size': calib_sampling_size_lst}, 'op': self.capability['opwise'] } - self.tuning_space = TuningSpace(adaptor_cap, conf=conf, framework=self.framework) + self.tuning_space = TuningSpace(adaptor_cap, conf=config, framework=self.framework) def setup_resume(self, resume): """Resume the best quantized model from tuning history. @@ -990,7 +992,7 @@ def setup_resume(self, resume): def set_q_func(self): """Set the training function for quantization aware training.""" - if self.conf.quantization.approach == 'quant_aware_training': + if self.config.approach == 'quant_aware_training': assert self.q_func != None, "Please set train func for quantization aware training" def _create_path(self, custom_path, filename): @@ -1000,28 +1002,28 @@ def _create_path(self, custom_path, filename): return new_path def _set_framework_info(self, q_dataloader, q_func=None): - framework_specific_info = {'device': self.conf.quantization.device, - 'approach': self.conf.quantization.approach, + framework_specific_info = {'device': self.config.device, + 'approach': self.config.approach, 'random_seed': options.random_seed, 'performance_only': self._not_tuning} - framework = self.conf.quantization.framework.lower() - framework_specific_info.update({'backend': self.conf.quantization.backend}) - framework_specific_info.update({'format': self.conf.quantization.quant_format}) - framework_specific_info.update({'domain': self.conf.quantization.quant_format}) + framework = self.config.framework.lower() + framework_specific_info.update({'backend': self.config.backend}) + framework_specific_info.update({'format': self.config.quant_format}) + framework_specific_info.update({'domain': self.config.quant_format}) - self.mixed_precision_mode = isinstance(self.conf.quantization, MixedPrecisionConfig) + self.mixed_precision_mode = isinstance(self.config, MixedPrecisionConfig) if 'tensorflow' in framework: framework_specific_info.update( - {"inputs": self.conf.quantization.inputs, - "outputs": self.conf.quantization.outputs, + {"inputs": self.config.inputs, + "outputs": self.config.outputs, 'workspace_path': options.workspace, - 'recipes': self.conf.quantization.recipes, - 'use_bf16': self.conf.quantization.use_bf16 if self.conf.quantization.use_bf16 is not None else False}) + 'recipes': self.config.recipes, + 'use_bf16': self.config.use_bf16 if self.config.use_bf16 is not None else False}) for item in ['scale_propagation_max_pooling', 'scale_propagation_concat']: if item not in framework_specific_info['recipes']: framework_specific_info['recipes'].update({item: True}) - if self.conf.quantization.backend == 'itex': + if self.config.backend == 'itex': framework = 'tensorflow_itex' if 'keras' in framework: framework_specific_info.update({ @@ -1033,40 +1035,40 @@ def _set_framework_info(self, q_dataloader, q_func=None): framework_specific_info.update({"approach": "post_training_dynamic_quant"}) framework_specific_info.update({"deploy_path": os.path.dirname(self.deploy_path)}) framework_specific_info.update({'workspace_path': options.workspace}) - framework_specific_info.update({'recipes': self.conf.quantization.recipes}) - framework_specific_info.update({'reduce_range': self.conf.quantization.reduce_range}) - framework_specific_info.update({'recipes': self.conf.quantization.recipes}) + framework_specific_info.update({'recipes': self.config.recipes}) + framework_specific_info.update({'reduce_range': self.config.reduce_range}) + framework_specific_info.update({'recipes': self.config.recipes}) if framework.lower() == 'onnxrt_qdq' or \ framework_specific_info['backend'] == 'onnxrt_trt_ep': framework_specific_info.update({'format': 'QDQ'}) framework = 'onnxrt_qdq' if framework == 'pytorch_ipex' or framework == 'pytorch' or framework == 'pytorch_fx': - if self.conf.quantization.backend == 'ipex': + if self.config.backend == 'ipex': framework = 'pytorch_ipex' - elif self.conf.quantization.backend == 'default': + elif self.config.backend == 'default': framework = 'pytorch_fx' if self.mixed_precision_mode: framework_specific_info.update({"approach": "post_training_dynamic_quant"}) - framework_specific_info.update({'recipes': self.conf.quantization.recipes}) + framework_specific_info.update({'recipes': self.config.recipes}) framework_specific_info.update({"q_dataloader": q_dataloader}) - framework_specific_info.update({"use_bf16": self.conf.quantization.use_bf16 \ - if self.conf.quantization.use_bf16 is not None else True}) + framework_specific_info.update({"use_bf16": self.config.use_bf16 \ + if self.config.use_bf16 is not None else True}) framework_specific_info.update( {"workspace_path": os.path.dirname(self.deploy_path)}) - if self.conf.quantization.op_name_dict is not None \ - and 'default_qconfig' in self.conf.quantization.op_name_dict: + if self.config.op_name_dict is not None \ + and 'default_qconfig' in self.config.op_name_dict: framework_specific_info.update( - {"default_qconfig": self.conf.quantization.op_name_dict['default_qconfig']}) + {"default_qconfig": self.config.op_name_dict['default_qconfig']}) framework_specific_info.update({"q_func": q_func}) - framework_specific_info.update({"example_inputs": self.conf.quantization.example_inputs}) + framework_specific_info.update({"example_inputs": self.config.example_inputs}) return framework, framework_specific_info def _set_objectives(self): # set objectives - self.higher_is_better = bool(self.conf.quantization.accuracy_criterion.higher_is_better) + self.higher_is_better = bool(self.config.accuracy_criterion.higher_is_better) obj_higher_is_better = None obj_weight = None - obj = self.conf.quantization.tuning_criterion.objective + obj = self.config.tuning_criterion.objective use_multi_objs = isinstance(obj, dict) self.use_multi_objective = False if use_multi_objs: @@ -1100,7 +1102,7 @@ def _set_objectives(self): self.metric_weight = self.eval_metric.get('weight', None) accuracy_criterion = {'relative': 0.01, 'higher_is_better': True} - accuracy_criterion_conf = self.conf.quantization.accuracy_criterion + accuracy_criterion_conf = self.config.accuracy_criterion accuracy_criterion[accuracy_criterion_conf.criterion] = accuracy_criterion_conf.tolerable_loss accuracy_criterion['higher_is_better'] = accuracy_criterion_conf.higher_is_better self.objectives = MultiObjective(objectives=objectives, @@ -1150,14 +1152,14 @@ def deploy_config(self): """Save the configuration locally for deployment.""" self.deploy_cfg = OrderedDict() model_cfg = dict() - model_cfg['inputs'] = self.conf.quantization.inputs - model_cfg['outputs'] = self.conf.quantization.outputs - model_cfg['backend'] = self.conf.quantization.backend - model_cfg['quant_format'] = self.conf.quantization.quant_format - model_cfg['domain'] = self.conf.quantization.domain - model_cfg['backend'] = self.conf.quantization.backend + model_cfg['inputs'] = self.config.inputs + model_cfg['outputs'] = self.config.outputs + model_cfg['backend'] = self.config.backend + model_cfg['quant_format'] = self.config.quant_format + model_cfg['domain'] = self.config.domain + model_cfg['backend'] = self.config.backend self.deploy_cfg['model'] = model_cfg - self.deploy_cfg['device'] = self.conf.quantization.device + self.deploy_cfg['device'] = self.config.device def setup_yaml(): represent_dict_order = lambda self, \ @@ -1388,7 +1390,7 @@ def stop(self, timeout, trials_count): need_stop = True elif timeout == 0 and self.best_tune_result: need_stop = True - elif self.trials_count >= self.conf.quantization.tuning_criterion.max_trials: + elif self.trials_count >= self.config.tuning_criterion.max_trials: need_stop = True else: need_stop = False diff --git a/neural_compressor/strategy/utils/tuning_space.py b/neural_compressor/strategy/utils/tuning_space.py index ea1d0df9978..a6b8d8ec4bd 100644 --- a/neural_compressor/strategy/utils/tuning_space.py +++ b/neural_compressor/strategy/utils/tuning_space.py @@ -18,7 +18,6 @@ """Tuning space.""" from collections import defaultdict, OrderedDict -import os import re from typing import Dict, Tuple from copy import deepcopy @@ -134,17 +133,16 @@ def __init__(self, capability, conf, framework=None): self.ops_attr = {'activation': set(), 'weight': set()} # {(op_name, op_type): {path1, path2, ...} self.ops_path_set = defaultdict(set) - self._create_tuning_space(capability, self._usr_cfg) - + def _init_usr_cfg(self): """Init user config.""" usr_cfg = {'quantization': {}} usr_cfg['quantization']['model_wise'] = None - usr_cfg['quantization']['optype_wise'] = self.conf.quantization.op_type_dict if self.conf else None - usr_cfg['quantization']['op_wise'] = self.conf.quantization.op_name_dict if self.conf else None + usr_cfg['quantization']['optype_wise'] = self.conf.op_type_dict if self.conf else None + usr_cfg['quantization']['op_wise'] = self.conf.op_name_dict if self.conf else None return usr_cfg - + def _parse_capability(self, capability: Dict) -> None: """Parse the capability and construct the tuning space(a tree). @@ -276,7 +274,7 @@ def _merge_op_wise_cfg(self, cap: Dict, op_wise_usr_cfg: Dict, fw_cap: Dict): cap['op'][op_name_type] = self._merge_op_cfg(cap['op'][op_name_type], op_user_cfg, fw_cap['op'][op_name_type]) - + def _merge_with_user_cfg(self, capability: Dict, user_cfg: Dict): """Merge the capability with user config. @@ -392,7 +390,7 @@ def _merge_with_user_cfg(self, capability: Dict, user_cfg: Dict): def _parse_cap_helper(self, cap): """Convert the cpa to internal format. - + Parsed result: (op_name, op_type): { diff --git a/test/strategy/test_tuning_space_1.x.py b/test/strategy/test_tuning_space_1.x.py index cad910441f6..28086048758 100644 --- a/test/strategy/test_tuning_space_1.x.py +++ b/test/strategy/test_tuning_space_1.x.py @@ -1,4 +1,4 @@ -from neural_compressor.experimental.strategy.utils.tuning_space import TuningItem, TuningSpace +from neural_compressor.experimental.strategy.utils.tuning_space import TuningSpace from neural_compressor.conf.dotdict import DotDict from neural_compressor.utils import logger from copy import deepcopy diff --git a/test/strategy/test_tuning_space_v2_1.x.py b/test/strategy/test_tuning_space_v2_1.x.py index b087daad3db..6bef6c4689a 100644 --- a/test/strategy/test_tuning_space_v2_1.x.py +++ b/test/strategy/test_tuning_space_v2_1.x.py @@ -1,4 +1,4 @@ -from neural_compressor.experimental.strategy.utils.tuning_space import TuningItem, TuningSpace +from neural_compressor.experimental.strategy.utils.tuning_space import TuningSpace from neural_compressor.conf.dotdict import DotDict from neural_compressor.utils import logger from copy import deepcopy From 03e10ad4c20dee6e83890ceeedf5bfa194973527 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Sun, 23 Apr 2023 10:43:56 +0800 Subject: [PATCH 089/103] trim trailing space Signed-off-by: yiliu30 --- neural_compressor/strategy/auto.py | 16 ++--- .../strategy/auto_mixed_precision.py | 8 +-- neural_compressor/strategy/basic.py | 19 +++--- neural_compressor/strategy/bayesian.py | 24 +++---- neural_compressor/strategy/conservative.py | 26 ++++---- neural_compressor/strategy/exhaustive.py | 6 +- neural_compressor/strategy/hawq_v2.py | 8 +-- neural_compressor/strategy/mse.py | 34 +++++----- neural_compressor/strategy/mse_v2.py | 38 +++++------ neural_compressor/strategy/random.py | 4 +- neural_compressor/strategy/strategy.py | 63 +++++++++---------- .../strategy/utils/tuning_sampler.py | 38 +++++------ .../strategy/utils/tuning_space.py | 60 +++++++++--------- .../strategy/utils/tuning_structs.py | 8 +-- neural_compressor/strategy/utils/utility.py | 2 +- 15 files changed, 176 insertions(+), 178 deletions(-) diff --git a/neural_compressor/strategy/auto.py b/neural_compressor/strategy/auto.py index 01eb8c3148c..b79c0a3cfc1 100644 --- a/neural_compressor/strategy/auto.py +++ b/neural_compressor/strategy/auto.py @@ -22,11 +22,11 @@ @strategy_registry class AutoTuneStrategy(TuneStrategy): """The auto tuning strategy. - + There are three stages executed by auto strategy sequentially, and the tuning process ends once the condition meets the exit policy. """ - + def __init__(self, model, conf, @@ -44,8 +44,8 @@ def __init__(self, conf: The Conf class instance includes all user configurations. q_dataloader: Data loader for calibration, mandatory for post-training quantization. Defaults to None. q_func: Training function for quantization aware training. Defaults to None. Defaults to None. - eval_func: The evaluation function provided by user. This function takes model as parameter, and - evaluation dataset and metrics should be encapsulated in this function implementation and + eval_func: The evaluation function provided by user. This function takes model as parameter, and + evaluation dataset and metrics should be encapsulated in this function implementation and outputs a higher-is-better accuracy scalar value. eval_dataloader: Data loader for evaluation. Defaults to None. eval_metric: Metric for evaluation. Defaults to None. @@ -64,7 +64,7 @@ def __init__(self, q_hooks=q_hooks) logger.info(f"*** Initialize auto tuning") self.strategies_sequence = ['conservative', 'basic'] - + def sequential_traverse(self): """Try different strategies sequentially.""" pre_strategy = self @@ -80,7 +80,7 @@ def sequential_traverse(self): eval_metric=self.eval_metric, resume=self._resume, q_hooks=self.q_hooks) - + if pre_strategy: #TODO add tuning history from the previous stage to current stage. strategy.baseline = deepcopy(pre_strategy.baseline) @@ -90,7 +90,7 @@ def sequential_traverse(self): strategy.traverse() self.best_qmodel = strategy.best_qmodel if self.best_qmodel: - return + return def next_tune_cfg(self): """Generate and yield the default tuning config. @@ -104,7 +104,7 @@ def next_tune_cfg(self): op_tuning_cfg['calib_sampling_size'] = calib_sampling_size_lst[0] logger.info(f"Quantize the model with default config.") yield op_tuning_cfg - + def traverse(self): """Traverse the tuning space.""" # Quantize model with default config diff --git a/neural_compressor/strategy/auto_mixed_precision.py b/neural_compressor/strategy/auto_mixed_precision.py index 0ca90c01a31..9c2281923cb 100644 --- a/neural_compressor/strategy/auto_mixed_precision.py +++ b/neural_compressor/strategy/auto_mixed_precision.py @@ -30,15 +30,15 @@ @strategy_registry class AutoMixedPrecisionTuneStrategy(TuneStrategy): """Tuning strategy for auto mixed precision.""" - + def next_tune_cfg(self): """Generate the next tuning config. - + Tuning configurations are generated according to the following rules: 1. First, it tries to convert all ops into target date type as many as possible. - 2. If the accuracy does not meets the requirements, it starts the stage of fallback + 2. If the accuracy does not meets the requirements, it starts the stage of fallback which converts ops into higher precision. - + Yields: tune_config (dict): A dict containing the tuning configuration. """ diff --git a/neural_compressor/strategy/basic.py b/neural_compressor/strategy/basic.py index b788fb868c9..1dc16af6890 100644 --- a/neural_compressor/strategy/basic.py +++ b/neural_compressor/strategy/basic.py @@ -29,14 +29,14 @@ @strategy_registry class BasicTuneStrategy(TuneStrategy): """The basic tuning strategy. - + There are three stages executed by Basic strategy sequentially, and the tuning process ends once the condition meets the exit policy. """ def distributed_next_tune_cfg_lst(self, comm): """Generate and yield the next tuning config list with below order. - + 1. OP Type Wise Tuning 2. Fallback OP One by One 3. Fallback Multiple OPs Accumulated @@ -49,7 +49,7 @@ def distributed_next_tune_cfg_lst(self, comm): calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options rank = comm.Get_rank() for calib_sampling_size in calib_sampling_size_lst: - # Initialize the tuning config for each op according to the quantization approach + # Initialize the tuning config for each op according to the quantization approach op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() # Optype-wise tuning tuning items: the algorithm/scheme/granularity of activation(weight) early_stop_tuning = False @@ -57,7 +57,7 @@ def distributed_next_tune_cfg_lst(self, comm): quant_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] quant_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] stage1_max = 1e9 # TODO set a more appropriate value - op_type_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_type_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) # stage 1: yield op_tune_cfg_lst op_tuning_cfg_lst_stage_1 = [] @@ -208,7 +208,7 @@ def fallback_by_block(self, fallback_items_lst, best_op_tuning_cfg_stage1, targe def next_tune_cfg(self): """Generate and yield the next tuning config with below order. - + 1. OP Type Wise Tuning: tries to quantize the OPs as many as possible and traverse all OP type wise tuning configs 2. Fallback OP One by One: it performs high-precision OP (FP32, BF16 ...) @@ -247,13 +247,13 @@ def next_tune_cfg(self): logger.info("Early stopping the stage 1.") break yield op_tuning_cfg - + # Apply all recipes, if not got the qmodel that meet the requirements, discard it. if stage1_cnt == 1 and not self.applied_all_recipes_flag: logger.info("Apply all recipes.") self.applied_all_recipes_flag = True yield self.apply_all_tuning_recipes(deepcopy(self.cur_best_tuning_cfg)) - + # Fallback the ops supported both static and dynamic from static to dynamic # Tuning items: None if self.config.approach == 'post_training_auto_quant': @@ -319,7 +319,7 @@ def next_tune_cfg(self): for op_tuning_cfg in fallback_sampler: op_tuning_cfg['calib_sampling_size'] = calib_sampling_size yield op_tuning_cfg - + def _initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg:OpTuningConfig): op_state = op_static_cfg.get_state() op_name = op_static_cfg.op_name @@ -344,5 +344,4 @@ def _initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg:OpTuningConfig) tuning_item = quant_mode_item.get_option_by_name(att_and_method_name) dynamic_state[att_and_method_name] = tuning_item.options[0] if tuning_item else None return OpTuningConfig(op_name, op_type, op_quant_mode, tuning_space, kwargs=dynamic_state) - - \ No newline at end of file + diff --git a/neural_compressor/strategy/bayesian.py b/neural_compressor/strategy/bayesian.py index 49b4d250381..367031e1982 100644 --- a/neural_compressor/strategy/bayesian.py +++ b/neural_compressor/strategy/bayesian.py @@ -51,8 +51,8 @@ def __init__(self, conf: The Conf class instance includes all user configurations. q_dataloader: Data loader for calibration, mandatory for post-training quantization. Defaults to None. q_func: Training function for quantization aware training. Defaults to None. Defaults to None. - eval_func: The evaluation function provided by user. This function takes model as parameter, and - evaluation dataset and metrics should be encapsulated in this function implementation and + eval_func: The evaluation function provided by user. This function takes model as parameter, and + evaluation dataset and metrics should be encapsulated in this function implementation and outputs a higher-is-better accuracy scalar value. eval_dataloader: Data loader for evaluation. Defaults to None. eval_metric: Metric for evaluation. Defaults to None. @@ -102,19 +102,19 @@ def next_tune_cfg(self): """Generate the next tuning config according to bayesian search algorithm. This strategy comes from the Bayesian optimization package and changed it to a discrete version. - It uses Gaussian processes to define the prior/posterior distribution over the black-box - function with the tuning history and then finds the tuning configuration that maximizes + It uses Gaussian processes to define the prior/posterior distribution over the black-box + function with the tuning history and then finds the tuning configuration that maximizes the expected improvement. Returns: tune_config (dict): A dict containing the tuning configuration for quantization. """ params = None - pbounds = {} + pbounds = {} tuning_space = self.tuning_space calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], + op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) self.op_configs = op_wise_pool.get_opwise_candidate() @@ -271,11 +271,11 @@ def params_to_array(self, params): """Generate an array from params. Args: - params (Dict): The dict contains keys in `self.keys`, and + params (Dict): The dict contains keys in `self.keys`, and corresponding param. Returns: - np.array: An array contains all params. + np.array: An array contains all params. """ try: assert set(params) == set(self.keys) @@ -324,7 +324,7 @@ def register(self, params, target): """Append a point and its target value to the known data. Runs in amortized constant time. - + Args: params (ndarray): a single point, with len(params) == self.dim target (float): target function value @@ -347,7 +347,7 @@ def get_target(self, params): Args: params (ndarray): a single point, with len(params) == self.dim - + Returns: target (float): target function value. """ @@ -394,7 +394,7 @@ def res(self): class BayesianOptimization(): """The class for bayesian optimization. - This class takes the parameters bounds in order to find which values for + This class takes the parameters bounds in order to find which values for the parameters yield the maximum value using bayesian optimization. """ @@ -404,7 +404,7 @@ def __init__(self, pbounds, random_seed=9527, verbose=2): Args: pbounds (dict): Dictionary with parameters names as keys and a tuple with minimum and maximum values. - random_seed (int, optional): The seed for random searching. Default to 9527. + random_seed (int, optional): The seed for random searching. Default to 9527. verbose (int, optional): The level of verbosity. Default to 2. """ self._random_seed = random_seed diff --git a/neural_compressor/strategy/conservative.py b/neural_compressor/strategy/conservative.py index 979e2b9f479..61fd856b063 100644 --- a/neural_compressor/strategy/conservative.py +++ b/neural_compressor/strategy/conservative.py @@ -33,12 +33,12 @@ @strategy_registry class ConservativeTuneStrategy(TuneStrategy): """Tuning strategy with accuracy first, performance second. - + The quantization level O0 is designed for user who want to keep the accuracy of the model after quantization. It starts with the original(fp32) model, and then quantize the OPs to lower precision OP type wisely and OP wisely. """ - + def __init__(self, model, conf, @@ -56,8 +56,8 @@ def __init__(self, conf: The Conf class instance includes all user configurations. q_dataloader: Data loader for calibration, mandatory for post-training quantization. Defaults to None. q_func: Training function for quantization aware training. Defaults to None. Defaults to None. - eval_func: The evaluation function provided by user. This function takes model as parameter, and - evaluation dataset and metrics should be encapsulated in this function implementation and + eval_func: The evaluation function provided by user. This function takes model as parameter, and + evaluation dataset and metrics should be encapsulated in this function implementation and outputs a higher-is-better accuracy scalar value. eval_dataloader: Data loader for evaluation. Defaults to None. eval_metric: Metric for evaluation. Defaults to None. @@ -129,11 +129,11 @@ def _get_op_type_priority(self): op_type_priority = list(optypewise_cap.keys()) return op_type_priority - def _sorted_item_by_op_type(self, - items_lst, + def _sorted_item_by_op_type(self, + items_lst, op_type_priority: List[str]) -> OrderedDict[str, List]: """Scoring the tuning items according to its op type. - + Args: items_lst: The tuning item list. # [(op_item, quant_mode), ... ] op_type_priority: The op type list with the order. # [optype_1, optype_2] @@ -142,7 +142,7 @@ def _sorted_item_by_op_type(self, The tuning items list that sorted according to its op type. OrderDict: # op_type: [(TuningItem, quant_mode), ...] - conv: [(TuningItem, static), (TuningItem, static)] + conv: [(TuningItem, static), (TuningItem, static)] linear: [(TuningItem, static), (TuningItem, static)] matmul: [(TuningItem, static), (TuningItem, static)] """ @@ -165,11 +165,11 @@ def initialize_tune_cfg(self): op_item_dtype_dict (OrderedDict): key is (op_name, op_type); value is quantization mode. quant_mode_wise_items (OrderedDict): key is quant_mode/precision; value is item list. initial_op_tuning_cfg (OrderedDict): key is (op_name, op_type); value is the initialized tuning config. - + """ from .utils.constant import auto_query_order_o0 as query_order from .utils.tuning_space import initial_tuning_cfg_with_quant_mode - + quant_mode_wise_items = OrderedDict() # mode, op_item_lst pre_items = set() # Collect op items supported the specified mode. @@ -197,17 +197,17 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): def _quant_items_pool(self, op_type_priority: List[str]) -> OrderedDict[ str, OrderedDict[str, List[Tuple[TuningItem, str]]]]: """Create the op queue to be quantized. - + Args: op_type_priority: The optype list with priority. - + Returns: The op item pool to convert into lower precision. quant_items_pool(OrderDict): int8: OrderDict: # (TuningItem, quant_mode) - conv2d: [(TuningItem, static), (TuningItem, static)] + conv2d: [(TuningItem, static), (TuningItem, static)] linear: [(TuningItem, static), (TuningItem, static)] """ quant_mode_wise_items = self.tuning_space.quant_mode_wise_items diff --git a/neural_compressor/strategy/exhaustive.py b/neural_compressor/strategy/exhaustive.py index f192481efc4..b9fad5d963e 100644 --- a/neural_compressor/strategy/exhaustive.py +++ b/neural_compressor/strategy/exhaustive.py @@ -25,12 +25,12 @@ class ExhaustiveTuneStrategy(TuneStrategy): def next_tune_cfg(self): """Generate and yield the next tuning config using exhaustive search in tuning space. - + It sequentially traverse all possible quantization tuning configurations in a tuning space. From the perspective of the impact on performance, we currently only traverse all possible quantization tuning configs. Same reason as Bayesian, fallback datatypes are not included for now. - + Returns: tune_config (dict): A dict containing the tuning configuration for quantization. """ @@ -38,7 +38,7 @@ def next_tune_cfg(self): calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options for calib_sampling_size in calib_sampling_size_lst: op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - op_wise_tuning_sampler = OpWiseTuningSampler(tuning_space, [], [], + op_wise_tuning_sampler = OpWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) for op_tuning_cfg in op_wise_tuning_sampler: op_tuning_cfg['calib_sampling_size'] = calib_sampling_size diff --git a/neural_compressor/strategy/hawq_v2.py b/neural_compressor/strategy/hawq_v2.py index 7bfb39b56be..3748a5e23c7 100644 --- a/neural_compressor/strategy/hawq_v2.py +++ b/neural_compressor/strategy/hawq_v2.py @@ -28,16 +28,16 @@ @strategy_registry class HAWQ_V2TuneStrategy(TuneStrategy): """The HAWQ V2 tuning strategy. - + HAWQ_V2 implements the "Hawq-v2: Hessian aware trace-weighted quantization of neural networks". We made a small change to it by using the hessian trace to score the op impact and then fallback the OPs according to the scoring result. - + """ def next_tune_cfg(self): """Generate and yield the next tuning config using HAWQ v2 search in tuning space. - + Returns: tune_config (dict): A dict containing the tuning configuration for quantization. """ @@ -63,7 +63,7 @@ def next_tune_cfg(self): yield op_tuning_cfg # Start compute the hessian trace logger.info(f"************** Start compute the hessian trace *****************") - target_dtype = "fp32" + target_dtype = "fp32" hawq_v2_criterion = None strategy_kwargs = self.config.tuning_criterion.strategy_kwargs if strategy_kwargs: diff --git a/neural_compressor/strategy/mse.py b/neural_compressor/strategy/mse.py index 5a769ee4159..50892f2dcaa 100644 --- a/neural_compressor/strategy/mse.py +++ b/neural_compressor/strategy/mse.py @@ -21,7 +21,7 @@ from typing import Dict, Any, List from .strategy import strategy_registry, TuneStrategy from ..utils import logger -from time import time +from time import time from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler from .utils.tuning_structs import OpTuningConfig @@ -34,7 +34,7 @@ class MSETuneStrategy(TuneStrategy): the best model-wise tuning configuration. It then calculates the MSE (Mean Squared Error) for each OP, sorts those OPs according to the MSE value, and performs the op-wise fallback in this order. """ - + def __init__(self, model, conf, @@ -52,8 +52,8 @@ def __init__(self, conf: The Conf class instance includes all user configurations. q_dataloader: Data loader for calibration, mandatory for post-training quantization. Defaults to None. q_func: Training function for quantization aware training. Defaults to None. Defaults to None. - eval_func: The evaluation function provided by user. This function takes model as parameter, and - evaluation dataset and metrics should be encapsulated in this function implementation and + eval_func: The evaluation function provided by user. This function takes model as parameter, and + evaluation dataset and metrics should be encapsulated in this function implementation and outputs a higher-is-better accuracy scalar value. eval_dataloader: Data loader for evaluation. Defaults to None. eval_metric: Metric for evaluation. Defaults to None. @@ -111,25 +111,25 @@ def mse_impact_lst(self, op_list: List, fp32_model, best_qmodel): op_list (List[Tuple(str, str)]): List of ops in format of [(op_name, op_type), ...]. fp32_model (Model): The original FP32 model before quantization. current_best_model (Model): The currently best quantized model. - + Returns: ordered_op_name_types (List[Tuple(str, str)]): The sorted list of ops by its MSE - impaction, in the same format of 'op_list'. + impaction, in the same format of 'op_list'. """ op_name_lst = [element[0] for element in op_list ] op_mapping = {} for (op_name, op_type) in list(op_list): op_mapping[op_name] = (op_name, op_type) current_best_tune_cfg = self._tune_cfg_converter(self.cur_best_tuning_cfg) - fp32_dump_content = self.adaptor.inspect_tensor(fp32_model, - self.calib_dataloader, op_name_lst, [1], inspect_type='activation', - save_to_disk=True, save_path="./nc_workspace/", + fp32_dump_content = self.adaptor.inspect_tensor(fp32_model, + self.calib_dataloader, op_name_lst, [1], inspect_type='activation', + save_to_disk=True, save_path="./nc_workspace/", quantization_cfg=current_best_tune_cfg) fp32_tensor_dict = fp32_dump_content['activation'][0] best_qmodel = self.adaptor.quantize(current_best_tune_cfg, self.model, self.calib_dataloader, self.q_func) - quant_dump_content = self.adaptor.inspect_tensor(best_qmodel, + quant_dump_content = self.adaptor.inspect_tensor(best_qmodel, self.calib_dataloader, op_name_lst, [1], inspect_type='activation', - save_to_disk=True, save_path="./nc_workspace/", + save_to_disk=True, save_path="./nc_workspace/", quantization_cfg=current_best_tune_cfg) dequantize_tensor_dict = quant_dump_content['activation'][0] ops_mse = { @@ -137,14 +137,14 @@ def mse_impact_lst(self, op_list: List, fp32_model, best_qmodel): list(fp32_tensor_dict[op].values())[0], list(dequantize_tensor_dict[op].values())[0]) for op in fp32_tensor_dict} ordered_op_names = sorted(ops_mse.keys(), key=lambda key: ops_mse[key], reverse=self.higher_is_better) - + ordered_op_name_types = [op_mapping[name] for name in ordered_op_names] return ordered_op_name_types def next_tune_cfg(self): """Generate and yield the next tuning config. - + Returns: tune_config (dict): A dict containing the tuning configuration for quantization. """ @@ -152,13 +152,13 @@ def next_tune_cfg(self): calib_sampling_size_lst = tuning_space.root_item.get_option_by_name('calib_sampling_size').options for calib_sampling_size in calib_sampling_size_lst: op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - # Optype-wise tuning + # Optype-wise tuning early_stop_tuning = True - stage1_cnt = 0 + stage1_cnt = 0 int8_ops = quant_mode_wise_items['static'] if 'static' in quant_mode_wise_items else [] int8_ops += quant_mode_wise_items['dynamic'] if 'dynamic' in quant_mode_wise_items else [] stage1_max = min(5, len(int8_ops)) # TODO set a more appropriate value - op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) for op_tuning_cfg in op_wise_tuning_sampler: stage1_cnt += 1 @@ -214,7 +214,7 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): # Do accumulated fallback according to the order in the previous stage if len(op_fallback_acc_impact) > 0: - ordered_ops = sorted(op_fallback_acc_impact.keys(), + ordered_ops = sorted(op_fallback_acc_impact.keys(), key=lambda key: op_fallback_acc_impact[key], reverse=self.higher_is_better) op_dtypes = OrderedDict(zip(ordered_ops, [target_dtype] * len(fallback_items_name_lst))) diff --git a/neural_compressor/strategy/mse_v2.py b/neural_compressor/strategy/mse_v2.py index 5750bf4e607..7a59c51effd 100644 --- a/neural_compressor/strategy/mse_v2.py +++ b/neural_compressor/strategy/mse_v2.py @@ -22,7 +22,7 @@ from typing import Dict, Any, List from .strategy import strategy_registry, TuneStrategy from ..utils import logger -from time import time +from time import time from .utils.tuning_sampler import OpTypeWiseTuningSampler from .utils.tuning_structs import OpTuningConfig @@ -30,12 +30,12 @@ @strategy_registry class MSE_V2TuneStrategy(TuneStrategy): """The `mse_v2` tuning strategy. - - MSE_v2 is a strategy with a two stages fallback and revert fallback. + + MSE_v2 is a strategy with a two stages fallback and revert fallback. Note that, only tensorflow framework and pytorch FX backend is currently supported for mse_v2 tuning strategy. """ - + def _tuning_record_msg(self, records): records_str_lst = [[str(e) for e in record] for record in records] record_msg = '\n'.join(','.join(record) for record in records_str_lst) @@ -43,7 +43,7 @@ def _tuning_record_msg(self, records): def next_tune_cfg(self): """Generate and yield the next tuning config with below order. - + 1. In the fallback stage, it uses multi-batch data to score the op impact and then fallback the op with the highest score util found the quantized model that meets accuracy criteria. @@ -51,7 +51,7 @@ def next_tune_cfg(self): the impact of fallback OPs in the previous stage and selects the op with the lowest score to revert the fallback until the quantized model that does not meets accuracy criteria. - + Returns: tune_config (dict): A dict containing the tuning configuration for quantization. """ @@ -63,11 +63,11 @@ def next_tune_cfg(self): op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() quant_ops = quant_mode_wise_items.get('static', []) quant_ops += quant_mode_wise_items.get('dynamic', []) - # Optype-wise tuning + # Optype-wise tuning early_stop_tuning = True stage1_cnt = 0 stage1_max = 2 # TODO set a more appropriate value - op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], + op_wise_tuning_sampler = OpTypeWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) for op_tuning_cfg in op_wise_tuning_sampler: stage1_cnt += 1 @@ -129,9 +129,9 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): while not self.objectives.compare(self.last_tune_result, self.baseline): # Record the time of calculating the sensitivity start = time() - ops_lst = self.adaptor.calculate_op_sensitivity(self.model, - self.calib_dataloader, - deepcopy(self._tune_cfg_converter(tune_cfg)), + ops_lst = self.adaptor.calculate_op_sensitivity(self.model, + self.calib_dataloader, + deepcopy(self._tune_cfg_converter(tune_cfg)), self.output_op_names, confidence_batches, fallback=True) @@ -145,10 +145,10 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): fallback it to {target_dtype}.") tune_cfg[select_op_info] = OpTuningConfig(select_op_info[0], select_op_info[1], - target_dtype, + target_dtype, self.tuning_space) # Record the fallback history - if not fallback_records: + if not fallback_records: fallback_records = [[select_op_info]] else: fallback_records.append(fallback_records[-1] + [select_op_info]) @@ -162,17 +162,17 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): break logger.info(f"*** Start to re-quant the fallback op in the previous stage.") # Track the current fallback ops - tmp_fallback_ops = fallback_records[-1] if fallback_records else [] + tmp_fallback_ops = fallback_records[-1] if fallback_records else [] start = time() - ops_lst = self.adaptor.calculate_op_sensitivity(self.model, - self.calib_dataloader, + ops_lst = self.adaptor.calculate_op_sensitivity(self.model, + self.calib_dataloader, deepcopy(self._tune_cfg_converter(tune_cfg)), - self.output_op_names, + self.output_op_names, confidence_batches, fallback=False, requantize_cfgs=requantize_cfg['op']) logger.debug(f"*** The op sensitivity analysis took {time() - start:.2f}s.") - if not ops_lst: + if not ops_lst: logger.warning("No op to be requantized") break for select_op_info in ops_lst: @@ -180,7 +180,7 @@ def dynamic_op_tuning_cfg_from_static(op_tuning_cfg: OpTuningConfig): if select_op_info not in tmp_fallback_ops: logger.debug(f"{select_op_info} not in fallback list.") continue - + new_fallback_ops = deepcopy(tmp_fallback_ops) new_fallback_ops.remove(select_op_info) if new_fallback_ops not in fallback_records: diff --git a/neural_compressor/strategy/random.py b/neural_compressor/strategy/random.py index b88152c9567..e876050cc67 100644 --- a/neural_compressor/strategy/random.py +++ b/neural_compressor/strategy/random.py @@ -29,7 +29,7 @@ class RandomTuneStrategy(TuneStrategy): def next_tune_cfg(self): """Generate and yield the next tuning config by random searching in tuning space. - + Random strategy is used to randomly choose quantization tuning configurations from the tuning space. As with the Exhaustive strategy, it also only considers quantization tuning configs to generate a better-performance quantized model. @@ -39,7 +39,7 @@ def next_tune_cfg(self): """ tuning_space = self.tuning_space op_item_dtype_dict, quant_mode_wise_items, initial_op_tuning_cfg = self.initial_tuning_cfg() - op_wise_tuning_sampler = OpWiseTuningSampler(tuning_space, [], [], + op_wise_tuning_sampler = OpWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) op_tuning_cfg_lst = list(op_wise_tuning_sampler) op_tuning_cfg_cnt = len(op_tuning_cfg_lst) diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 639c3e3ba1f..42673dc9e72 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -121,8 +121,8 @@ def __init__(self, conf: The Conf class instance includes all user configurations. q_dataloader: Data loader for calibration, mandatory for post-training quantization. Defaults to None. q_func: Training function for quantization aware training. Defaults to None. Defaults to None. - eval_func: The evaluation function provided by user. This function takes model as parameter, and - evaluation dataset and metrics should be encapsulated in this function implementation and + eval_func: The evaluation function provided by user. This function takes model as parameter, and + evaluation dataset and metrics should be encapsulated in this function implementation and outputs a higher-is-better accuracy scalar value. eval_dataloader: Data loader for evaluation. Defaults to None. eval_metric: Metric for evaluation. Defaults to None. @@ -156,12 +156,12 @@ def __init__(self, self.tune_result_record = [] self.tuning_history = [] self.tuning_result_data = [] - + self.baseline = None self.last_tune_result = None self.last_qmodel = None self.last_tune_cfg = None - self.best_qmodel = None + self.best_qmodel = None self.best_tune_result = None self.best_tuning_cfg = None # track the best tuning config correspondence to the best quantized model self.cur_best_acc = self.initial_best_acc() # track the current best accuracy @@ -171,7 +171,7 @@ def __init__(self, self.capability = self.adaptor.query_fw_capability(model) logger.debug(self.capability) self.set_tuning_space(self.config) - + #For algo scheduler self.algo_scheduler = AlgorithmScheduler(self.config.recipes) self.algo_scheduler.dataloader = self.calib_dataloader # reuse the calibration iteration @@ -184,7 +184,7 @@ def __init__(self, self.tuning_times = 0 self.fallback_start_point = 0 self.metric_met_point = 0 - + # for recipes # {recipe name: the list of supported value} self._tuning_recipes = OrderedDict() @@ -194,7 +194,7 @@ def __init__(self, self._not_tuning_recipes_values = {} self._initialize_recipe() self.applied_all_recipes_flag = False - + self._resume = resume if self._resume is not None: self.setup_resume(resume) @@ -212,7 +212,7 @@ def next_tune_cfg(self): tune_config (dict): It's a dict containing the tuning configuration to traverse. """ raise NotImplementedError - + def _initialize_recipe(self): """Divide the recipe into two categories tuning/not tuning.""" from .utils.utility import get_adaptor_name @@ -248,7 +248,6 @@ def _initialize_recipe(self): logger.debug(self._not_tuning_recipes_values) logger.info(f"{len(self._tuning_recipes)} recipes require future tuning.") logger.debug(self._tuning_recipes) - def distributed_next_tune_cfg_lst(self, comm): """Interface for generate the distributed next tuning config list. @@ -327,7 +326,7 @@ def master_worker_handle(self, comm): # record eval_results for context coordination of stage 3 self.last_tune_result = eval_res self.eval_results[tag] = eval_res - + self.overall_trials += 1 self.best_tune_cfg_id = None self.already_ack_id_lst.add(tag) @@ -337,7 +336,7 @@ def master_worker_handle(self, comm): logger.info("[Rank {}]master has one tuning cfg meet acc: {}".format(comm.Get_rank(), tag)) self.met_flag = True self.requirements_met_min_cfg_id = min(self.requirements_met_min_cfg_id, tag) - + # must ensure every id lower than current min_id has been acknowledged # because a tune cfg (not acked yet) with lower id can have better acc for i in range(self.requirements_met_min_cfg_id): @@ -347,7 +346,7 @@ def master_worker_handle(self, comm): # not completely collected yet! self.met_flag = False break - + if self.met_flag: # found the best tune cfg! logger.info("[Rank {}]master has one tuning cfg meet acc: {} and also collect all acks before"\ @@ -365,7 +364,7 @@ def master_worker_handle(self, comm): logger.info(self.best_tune_cfg_id) logger.info(self.tune_cfg_lst[self.best_tune_cfg_id]) break - + # send the next cfg if not exceed max trials if self.overall_trials > self.config.tuning_criterion.max_trials: self.max_trial_flag = True @@ -376,7 +375,7 @@ def master_worker_handle(self, comm): cur_cfg_id, sender_rank)) comm.send(obj=cur_cfg_id, dest=sender_rank, tag=cur_cfg_id) cur_cfg_id += 1 - else: + else: logger.info("[Rank {}]All tune configs are sent, no more sending, just collecting..."\ .format(comm.Get_rank())) @@ -498,14 +497,14 @@ def distributed_traverse(self): .format(self.met_flag or self.max_trial_flag or self.max_time_flag)) if self.met_flag or self.max_trial_flag or self.max_time_flag: break - + def _fallback_ops(self, tune_cfg, recipe_op_lst, tuning_space): """Fallback ops in recipe op list.""" for op_name_type in recipe_op_lst: tune_cfg.update({op_name_type: OpTuningConfig(op_name_type[0], \ op_name_type[1],'fp32', tuning_space)}) return tune_cfg - + def apply_all_tuning_recipes(self, tune_cfg): """Apply all tunable recipes with their value.""" tune_cfg['recipe_cfgs'] = tune_cfg.get('recipe_cfgs', {}) @@ -517,10 +516,10 @@ def apply_all_tuning_recipes(self, tune_cfg): tune_cfg = self._fallback_ops(tune_cfg, self.capability['recipes_ops'][recipe_name],\ self.tuning_space) return tune_cfg - + def apply_recipe_one_by_one(self, tune_cfg): """Apply the tunable recipes one by one. - + For recipes only have two options, apply the last one. For recipes with multiple values. such as alpha of smooth quant, apply it one by one. """ @@ -569,8 +568,8 @@ def set_param_for_pre_quantization_algos(self, algo_scheduler, tune_cfg, fp32_mo sq_algo.folding = smooth_quant_args['folding'] #logger.debug(f"Set smooth quant with alpha {smooth_quant_args['alpha']} as the pre-quantization algo.") algo_scheduler.append_algorithm('pre_quantization', sq_algo) - - + + def set_param_for_post_quantization_algos(self, algo_scheduler, tune_cfg, pre_optimized_model, q_model) -> None: """Set the parameter for post-quantization algos, such as bias correction, weight correction. @@ -583,7 +582,7 @@ def set_param_for_post_quantization_algos(self, algo_scheduler, tune_cfg, pre_op algo_scheduler.origin_model = pre_optimized_model # if no pre-process algos, return the fp32 model directly. algo_scheduler.q_model = q_model - + algo_scheduler.reset_exec_algorithms() recipe_cfgs = tune_cfg.get('recipe_cfgs', None) # for fast_bias_correction @@ -689,20 +688,20 @@ def traverse(self): self._dump_tuning_process_statistics() break self._recover_best_qmodel_from_tuning_cfg() - + def _remove_redundant_qmodel(self): """Remove the redundant quantized model to reduce memory use. - + During the tuning process, the strategy only keeps the best tuning config instead of the best quantized model to reduce memory use. """ self.last_qmodel = None self.best_qmodel = None - + def _eval_baseline(self): """Evaluate the fp32 model if needed.""" if self._not_tuning: - + logger.info("Do not evaluate the baseline and quantize the model with default configuration.") return else: @@ -819,7 +818,7 @@ def _optype_skipped(optype): def initial_tuning_cfg(self): """Init the tuning config. - + Initialize the tuning config according to the quantization approach. Returns: @@ -945,7 +944,7 @@ def _tune_cfg_converter(self, op_tuning_cfg): def set_tuning_space(self, config): """Create the tuning space. - + Create the tuning space based on the framework capability and user configuration. Args: @@ -1079,7 +1078,7 @@ def _set_objectives(self): self.use_multi_objective = True else: objectives = [obj.lower()] - + # set metric self.metric_name = ['Accuracy'] self.metric_criterion = [self.higher_is_better] @@ -1100,7 +1099,7 @@ def _set_objectives(self): self.metric_criterion = [True] * len(self.metric_name) # metric weight self.metric_weight = self.eval_metric.get('weight', None) - + accuracy_criterion = {'relative': 0.01, 'higher_is_better': True} accuracy_criterion_conf = self.config.accuracy_criterion accuracy_criterion[accuracy_criterion_conf.criterion] = accuracy_criterion_conf.tolerable_loss @@ -1111,7 +1110,7 @@ def _set_objectives(self): metric_weight=self.metric_weight, obj_criterion=obj_higher_is_better, obj_weight=obj_weight) - + def _same_conf(self, src_conf, dst_conf): """Check if the two configs are the same.""" from ..utils.utility import compare_objects @@ -1160,7 +1159,7 @@ def deploy_config(self): model_cfg['backend'] = self.config.backend self.deploy_cfg['model'] = model_cfg self.deploy_cfg['device'] = self.config.device - + def setup_yaml(): represent_dict_order = lambda self, \ data: self.represent_mapping('tag:yaml.org,2002:map', data.items()) @@ -1268,7 +1267,7 @@ def __setstate__(self, d): def stop(self, timeout, trials_count): """Check if need to stop traverse. - + Check if need to stop traversing the tuning space, either accuracy goal is met or timeout is reach. Returns: diff --git a/neural_compressor/strategy/utils/tuning_sampler.py b/neural_compressor/strategy/utils/tuning_sampler.py index b33a13e2fc1..f751e5b54f2 100644 --- a/neural_compressor/strategy/utils/tuning_sampler.py +++ b/neural_compressor/strategy/utils/tuning_sampler.py @@ -25,7 +25,7 @@ from .tuning_structs import OpTuningConfig from ...utils import logger -TUNING_ITEM_PRIORITY = [('activation','scheme'), ('activation','algorithm'),('activation','granularity'), +TUNING_ITEM_PRIORITY = [('activation','scheme'), ('activation','algorithm'),('activation','granularity'), ('activation','compute_dtype'), ('weight','scheme'), ('weight','algorithm'), \ ('weight','granularity')] @@ -33,9 +33,9 @@ class TuningSamplerRegistry: """Class decorator used to register all TuningSampler subclasses.""" - + sampler_dict = {} - + @classmethod def register(cls, name): """Register new tuning sampler. @@ -58,7 +58,7 @@ def __init__(self): class TuningSampler: """Not displayed in API Docs. - + Basic class of tuning sampler. """ @@ -85,7 +85,7 @@ def __init__(self, def __iter__(self, tune_cfg=None): """Interface for generate the next tuning config.""" pass - + def _set_dtype(self, op_name_type, config_args): has_weight = op_name_type in self.tuning_space.ops_attr['weight'] path = self.op_complete_path[op_name_type].get('activation', None) @@ -93,7 +93,7 @@ def _set_dtype(self, op_name_type, config_args): if has_weight: path = self.op_complete_path[op_name_type].get('weight', None) config_args['weight_dtype'] = self.tuning_space.ops_data_type[op_name_type][path] - + class ModelWiseTuningSampler(TuningSampler): """Not displayed in API Docs.""" @@ -108,16 +108,16 @@ def __init__(self, step1. create a default tuning config for each op step2. collect all tuning items and options, and build the model-wise traverse order - step3. yield the tuning item with option one by one, query the existence of tuning item + step3. yield the tuning item with option one by one, query the existence of tuning item and specific option for one op if exist, use the default tuning config if not exist - + Args: tuning_space: Tuning space. tuning_items_priority: The priority to traverse the tuning items. tuning_order_lst: The tuning orders. op_dtype_dict: The (op name, op type) and its target data type. initial_op_tuning_cfg: The initial tuning config. - + """ super().__init__(tuning_space, tuning_order_lst, initial_op_tuning_cfg) @@ -139,7 +139,7 @@ def __init__(self, for tuning_item in quant_mode_item.options: tuning_items[tuning_item.name] = tuning_items[tuning_item.name].union(tuning_item.options) self.tuning_items = tuning_items - + def __iter__(self): """Yield the next tuning config. @@ -173,7 +173,7 @@ def __iter__(self): self.tuning_space, kwargs=config_args) yield tune_cfg - + class OpTypeWiseTuningSampler(TuningSampler): """Not displayed in API Docs.""" @@ -197,13 +197,13 @@ def __init__(self, tuning_items_priority = TUNING_ITEM_PRIORITY # (op_type, quant_mode) : {tuning_item_name : [option1, option2]} # {('activation', 'scheme'): ['sym', 'sym'], ('activation', 'algorithm'): ['minmax', 'kl', 'minmax', 'kl']} - + self.optype_quant_mode_option = {} self.optype_quant_mode_items_name = defaultdict(list) self.op_type_quant_mode_wise_combination = {} self.op_dtype_dict = op_dtype_dict self.default_op_config = {} - + for op_name_type, quant_mode in op_dtype_dict.items(): full_path = self.tuning_space.get_op_default_path_by_pattern(op_name_type, quant_mode) self.op_complete_path[op_name_type] = copy.deepcopy(full_path) @@ -253,9 +253,9 @@ def __iter__(self): all_exist_flag = True for method_name, method_val in zip(op_tuning_items, op_tuning_item_vals): full_path = self.op_complete_path[op_name_type] - if not self.tuning_space.query_item_option(op_name_type, - full_path[method_name[0]], - method_name, + if not self.tuning_space.query_item_option(op_name_type, + full_path[method_name[0]], + method_name, method_val): all_exist_flag = False op_tuning_config = self.default_op_config[op_name_type] @@ -338,7 +338,7 @@ def __iter__(self): kwargs=config_args) new_tune_cfg.update({op_name_type: op_tuning_config}) yield new_tune_cfg - + def get_opwise_candidate(self): """Collect all op-wise setting. @@ -369,7 +369,7 @@ def get_opwise_candidate(self): for op_tuning_item_vals in op_options: config_args = dict(zip(op_tuning_items, op_tuning_item_vals)) self._set_dtype( op_name_type, config_args) - op_tuning_config = OpTuningConfig(op_name_type[0], op_name_type[1], + op_tuning_config = OpTuningConfig(op_name_type[0], op_name_type[1], quant_mode, self.tuning_space, kwargs=config_args) op_wise_configs[op_name_type].append(op_tuning_config) @@ -447,7 +447,7 @@ def __init__(self, tuning_space (TuningSpace): Tuning space. tuning_order_lst (List[TuningOrder]): The tuning orders. initial_op_tuning_cfg (Dict[tuple, Any]): The initial tuning config. - op_block_lst (List[List[tuple]]): The block of op_list, + op_block_lst (List[List[tuple]]): The block of op_list, [[(op name, op type), (op name, op type), ...], op_list2, ...]. accumulate (bool): Fallback accumulated or not. target_dtype (str): Skip fallback the first op or not. Defaults to True. diff --git a/neural_compressor/strategy/utils/tuning_space.py b/neural_compressor/strategy/utils/tuning_space.py index a6b8d8ec4bd..84cf716b2a5 100644 --- a/neural_compressor/strategy/utils/tuning_space.py +++ b/neural_compressor/strategy/utils/tuning_space.py @@ -29,7 +29,7 @@ class TuningItem: """Not displayed in API Docs.""" - + def __init__(self, name, options=[], item_type=None): """Init the tuning item. @@ -50,7 +50,7 @@ def options(self): All options. """ return self._options - + def get_options_name(self): """Return the name list of the options.""" return [o.name for o in self.options] @@ -88,7 +88,7 @@ def get_option_by_name(self, option_name): def get_details(self, depth=0): """Get the tuning item and its options recursively. - + Args: depth: recursion depth. Defaults to 0. @@ -106,11 +106,11 @@ def get_details(self, depth=0): class TuningSpace: """Not displayed in API Docs. - + 1) capability -> internal format -> merge -> tuning space (tree) """ - + def __init__(self, capability, conf, framework=None): """Init the tuning space. @@ -188,7 +188,7 @@ def _parse(cap, root, path, op_name_type): def _merge_op_cfg(self, cur_op_cap, op_user_cfg, fw_op_cap): """Merge the op cfg with user cfg. - + op_user_cfg:{ 'activation':{ 'dtype': ['fp32'] @@ -197,19 +197,19 @@ def _merge_op_cfg(self, cur_op_cap, op_user_cfg, fw_op_cap): 'dtype': ['fp32'] } } - + Step1. merge dtype, get the intersection between fw_op_cap and op_user_cfg. Step2. merge method options. - + # if dtype and type intersection with precision set -> only keep the intersection precision # and remove the quantization. # else(no dtype, or no intersection) -> merge the method Args: cur_op_cap: current capability. - op_user_cfg: The user capability. + op_user_cfg: The user capability. fw_op_cap: The fwk capability(baseline). - + Returns: Return the merged capability. """ @@ -254,13 +254,13 @@ def _merge_optype_wise_cfg(self, cap: Dict, optype_wise_usr_cfg: Dict, fw_cap: D op_type_pattern = re.compile(op_type) op_lst = [op_name_type for op_name_type in cap['op'] if op_type_pattern.fullmatch(op_name_type[1])] for op_name_type in op_lst: - cap['op'][op_name_type] = self._merge_op_cfg(cap['op'][op_name_type], + cap['op'][op_name_type] = self._merge_op_cfg(cap['op'][op_name_type], op_user_cfg, fw_cap['op'][op_name_type]) def _merge_model_wise_cfg(self, cap: Dict, model_wise_usr_cfg: Dict, fw_cap: Dict): for op_name_type in cap['op'].keys(): - cap['op'][op_name_type] = self._merge_op_cfg(cap['op'][op_name_type], + cap['op'][op_name_type] = self._merge_op_cfg(cap['op'][op_name_type], model_wise_usr_cfg, fw_cap['op'][op_name_type]) @@ -271,18 +271,18 @@ def _merge_op_wise_cfg(self, cap: Dict, op_wise_usr_cfg: Dict, fw_cap: Dict): for op_name in op_name_types: if op_name_pattern.fullmatch(op_name): op_name_type = op_name_types[op_name] - cap['op'][op_name_type] = self._merge_op_cfg(cap['op'][op_name_type], + cap['op'][op_name_type] = self._merge_op_cfg(cap['op'][op_name_type], op_user_cfg, fw_cap['op'][op_name_type]) def _merge_with_user_cfg(self, capability: Dict, user_cfg: Dict): """Merge the capability with user config. - + Merge the capability queried from the adaptor with user config in the order of model-wise, optype-wise, and op-wise if needed. The optype-wise user config will override the model-wise user config for their intersection parts, the same as the op-wise and optype-wise. - + Here is an example: capability:{ ('op1','type1'): { @@ -302,7 +302,7 @@ def _merge_with_user_cfg(self, capability: Dict, user_cfg: Dict): 'item2': [item2_option1, item2_option2], } } - + user_config{ model-wise:{ 'item1': [item1_option1] @@ -387,7 +387,7 @@ def _merge_with_user_cfg(self, capability: Dict, user_cfg: Dict): self._merge_optype_wise_cfg(capability, user_cfg['optype_wise'], fw_capability) if user_cfg['op_wise'] is not None: self._merge_op_wise_cfg(capability, user_cfg['op_wise'], fw_capability) - + def _parse_cap_helper(self, cap): """Convert the cpa to internal format. @@ -469,7 +469,7 @@ def _parse_cap_helper(self, cap): parsed_op_cap[quant_mode][att][_data_type][signed_flag][item_name] = item_options else: # Parse the data info for item with unique value. - att_dtype = op_cap[att]['dtype'] + att_dtype = op_cap[att]['dtype'] if isinstance(att_dtype, list): att_dtype = att_dtype[0] parsed_op_cap['precision'][att][att_dtype] = {'dtype': att_dtype} @@ -477,10 +477,10 @@ def _parse_cap_helper(self, cap): parsed_cap[op_name_type] = parsed_op_cap return parsed_cap - + def _create_tuning_space(self, capability, usr_cfg): """Create tuning space. - + steo1. convert the capability into internal format. step2. merge the capability with usr_cfg step3. create the tuning space @@ -546,7 +546,7 @@ def get_default_config(self, op_name_type, quant_mode): self, kwargs=config_args) return op_tuning_config - + def get_item_by_path(self, path, default=None): """Get the item according to the path.""" item = self.root_item @@ -575,7 +575,7 @@ def get_default_full_path(self, op_name_type, path): if len(path) == 3: return path assert len(path) == 2, f"Got the path: {path}, please provide the path include activation or weight." att_item = self.get_item_by_path((op_name_type, *path)) - if not att_item or len(att_item.options) == 0: + if not att_item or len(att_item.options) == 0: logger.debug(f"Could not found item for {op_name_type} with path {path}") return None dtype = att_item.options[0].name @@ -602,7 +602,7 @@ def query_quant_mode_item_by_full_path(self, op_name_type, path) -> Tuple[Tuning new_path = (op_name_type, *path) item = self.get_item_by_path(new_path) return item - + def query_items_by_quant_mode(self, quant_mode): """Collect all op items that support the specified mode. @@ -620,9 +620,9 @@ def get_op_default_path_by_pattern(self, op_name_type, pattern): Args: op_name_type: (op_name, op_type) pattern: 'static', 'dynamic', ('static', 'int8'), ('precision', 'fp32') - + Returns: - result(Dict): The default full path of activation and weight if have. + result(Dict): The default full path of activation and weight if have. """ internal_pattern = pattern_to_internal(pattern) full_path = {'activation': None, 'weight': None} @@ -636,7 +636,7 @@ def get_op_default_path_by_pattern(self, op_name_type, pattern): def pattern_to_internal(pattern, default_dtype='int8'): """Convert pattern to internal format. - + 'static' -> ('static', (('int8'),('int8'))) 'dynamic' -> ('dynamic', (('int8'),('int8'))) 'fp32' -> ('precision', (('fp32'), ('fp32'))) @@ -673,15 +673,15 @@ def initial_tuning_cfg_with_quant_mode(op_name_type, quant_mode, tuning_space: T op_name_type: (op name, op type) quant_mode: dynamic/static/fp32/bf16/fp16 tuning_space: tuning space. - - step1, convert the quant_mode into internal format. + + step1, convert the quant_mode into internal format. step2, complete the path based. step3, get the mode item. step4, use the first option as value for method. step5, create the op tuning config. - + Returns: - The initial tuning config. + The initial tuning config. """ internal_pattern = pattern_to_internal(quant_mode) full_path = {'activation': None, 'weight': None} diff --git a/neural_compressor/strategy/utils/tuning_structs.py b/neural_compressor/strategy/utils/tuning_structs.py index 0e9fe5a30aa..778aa74fdd9 100644 --- a/neural_compressor/strategy/utils/tuning_structs.py +++ b/neural_compressor/strategy/utils/tuning_structs.py @@ -23,7 +23,7 @@ class OpTuningConfig: """Op tuning config.""" - + def __init__(self, op_name, op_type, op_quant_mode, tuning_space, kwargs={}): """Create the tuning config. @@ -43,7 +43,7 @@ def __init__(self, op_name, op_type, op_quant_mode, tuning_space, kwargs={}): self.weight_dtype = None self.has_weight = self.op_name_type in tuning_space.ops_attr['weight'] self._set_dtype() - + def _set_dtype(self): """Set the date type.""" if self.op_quant_mode in PRECISION_LIST: @@ -58,7 +58,7 @@ def _set_dtype(self): # assert self.weight_dtype, \ # (f"Didn't assign the weight data type for {self.op_name, self.op_type}", \ # f"with quant_mode {self.op_quant_mode}") - + def __repr__(self) -> str: """Display the tuning config as string. @@ -76,7 +76,7 @@ def __repr__(self) -> str: def get_state(self): """Return the op tuning configuration. - + Returns: Dict: The op tuning state. """ diff --git a/neural_compressor/strategy/utils/utility.py b/neural_compressor/strategy/utils/utility.py index 22b95176e59..be8b5f7853a 100644 --- a/neural_compressor/strategy/utils/utility.py +++ b/neural_compressor/strategy/utils/utility.py @@ -22,7 +22,7 @@ class OrderedDefaultDict(OrderedDict): """Ordered default dict.""" - + def __missing__(self, key): """Initialize value for the missing key.""" self[key] = value = OrderedDefaultDict() From e5667218039b124427075f79d6844b8e4236591c Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Sun, 23 Apr 2023 21:27:05 +0800 Subject: [PATCH 090/103] fixed the mixed precision config Signed-off-by: yiliu30 --- neural_compressor/contrib/strategy/sigopt.py | 10 ++++---- neural_compressor/contrib/strategy/tpe.py | 6 ++--- .../strategy/auto_mixed_precision.py | 22 ++++++++++++++++- neural_compressor/strategy/strategy.py | 24 +++++++++++++------ test/strategy/test_tuning_space.py | 24 +++++++------------ test/strategy/test_tuning_space_v2.py | 14 +++++------ 6 files changed, 61 insertions(+), 39 deletions(-) diff --git a/neural_compressor/contrib/strategy/sigopt.py b/neural_compressor/contrib/strategy/sigopt.py index 4b6d6bb5661..227b21edd9b 100644 --- a/neural_compressor/contrib/strategy/sigopt.py +++ b/neural_compressor/contrib/strategy/sigopt.py @@ -89,8 +89,8 @@ def __init__(self, conf: The Conf class instance includes all user configurations. q_dataloader: Data loader for calibration, mandatory for post-training quantization. Defaults to None. q_func: Training function for quantization aware training. Defaults to None. Defaults to None. - eval_func: The evaluation function provided by user. This function takes model as parameter, and - evaluation dataset and metrics should be encapsulated in this function implementation and + eval_func: The evaluation function provided by user. This function takes model as parameter, and + evaluation dataset and metrics should be encapsulated in this function implementation and outputs a higher-is-better accuracy scalar value. eval_dataloader: Data loader for evaluation. Defaults to None. eval_metric: Metric for evaluation. Defaults to None. @@ -108,7 +108,7 @@ def __init__(self, resume=resume, q_hooks=q_hooks) logger.info(f"*** Initialize SigOpt tuning") - self.config = conf.quantization + self.config = self._initialize_config(conf) strategy_name = self.config.tuning_criterion.strategy if strategy_name.lower() == "sigopt": try: @@ -119,7 +119,7 @@ def __init__(self, import sys subprocess.check_call([sys.executable, "-m", "pip", "install", "sigopt"]) import sigopt # pylint: disable=import-error - finally: + except: assert False, "Unable to import sigopt from the local environment." else: pass @@ -266,7 +266,7 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): for quant_mode, quant_mode_items in quant_mode_wise_items.items(): initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) - op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], + op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) self.op_configs = op_wise_pool.get_opwise_candidate() for op, configs in self.op_configs.items(): diff --git a/neural_compressor/contrib/strategy/tpe.py b/neural_compressor/contrib/strategy/tpe.py index 719174221cb..2f80cb7751b 100644 --- a/neural_compressor/contrib/strategy/tpe.py +++ b/neural_compressor/contrib/strategy/tpe.py @@ -93,7 +93,7 @@ def __init__(self, resume=None, q_hooks=None): """Initialize the tpe tuning strategy if the user specified to use it.""" - self.config = conf.quantization + self.config = self._initialize_config(conf) assert self.config.approach == 'post_training_static_quant', \ "TPE strategy is only for post training static quantization!" """Initialize the tpe tuning strategy if the user specified to use it.""" @@ -118,7 +118,7 @@ def __init__(self, self.max_trials = 200 if self.config.tuning_criterion.max_trials: self.max_trials = self.config.tuning_criterion.max_trials - + self.loss_function_config = { 'acc_th': 0.01, 'acc_weight': 1.0, @@ -226,7 +226,7 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): op_item_dtype_dict = OrderedDict() for quant_mode, quant_mode_items in quant_mode_wise_items.items(): initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) - op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], + op_wise_pool = OpWiseTuningSampler(tuning_space, [], [], op_item_dtype_dict, initial_op_tuning_cfg) self.op_configs = op_wise_pool.get_opwise_candidate() self.opwise_tune_cfgs = {} diff --git a/neural_compressor/strategy/auto_mixed_precision.py b/neural_compressor/strategy/auto_mixed_precision.py index 9c2281923cb..78e045e72ed 100644 --- a/neural_compressor/strategy/auto_mixed_precision.py +++ b/neural_compressor/strategy/auto_mixed_precision.py @@ -22,7 +22,6 @@ from collections import OrderedDict from .strategy import strategy_registry, TuneStrategy from ..utils import logger - from .utils.tuning_sampler import OpTypeWiseTuningSampler, FallbackTuningSampler from .utils.tuning_structs import OpTuningConfig @@ -31,6 +30,27 @@ class AutoMixedPrecisionTuneStrategy(TuneStrategy): """Tuning strategy for auto mixed precision.""" + def _initialize_config(self, conf): + """Init the tuning config based on user conf. + + Args: + conf: User config + + Returns: + Tuning config + """ + config = conf.mixed_precision + config.approach = getattr(config, 'approach', None) + config.recipes = getattr(config, 'recipes', {}) + config.calibration_sampling_size = getattr(config, 'calibration_sampling_size', [0]) + config.op_type_dict = getattr(config, 'op_type_dict', None) + config.op_name_dict = getattr(config, 'op_name_dict', None) + config.quant_format = getattr(config, 'quant_format', "") + config.domain = getattr(config, 'domain', None) + config.reduce_range = getattr(config, 'reduce_range', None) + config.example_inputs = getattr(config, 'example_inputs', None) + return config + def next_tune_cfg(self): """Generate the next tuning config. diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 42673dc9e72..507a8664289 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -132,8 +132,7 @@ def __init__(self, """ self.model = model self.conf = conf - # TODO replace self.config with mixed_precision when self is an instance of AutoMixedPrecisionTuneStrategy - self.config = conf.quantization + self.config = self._initialize_config(conf) self.history_path = self._create_path(options.workspace, './history.snapshot') self.deploy_path = self._create_path(options.workspace, 'deploy.yaml') self.calib_dataloader = q_dataloader @@ -198,6 +197,17 @@ def __init__(self, self._resume = resume if self._resume is not None: self.setup_resume(resume) + def _initialize_config(self, conf): + """Init the tuning config based on user conf. + + Args: + conf: User config + + Returns: + Tuning config + """ + config = conf.quantization + return config @abstractmethod def next_tune_cfg(self): @@ -1001,14 +1011,14 @@ def _create_path(self, custom_path, filename): return new_path def _set_framework_info(self, q_dataloader, q_func=None): - framework_specific_info = {'device': self.config.device, - 'approach': self.config.approach, + framework_specific_info = {'device': getattr(self.config, 'device', None), + 'approach': getattr(self.config, 'approach', None), 'random_seed': options.random_seed, 'performance_only': self._not_tuning} framework = self.config.framework.lower() framework_specific_info.update({'backend': self.config.backend}) - framework_specific_info.update({'format': self.config.quant_format}) - framework_specific_info.update({'domain': self.config.quant_format}) + framework_specific_info.update({'format': getattr(self.config, 'quant_format', None)}) + framework_specific_info.update({'domain': getattr(self.config, 'quant_format', None)}) self.mixed_precision_mode = isinstance(self.config, MixedPrecisionConfig) @@ -1020,7 +1030,7 @@ def _set_framework_info(self, q_dataloader, q_func=None): 'recipes': self.config.recipes, 'use_bf16': self.config.use_bf16 if self.config.use_bf16 is not None else False}) for item in ['scale_propagation_max_pooling', 'scale_propagation_concat']: - if item not in framework_specific_info['recipes']: + if framework_specific_info['recipes'] and item not in framework_specific_info['recipes']: framework_specific_info['recipes'].update({item: True}) if self.config.backend == 'itex': framework = 'tensorflow_itex' diff --git a/test/strategy/test_tuning_space.py b/test/strategy/test_tuning_space.py index 6da04ebaf29..a833313e924 100644 --- a/test/strategy/test_tuning_space.py +++ b/test/strategy/test_tuning_space.py @@ -210,18 +210,16 @@ def setUp(self) -> None: } } } - + self.capability2 = { 'calib': {'calib_sampling_size': [1, 10]}, 'op': deepcopy(op_cap2) } - + def test_tuning_space_merge_op_wise_not_exist(self): # op-wise conf = { - 'quantization': { - 'op_type_dict': deepcopy(self.op_wise_user_config2), - } + 'op_type_dict': deepcopy(self.op_wise_user_config2), } conf = DotDict(conf) tuning_space2 = TuningSpace(deepcopy(self.capability2), deepcopy(conf)) @@ -230,20 +228,20 @@ def test_tuning_space_merge_op_wise_not_exist(self): def test_tuning_space_creation(self): conf = None - # Test the creation of tuning space + # Test the creation of tuning space tuning_space = TuningSpace(self.capability, conf) logger.debug(tuning_space.root_item.get_details()) - # ops supported static + # ops supported static static_items = tuning_space.query_items_by_quant_mode('static') static_items_name = [item.name for item in static_items] self.assertEqual(set(static_items_name), set(op_cap.keys())) - # ops supported dynamic + # ops supported dynamic dynamic_items = tuning_space.query_items_by_quant_mode('dynamic') dynamic_items_name = [item.name for item in dynamic_items] all_items_name = list(op_cap.keys()) all_items_name.remove(('op_name3', 'op_type2')) self.assertEqual(set(dynamic_items_name), set(all_items_name)) - # ops supported fp32 + # ops supported fp32 fp32_items = tuning_space.query_items_by_quant_mode('fp32') fp32_items_name = [item.name for item in fp32_items] self.assertEqual(set(fp32_items_name), set(op_cap.keys())) @@ -253,9 +251,7 @@ def test_tuning_space_creation(self): def test_tuning_space_merge_optype_wise(self): # optype-wise conf = { - 'quantization': { - 'op_type_dict': self.optype_wise_user_config, - } + 'op_type_dict': self.optype_wise_user_config, } conf = DotDict(conf) tuning_space2 = TuningSpace(deepcopy(self.capability), deepcopy(conf)) @@ -278,9 +274,7 @@ def test_tuning_space_merge_optype_wise(self): def test_tuning_space_merge_op_wise(self): # op-wise conf = { - 'quantization': { - 'op_name_dict': self.op_wise_user_config, - } + 'op_name_dict': self.op_wise_user_config, } conf = DotDict(conf) diff --git a/test/strategy/test_tuning_space_v2.py b/test/strategy/test_tuning_space_v2.py index 8ef5cd05f08..192f4911c5f 100644 --- a/test/strategy/test_tuning_space_v2.py +++ b/test/strategy/test_tuning_space_v2.py @@ -174,7 +174,7 @@ def setUp(self) -> None: 'calib': {'calib_sampling_size': [1, 10, 50]}, 'op': deepcopy(op_cap) } - + self.op_wise_user_cfg_for_fallback = { 'op_name1': { 'activation': { @@ -185,8 +185,8 @@ def setUp(self) -> None: } }, } - - + + def test_tuning_sampler_int4(self): # op-wise conf = {} @@ -208,7 +208,7 @@ def test_tuning_sampler_int4(self): if dtype_item.name == 'int4': found_int4_weight = True self.assertTrue(found_int4_weight) - + def test_sampler_int4(self): # test sampler from collections import OrderedDict @@ -241,7 +241,7 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): op_item_dtype_dict = OrderedDict() for quant_mode, quant_mode_items in quant_mode_wise_items.items(): initial_op_quant_mode(quant_mode_items, quant_mode, op_item_dtype_dict) - + op_wise_tuning_sampler = OpWiseTuningSampler(deepcopy(tuning_space), [], [], op_item_dtype_dict, initial_op_tuning_cfg) op3 = ('op_name3', 'op_type3') @@ -250,14 +250,12 @@ def initial_op_quant_mode(items_lst, target_quant_mode, op_item_dtype_dict): act_dtype = op_cfg['activation']['dtype'] weight_dtype = op_cfg['weight']['dtype'] self.assertTrue(act_dtype == weight_dtype == 'int4') - + def test_tuning_space_merge_op_wise(self): # op-wise conf = { - 'quantization': { 'op_name_dict': self.op_wise_user_cfg_for_fallback, - } } conf = DotDict(conf) From c0eba52014dbb9a61def72b80565073b7905ab29 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Mon, 24 Apr 2023 17:29:01 +0800 Subject: [PATCH 091/103] fix for code scan and enhance coverage Signed-off-by: Cheng, Zixuan --- neural_compressor/adaptor/pytorch.py | 30 +++++++++++----------- neural_compressor/compression/callbacks.py | 14 +++++----- neural_compressor/config.py | 14 ++++++++++ neural_compressor/mix_precision.py | 3 ++- neural_compressor/quantization.py | 4 +-- 5 files changed, 40 insertions(+), 25 deletions(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index c5b07cf7358..86b1e463d61 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -138,7 +138,7 @@ def get_example_inputs(model, dataloader): for idx, (input, label) in enumerate(dataloader): output = pytorch_forward_wrapper(model, input) - if isinstance(input, dict) or isinstance(input, UserDict): + if isinstance(input, dict) or isinstance(input, UserDict): # pragma: no cover assert version.release >= Version("1.12.0").release, \ "INC support IPEX version >= 1.12.0" if "label" in input.keys(): @@ -151,11 +151,11 @@ def get_example_inputs(model, dataloader): if isinstance(input, torch.Tensor): return input break - except Exception as e: + except Exception as e: # pragma: no cover for idx, input in enumerate(dataloader): output = pytorch_forward_wrapper(model, input) - if isinstance(input, dict) or isinstance(input, UserDict): + if isinstance(input, dict) or isinstance(input, UserDict): # pragma: no cover assert version.release >= Version("1.12.0").release, \ "INC support IPEX version >= 1.12.0" if "label" in input.keys(): @@ -460,7 +460,7 @@ def _observer(algorithm, quant_min, quant_max = calculate_quant_min_max(unsigned, num_bits) logger.info((f"For {dtype}, replace it with {torch_dtype} and " + \ f"set quant_min: {quant_min}, quant_max: {quant_max}")) - else: + else: # pragma: no cover assert False, "Unsupport dtype with {}".format(dtype) if algorithm == 'placeholder' or torch_dtype == torch.float: # pragma: no cover @@ -520,7 +520,7 @@ def _fake_quantize(algorithm, scheme, granularity, dtype, compute_dtype='uint8') """ version = get_torch_version() if scheme == 'asym_float' \ - and version.release >= Version("1.7.0").release: + and version.release >= Version("1.7.0").release: # pragma: no cover return torch.quantization.default_float_qparams_observer if algorithm == 'placeholder' or dtype == 'fp32': # pragma: no cover return _observer(algorithm, scheme, granularity, dtype, compute_dtype=compute_dtype) @@ -799,18 +799,18 @@ def __init__(self, framework_specific_info): self.approach = framework_specific_info['approach'] if framework_specific_info['approach'] in ["post_training_static_quant", "post_training_auto_quant"]: - if self.version.release < Version("1.7.0").release: + if self.version.release < Version("1.7.0").release: # pragma: no cover self.q_mapping = tq.default_mappings.DEFAULT_MODULE_MAPPING - elif self.version.release < Version("1.8.0").release: + elif self.version.release < Version("1.8.0").release: # pragma: no cover self.q_mapping = \ tq.quantization_mappings.get_static_quant_module_mappings() else: self.q_mapping = \ tq.quantization_mappings.get_default_static_quant_module_mappings() elif framework_specific_info['approach'] == "quant_aware_training": - if self.version.release < Version("1.7.0").release: + if self.version.release < Version("1.7.0").release: # pragma: no cover self.q_mapping = tq.default_mappings.DEFAULT_QAT_MODULE_MAPPING - elif self.version.release < Version("1.8.0").release: + elif self.version.release < Version("1.8.0").release: # pragma: no cover self.q_mapping = \ tq.quantization_mappings.get_qat_module_mappings() else: @@ -927,7 +927,7 @@ def eval_func(self, model, dataloader, postprocess, metrics, measurer, iteration results.append(output) if idx + 1 == iteration: break - except Exception as e: + except Exception as e: # pragma: no cover logger.warning("The dataloader didn't include label, will try input without label!") for idx, input in enumerate(dataloader): if (isinstance(input, dict) or isinstance(input, UserDict)): @@ -1999,7 +1999,7 @@ def _prepare(model, inplace=True, op_list=[], white_list=None): white_list=white_list, qconfig_parent=model.qconfig) # sanity check common API misusage - if not any(hasattr(m, 'qconfig') and m.qconfig for m in model.modules()): + if not any(hasattr(m, 'qconfig') and m.qconfig for m in model.modules()): # pragma: no cover logger.warn("None of the submodule got qconfig applied. Make sure you " "passed correct configuration through `qconfig_dict` or " "by assigning the `.qconfig` attribute directly on submodules") @@ -2645,7 +2645,7 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): self.calib_func(q_model, dataloader, tmp_iterations=2) else: if self.approach in ['post_training_static_quant', 'post_training_auto_quant']: - if self.version.release < Version("1.12.0").release: + if self.version.release < Version("1.12.0").release: # pragma: no cover try: self.tmp_model = copy.deepcopy(model) except Exception as e: # pragma: no cover @@ -2796,7 +2796,7 @@ def _cfg_to_qconfig(self, tune_cfg): } """ assert self.cfgs is not None, "No configure for IPEX int8 model..." - if self.version.release < Version("1.12.0").release: + if self.version.release < Version("1.12.0").release: # pragma: no cover for key in tune_cfg['op']: try: scheme = tune_cfg['op'][key]['activation']['scheme'] @@ -3011,7 +3011,7 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): with open(self.ipex_config_path, 'r') as f: self.cfgs = json.load(f) - if self.version.release < Version("1.12.0").release: + if self.version.release < Version("1.12.0").release: # pragma: no cover self.default_cfgs = copy.deepcopy(self.cfgs) self.fuse_ops = self.get_fuse_ops(self.cfgs) for op_cfg in self.cfgs: @@ -4071,7 +4071,7 @@ def fuse_fx_model(self, model, is_qat): """ try: tmp_model = copy.deepcopy(model._model) - except Exception as e: + except Exception as e: # pragma: no cover tmp_model = model._model logger.warning("Deepcopy failed: {}, inplace=True now!".format(repr(e))) diff --git a/neural_compressor/compression/callbacks.py b/neural_compressor/compression/callbacks.py index 5fd1dd68ce1..17ba3aa8b99 100644 --- a/neural_compressor/compression/callbacks.py +++ b/neural_compressor/compression/callbacks.py @@ -211,28 +211,28 @@ def model(self, user_model): if user_model is None: return - if self.framework == 'NA': + if self.framework == None: self.framework = get_model_fwk_name( user_model.model if isinstance(user_model, BaseModel) else user_model) if self.framework == "tensorflow": from ..model.tensorflow_model import get_model_type if not isinstance(user_model, BaseModel) and get_model_type(user_model) == 'keras'\ - and self.cfg.quantization.backend == 'itex': + and self.conf.quantization.backend == 'itex': self.framework = 'keras' if self.framework == "pytorch": - if self.cfg.quantization.backend == "default": + if self.conf.quantization.backend == "default": self.framework = "pytorch_fx" elif self.conf.quantization.backend == "ipex": self.framework = "pytorch_ipex" - self.cfg.quantization.framework = self.framework + self.conf.quantization.framework = self.framework if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") if "tensorflow" in self.framework or self.framework == "keras": - if self.cfg.quantization and self.cfg.quantization.approach == "quant_aware_training": - self._model = Model(user_model, backend='tensorflow_qat', device=self.cfg.device) + if self.conf.quantization and self.conf.quantization.approach == "quant_aware_training": + self._model = Model(user_model, backend='tensorflow_qat', device=self.conf.device) else: - self._model = Model(user_model, backend=self.framework, device=self.cfg.device) + self._model = Model(user_model, backend=self.framework, device=self.conf.device) else: self._model = Model(user_model, backend=self.framework) else: diff --git a/neural_compressor/config.py b/neural_compressor/config.py index aa4f329f10d..0bdb967d70f 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -1728,10 +1728,12 @@ def model_name(self, model_name): @property def accuracy_criterion(self): + """Get the accuracy criterion.""" return self._accuracy_criterion @accuracy_criterion.setter def accuracy_criterion(self, accuracy_criterion): + """Set the accuracy criterion.""" if _check_value("accuracy_criterion", accuracy_criterion, AccuracyCriterion): self._accuracy_criterion = accuracy_criterion @@ -1748,55 +1750,67 @@ def tuning_criterion(self, tuning_criterion): @property def device(self): + """Get device.""" return self._device @device.setter def device(self, device): + """Set device.""" if _check_value('device', device, str, ['cpu', 'gpu']): self._device = device @property def backend(self): + """Get backend.""" return self._backend @backend.setter def backend(self, backend): + """Set backend.""" if _check_value('backend', backend, str, [ 'default', 'itex', 'ipex', 'onnxrt_trt_ep', 'onnxrt_cuda_ep']): self._backend = backend @property def outputs(self): + """Get outputs.""" return self._outputs @outputs.setter def outputs(self, outputs): + """Set outputs.""" if _check_value('outputs', outputs, str): self._outputs = outputs @property def inputs(self): + """Get inputs.""" return self._inputs @inputs.setter def inputs(self, inputs): + """Set inputs.""" if _check_value('inputs', inputs, str): self._inputs = inputs @property def framework(self): + """Get framework.""" return self._framework @framework.setter def framework(self, framework): + """Set framework.""" self._framework = framework @property def excluded_precisions(self): + """Get excluded precisions.""" return self._excluded_precisions @excluded_precisions.setter def excluded_precisions(self, excluded_precisions): + """Set excluded precisions.""" if _check_value("excluded_precisions", excluded_precisions, str, ["bf16", "fp16"]): self._excluded_precisions = excluded_precisions self._use_bf16 = "bf16" not in excluded_precisions diff --git a/neural_compressor/mix_precision.py b/neural_compressor/mix_precision.py index be5c4474c51..1f5bc0445ba 100644 --- a/neural_compressor/mix_precision.py +++ b/neural_compressor/mix_precision.py @@ -52,7 +52,8 @@ def __init__(self, conf=None): Args: conf (obj): The MixedPrecisionConfig class containing accuracy goal, tuning objective etc. """ - self.conf = _Config(mixed_precision=conf, quantization=None, benchmark=None, pruning=None, distillation=None, nas=None) + self.conf = _Config(mixed_precision=conf, quantization=None, benchmark=None + , pruning=None, distillation=None, nas=None) seed = options.random_seed random.seed(seed) np.random.seed(seed) diff --git a/neural_compressor/quantization.py b/neural_compressor/quantization.py index 7de9d4d2722..747cfa4d38d 100644 --- a/neural_compressor/quantization.py +++ b/neural_compressor/quantization.py @@ -100,7 +100,7 @@ def pre_proccess(self): with open(self.resume_file, 'rb') as f: _resume = pickle.load(f).__dict__ - if self._eval_func is None and self._eval_dataloader is None: + if self._eval_func is None and self._eval_dataloader is None: # pragma: no cover logger.info("Quantize model without tuning!") self.strategy = STRATEGIES[strategy]( @@ -125,7 +125,7 @@ def execute(self): self.strategy.traverse() except KeyboardInterrupt: pass - except Exception as e: + except Exception as e: # pragma: no cover logger.error("Unexpected exception {} happened during tuning.".format(repr(e))) import traceback traceback.print_exc() From 13305aa2cae2f2cf2d3a02c902ba7f5bb055f87f Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Mon, 24 Apr 2023 18:53:20 +0800 Subject: [PATCH 092/103] fix for ut Signed-off-by: Cheng, Zixuan --- neural_compressor/compression/callbacks.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/neural_compressor/compression/callbacks.py b/neural_compressor/compression/callbacks.py index 17ba3aa8b99..82544b9a500 100644 --- a/neural_compressor/compression/callbacks.py +++ b/neural_compressor/compression/callbacks.py @@ -217,14 +217,17 @@ def model(self, user_model): if self.framework == "tensorflow": from ..model.tensorflow_model import get_model_type if not isinstance(user_model, BaseModel) and get_model_type(user_model) == 'keras'\ - and self.conf.quantization.backend == 'itex': + and self.conf.backend == 'itex': self.framework = 'keras' if self.framework == "pytorch": - if self.conf.quantization.backend == "default": - self.framework = "pytorch_fx" - elif self.conf.quantization.backend == "ipex": - self.framework = "pytorch_ipex" - self.conf.quantization.framework = self.framework + try: + if self.conf.quantization.backend == "default": + self.framework = "pytorch_fx" + elif self.conf.quantization.backend == "ipex": + self.framework = "pytorch_ipex" + self.conf.quantization.framework = self.framework + except Exception as e: + pass if not isinstance(user_model, BaseModel): logger.warning("Force convert framework model to neural_compressor model.") From 5202cf7700be09af6e94880406b2ebbce90ac6fb Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 25 Apr 2023 14:08:46 +0800 Subject: [PATCH 093/103] fixed format config Signed-off-by: yiliu30 --- neural_compressor/benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py index 7d4cec75b41..7936049e986 100644 --- a/neural_compressor/benchmark.py +++ b/neural_compressor/benchmark.py @@ -327,7 +327,7 @@ def run_instance(self): 'random_seed': options.random_seed, 'backend': cfg.backend \ if cfg.backend is not None else 'default', - 'format': None} + 'format': 'default'} framework = cfg.framework.lower() if 'tensorflow' in framework: framework_specific_info.update({"inputs": cfg.inputs, \ From c056c3af0adb7c872457b2aa2599a76bb7cc0e05 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Tue, 25 Apr 2023 16:20:52 +0800 Subject: [PATCH 094/103] fix benchmark default value Signed-off-by: Cheng, Zixuan --- neural_compressor/benchmark.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py index 7d4cec75b41..c78ad91c24f 100644 --- a/neural_compressor/benchmark.py +++ b/neural_compressor/benchmark.py @@ -327,12 +327,12 @@ def run_instance(self): 'random_seed': options.random_seed, 'backend': cfg.backend \ if cfg.backend is not None else 'default', - 'format': None} + 'format': 'default'} framework = cfg.framework.lower() if 'tensorflow' in framework: framework_specific_info.update({"inputs": cfg.inputs, \ "outputs": cfg.outputs, \ - "recipes": None, \ + "recipes": {}, \ 'workspace_path': options.workspace}) if framework == 'keras': framework_specific_info.update({'workspace_path': options.workspace}) @@ -475,6 +475,7 @@ def model(self, user_model): if cfg.framework is None: assert not isinstance(user_model, BaseModel), \ "Please pass an original framework model but not neural compressor model!" + import pdb; pdb.set_trace() self.framework = get_model_fwk_name(user_model) if self.framework == "tensorflow": from .model.tensorflow_model import get_model_type From 971659e6a83341b423575ecfbc2ad39f20d8cb57 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Tue, 25 Apr 2023 16:26:22 +0800 Subject: [PATCH 095/103] minor fix Signed-off-by: Cheng, Zixuan --- neural_compressor/benchmark.py | 1 - 1 file changed, 1 deletion(-) diff --git a/neural_compressor/benchmark.py b/neural_compressor/benchmark.py index c78ad91c24f..0ce19ab6dd1 100644 --- a/neural_compressor/benchmark.py +++ b/neural_compressor/benchmark.py @@ -475,7 +475,6 @@ def model(self, user_model): if cfg.framework is None: assert not isinstance(user_model, BaseModel), \ "Please pass an original framework model but not neural compressor model!" - import pdb; pdb.set_trace() self.framework = get_model_fwk_name(user_model) if self.framework == "tensorflow": from .model.tensorflow_model import get_model_type From 3935732c2879b1c8ebd64a7b2d1a14afde79c3f9 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 25 Apr 2023 21:24:51 +0800 Subject: [PATCH 096/103] clean code Signed-off-by: yiliu30 --- neural_compressor/strategy/strategy.py | 289 +++++++++--------- .../strategy/utils/tuning_structs.py | 3 +- test/strategy/test_bayesian.py | 8 +- test/strategy/test_exhaustive.py | 16 +- test/strategy/test_mse_v2_2.x.py | 30 +- test/strategy/test_random.py | 16 +- test/strategy/test_tpe.py | 14 +- test/strategy/test_tuning_space.py | 2 +- 8 files changed, 190 insertions(+), 188 deletions(-) diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 507a8664289..0dfa474f211 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -17,39 +17,31 @@ """The base class for tuning strategy.""" -from abc import abstractmethod import os import math import copy -from copy import deepcopy import pickle +import yaml +import sys + +from abc import abstractmethod +from copy import deepcopy from collections import OrderedDict, defaultdict from pathlib import Path -import yaml +from time import time import numpy as np from neural_compressor.adaptor.tensorflow import TensorFlowAdaptor from ..config import MixedPrecisionConfig, options from ..objective import MultiObjective from ..adaptor import FRAMEWORKS -from ..utils.utility import Statistics -from ..utils.utility import fault_tolerant_file, GLOBAL_STATE, MODE +from ..utils.utility import Statistics, fault_tolerant_file, GLOBAL_STATE, MODE, LazyImport, DotDict from ..utils.create_obj_from_config import create_eval_func -from ..utils.utility import LazyImport from ..utils import logger from ..version import __version__ -from ..utils.utility import DotDict from ..algorithm import AlgorithmScheduler, ALGORITHMS -import copy -import numpy as np -from collections import OrderedDict -from time import time -from ..utils import logger -import sys - - -from .utils.tuning_space import TuningItem, TuningSpace +from .utils.tuning_space import TuningSpace from .utils.tuning_structs import OpTuningConfig from .utils.constant import FALLBACK_RECIPES_SET @@ -78,32 +70,6 @@ def strategy_registry(cls): class TuneStrategy(object): """Basic class for tuning strategy.""" - def _check_tuning_status(self): - if self.eval_func: - self._not_tuning = False - logger.info("Execute the tuning process due to detect the evaluation function.") - if self.eval_dataloader: - logger.warning("Ignore the evaluation dataloader due to evaluation function exist.") - if self.eval_metric: - logger.warning("Ignore the evaluation metric due to evaluation function exist.") - return - if self.eval_dataloader and self.eval_metric: - self._not_tuning = False - logger.info("Create evaluation function according to evaluation dataloader and metric\ - and Execute the tuning process.") - return - else: - if self.eval_dataloader: - assert self.eval_metric, "Detected evaluation dataloader but no evaluation metric, " \ - "Please provide both to perform tuning process or neither for the default quantization." - if self.eval_metric: - assert self.eval_dataloader, "Detected evaluation metric but no evaluation dataloader, "\ - "Please provide both to perform tuning process or neither for the default quantization." - if self._not_tuning: - logger.info("Quantize the model with default configuration without evaluating the model.\ - To perform the tuning process, please either provide an eval_func or provide an\ - eval_dataloader an eval_metric.") - def __init__(self, model, conf, @@ -139,7 +105,7 @@ def __init__(self, self.eval_func = eval_func self.eval_dataloader = eval_dataloader self.eval_metric = eval_metric - # not tuning equals to performance only + # not tuning equals to performance only in 1.x self._not_tuning = True self._check_tuning_status() self.q_func = q_func @@ -162,18 +128,24 @@ def __init__(self, self.last_tune_cfg = None self.best_qmodel = None self.best_tune_result = None - self.best_tuning_cfg = None # track the best tuning config correspondence to the best quantized model - self.cur_best_acc = self.initial_best_acc() # track the current best accuracy - self.cur_best_tuning_cfg = {} # track tuning cfg with the current best accuracy + # track the best tuning config correspondence to the best quantized model + self.best_tuning_cfg = None + # track the current best accuracy + self.cur_best_acc = self.initial_best_acc() + # track tuning cfg with the current best accuracy + self.cur_best_tuning_cfg = {} self.re_quant = False self.trials_count = 0 + + # query capability and build tuning space self.capability = self.adaptor.query_fw_capability(model) logger.debug(self.capability) self.set_tuning_space(self.config) - #For algo scheduler + # set algo scheduler self.algo_scheduler = AlgorithmScheduler(self.config.recipes) - self.algo_scheduler.dataloader = self.calib_dataloader # reuse the calibration iteration + # reuse the calibration iteration + self.algo_scheduler.dataloader = self.calib_dataloader self.algo_scheduler.origin_model = self.model self.algo_scheduler.adaptor = self.adaptor @@ -184,12 +156,12 @@ def __init__(self, self.fallback_start_point = 0 self.metric_met_point = 0 - # for recipes - # {recipe name: the list of supported value} + # set recipes + # recipe name -> the list of supported value self._tuning_recipes = OrderedDict() - # {recipe name: the default value when not tuning} + # recipe name-> the default value when not tuning self._tuning_recipes_default_values = {} - # {recipe name: the value specified by user} + # recipe name -> the value specified by user self._not_tuning_recipes_values = {} self._initialize_recipe() self.applied_all_recipes_flag = False @@ -197,6 +169,37 @@ def __init__(self, self._resume = resume if self._resume is not None: self.setup_resume(resume) + def _check_tuning_status(self): + # got eval func + if self.eval_func: + self._not_tuning = False + logger.info("Execute the tuning process due to detect the evaluation function.") + if self.eval_dataloader: + logger.warning("Ignore the evaluation dataloader due to evaluation function exist.") + if self.eval_metric: + logger.warning("Ignore the evaluation metric due to evaluation function exist.") + return + # got eval dataloader + eval metric => eval func + if self.eval_dataloader and self.eval_metric: + self._not_tuning = False + logger.info("Create evaluation function according to evaluation dataloader and metric\ + and Execute the tuning process.") + return + else: + # got eval dataloader but not eval metric + if self.eval_dataloader: + assert self.eval_metric, "Detected evaluation dataloader but no evaluation metric, " \ + "Please provide both to perform tuning process or neither for the default quantization." + # got eval metric but not eval dataloader + if self.eval_metric: + assert self.eval_dataloader, "Detected evaluation metric but no evaluation dataloader, "\ + "Please provide both to perform tuning process or neither for the default quantization." + # not tuning + if self._not_tuning: + logger.info("Quantize the model with default configuration without evaluating the model.\ + To perform the tuning process, please either provide an eval_func or provide an\ + eval_dataloader an eval_metric.") + def _initialize_config(self, conf): """Init the tuning config based on user conf. @@ -223,6 +226,97 @@ def next_tune_cfg(self): """ raise NotImplementedError + def traverse(self): + """Traverse the tuning space. + + The main traverse logic which could be override by some concrete strategy which needs more hooks. + """ + self._eval_baseline() + if self.config.use_distributed_tuning: + logger.info("use distributed traverse: {}".format(self.config.use_distributed_tuning)) + return self.distributed_traverse() + traverse_start_time = time() + for op_tuning_cfg in self.next_tune_cfg(): + tuning_start_time = time() + tune_cfg = self._tune_cfg_converter(op_tuning_cfg) + self.trials_count += 1 + tuning_history = self._find_tuning_history(tune_cfg) + if tuning_history and self.trials_count < self.config.tuning_criterion.max_trials: + self.last_tune_result = tuning_history['last_tune_result'] + self.best_tune_result = tuning_history['best_tune_result'] + logger.warn("Find evaluated tuning config, skip.") + continue + self._remove_redundant_qmodel() + logger.debug("Dump current tuning configuration:") + logger.debug(tune_cfg) + self.tuning_times += 1 + # set the parameter for pre quantization algos and run + self.set_param_for_pre_quantization_algos(self.algo_scheduler, tune_cfg, self.model) + self.model = self.algo_scheduler('pre_quantization') + # quantize + q_model = self.adaptor.quantize(copy.deepcopy(tune_cfg), self.model, self.calib_dataloader, self.q_func) + assert self.adaptor.pre_optimized_model + # set the parameter for post quantization algos and run + self.set_param_for_post_quantization_algos(self.algo_scheduler, tune_cfg,\ + self.adaptor.pre_optimized_model, q_model) + self.last_qmodel = self.algo_scheduler('post_quantization') + self.last_tune_cfg = copy.deepcopy(tune_cfg) + # remove the reference to model + self.algo_scheduler.reset_exec_algorithms() + assert self.last_qmodel + # return the last quantized model as a result. if not tune. + if self._not_tuning: + self.best_qmodel = self.last_qmodel + self._add_tuning_history(copy.deepcopy(tune_cfg), (-1, [0]), q_config=self.last_qmodel.q_config) + return + self.last_tune_result = self._evaluate(self.last_qmodel) + self.cur_best_acc, self.cur_best_tuning_cfg = self.update_best_op_tuning_cfg(op_tuning_cfg) + need_stop = self.stop(self.config.tuning_criterion.timeout, self.trials_count) + + # record the tuning history + saved_tune_cfg = copy.deepcopy(tune_cfg) + saved_last_tune_result = copy.deepcopy(self.last_tune_result) + self._add_tuning_history(saved_tune_cfg, + saved_last_tune_result, + q_config=q_model.q_config) + self.tune_result_record.append(copy.deepcopy(self.last_tune_result)) + self.tune_cfg = tune_cfg + now_time = time() + acc_res_msg = "" + performance_res_msg = "" + if self.tuning_result_data: + acc_res_msg = "[ " + "| ".join(self.tuning_result_data[0]) + " ]" + performance_res_msg = "[ " + "| ".join(self.tuning_result_data[1]) + " ]" + logger.debug(f"*** The accuracy of last tuning is: {acc_res_msg}") + logger.debug(f"*** The performance of last tuning is: {performance_res_msg}") + logger.debug(f"*** The last tuning time: {(now_time - tuning_start_time):.2f} s") + logger.debug(f"*** The tuning process lasted time: {(now_time - traverse_start_time):.2f} s") + + self._dump_tuning_process_statistics() + if need_stop: + if self.re_quant: + logger.info("*** Do not stop the tuning process, re-quantize the ops.") + continue + # recover the best quantized model from tuning config + self._recover_best_qmodel_from_tuning_cfg() + if self.config.diagnosis: + logger.debug(f'*** Start to do diagnosis (inspect tensor).') + self._diagnosis() + if self.use_multi_objective and len(self.tune_result_record) > 1 and \ + self.best_tune_result is not None: + best_trail, best_result = self.objectives.best_result(self.tune_result_record, + copy.deepcopy(self.baseline)) + if best_result != self.best_tune_result: + from neural_compressor.utils.utility import recover + self.best_qmodel = recover(self.model.model, + os.path.join(options.workspace, 'history.snapshot'), + best_trail) + logger.debug(f"*** Update the best qmodel by recovering from history.") + self.best_tune_result = best_result + self._dump_tuning_process_statistics() + break + self._recover_best_qmodel_from_tuning_cfg() + def _initialize_recipe(self): """Divide the recipe into two categories tuning/not tuning.""" from .utils.utility import get_adaptor_name @@ -608,97 +702,6 @@ def set_param_for_post_quantization_algos(self, algo_scheduler, tune_cfg, pre_op algo_scheduler.append_algorithm('post_quantization', w_algo) logger.debug(f"Add weight correction as the post quantization algo.") - def traverse(self): - """Traverse the tuning space. - - The main traverse logic which could be override by some concrete strategy which needs more hooks. - """ - self._eval_baseline() - if self.config.use_distributed_tuning: - logger.info("use distributed traverse: {}".format(self.config.use_distributed_tuning)) - return self.distributed_traverse() - traverse_start_time = time() - for op_tuning_cfg in self.next_tune_cfg(): - tuning_start_time = time() - tune_cfg = self._tune_cfg_converter(op_tuning_cfg) - self.trials_count += 1 - tuning_history = self._find_tuning_history(tune_cfg) - if tuning_history and self.trials_count < self.config.tuning_criterion.max_trials: - self.last_tune_result = tuning_history['last_tune_result'] - self.best_tune_result = tuning_history['best_tune_result'] - logger.warn("Find evaluated tuning config, skip.") - continue - self._remove_redundant_qmodel() - logger.debug("Dump current tuning configuration:") - logger.debug(tune_cfg) - self.tuning_times += 1 - # set the parameter for pre quantization algos and run - self.set_param_for_pre_quantization_algos(self.algo_scheduler, tune_cfg, self.model) - self.model = self.algo_scheduler('pre_quantization') - # quantize - q_model = self.adaptor.quantize(copy.deepcopy(tune_cfg), self.model, self.calib_dataloader, self.q_func) - assert self.adaptor.pre_optimized_model - # set the parameter for post quantization algos and run - self.set_param_for_post_quantization_algos(self.algo_scheduler, tune_cfg, self.adaptor.pre_optimized_model, - q_model) - self.last_qmodel = self.algo_scheduler('post_quantization') - self.last_tune_cfg = copy.deepcopy(tune_cfg) - # Remove the reference to model - self.algo_scheduler.reset_exec_algorithms() - assert self.last_qmodel - # Return the last quantized model as a result. if performance only. - if self._not_tuning: - self.best_qmodel = self.last_qmodel - self._add_tuning_history(copy.deepcopy(tune_cfg), (-1, [0]), q_config=self.last_qmodel.q_config) - return - self.last_tune_result = self._evaluate(self.last_qmodel) - self.cur_best_acc, self.cur_best_tuning_cfg = self.update_best_op_tuning_cfg(op_tuning_cfg) - need_stop = self.stop(self.config.tuning_criterion.timeout, self.trials_count) - - # record the tuning history - saved_tune_cfg = copy.deepcopy(tune_cfg) - saved_last_tune_result = copy.deepcopy(self.last_tune_result) - self._add_tuning_history(saved_tune_cfg, - saved_last_tune_result, - q_config=q_model.q_config) - self.tune_result_record.append(copy.deepcopy(self.last_tune_result)) - self.tune_cfg = tune_cfg - now_time = time() - acc_res_msg = "" - performance_res_msg = "" - if self.tuning_result_data: - acc_res_msg = "[ " + "| ".join(self.tuning_result_data[0]) + " ]" - performance_res_msg = "[ " + "| ".join(self.tuning_result_data[1]) + " ]" - logger.debug(f"*** The accuracy of last tuning is: {acc_res_msg}") - logger.debug(f"*** The performance of last tuning is: {performance_res_msg}") - logger.debug(f"*** The last tuning time: {(now_time - tuning_start_time):.2f} s") - logger.debug(f"*** The tuning process lasted time: {(now_time - traverse_start_time):.2f} s") - - self._dump_tuning_process_statistics() - if need_stop: - if self.re_quant: - logger.info("*** Do not stop the tuning process, re-quantize the ops.") - continue - # recover the best quantized model from tuning config - self._recover_best_qmodel_from_tuning_cfg() - if self.config.diagnosis: - logger.debug(f'*** Start to do diagnosis (inspect tensor).') - self._diagnosis() - if self.use_multi_objective and len(self.tune_result_record) > 1 and \ - self.best_tune_result is not None: - best_trail, best_result = self.objectives.best_result(self.tune_result_record, - copy.deepcopy(self.baseline)) - if best_result != self.best_tune_result: - from neural_compressor.utils.utility import recover - self.best_qmodel = recover(self.model.model, - os.path.join(options.workspace, 'history.snapshot'), - best_trail) - logger.debug(f"*** Update the best qmodel by recovering from history.") - self.best_tune_result = best_result - self._dump_tuning_process_statistics() - break - self._recover_best_qmodel_from_tuning_cfg() - def _remove_redundant_qmodel(self): """Remove the redundant quantized model to reduce memory use. diff --git a/neural_compressor/strategy/utils/tuning_structs.py b/neural_compressor/strategy/utils/tuning_structs.py index 778aa74fdd9..f98dba9ab27 100644 --- a/neural_compressor/strategy/utils/tuning_structs.py +++ b/neural_compressor/strategy/utils/tuning_structs.py @@ -18,8 +18,7 @@ """Tuning structure.""" from typing import Dict -from .constant import QUANT_MODE_SET, TUNING_ITEMS_LST, PRECISION_LIST -from ...utils import logger +from .constant import TUNING_ITEMS_LST, PRECISION_LIST class OpTuningConfig: """Op tuning config.""" diff --git a/test/strategy/test_bayesian.py b/test/strategy/test_bayesian.py index daf21b565ce..cd2ff8ad2c0 100644 --- a/test/strategy/test_bayesian.py +++ b/test/strategy/test_bayesian.py @@ -184,7 +184,7 @@ def create_test_graph(): def objective_func(params): return params['x1']**2 + params['x2'] -class TestQuantization(unittest.TestCase): +class TestBayesianStrategy(unittest.TestCase): @classmethod def setUpClass(self): @@ -230,13 +230,13 @@ def test_run_bayesian_max_trials(self): # tuning and accuracy criterion tune_cri = TuningCriterion(strategy='bayesian', max_trials=3) acc_cri = AccuracyCriterion(tolerable_loss=0.01) - + op_name_dict = { "conv1": { "activation": {"dtype": ["fp32"]}, }, } - + acc = [0, 1, 0.9, 0.9, 1] def fake_eval(model): acc.pop(0) @@ -249,7 +249,7 @@ def fake_eval(model): calib_dataloader=dataloader, eval_func=fake_eval) self.assertNotEqual(q_model, None) - + def test_bayesian_opt_class(self): from neural_compressor.strategy.bayesian import BayesianOptimization diff --git a/test/strategy/test_exhaustive.py b/test/strategy/test_exhaustive.py index 1bb01b95c70..bf1fd5457c3 100644 --- a/test/strategy/test_exhaustive.py +++ b/test/strategy/test_exhaustive.py @@ -35,7 +35,7 @@ def build_fake_model(): tf.import_graph_def(graph_def, name='') return graph -class TestQuantization(unittest.TestCase): +class TestExhaustiveStrategy(unittest.TestCase): @classmethod def setUpClass(self): @@ -49,19 +49,19 @@ def test_ru_exhaustive_one_trial(self): from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion from neural_compressor.data import Datasets, DATALOADERS - + # dataset and dataloader dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) dataloader = DATALOADERS["tensorflow"](dataset) - + # tuning and accuracy criterion tune_cri = TuningCriterion(strategy='exhaustive', max_trials=1) acc_cri = AccuracyCriterion(tolerable_loss=0.01) - + conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) def fake_eval(model): return 1 - + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, @@ -72,15 +72,15 @@ def test_ru_exhaustive_max_trials(self): from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion from neural_compressor.data import Datasets, DATALOADERS - + # dataset and dataloader dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) dataloader = DATALOADERS["tensorflow"](dataset) - + # tuning and accuracy criterion tune_cri = TuningCriterion(strategy='exhaustive', max_trials=3) acc_cri = AccuracyCriterion(tolerable_loss=0.01) - + acc = [0, 1, 0.9, 0.9, 1] def fake_eval(model): acc.pop(0) diff --git a/test/strategy/test_mse_v2_2.x.py b/test/strategy/test_mse_v2_2.x.py index 30431bbd185..f7dba33a490 100644 --- a/test/strategy/test_mse_v2_2.x.py +++ b/test/strategy/test_mse_v2_2.x.py @@ -53,25 +53,25 @@ def tearDownClass(self): shutil.rmtree('./saved', ignore_errors=True) shutil.rmtree('nc_workspace', ignore_errors=True) - def test_quantization_saved_tf(self): + def test_mse_v2_tf(self): i = [0] # use a mutable type (list) to wrap the int object def fake_eval_func(_): # 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 eval_list = [0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1] i[0] += 1 return eval_list[i[0]] - + from neural_compressor.quantization import fit from neural_compressor.config import TuningCriterion, PostTrainingQuantConfig from neural_compressor.data import Datasets, DATALOADERS dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) dataloader = DATALOADERS['tensorflow'](dataset) - + conf = PostTrainingQuantConfig( approach="static", quant_level=1, tuning_criterion=TuningCriterion(strategy="mse_v2")) - + q_model = fit( model=self.tf_model, conf=conf, @@ -79,21 +79,21 @@ def fake_eval_func(_): eval_dataloader=dataloader, eval_func=fake_eval_func) self.assertIsNotNone(q_model) - - def test_quantization_saved_tf_with_confidence_batches(self): + + def test_mse_v2_tf_with_confidence_batches(self): i = [0] # use a mutable type (list) to wrap the int object def fake_eval_func(_): # 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 eval_list = [0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1] i[0] += 1 return eval_list[i[0]] - + from neural_compressor.quantization import fit from neural_compressor.config import TuningCriterion, PostTrainingQuantConfig from neural_compressor.data import Datasets, DATALOADERS dataset = Datasets("tensorflow")["dummy"](((100, 3, 3, 1))) dataloader = DATALOADERS['tensorflow'](dataset) - + conf = PostTrainingQuantConfig( approach="static", quant_level=1, @@ -102,7 +102,7 @@ def fake_eval_func(_): strategy_kwargs={ "confidence_batches": 5, })) - + q_model = fit( model=self.tf_model, conf=conf, @@ -110,25 +110,25 @@ def fake_eval_func(_): eval_dataloader=dataloader, eval_func=fake_eval_func) self.assertIsNotNone(q_model) - - def test_quantization_saved_torch(self): + + def test_mse_v2_saved_torch(self): i = [0] def fake_eval_func(model): acc_lst = [1, 1, 0, 0, 0, 0, 1, 1.1, 1.5, 1.1] i[0] += 1 return acc_lst[i[0]] - + from neural_compressor.quantization import fit from neural_compressor.config import TuningCriterion, PostTrainingQuantConfig from neural_compressor.data import Datasets, DATALOADERS dataset = Datasets("pytorch")["dummy"](((1, 3, 224, 224))) dataloader = DATALOADERS['pytorch'](dataset) - + conf = PostTrainingQuantConfig( approach="static", quant_level=1, tuning_criterion=TuningCriterion(strategy="mse_v2")) - + q_model = fit( model=self.torch_model, conf=conf, @@ -136,6 +136,6 @@ def fake_eval_func(model): eval_dataloader=dataloader, eval_func=fake_eval_func) self.assertIsNotNone(q_model) - + if __name__ == "__main__": unittest.main() diff --git a/test/strategy/test_random.py b/test/strategy/test_random.py index 8f4a35d93f6..d6fd125f926 100644 --- a/test/strategy/test_random.py +++ b/test/strategy/test_random.py @@ -41,7 +41,7 @@ def build_fake_model(): return graph -class TestQuantization(unittest.TestCase): +class TestRandomStrategy(unittest.TestCase): @classmethod def setUpClass(self): @@ -55,19 +55,19 @@ def test_ru_random_one_trial(self): from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion from neural_compressor.data import Datasets, DATALOADERS - + # dataset and dataloader dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) dataloader = DATALOADERS["tensorflow"](dataset) - + # tuning and accuracy criterion tune_cri = TuningCriterion(strategy='random', max_trials=1) acc_cri = AccuracyCriterion(tolerable_loss=0.01) - + conf = PostTrainingQuantConfig(quant_level=1, tuning_criterion=tune_cri, accuracy_criterion=acc_cri) def fake_eval(model): return 1 - + q_model = fit(model=self.constant_graph, conf=conf, calib_dataloader=dataloader, @@ -78,15 +78,15 @@ def test_ru_random_max_trials(self): from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion from neural_compressor.data import Datasets, DATALOADERS - + # dataset and dataloader dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) dataloader = DATALOADERS["tensorflow"](dataset) - + # tuning and accuracy criterion tune_cri = TuningCriterion(strategy='random', max_trials=3) acc_cri = AccuracyCriterion(tolerable_loss=0.01) - + acc = [0, 1, 0.9, 0.9, 1] def fake_eval(model): acc.pop(0) diff --git a/test/strategy/test_tpe.py b/test/strategy/test_tpe.py index d367bdd5c1e..9c8e4e6c1c0 100644 --- a/test/strategy/test_tpe.py +++ b/test/strategy/test_tpe.py @@ -38,7 +38,7 @@ def build_fake_model(): tf.import_graph_def(graph_def, name='') return graph -class TestQuantization(unittest.TestCase): +class TestTpeStrategy(unittest.TestCase): @classmethod def setUpClass(self): @@ -48,15 +48,15 @@ def setUpClass(self): def tearDownClass(self): shutil.rmtree("saved", ignore_errors=True) - def test_run_tpe_one_trial(self): + def test_run_tpe_one_trial(self): from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion from neural_compressor.data import Datasets, DATALOADERS - + # dataset and dataloader dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) dataloader = DATALOADERS["tensorflow"](dataset) - + # tuning and accuracy criterion tune_cri = TuningCriterion(strategy='tpe', max_trials=200) acc_cri = AccuracyCriterion(tolerable_loss=0.01) @@ -72,15 +72,15 @@ def test_run_tpe_max_trials(self): from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion, AccuracyCriterion from neural_compressor.data import Datasets, DATALOADERS - + # dataset and dataloader dataset = Datasets("tensorflow")["dummy"]((100, 3, 3, 1), label=True) dataloader = DATALOADERS["tensorflow"](dataset) - + # tuning and accuracy criterion tune_cri = TuningCriterion(strategy='tpe', max_trials=5) acc_cri = AccuracyCriterion(tolerable_loss=0.01) - + from neural_compressor.metric import METRICS metrics = METRICS('tensorflow') top1 = metrics['topk']() diff --git a/test/strategy/test_tuning_space.py b/test/strategy/test_tuning_space.py index a833313e924..310fb5f3988 100644 --- a/test/strategy/test_tuning_space.py +++ b/test/strategy/test_tuning_space.py @@ -179,7 +179,7 @@ } -class TestTuningSampler(unittest.TestCase): +class TestTuningSpace(unittest.TestCase): def setUp(self) -> None: self.capability = { 'calib': {'calib_sampling_size': [1, 10, 50]}, From 149a55792b58feab3e5bef636381ecff389e9ac4 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 26 Apr 2023 09:17:41 +0800 Subject: [PATCH 097/103] enhance auto strategy Signed-off-by: yiliu30 --- neural_compressor/contrib/strategy/sigopt.py | 2 +- neural_compressor/contrib/strategy/tpe.py | 1 + neural_compressor/strategy/auto.py | 5 + .../strategy/auto_mixed_precision.py | 3 +- neural_compressor/strategy/strategy.py | 196 +++++++++++++++--- test/strategy/test_new_datatype.py | 10 +- 6 files changed, 179 insertions(+), 38 deletions(-) diff --git a/neural_compressor/contrib/strategy/sigopt.py b/neural_compressor/contrib/strategy/sigopt.py index 227b21edd9b..336c9adf91c 100644 --- a/neural_compressor/contrib/strategy/sigopt.py +++ b/neural_compressor/contrib/strategy/sigopt.py @@ -194,7 +194,7 @@ def traverse(self): This is SigOpt version of traverse -- with additional constraints setting to HPO. """ - self._eval_baseline() + self._prepare_tuning() baseline_msg = '[Accuracy: {:.4f}'.format(self.baseline[0]) + \ ''.join([', {}: {:.4f}'.format(x,y) for x,y in zip( \ diff --git a/neural_compressor/contrib/strategy/tpe.py b/neural_compressor/contrib/strategy/tpe.py index 2f80cb7751b..18bf0a76105 100644 --- a/neural_compressor/contrib/strategy/tpe.py +++ b/neural_compressor/contrib/strategy/tpe.py @@ -191,6 +191,7 @@ def _configure_hpopt_search_space_and_params(self, search_space): def traverse(self): """Tpe traverse logic.""" logger.info("Start to run tpe strategy.") + self._prepare_tuning() # prepare log file trials_file = os.path.join(os.path.dirname(self.history_path), 'tpe_trials.csv') best_result_file = os.path.join(os.path.dirname(self.history_path), 'tpe_best_result.csv') diff --git a/neural_compressor/strategy/auto.py b/neural_compressor/strategy/auto.py index b79c0a3cfc1..bdd71dcff73 100644 --- a/neural_compressor/strategy/auto.py +++ b/neural_compressor/strategy/auto.py @@ -83,9 +83,14 @@ def sequential_traverse(self): if pre_strategy: #TODO add tuning history from the previous stage to current stage. + strategy.adaptor = pre_strategy.adaptor + strategy.framework = pre_strategy.framework strategy.baseline = deepcopy(pre_strategy.baseline) strategy.trials_count = pre_strategy.trials_count strategy.objectives.baseline = deepcopy(pre_strategy.baseline) + strategy.capability = pre_strategy.capability + strategy.tuning_space = pre_strategy.tuning_space + strategy.algo_scheduler = pre_strategy.algo_scheduler pre_strategy = strategy strategy.traverse() self.best_qmodel = strategy.best_qmodel diff --git a/neural_compressor/strategy/auto_mixed_precision.py b/neural_compressor/strategy/auto_mixed_precision.py index 78e045e72ed..fe3f5663cd8 100644 --- a/neural_compressor/strategy/auto_mixed_precision.py +++ b/neural_compressor/strategy/auto_mixed_precision.py @@ -128,8 +128,7 @@ def next_tune_cfg(self): def traverse(self): """Traverse the tuning space according to auto-mixed precision strategy.""" - # get fp32 model baseline - self._eval_baseline() + self._prepare_tuning() for op_tuning_cfg in self.next_tune_cfg(): # add tune_cfg here as quantize use tune_cfg diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 0dfa474f211..5d989b3c697 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -111,18 +111,19 @@ def __init__(self, self.q_func = q_func self.q_hooks = q_hooks GLOBAL_STATE.STATE = MODE.QUANTIZATION - framework, framework_specific_info = self._set_framework_info(q_dataloader, q_func) - self.adaptor = FRAMEWORKS[framework](framework_specific_info) - self.framework = framework - self.set_q_func() - self._set_objectives() + # following attributes may set by pre strategy: + # adaptor, framework, baseline, trials_count, capability, tuning_space, algo_scheduler + self._adaptor = None + self._framework = None + self.check_q_func() + self.objectives = self._set_objectives() self.tune_data = {} self.tune_result_record = [] self.tuning_history = [] self.tuning_result_data = [] - self.baseline = None + self._baseline = None self.last_tune_result = None self.last_qmodel = None self.last_tune_cfg = None @@ -131,23 +132,15 @@ def __init__(self, # track the best tuning config correspondence to the best quantized model self.best_tuning_cfg = None # track the current best accuracy - self.cur_best_acc = self.initial_best_acc() + self.cur_best_acc = None # track tuning cfg with the current best accuracy self.cur_best_tuning_cfg = {} self.re_quant = False - self.trials_count = 0 - # query capability and build tuning space - self.capability = self.adaptor.query_fw_capability(model) - logger.debug(self.capability) - self.set_tuning_space(self.config) - - # set algo scheduler - self.algo_scheduler = AlgorithmScheduler(self.config.recipes) - # reuse the calibration iteration - self.algo_scheduler.dataloader = self.calib_dataloader - self.algo_scheduler.origin_model = self.model - self.algo_scheduler.adaptor = self.adaptor + self._trials_count = 0 + self._capability = None + self._tuning_space = None + self._algo_scheduler = None self._optype_statistics = None self.fallback_stats_baseline = None @@ -169,6 +162,146 @@ def __init__(self, self._resume = resume if self._resume is not None: self.setup_resume(resume) + @property + def adaptor(self): + """ + Gets the adaptor. + """ + return self._adaptor + + @adaptor.setter + def adaptor(self, value): + """ + Sets the adaptor. + + Args: + value: The new value for the adaptor. + """ + self._adaptor = value + + @property + def framework(self): + """ + Gets the framework. + """ + return self._framework + + @framework.setter + def framework(self, value): + """ + Sets the framework. + + Args: + value: The new value for the framework. + """ + self._framework = value + + @property + def baseline(self): + """ + Gets the baseline. + """ + return self._baseline + + @baseline.setter + def baseline(self, value): + """ + Sets the baseline. + + Args: + value (float): The new value for the baseline. + """ + self._baseline = value + + @property + def trials_count(self): + """ + Gets the trials_count. + """ + return self._trials_count + + @trials_count.setter + def trials_count(self, value): + """ + Sets the trials_count. + + Args: + value (int): The new value for the trials_count. + """ + self._trials_count = value + + @property + def capability(self): + """ + Gets the capability. + """ + return self._capability + + @capability.setter + def capability(self, value): + """ + Sets the capability. + + Args: + value: The new value for the capability. + """ + self._capability = value + + @property + def tuning_space(self): + """ + Gets the tuning_space. + """ + return self._tuning_space + + @tuning_space.setter + def tuning_space(self, value): + """ + Sets the tuning_space. + + Args: + value (list): The new value for the tuning_space. + """ + self._tuning_space = value + + @property + def algo_scheduler(self): + """ + Gets the algo_scheduler. + """ + return self._algo_scheduler + + @algo_scheduler.setter + def algo_scheduler(self, value): + """ + Sets the algo_scheduler. + + Args: + value: The new value for the algo_scheduler. + """ + self._algo_scheduler = value + + def _initialize_algo_scheduler(self): + algo_scheduler = AlgorithmScheduler(self.config.recipes) + # reuse the calibration iteration + algo_scheduler.dataloader = self.calib_dataloader + algo_scheduler.origin_model = self.model + algo_scheduler.adaptor = self.adaptor + return algo_scheduler + + def _prepare_tuning(self): + """Prepare to tune and avoid repeated initialization of the adaptor and tuning space.""" + framework, framework_specific_info = self._set_framework_info(self.calib_dataloader, self.q_func) + self.adaptor = self.adaptor or FRAMEWORKS[framework](framework_specific_info) + self.framework = self.framework or framework + self.cur_best_acc = self.cur_best_acc or self.initial_best_acc() + # query capability and build tuning space + self.capability = self.capability or self.adaptor.query_fw_capability(self.model) + logger.debug(self.capability) + self.tuning_space = self.tuning_space or self.build_tuning_space(self.config) + self.algo_scheduler = self.algo_scheduler or self._initialize_algo_scheduler() + self._eval_baseline() + def _check_tuning_status(self): # got eval func if self.eval_func: @@ -231,7 +364,7 @@ def traverse(self): The main traverse logic which could be override by some concrete strategy which needs more hooks. """ - self._eval_baseline() + self._prepare_tuning() if self.config.use_distributed_tuning: logger.info("use distributed traverse: {}".format(self.config.use_distributed_tuning)) return self.distributed_traverse() @@ -571,6 +704,7 @@ def distributed_traverse(self): The main traverse logic which could be override by some concrete strategy which needs more hooks. """ + self._prepare_tuning() MPI = LazyImport("mpi4py.MPI") comm = MPI.COMM_WORLD rank = comm.Get_rank() @@ -955,7 +1089,7 @@ def _tune_cfg_converter(self, op_tuning_cfg): tune_cfg['recipe_cfgs'][recipe_name] = recipe_val return tune_cfg - def set_tuning_space(self, config): + def build_tuning_space(self, config): """Create the tuning space. Create the tuning space based on the framework capability and user configuration. @@ -975,7 +1109,8 @@ def set_tuning_space(self, config): 'calib': {'calib_sampling_size': calib_sampling_size_lst}, 'op': self.capability['opwise'] } - self.tuning_space = TuningSpace(adaptor_cap, conf=config, framework=self.framework) + tuning_space = TuningSpace(adaptor_cap, conf=config, framework=self.framework) + return tuning_space def setup_resume(self, resume): """Resume the best quantized model from tuning history. @@ -1002,8 +1137,8 @@ def setup_resume(self, resume): break - def set_q_func(self): - """Set the training function for quantization aware training.""" + def check_q_func(self): + """Check the training function for quantization aware training.""" if self.config.approach == 'quant_aware_training': assert self.q_func != None, "Please set train func for quantization aware training" @@ -1117,12 +1252,13 @@ def _set_objectives(self): accuracy_criterion_conf = self.config.accuracy_criterion accuracy_criterion[accuracy_criterion_conf.criterion] = accuracy_criterion_conf.tolerable_loss accuracy_criterion['higher_is_better'] = accuracy_criterion_conf.higher_is_better - self.objectives = MultiObjective(objectives=objectives, - accuracy_criterion=accuracy_criterion, - metric_criterion=self.metric_criterion, - metric_weight=self.metric_weight, - obj_criterion=obj_higher_is_better, - obj_weight=obj_weight) + objectives = MultiObjective(objectives=objectives, + accuracy_criterion=accuracy_criterion, + metric_criterion=self.metric_criterion, + metric_weight=self.metric_weight, + obj_criterion=obj_higher_is_better, + obj_weight=obj_weight) + return objectives def _same_conf(self, src_conf, dst_conf): """Check if the two configs are the same.""" diff --git a/test/strategy/test_new_datatype.py b/test/strategy/test_new_datatype.py index 599dfa2a947..02e2d48d1f2 100644 --- a/test/strategy/test_new_datatype.py +++ b/test/strategy/test_new_datatype.py @@ -40,14 +40,14 @@ def add_cap(filename): }, } } - + with open(filename) as f: con = yaml.safe_load(f) con[0]['int4'] = int4_cap with open(filename, 'w') as out: yaml.dump(con, out) -class TestBasicTuningStrategy(unittest.TestCase): +class TestAddNewDataType(unittest.TestCase): @classmethod def setUpClass(self): @@ -56,7 +56,7 @@ def setUpClass(self): @classmethod def tearDownClass(self): shutil.rmtree('saved', ignore_errors=True) - + def test_add_int4(self): import shutil import importlib @@ -68,12 +68,12 @@ def test_add_int4(self): from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion from neural_compressor.data import Datasets, DATALOADERS - + # dataset and dataloader dataset = Datasets("pytorch")["dummy"](((100, 3, 224, 224))) dataloader = DATALOADERS["pytorch"](dataset) model = build_model() - + def fake_eval(model): return 1 From ffd1543419a61fb075aa91eb67bb8fb2688b8019 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 26 Apr 2023 11:20:15 +0800 Subject: [PATCH 098/103] fixed docstring format issue Signed-off-by: yiliu30 --- neural_compressor/strategy/strategy.py | 48 ++++++++------------------ 1 file changed, 14 insertions(+), 34 deletions(-) diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 5d989b3c697..b1642abc249 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -164,15 +164,12 @@ def __init__(self, @property def adaptor(self): - """ - Gets the adaptor. - """ + """Gets the adaptor.""" return self._adaptor @adaptor.setter def adaptor(self, value): - """ - Sets the adaptor. + """Sets the adaptor. Args: value: The new value for the adaptor. @@ -181,15 +178,12 @@ def adaptor(self, value): @property def framework(self): - """ - Gets the framework. - """ + """Gets the framework.""" return self._framework @framework.setter def framework(self, value): - """ - Sets the framework. + """Sets the framework. Args: value: The new value for the framework. @@ -198,15 +192,13 @@ def framework(self, value): @property def baseline(self): - """ - Gets the baseline. + """Gets the baseline. """ return self._baseline @baseline.setter def baseline(self, value): - """ - Sets the baseline. + """Sets the baseline. Args: value (float): The new value for the baseline. @@ -215,15 +207,12 @@ def baseline(self, value): @property def trials_count(self): - """ - Gets the trials_count. - """ + """Gets the trials_count.""" return self._trials_count @trials_count.setter def trials_count(self, value): - """ - Sets the trials_count. + """Sets the trials_count. Args: value (int): The new value for the trials_count. @@ -232,15 +221,12 @@ def trials_count(self, value): @property def capability(self): - """ - Gets the capability. - """ + """Gets the capability.""" return self._capability @capability.setter def capability(self, value): - """ - Sets the capability. + """Sets the capability. Args: value: The new value for the capability. @@ -249,15 +235,12 @@ def capability(self, value): @property def tuning_space(self): - """ - Gets the tuning_space. - """ + """Gets the tuning_space.""" return self._tuning_space @tuning_space.setter def tuning_space(self, value): - """ - Sets the tuning_space. + """Sets the tuning_space. Args: value (list): The new value for the tuning_space. @@ -266,15 +249,12 @@ def tuning_space(self, value): @property def algo_scheduler(self): - """ - Gets the algo_scheduler. - """ + """Gets the algo_scheduler.""" return self._algo_scheduler @algo_scheduler.setter def algo_scheduler(self, value): - """ - Sets the algo_scheduler. + """Sets the algo_scheduler. Args: value: The new value for the algo_scheduler. From d611d4edbbef634bb68c3f281f13646d41a840cf Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 26 Apr 2023 14:20:38 +0800 Subject: [PATCH 099/103] fixed docstring format Signed-off-by: yiliu30 --- neural_compressor/strategy/strategy.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index b1642abc249..e35aa79d95f 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -192,8 +192,7 @@ def framework(self, value): @property def baseline(self): - """Gets the baseline. - """ + """Gets the baseline.""" return self._baseline @baseline.setter From ad61374f5faa4e1f2b655c712c3b2482b9f1cfb1 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 26 Apr 2023 14:37:11 +0800 Subject: [PATCH 100/103] fixed coverage Signed-off-by: yiliu30 --- .../adaptor/torch_utils/hawq_metric.py | 18 +++++++-------- neural_compressor/strategy/strategy.py | 22 +++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/neural_compressor/adaptor/torch_utils/hawq_metric.py b/neural_compressor/adaptor/torch_utils/hawq_metric.py index 758651a8078..8295fc99cc2 100644 --- a/neural_compressor/adaptor/torch_utils/hawq_metric.py +++ b/neural_compressor/adaptor/torch_utils/hawq_metric.py @@ -52,9 +52,9 @@ def remove(self): class HessianTrace: """HessianTrace Class. - Please refer to Yao, Zhewei, et al. "Pyhessian: Neural networks through the lens of the hessian." + Please refer to Yao, Zhewei, et al. "Pyhessian: Neural networks through the lens of the hessian." 2020 IEEE international conference on big data (Big data). IEEE, 2020. - Dong, Zhen, et al. "Hawq-v2: Hessian aware trace-weighted quantization of neural networks." + Dong, Zhen, et al. "Hawq-v2: Hessian aware trace-weighted quantization of neural networks." Advances in neural information processing systems 33 (2020): 18518-18529. https://github.com/openvinotoolkit/nncf/blob/develop/nncf/torch/quantization/hessian_trace.py """ @@ -173,7 +173,7 @@ def act_grad_hook(model, grad_input, grad_output): def _get_enable_act_grad_hook(self, name): def enable_act_grad_hook(model, inputs, outputs): input = inputs[0] - if input.requires_grad is False: + if input.requires_grad is False: # input.requires_grad = True self.layer_acts[name] = input @@ -251,13 +251,13 @@ def _sample_rademacher(self, params): r.masked_fill_(r == 0, -1) samples.append(r) return samples - + def _sample_rademacher_like_params(self): def sample(parameter): r = torch.randint_like(parameter, high=2, device=self.device) return r.masked_fill_(r == 0, -1) return [sample(p) for p in self.params] - + def _sample_normal_like_params(self): return [torch.randn(p.size(), device=self.device) for p in self.params] @@ -391,7 +391,7 @@ def _insert_hook(self, model, target_module_list): for layer, module in model.named_modules(): for target_module in target_module_list: # print("layer:",layer) - # print("target_model:",target_module) + # print("target_model:",target_module) if layer == target_module: logging.debug("Collect: %s" % (module)) # print("Collect: %s" % (module)) @@ -408,7 +408,7 @@ def _insert_hook_quantize(self, model, target_module_list): # print("layer:",layer) length = len("_model.") new_key = layer[length:] - # print("target_model:",target_module) + # print("target_model:",target_module) if new_key == target_module: logging.debug("Collect: %s" % (module)) # print("Collect: %s" % (module)) @@ -521,7 +521,7 @@ def compare_weights( float_dict: Dict[str, Any], quantized_dict: Dict[str, Any] ) -> Dict[str, Dict[str, torch.Tensor]]: r"""Compare the weights of the float module with its corresponding quantized module. - + Returns a dict with key corresponding to module names and each entry being a dictionary with two keys 'float' and 'quantized', containing the float and quantized weights. This dict can be used to compare and compute the quantization @@ -608,7 +608,7 @@ def hawq_top(fp32_model, q_model, dataloader, criterion, enable_act): op_qnt_tensor = weight_quant_loss[key]['quantized'].dequantize() diff_l2 = (torch.norm(op_float_tensor - op_qnt_tensor, p=2) ** 2) pertur_lst[key] = diff_l2 - + if enable_act: act_to_traces = traces['activation'] for trace_i, pertur_i, act_i in zip(op_to_traces.keys(), pertur_lst.keys(), act_to_traces.keys()): diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index e35aa79d95f..7ae4b22fc62 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -61,7 +61,7 @@ def strategy_registry(cls): assert cls.__name__.endswith( 'TuneStrategy' ), "The name of subclass of TuneStrategy should end with \'TuneStrategy\' substring." - if cls.__name__[:-len('TuneStrategy')].lower() in STRATEGIES: + if cls.__name__[:-len('TuneStrategy')].lower() in STRATEGIES: # pragma: no cover raise ValueError('Cannot have two strategies with the same name') STRATEGIES[cls.__name__[:-len('TuneStrategy')].lower()] = cls return cls @@ -299,11 +299,11 @@ def _check_tuning_status(self): return else: # got eval dataloader but not eval metric - if self.eval_dataloader: + if self.eval_dataloader: # pragma: no cover assert self.eval_metric, "Detected evaluation dataloader but no evaluation metric, " \ "Please provide both to perform tuning process or neither for the default quantization." # got eval metric but not eval dataloader - if self.eval_metric: + if self.eval_metric: # pragma: no cover assert self.eval_dataloader, "Detected evaluation metric but no evaluation dataloader, "\ "Please provide both to perform tuning process or neither for the default quantization." # not tuning @@ -353,7 +353,7 @@ def traverse(self): tune_cfg = self._tune_cfg_converter(op_tuning_cfg) self.trials_count += 1 tuning_history = self._find_tuning_history(tune_cfg) - if tuning_history and self.trials_count < self.config.tuning_criterion.max_trials: + if tuning_history and self.trials_count < self.config.tuning_criterion.max_trials: # pragma: no cover self.last_tune_result = tuning_history['last_tune_result'] self.best_tune_result = tuning_history['best_tune_result'] logger.warn("Find evaluated tuning config, skip.") @@ -415,7 +415,7 @@ def traverse(self): logger.debug(f'*** Start to do diagnosis (inspect tensor).') self._diagnosis() if self.use_multi_objective and len(self.tune_result_record) > 1 and \ - self.best_tune_result is not None: + self.best_tune_result is not None: # pragma: no cover best_trail, best_result = self.objectives.best_result(self.tune_result_record, copy.deepcopy(self.baseline)) if best_result != self.best_tune_result: @@ -747,7 +747,7 @@ def apply_recipe_one_by_one(self, tune_cfg): new_tune_cfg = self._fallback_ops(copy.deepcopy(tune_cfg), \ self.capability['recipes_ops'][recipe_name], self.tuning_space) yield new_tune_cfg - if recipe_name == "smooth_quant": + if recipe_name == "smooth_quant": # pragma: no cover sq_args = {'smooth_quant': True} if 'recipe_cfgs' not in new_tune_cfg: new_tune_cfg['recipe_cfgs'] = sq_args @@ -803,13 +803,13 @@ def set_param_for_post_quantization_algos(self, algo_scheduler, tune_cfg, pre_op algo_scheduler.reset_exec_algorithms() recipe_cfgs = tune_cfg.get('recipe_cfgs', None) # for fast_bias_correction - if recipe_cfgs and recipe_cfgs.get('fast_bias_correction', False): + if recipe_cfgs and recipe_cfgs.get('fast_bias_correction', False): # pragma: no cover fbc_algo = ALGORITHMS()['fast_bias_correction'] fbc_algo.quantization_cfg = deepcopy(tune_cfg) algo_scheduler.append_algorithm('post_quantization', fbc_algo) logger.debug(f"Add fast bias correction as the post quantization algo.") # for weight correction - if recipe_cfgs and recipe_cfgs.get('weight_correction', False): + if recipe_cfgs and recipe_cfgs.get('weight_correction', False): # pragma: no cover w_algo = ALGORITHMS()['weight_correction'] w_algo.quantization_cfg = deepcopy(tune_cfg) algo_scheduler.append_algorithm('post_quantization', w_algo) @@ -903,13 +903,13 @@ def _compare_optype_statistics(self, fields=None, optypes=None, adaptor_statistics = self.adaptor.optype_statistics def _field_skipped(field): - if fields != None: + if fields != None: # pragma: no cover return field not in fields elif skip_fields != None: return field in skip_fields def _optype_skipped(optype): - if optypes != None: + if optypes != None: # pragma: no cover return optype not in optypes elif skip_optypes != None: return optype in skip_optypes @@ -1099,7 +1099,7 @@ def setup_resume(self, resume): """ self.__dict__.update(resume) for history in self.tuning_history: - if self._same_conf(history['cfg'], self.conf): + if self._same_conf(history['cfg'], self.conf): # pragma: no cover self.__dict__.update({k: v for k, v in history.items() \ if k not in ['version', 'history']}) logger.info("Start to resume tuning process.") From e73aa34dc26ae14d596481fe8c1d57711af04705 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 4 May 2023 08:48:56 +0800 Subject: [PATCH 101/103] disable some pylint check Signed-off-by: yiliu30 --- neural_compressor/strategy/strategy.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index 7ae4b22fc62..a039a050279 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -364,14 +364,14 @@ def traverse(self): self.tuning_times += 1 # set the parameter for pre quantization algos and run self.set_param_for_pre_quantization_algos(self.algo_scheduler, tune_cfg, self.model) - self.model = self.algo_scheduler('pre_quantization') + self.model = self.algo_scheduler('pre_quantization') # pylint: disable=E1102 # quantize q_model = self.adaptor.quantize(copy.deepcopy(tune_cfg), self.model, self.calib_dataloader, self.q_func) assert self.adaptor.pre_optimized_model # set the parameter for post quantization algos and run self.set_param_for_post_quantization_algos(self.algo_scheduler, tune_cfg,\ self.adaptor.pre_optimized_model, q_model) - self.last_qmodel = self.algo_scheduler('post_quantization') + self.last_qmodel = self.algo_scheduler('post_quantization') # pylint: disable=E1102 self.last_tune_cfg = copy.deepcopy(tune_cfg) # remove the reference to model self.algo_scheduler.reset_exec_algorithms() @@ -655,14 +655,14 @@ def slave_worker_handle(self, comm): # set the parameter for pre quantization algos and run self.set_param_for_pre_quantization_algos(self.algo_scheduler, tune_cfg, self.model) - self.model = self.algo_scheduler('pre_quantization') + self.model = self.algo_scheduler('pre_quantization') # pylint: disable=E1102 # quantize q_model = self.adaptor.quantize(copy.deepcopy(tune_cfg), self.model, self.calib_dataloader, self.q_func) assert self.adaptor.pre_optimized_model # set the parameter for post quantization algos and run self.set_param_for_post_quantization_algos(self.algo_scheduler, tune_cfg, self.adaptor.pre_optimized_model, q_model) - self.last_qmodel = self.algo_scheduler('post_quantization') + self.last_qmodel = self.algo_scheduler('post_quantization') # pylint: disable=E1102 self.last_tune_cfg = copy.deepcopy(tune_cfg) # Remove the reference to model self.algo_scheduler.reset_exec_algorithms() From 849f6371cc2805146d721a1559455a1a63288dd9 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Fri, 5 May 2023 15:01:48 +0800 Subject: [PATCH 102/103] enhance distributed log Signed-off-by: yiliu30 --- neural_compressor/strategy/strategy.py | 3 +++ test/strategy/test_distributed_tuning.py | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index a039a050279..a668eb0ab10 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -541,6 +541,9 @@ def master_worker_handle(self, comm): # record eval_results for context coordination of stage 3 self.last_tune_result = eval_res + self.objectives.val = eval_res + self.trials_count = self.overall_trials + 1 + self.stop(self.config.tuning_criterion.timeout, None) self.eval_results[tag] = eval_res self.overall_trials += 1 diff --git a/test/strategy/test_distributed_tuning.py b/test/strategy/test_distributed_tuning.py index 24340826f0f..0a00d2e616a 100644 --- a/test/strategy/test_distributed_tuning.py +++ b/test/strategy/test_distributed_tuning.py @@ -91,10 +91,10 @@ def test_pt_stage_1_met(self): # fake evaluation function num_baseline = num_processes # TODO, replace num_baseline with 1 when evaluating baseline only once. acc_lst = [2.0] * num_baseline + [1.0, 2.1, 2.2, 2.3, 2.0] #the tuning result (2.1) - perf_lst = [2.0] * num_baseline + [2.5, 2.0, 1.5, 1.1, 5.0] + perf_lst = [2.0] * num_baseline + [2.5, 2.0, 1.5, 1.1, 5.0] # make sure this path can be accessed by all nodes - acc_perf_data_file_path = 'test_pt_stage_1_met.json' + acc_perf_data_file_path = 'test_pt_stage_1_met.json' save_acc_perf_to_local(acc_lst, perf_lst, acc_perf_data_file_path) def _fake_eval(model): @@ -108,7 +108,7 @@ def _fake_eval(model): dataloader = DATALOADERS["pytorch"](dataset) # tuning and accuracy criterion - conf = PostTrainingQuantConfig(use_distributed_tuning=True) + conf = PostTrainingQuantConfig(quant_level=1, use_distributed_tuning=True) # fit q_model = fit(model=resnet18, conf=conf, @@ -133,7 +133,7 @@ def test_pt_stage_3_fp32_met(self): perf_lst = [2.0] * num_baseline + [1.0] * 16 + [1.0, 1.0, 1.0] # make sure this path can be accessed by all nodes - acc_perf_data_file_path = 'test_pt_stage_3_fp32_met.json' + acc_perf_data_file_path = 'test_pt_stage_3_fp32_met.json' save_acc_perf_to_local(acc_lst, perf_lst, acc_perf_data_file_path) def _fake_eval(model): @@ -147,7 +147,7 @@ def _fake_eval(model): dataloader = DATALOADERS["pytorch"](dataset) # tuning and accuracy criterion - conf = PostTrainingQuantConfig(use_distributed_tuning=True) + conf = PostTrainingQuantConfig(quant_level=1, use_distributed_tuning=True) # fit q_model = fit(model=resnet18, conf=conf, @@ -172,7 +172,7 @@ def test_pt_stage_4_fp32_met(self): perf_lst = [2.0] * num_baseline + [1.0] * 37 + [1.0, 1.0, 1.0] # make sure this path can be accessed by all nodes - acc_perf_data_file_path = 'test_pt_stage_stage_4_fp32_met.json' + acc_perf_data_file_path = 'test_pt_stage_stage_4_fp32_met.json' save_acc_perf_to_local(acc_lst, perf_lst, acc_perf_data_file_path) def _fake_eval(model): @@ -186,7 +186,7 @@ def _fake_eval(model): dataloader = DATALOADERS["pytorch"](dataset) # tuning and accuracy criterion - conf = PostTrainingQuantConfig(use_distributed_tuning=True) + conf = PostTrainingQuantConfig(quant_level=1, use_distributed_tuning=True) # fit q_model = fit(model=resnet18, conf=conf, @@ -210,7 +210,7 @@ def test_pt_stage_not_met(self): perf_lst = [2.0] * num_baseline + [1.0] * 57 # make sure this path can be accessed by all nodes - acc_perf_data_file_path = 'test_pt_stage_not_met.json' + acc_perf_data_file_path = 'test_pt_stage_not_met.json' save_acc_perf_to_local(acc_lst, perf_lst, acc_perf_data_file_path) def _fake_eval(model): @@ -224,7 +224,7 @@ def _fake_eval(model): dataloader = DATALOADERS["pytorch"](dataset) # tuning and accuracy criterion - conf = PostTrainingQuantConfig(use_distributed_tuning=True) + conf = PostTrainingQuantConfig(quant_level=1, use_distributed_tuning=True) # fit q_model = fit(model=resnet18, conf=conf, @@ -263,7 +263,7 @@ def _fake_eval(model): dataloader = DATALOADERS["pytorch"](dataset) # tuning and accuracy criterion - conf = PostTrainingQuantConfig(use_distributed_tuning=True) + conf = PostTrainingQuantConfig(quant_level=1, use_distributed_tuning=True) # fit q_model = fit(model=resnet18, conf=conf, From c6b8b6a22a74f236c65ad0f745048c857f624fef Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Sat, 6 May 2023 12:02:10 +0800 Subject: [PATCH 103/103] update the initialization method Signed-off-by: yiliu30 --- neural_compressor/strategy/auto.py | 14 +++----------- neural_compressor/strategy/strategy.py | 26 +++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/neural_compressor/strategy/auto.py b/neural_compressor/strategy/auto.py index bdd71dcff73..9a77c7645d4 100644 --- a/neural_compressor/strategy/auto.py +++ b/neural_compressor/strategy/auto.py @@ -79,18 +79,10 @@ def sequential_traverse(self): eval_dataloader=self.eval_dataloader, eval_metric=self.eval_metric, resume=self._resume, - q_hooks=self.q_hooks) + q_hooks=self.q_hooks, + pre_strategy = pre_strategy + ) - if pre_strategy: - #TODO add tuning history from the previous stage to current stage. - strategy.adaptor = pre_strategy.adaptor - strategy.framework = pre_strategy.framework - strategy.baseline = deepcopy(pre_strategy.baseline) - strategy.trials_count = pre_strategy.trials_count - strategy.objectives.baseline = deepcopy(pre_strategy.baseline) - strategy.capability = pre_strategy.capability - strategy.tuning_space = pre_strategy.tuning_space - strategy.algo_scheduler = pre_strategy.algo_scheduler pre_strategy = strategy strategy.traverse() self.best_qmodel = strategy.best_qmodel diff --git a/neural_compressor/strategy/strategy.py b/neural_compressor/strategy/strategy.py index a668eb0ab10..130dae40b73 100644 --- a/neural_compressor/strategy/strategy.py +++ b/neural_compressor/strategy/strategy.py @@ -66,8 +66,32 @@ def strategy_registry(cls): STRATEGIES[cls.__name__[:-len('TuneStrategy')].lower()] = cls return cls +class TuneStrategyMeta(type): + """Tuning strategy metaclass.""" + + def __call__(cls, *args, pre_strategy=None, **kwargs): + """Create new strategy instance based on the previous one if has. + + Args: + pre_strategy: The previous strategy instance. Defaults to None. + + Returns: + The newly created strategy instance. + """ + new_strategy = super().__call__(*args, **kwargs) + if pre_strategy: + new_strategy.adaptor = pre_strategy.adaptor + new_strategy.framework = pre_strategy.framework + new_strategy.baseline = deepcopy(pre_strategy.baseline) + new_strategy.trials_count = pre_strategy.trials_count + new_strategy.objectives.baseline = deepcopy(pre_strategy.baseline) + new_strategy.capability = pre_strategy.capability + new_strategy.tuning_space = pre_strategy.tuning_space + new_strategy.algo_scheduler = pre_strategy.algo_scheduler + return new_strategy + @strategy_registry -class TuneStrategy(object): +class TuneStrategy(metaclass=TuneStrategyMeta): """Basic class for tuning strategy.""" def __init__(self,