From c0c34c2cf78037021766e09f497866807eec1923 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Tue, 14 May 2024 14:17:28 +0800 Subject: [PATCH 01/13] Support Autotune FP16 Mix-precision on torch 3.0 new API Signed-off-by: zehao-intel --- neural_compressor/common/utils/constants.py | 1 + .../strategy/auto_mixed_precision.py | 2 +- neural_compressor/torch/amp/fp16/__init__.py | 13 +++ .../torch/amp/fp16/fp16_convert.py | 108 ++++++++++++++++++ .../torch/quantization/__init__.py | 3 + .../torch/quantization/algorithm_entry.py | 28 ++++- .../torch/quantization/config.py | 74 ++++++++++++ test/3x/torch/test_autotune.py | 9 ++ 8 files changed, 236 insertions(+), 2 deletions(-) create mode 100644 neural_compressor/torch/amp/fp16/__init__.py create mode 100644 neural_compressor/torch/amp/fp16/fp16_convert.py diff --git a/neural_compressor/common/utils/constants.py b/neural_compressor/common/utils/constants.py index 097bae60381..ef437f3231b 100644 --- a/neural_compressor/common/utils/constants.py +++ b/neural_compressor/common/utils/constants.py @@ -36,6 +36,7 @@ TEQ = "teq" # pragma: no cover AUTOROUND = "autoround" FP8_QUANT = "fp8_quant" +MIX_PRECISION = "mix_precision" # options import datetime diff --git a/neural_compressor/strategy/auto_mixed_precision.py b/neural_compressor/strategy/auto_mixed_precision.py index 491baf5fa2d..427b61e982c 100644 --- a/neural_compressor/strategy/auto_mixed_precision.py +++ b/neural_compressor/strategy/auto_mixed_precision.py @@ -187,7 +187,7 @@ def fallback_in_op_wise(self, tuning_space, fallback_items_name_lst, initial_op_ ) op_fallback_acc_impact = OrderedDict() for op_index, op_tuning_cfg in enumerate(fallback_sampler): - op_tuning_cfg["calib_sampling_size"] = -1 + op_tuning_cfg["calib_sampling_size"] = -1le_list yield op_tuning_cfg acc, _ = self.last_tune_result op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc diff --git a/neural_compressor/torch/amp/fp16/__init__.py b/neural_compressor/torch/amp/fp16/__init__.py new file mode 100644 index 00000000000..28f108cb636 --- /dev/null +++ b/neural_compressor/torch/amp/fp16/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/neural_compressor/torch/amp/fp16/fp16_convert.py b/neural_compressor/torch/amp/fp16/fp16_convert.py new file mode 100644 index 00000000000..4f7ca159f63 --- /dev/null +++ b/neural_compressor/torch/amp/fp16/fp16_convert.py @@ -0,0 +1,108 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""FP16 Convert for Torch Utils.""" + +import torch + +from torch.fx import symbolic_trace +from typing import Dict, Tuple + +from neural_compressor.common import logger +from neural_compressor.quantization import MixPrecisionConfig + +class FP16ModuleWrapper(torch.nn.Module): + """FP16Module Wrapper Class.""" + + def __init__(self, module, device="cpu"): + """Init a FP16ModuleWrapper object.""" + super(FP16ModuleWrapper, self).__init__() + self.add_module("module", module) + self.train(module.training) + self.device = device + # WA for TransformerEncoder to access its Linear's weights and bias + if isinstance(module, torch.nn.Linear): + self.weight = self.module.weight if hasattr(self.module, "weight") else None + self.bias = self.module.bias if hasattr(self.module, "bias") else None + + def forward(self, X): + """Convert dtype.""" + with torch.autocast(device_type=self.device, dtype=torch.float16): + X = self.module(X) + return X.float() + + +class FP16Converter(): + """FP16 Converter Class.""" + + def __init__(self, configs_mapping: Dict[Tuple[str], MixPrecisionConfig], *args, **kwargs): + """Initialize the FP16 Converter with config. + + Args: + config (MixPrecisionConfig): config class for mix-precision. + """ + self.configs_mapping = configs_mapping + # self.fp16_ops_list = config.tune_cfg["fp16_ops_list"] + + # def Convert(self, model: torch.nn.Module): + # """Convert to FP16 model. + + # Args: + # model (torch.nn.Module): the input model. + # tune_cfg (dict): dictionary of quantization configuration. + + # Returns: + # mixed_precision_model (object): model with mixed precision. + # """ + # if len(self.fp16_ops_list) > 0: + # logger.info("Convert operators to float16") + # mixed_precision_model = self._fp16_wrapper_model(model, self.fp16_ops_list) + # # if fx_sub_module_list is not None and len(fx_sub_module_list) > 0: + # # mixed_precision_model = FP16Converter.fp16_symbolic_trace(mixed_precision_model, fx_sub_module_list) + # return mixed_precision_model + + # def _fp16_wrapper_model(self, model: torch.nn.Module, prefix=""): + # for name, child in model.named_children(): + # op_name = prefix + "." + name if prefix != "" else name + # for fp16_op_name in self.fp16_ops_list: + # if op_name == fp16_op_name[0]: + # child = FP16ModuleWrapper(child, device) + # else: + # self._fp16_wrapper_model(child, op_name) + # setattr(model, name, child) + # return model + + # @staticmethod + # def fp16_symbolic_trace(model, fx_sub_module_list, prefix=""): + # """Symbolic trace for fp16 models. + + # Args: + # model (object): the input model. + # fx_sub_module_list (list): _description_ + # prefix (str): prefix of op name. + + # Returns: + # model (object) + # """ + # for name, child in model.named_children(): + # op_name = prefix + "." + name if prefix != "" else name + # for fx_sub_module_name in fx_sub_module_list: + # if op_name == fx_sub_module_name: + # child = symbolic_trace(child) + # else: + # FP16Converter.fp16_symbolic_trace(child, fx_sub_module_list, op_name) + # setattr(model, name, child) + # return model diff --git a/neural_compressor/torch/quantization/__init__.py b/neural_compressor/torch/quantization/__init__.py index 87a931610fa..a71ca565b9d 100644 --- a/neural_compressor/torch/quantization/__init__.py +++ b/neural_compressor/torch/quantization/__init__.py @@ -34,6 +34,9 @@ FP8Config, get_default_fp8_config, get_default_fp8_config_set, + MixPrecisionConfig, + get_default_mix_precision_config, + get_default_mix_precision_config_set, ) from neural_compressor.torch.quantization.autotune import ( diff --git a/neural_compressor/torch/quantization/algorithm_entry.py b/neural_compressor/torch/quantization/algorithm_entry.py index 41d2593c224..a9e7fa94e55 100644 --- a/neural_compressor/torch/quantization/algorithm_entry.py +++ b/neural_compressor/torch/quantization/algorithm_entry.py @@ -18,7 +18,18 @@ import torch -from neural_compressor.common.utils import AUTOROUND, AWQ, FP8_QUANT, GPTQ, HQQ, RTN, SMOOTH_QUANT, STATIC_QUANT, TEQ +from neural_compressor.common.utils import ( + AUTOROUND, + AWQ, + FP8_QUANT, + GPTQ, + HQQ, + RTN, + SMOOTH_QUANT, + STATIC_QUANT, + TEQ, + MIX_PRECISION +) from neural_compressor.torch.quantization import ( AutoRoundConfig, AWQConfig, @@ -29,6 +40,7 @@ SmoothQuantConfig, StaticQuantConfig, TEQConfig, + MixPrecisionConfig, ) from neural_compressor.torch.utils import Mode, logger, register_algo @@ -489,3 +501,17 @@ def fp8_quant_entry( model.qconfig = configs_mapping model.save = MethodType(save, model) return model + + +###################### Habana FP8 Algo Entry ################################## +@register_algo(MIX_PRECISION) +def mix_precision_entry( + model: torch.nn.Module, configs_mapping: Dict[Tuple[str], MixPrecisionConfig], *args, **kwargs +) -> torch.nn.Module: + # only support fp16 now, more types might be added later + from neural_compressor.torch.amp.fp16.fp16_convert import FP16Converter + fp16_converter = FP16Converter(configs_mapping, *args, **kwargs) + # model = quantize(model, configs_mapping, *args, **kwargs) + # model.qconfig = configs_mapping + # model.save = MethodType(save, model) + return model diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py index a052c923c81..4c4b3580ac9 100644 --- a/neural_compressor/torch/quantization/config.py +++ b/neural_compressor/torch/quantization/config.py @@ -34,6 +34,7 @@ FP8_QUANT, GPTQ, HQQ, + MIX_PRECISION, OP_NAME_OR_MODULE_TYPE, RTN, SMOOTH_QUANT, @@ -1117,6 +1118,79 @@ def get_default_fp8_config_set() -> FP8Config: return FP8Config.get_config_set_for_tuning() +######################## MixPrecision Config ############################### +@register_config(framework_name=FRAMEWORK_NAME, algo_name=MIX_PRECISION) +class MixPrecisionConfig(BaseConfig): + """Config class for mix-precision.""" + + name = MIX_PRECISION + supported_configs: List[OperatorConfig] = [] + params_list = [ + "fp16_ops" + "device", + ] + + def __init__( + self, + fp16_ops: List = [torch.nn.Linear], + device: Union[str, List[str]] = "cpu", + white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST, + ): + """Init MixPrecision config. + + Args: + """ + super().__init__(white_list=white_list) + self.fp16_ops = fp16_ops + self.device = device + self._post_init() + + @classmethod + def register_supported_configs(cls) -> List[OperatorConfig]: + supported_configs = [] + mix_precision_config = MixPrecisionConfig( + fp16_ops=[torch.nn.Linear], + device=["cpu", "cuda"], + ) + operators = [torch.nn.Linear] + supported_configs.append(OperatorConfig(config=mix_precision_config, operators=operators)) + cls.supported_configs = supported_configs + + @staticmethod + def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]: + white_list = (torch.nn.Linear,) + filter_result = [] + for op_name, module in model.named_modules(): + if isinstance(module, white_list): + pair = (op_name, type(module).__name__) + filter_result.append(pair) + logger.debug(f"Get model info: {filter_result}") + return filter_result + + @classmethod + def get_config_set_for_tuning(cls) -> Union[None, "MixPrecisionConfig", List["MixPrecisionConfig"]]: + # TODO fwk owner needs to update it. + return MixPrecisionConfig(fp16_ops=[torch.nn.Linear]) + + +def get_default_mix_precision_config() -> MixPrecisionConfig: + """Generate the defaul mix-precision config. + + Returns: + the default mix-precision config. + """ + return MixPrecisionConfig() + + +def get_default_mix_precision_config_set() -> MixPrecisionConfig: + """Generate the default mix-precision config set. + + Returns: + the default mix-precision config. + """ + return MixPrecisionConfig.get_config_set_for_tuning() + + ##################### Algo Configs End ################################### diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index 0c82a5af051..661671db4f5 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -308,6 +308,15 @@ def eval_acc_fn(model) -> float: best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) self.assertIsNone(best_model) + @reset_tuning_target + def test_autotune_mix_precision(self): + def eval_acc_fn(model) -> float: + return 1.0 + + custom_tune_config = TuningConfig(config_set=[MixPrecisionConfig(fp16_ops=[torch.nn.Linear])], max_trials=2) + best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) + self.assertIsNotNone(best_model) + if __name__ == "__main__": unittest.main() From 085ca64021a13da101ac91bfb8a8df0c91795fd3 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Wed, 15 May 2024 14:29:13 +0800 Subject: [PATCH 02/13] fix bugs and add ut Signed-off-by: zehao-intel --- .../strategy/auto_mixed_precision.py | 2 +- .../mix_precision}/__init__.py | 8 +- .../algorithms/mix_precision/fp16_convert.py | 70 ++++++++++++ .../mix_precision/module_wrappers.py | 39 +++++++ .../torch/amp/fp16/fp16_convert.py | 108 ------------------ .../torch/quantization/algorithm_entry.py | 10 +- .../torch/quantization/config.py | 22 ++-- test/3x/torch/test_autotune.py | 42 ++++++- 8 files changed, 174 insertions(+), 127 deletions(-) rename neural_compressor/torch/{amp/fp16 => algorithms/mix_precision}/__init__.py (65%) create mode 100644 neural_compressor/torch/algorithms/mix_precision/fp16_convert.py create mode 100644 neural_compressor/torch/algorithms/mix_precision/module_wrappers.py delete mode 100644 neural_compressor/torch/amp/fp16/fp16_convert.py diff --git a/neural_compressor/strategy/auto_mixed_precision.py b/neural_compressor/strategy/auto_mixed_precision.py index 427b61e982c..491baf5fa2d 100644 --- a/neural_compressor/strategy/auto_mixed_precision.py +++ b/neural_compressor/strategy/auto_mixed_precision.py @@ -187,7 +187,7 @@ def fallback_in_op_wise(self, tuning_space, fallback_items_name_lst, initial_op_ ) op_fallback_acc_impact = OrderedDict() for op_index, op_tuning_cfg in enumerate(fallback_sampler): - op_tuning_cfg["calib_sampling_size"] = -1le_list + op_tuning_cfg["calib_sampling_size"] = -1 yield op_tuning_cfg acc, _ = self.last_tune_result op_fallback_acc_impact[fallback_items_name_lst[op_index]] = acc diff --git a/neural_compressor/torch/amp/fp16/__init__.py b/neural_compressor/torch/algorithms/mix_precision/__init__.py similarity index 65% rename from neural_compressor/torch/amp/fp16/__init__.py rename to neural_compressor/torch/algorithms/mix_precision/__init__.py index 28f108cb636..681a0747684 100644 --- a/neural_compressor/torch/amp/fp16/__init__.py +++ b/neural_compressor/torch/algorithms/mix_precision/__init__.py @@ -1,13 +1,19 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# # Copyright (c) 2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from neural_compressor.torch.algorithms.mix_precision.fp16_convert import FP16Converter +from neural_compressor.torch.algorithms.mix_precision.module_wrappers import FP16ModuleWrapper \ No newline at end of file diff --git a/neural_compressor/torch/algorithms/mix_precision/fp16_convert.py b/neural_compressor/torch/algorithms/mix_precision/fp16_convert.py new file mode 100644 index 00000000000..9eccc175017 --- /dev/null +++ b/neural_compressor/torch/algorithms/mix_precision/fp16_convert.py @@ -0,0 +1,70 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""FP16 Convert for Torch Modules.""" + +import torch + +from typing import Dict, Tuple + +from neural_compressor.common import logger +from neural_compressor.torch.algorithms.mix_precision.module_wrappers import FP16ModuleWrapper +from neural_compressor.torch.quantization import MixPrecisionConfig +from neural_compressor.torch.utils import get_device + + +class FP16Converter(): + """FP16 Converter Class.""" + + def __init__(self, configs_mapping: Dict[Tuple[str], MixPrecisionConfig], *args, **kwargs): + """Initialize the FP16 Converter with config. + + Args: + config (MixPrecisionConfig): config class for mix-precision. + """ + self.configs_mapping = configs_mapping + self.device = "auto" + for _, config in configs_mapping.items(): + self.device = config.device + break + if self.device == "auto": + self.device = get_device() + + def convert(self, model: torch.nn.Module): + """Convert to FP16 model. + + Args: + model (torch.nn.Module): the input model. + tune_cfg (dict): dictionary of quantization configuration. + + Returns: + mixed_precision_model (object): model with mixed precision. + """ + if len(self.configs_mapping) > 0: + logger.info("Convert operators to float16") + mixed_precision_model = self._fp16_wrapper_model(model) + return mixed_precision_model + + def _fp16_wrapper_model(self, model: torch.nn.Module, prefix=""): + for name, child in model.named_children(): + op_name = prefix + "." + name if prefix != "" else name + for op_info, config in self.configs_mapping.items(): + if op_name == op_info[0] and config.dtype == "fp16": + child = FP16ModuleWrapper(child, self.device) + else: + self._fp16_wrapper_model(child, op_name) + setattr(model, name, child) + return model diff --git a/neural_compressor/torch/algorithms/mix_precision/module_wrappers.py b/neural_compressor/torch/algorithms/mix_precision/module_wrappers.py new file mode 100644 index 00000000000..1200bc0a7d8 --- /dev/null +++ b/neural_compressor/torch/algorithms/mix_precision/module_wrappers.py @@ -0,0 +1,39 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""FP16 Wrapper for Torch Modules.""" + +import torch + +class FP16ModuleWrapper(torch.nn.Module): + """FP16Module Wrapper Class.""" + + def __init__(self, module, device): + """Init a FP16ModuleWrapper object.""" + super(FP16ModuleWrapper, self).__init__() + self.add_module("module", module) + self.train(module.training) + self.device = device + # WA for TransformerEncoder to access its Linear's weights and bias + if isinstance(module, torch.nn.Linear): + self.weight = self.module.weight if hasattr(self.module, "weight") else None + self.bias = self.module.bias if hasattr(self.module, "bias") else None + + def forward(self, X): + """Convert dtype.""" + with torch.autocast(device_type=self.device, dtype=torch.float16): + X = self.module(X) + return X.float() diff --git a/neural_compressor/torch/amp/fp16/fp16_convert.py b/neural_compressor/torch/amp/fp16/fp16_convert.py deleted file mode 100644 index 4f7ca159f63..00000000000 --- a/neural_compressor/torch/amp/fp16/fp16_convert.py +++ /dev/null @@ -1,108 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2021 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""FP16 Convert for Torch Utils.""" - -import torch - -from torch.fx import symbolic_trace -from typing import Dict, Tuple - -from neural_compressor.common import logger -from neural_compressor.quantization import MixPrecisionConfig - -class FP16ModuleWrapper(torch.nn.Module): - """FP16Module Wrapper Class.""" - - def __init__(self, module, device="cpu"): - """Init a FP16ModuleWrapper object.""" - super(FP16ModuleWrapper, self).__init__() - self.add_module("module", module) - self.train(module.training) - self.device = device - # WA for TransformerEncoder to access its Linear's weights and bias - if isinstance(module, torch.nn.Linear): - self.weight = self.module.weight if hasattr(self.module, "weight") else None - self.bias = self.module.bias if hasattr(self.module, "bias") else None - - def forward(self, X): - """Convert dtype.""" - with torch.autocast(device_type=self.device, dtype=torch.float16): - X = self.module(X) - return X.float() - - -class FP16Converter(): - """FP16 Converter Class.""" - - def __init__(self, configs_mapping: Dict[Tuple[str], MixPrecisionConfig], *args, **kwargs): - """Initialize the FP16 Converter with config. - - Args: - config (MixPrecisionConfig): config class for mix-precision. - """ - self.configs_mapping = configs_mapping - # self.fp16_ops_list = config.tune_cfg["fp16_ops_list"] - - # def Convert(self, model: torch.nn.Module): - # """Convert to FP16 model. - - # Args: - # model (torch.nn.Module): the input model. - # tune_cfg (dict): dictionary of quantization configuration. - - # Returns: - # mixed_precision_model (object): model with mixed precision. - # """ - # if len(self.fp16_ops_list) > 0: - # logger.info("Convert operators to float16") - # mixed_precision_model = self._fp16_wrapper_model(model, self.fp16_ops_list) - # # if fx_sub_module_list is not None and len(fx_sub_module_list) > 0: - # # mixed_precision_model = FP16Converter.fp16_symbolic_trace(mixed_precision_model, fx_sub_module_list) - # return mixed_precision_model - - # def _fp16_wrapper_model(self, model: torch.nn.Module, prefix=""): - # for name, child in model.named_children(): - # op_name = prefix + "." + name if prefix != "" else name - # for fp16_op_name in self.fp16_ops_list: - # if op_name == fp16_op_name[0]: - # child = FP16ModuleWrapper(child, device) - # else: - # self._fp16_wrapper_model(child, op_name) - # setattr(model, name, child) - # return model - - # @staticmethod - # def fp16_symbolic_trace(model, fx_sub_module_list, prefix=""): - # """Symbolic trace for fp16 models. - - # Args: - # model (object): the input model. - # fx_sub_module_list (list): _description_ - # prefix (str): prefix of op name. - - # Returns: - # model (object) - # """ - # for name, child in model.named_children(): - # op_name = prefix + "." + name if prefix != "" else name - # for fx_sub_module_name in fx_sub_module_list: - # if op_name == fx_sub_module_name: - # child = symbolic_trace(child) - # else: - # FP16Converter.fp16_symbolic_trace(child, fx_sub_module_list, op_name) - # setattr(model, name, child) - # return model diff --git a/neural_compressor/torch/quantization/algorithm_entry.py b/neural_compressor/torch/quantization/algorithm_entry.py index a9e7fa94e55..7f4179dd89a 100644 --- a/neural_compressor/torch/quantization/algorithm_entry.py +++ b/neural_compressor/torch/quantization/algorithm_entry.py @@ -503,15 +503,13 @@ def fp8_quant_entry( return model -###################### Habana FP8 Algo Entry ################################## +###################### FP16 Algo Entry ################################## @register_algo(MIX_PRECISION) def mix_precision_entry( model: torch.nn.Module, configs_mapping: Dict[Tuple[str], MixPrecisionConfig], *args, **kwargs ) -> torch.nn.Module: # only support fp16 now, more types might be added later - from neural_compressor.torch.amp.fp16.fp16_convert import FP16Converter + from neural_compressor.torch.algorithms.mix_precision import FP16Converter fp16_converter = FP16Converter(configs_mapping, *args, **kwargs) - # model = quantize(model, configs_mapping, *args, **kwargs) - # model.qconfig = configs_mapping - # model.save = MethodType(save, model) - return model + + return fp16_converter.convert(model) diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py index 4c4b3580ac9..8306f224bff 100644 --- a/neural_compressor/torch/quantization/config.py +++ b/neural_compressor/torch/quantization/config.py @@ -1126,14 +1126,20 @@ class MixPrecisionConfig(BaseConfig): name = MIX_PRECISION supported_configs: List[OperatorConfig] = [] params_list = [ - "fp16_ops" + "dtype", "device", ] + supported_fp16_ops = ( + torch.nn.Linear, + torch.nn.Conv1d, + torch.nn.Conv2d, + torch.nn.Conv3d, + ) def __init__( self, - fp16_ops: List = [torch.nn.Linear], - device: Union[str, List[str]] = "cpu", + dtype: Union[str, List[str]] = "fp16", + device: Union[str, List[str]] = "auto", white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST, ): """Init MixPrecision config. @@ -1141,7 +1147,7 @@ def __init__( Args: """ super().__init__(white_list=white_list) - self.fp16_ops = fp16_ops + self.dtype = dtype self.device = device self._post_init() @@ -1149,16 +1155,16 @@ def __init__( def register_supported_configs(cls) -> List[OperatorConfig]: supported_configs = [] mix_precision_config = MixPrecisionConfig( - fp16_ops=[torch.nn.Linear], + dtype=["fp16", "fp32"], device=["cpu", "cuda"], ) - operators = [torch.nn.Linear] + operators = cls.supported_fp16_ops supported_configs.append(OperatorConfig(config=mix_precision_config, operators=operators)) cls.supported_configs = supported_configs @staticmethod def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]: - white_list = (torch.nn.Linear,) + white_list = tuple(MixPrecisionConfig.supported_fp16_ops) filter_result = [] for op_name, module in model.named_modules(): if isinstance(module, white_list): @@ -1170,7 +1176,7 @@ def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]: @classmethod def get_config_set_for_tuning(cls) -> Union[None, "MixPrecisionConfig", List["MixPrecisionConfig"]]: # TODO fwk owner needs to update it. - return MixPrecisionConfig(fp16_ops=[torch.nn.Linear]) + return MixPrecisionConfig(dtype=["fp16", "fp32"]) def get_default_mix_precision_config() -> MixPrecisionConfig: diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index 661671db4f5..9e1a55bd192 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -7,7 +7,7 @@ import transformers from neural_compressor.common import logger -from neural_compressor.torch.quantization import RTNConfig, TuningConfig, autotune, get_all_config_set +from neural_compressor.torch.quantization import RTNConfig, TuningConfig, MixPrecisionConfig, autotune, get_all_config_set from neural_compressor.torch.utils import constants FAKE_DOUBLE_QUANT_CONFIGS = { @@ -309,13 +309,49 @@ def eval_acc_fn(model) -> float: self.assertIsNone(best_model) @reset_tuning_target - def test_autotune_mix_precision(self): + def test_autotune_mix_precision_default(self): + from neural_compressor.torch.algorithms.mix_precision import FP16ModuleWrapper + + def eval_acc_fn(model) -> float: + return 1.0 + + custom_tune_config = TuningConfig(config_set=[MixPrecisionConfig()], max_trials=2) + best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) + + self.assertIsNotNone(best_model) + self.assertTrue(isinstance(best_model.fc1, FP16ModuleWrapper)) + self.assertTrue(isinstance(best_model.fc2, FP16ModuleWrapper)) + self.assertTrue(isinstance(best_model.fc3, FP16ModuleWrapper)) + + @reset_tuning_target + def test_autotune_mix_precision_set_op_name(self): + from neural_compressor.common.base_config import ComposableConfig, config_registry + from neural_compressor.torch.algorithms.mix_precision import FP16ModuleWrapper + def eval_acc_fn(model) -> float: return 1.0 - custom_tune_config = TuningConfig(config_set=[MixPrecisionConfig(fp16_ops=[torch.nn.Linear])], max_trials=2) + config = { + "mix_precision": { + "global": { + "dtype": "fp16", + }, + "local": { + "fc1": { + "dtype": "fp32", + } + }, + } + } + registered_configs = config_registry.get_cls_configs() + config = ComposableConfig.from_dict(config, config_registry=registered_configs["torch"]) + custom_tune_config = TuningConfig(config_set=[config], max_trials=2) best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) + self.assertIsNotNone(best_model) + self.assertTrue(isinstance(best_model.fc1, torch.nn.Linear)) + self.assertTrue(isinstance(best_model.fc2, FP16ModuleWrapper)) + self.assertTrue(isinstance(best_model.fc3, FP16ModuleWrapper)) if __name__ == "__main__": From e4b705ec3fe79a392ee4b738c7cf54b5ba12fef3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 15 May 2024 06:38:07 +0000 Subject: [PATCH 03/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../algorithms/mix_precision/__init__.py | 2 +- .../algorithms/mix_precision/fp16_convert.py | 8 ++++---- .../mix_precision/module_wrappers.py | 1 + .../torch/quantization/algorithm_entry.py | 19 +++++-------------- .../torch/quantization/config.py | 10 +++++----- test/3x/torch/test_autotune.py | 8 +++++++- 6 files changed, 23 insertions(+), 25 deletions(-) diff --git a/neural_compressor/torch/algorithms/mix_precision/__init__.py b/neural_compressor/torch/algorithms/mix_precision/__init__.py index 681a0747684..42a4eae2f53 100644 --- a/neural_compressor/torch/algorithms/mix_precision/__init__.py +++ b/neural_compressor/torch/algorithms/mix_precision/__init__.py @@ -16,4 +16,4 @@ # limitations under the License. from neural_compressor.torch.algorithms.mix_precision.fp16_convert import FP16Converter -from neural_compressor.torch.algorithms.mix_precision.module_wrappers import FP16ModuleWrapper \ No newline at end of file +from neural_compressor.torch.algorithms.mix_precision.module_wrappers import FP16ModuleWrapper diff --git a/neural_compressor/torch/algorithms/mix_precision/fp16_convert.py b/neural_compressor/torch/algorithms/mix_precision/fp16_convert.py index 9eccc175017..8ce33a2381a 100644 --- a/neural_compressor/torch/algorithms/mix_precision/fp16_convert.py +++ b/neural_compressor/torch/algorithms/mix_precision/fp16_convert.py @@ -16,22 +16,22 @@ # limitations under the License. """FP16 Convert for Torch Modules.""" -import torch - from typing import Dict, Tuple +import torch + from neural_compressor.common import logger from neural_compressor.torch.algorithms.mix_precision.module_wrappers import FP16ModuleWrapper from neural_compressor.torch.quantization import MixPrecisionConfig from neural_compressor.torch.utils import get_device -class FP16Converter(): +class FP16Converter: """FP16 Converter Class.""" def __init__(self, configs_mapping: Dict[Tuple[str], MixPrecisionConfig], *args, **kwargs): """Initialize the FP16 Converter with config. - + Args: config (MixPrecisionConfig): config class for mix-precision. """ diff --git a/neural_compressor/torch/algorithms/mix_precision/module_wrappers.py b/neural_compressor/torch/algorithms/mix_precision/module_wrappers.py index 1200bc0a7d8..ca5e1c3beb1 100644 --- a/neural_compressor/torch/algorithms/mix_precision/module_wrappers.py +++ b/neural_compressor/torch/algorithms/mix_precision/module_wrappers.py @@ -18,6 +18,7 @@ import torch + class FP16ModuleWrapper(torch.nn.Module): """FP16Module Wrapper Class.""" diff --git a/neural_compressor/torch/quantization/algorithm_entry.py b/neural_compressor/torch/quantization/algorithm_entry.py index de3bedb5ac1..8dc2d0b28f7 100644 --- a/neural_compressor/torch/quantization/algorithm_entry.py +++ b/neural_compressor/torch/quantization/algorithm_entry.py @@ -19,27 +19,18 @@ import torch from neural_compressor.common.utils import ( - ( AUTOROUND, - AWQ, - FP8_QUANT, - GPTQ, - HQQ, - + MIX_PRECISION, RTN, - SMOOTH_QUANT, - STATIC_QUANT, - TEQ, Mode, -), - MIX_PRECISION +) ) from neural_compressor.torch.quantization import ( AutoRoundConfig, @@ -47,11 +38,11 @@ FP8Config, GPTQConfig, HQQConfig, + MixPrecisionConfig, RTNConfig, SmoothQuantConfig, StaticQuantConfig, TEQConfig, - MixPrecisionConfig, ) from neural_compressor.torch.utils import get_quantizer, is_ipex_imported, logger, postprocess_model, register_algo from neural_compressor.torch.utils.constants import PT2E_STATIC_QUANT @@ -518,7 +509,7 @@ def mix_precision_entry( model: torch.nn.Module, configs_mapping: Dict[Tuple[str], MixPrecisionConfig], *args, **kwargs ) -> torch.nn.Module: # only support fp16 now, more types might be added later - from neural_compressor.torch.algorithms.mix_precision import FP16Converter + from neural_compressor.torch.algorithms.mix_precision import FP16Converter fp16_converter = FP16Converter(configs_mapping, *args, **kwargs) - + return fp16_converter.convert(model) diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py index a02cc8bcd48..6ce726942bd 100644 --- a/neural_compressor/torch/quantization/config.py +++ b/neural_compressor/torch/quantization/config.py @@ -1147,10 +1147,10 @@ class MixPrecisionConfig(BaseConfig): "device", ] supported_fp16_ops = ( - torch.nn.Linear, - torch.nn.Conv1d, - torch.nn.Conv2d, - torch.nn.Conv3d, + torch.nn.Linear, + torch.nn.Conv1d, + torch.nn.Conv2d, + torch.nn.Conv3d, ) def __init__( @@ -1197,7 +1197,7 @@ def get_config_set_for_tuning(cls) -> Union[None, "MixPrecisionConfig", List["Mi def get_default_mix_precision_config() -> MixPrecisionConfig: - """Generate the defaul mix-precision config. + """Generate the default mix-precision config. Returns: the default mix-precision config. diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index df930fc1320..a4f81758c88 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -7,7 +7,13 @@ import transformers from neural_compressor.common import logger -from neural_compressor.torch.quantization import RTNConfig, TuningConfig, MixPrecisionConfig, autotune, get_all_config_set +from neural_compressor.torch.quantization import ( + MixPrecisionConfig, + RTNConfig, + TuningConfig, + autotune, + get_all_config_set, +) from neural_compressor.torch.utils import constants FAKE_DOUBLE_QUANT_CONFIGS = { From aa5ec4bdfe039f31711a5f411334b1e90d6185b8 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Wed, 15 May 2024 14:39:56 +0800 Subject: [PATCH 04/13] fix issue caused by merging Signed-off-by: zehao-intel --- .../torch/quantization/algorithm_entry.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/neural_compressor/torch/quantization/algorithm_entry.py b/neural_compressor/torch/quantization/algorithm_entry.py index de3bedb5ac1..da4d58c6b65 100644 --- a/neural_compressor/torch/quantization/algorithm_entry.py +++ b/neural_compressor/torch/quantization/algorithm_entry.py @@ -19,27 +19,17 @@ import torch from neural_compressor.common.utils import ( - ( AUTOROUND, - AWQ, - FP8_QUANT, - GPTQ, - HQQ, - RTN, - SMOOTH_QUANT, - STATIC_QUANT, - TEQ, Mode, -), - MIX_PRECISION + MIX_PRECISION, ) from neural_compressor.torch.quantization import ( AutoRoundConfig, From 264a4840e236d6c979c030a51a57edec4f316ef1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 15 May 2024 06:44:21 +0000 Subject: [PATCH 05/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- neural_compressor/torch/quantization/algorithm_entry.py | 1 + 1 file changed, 1 insertion(+) diff --git a/neural_compressor/torch/quantization/algorithm_entry.py b/neural_compressor/torch/quantization/algorithm_entry.py index 5f39179ffa1..8a26bac46dd 100644 --- a/neural_compressor/torch/quantization/algorithm_entry.py +++ b/neural_compressor/torch/quantization/algorithm_entry.py @@ -509,6 +509,7 @@ def mix_precision_entry( ) -> torch.nn.Module: # only support fp16 now, more types might be added later from neural_compressor.torch.algorithms.mix_precision import FP16Converter + fp16_converter = FP16Converter(configs_mapping, *args, **kwargs) return fp16_converter.convert(model) From aa54c5f268b51e6a2a3316102ca34b897a352b5d Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Wed, 15 May 2024 16:00:33 +0800 Subject: [PATCH 06/13] refine details Signed-off-by: zehao-intel --- neural_compressor/torch/quantization/algorithm_entry.py | 2 +- neural_compressor/torch/quantization/config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_compressor/torch/quantization/algorithm_entry.py b/neural_compressor/torch/quantization/algorithm_entry.py index 8a26bac46dd..e25b2730c44 100644 --- a/neural_compressor/torch/quantization/algorithm_entry.py +++ b/neural_compressor/torch/quantization/algorithm_entry.py @@ -502,7 +502,7 @@ def fp8_quant_entry( return model -###################### FP16 Algo Entry ################################## +###################### Mixed Precision Algo Entry ################################## @register_algo(MIX_PRECISION) def mix_precision_entry( model: torch.nn.Module, configs_mapping: Dict[Tuple[str], MixPrecisionConfig], *args, **kwargs diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py index 6ce726942bd..3bc1dcf18bb 100644 --- a/neural_compressor/torch/quantization/config.py +++ b/neural_compressor/torch/quantization/config.py @@ -1173,7 +1173,7 @@ def register_supported_configs(cls) -> List[OperatorConfig]: supported_configs = [] mix_precision_config = MixPrecisionConfig( dtype=["fp16", "fp32"], - device=["cpu", "cuda"], + device=["auto", "cpu", "cuda"], ) operators = cls.supported_fp16_ops supported_configs.append(OperatorConfig(config=mix_precision_config, operators=operators)) From 45624315622ffa3cc30aba6b92af63cc9b92755a Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Thu, 16 May 2024 13:21:56 +0800 Subject: [PATCH 07/13] solve comments Signed-off-by: zehao-intel --- .../algorithms/mix_precision/__init__.py | 4 +- .../algorithms/mix_precision/fp16_convert.py | 70 --------------- .../mix_precision/half_precision_convert.py | 89 +++++++++++++++++++ .../mix_precision/module_wrappers.py | 20 ++--- .../torch/quantization/algorithm_entry.py | 9 +- .../torch/quantization/config.py | 14 ++- test/3x/torch/test_autotune.py | 14 +-- 7 files changed, 117 insertions(+), 103 deletions(-) delete mode 100644 neural_compressor/torch/algorithms/mix_precision/fp16_convert.py create mode 100644 neural_compressor/torch/algorithms/mix_precision/half_precision_convert.py diff --git a/neural_compressor/torch/algorithms/mix_precision/__init__.py b/neural_compressor/torch/algorithms/mix_precision/__init__.py index 42a4eae2f53..084e1c44e0f 100644 --- a/neural_compressor/torch/algorithms/mix_precision/__init__.py +++ b/neural_compressor/torch/algorithms/mix_precision/__init__.py @@ -15,5 +15,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -from neural_compressor.torch.algorithms.mix_precision.fp16_convert import FP16Converter -from neural_compressor.torch.algorithms.mix_precision.module_wrappers import FP16ModuleWrapper +from neural_compressor.torch.algorithms.mix_precision.half_precision_convert import HalfPrecisionConverter +from neural_compressor.torch.algorithms.mix_precision.module_wrappers import HalfPrecisionModuleWrapper diff --git a/neural_compressor/torch/algorithms/mix_precision/fp16_convert.py b/neural_compressor/torch/algorithms/mix_precision/fp16_convert.py deleted file mode 100644 index 8ce33a2381a..00000000000 --- a/neural_compressor/torch/algorithms/mix_precision/fp16_convert.py +++ /dev/null @@ -1,70 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""FP16 Convert for Torch Modules.""" - -from typing import Dict, Tuple - -import torch - -from neural_compressor.common import logger -from neural_compressor.torch.algorithms.mix_precision.module_wrappers import FP16ModuleWrapper -from neural_compressor.torch.quantization import MixPrecisionConfig -from neural_compressor.torch.utils import get_device - - -class FP16Converter: - """FP16 Converter Class.""" - - def __init__(self, configs_mapping: Dict[Tuple[str], MixPrecisionConfig], *args, **kwargs): - """Initialize the FP16 Converter with config. - - Args: - config (MixPrecisionConfig): config class for mix-precision. - """ - self.configs_mapping = configs_mapping - self.device = "auto" - for _, config in configs_mapping.items(): - self.device = config.device - break - if self.device == "auto": - self.device = get_device() - - def convert(self, model: torch.nn.Module): - """Convert to FP16 model. - - Args: - model (torch.nn.Module): the input model. - tune_cfg (dict): dictionary of quantization configuration. - - Returns: - mixed_precision_model (object): model with mixed precision. - """ - if len(self.configs_mapping) > 0: - logger.info("Convert operators to float16") - mixed_precision_model = self._fp16_wrapper_model(model) - return mixed_precision_model - - def _fp16_wrapper_model(self, model: torch.nn.Module, prefix=""): - for name, child in model.named_children(): - op_name = prefix + "." + name if prefix != "" else name - for op_info, config in self.configs_mapping.items(): - if op_name == op_info[0] and config.dtype == "fp16": - child = FP16ModuleWrapper(child, self.device) - else: - self._fp16_wrapper_model(child, op_name) - setattr(model, name, child) - return model diff --git a/neural_compressor/torch/algorithms/mix_precision/half_precision_convert.py b/neural_compressor/torch/algorithms/mix_precision/half_precision_convert.py new file mode 100644 index 00000000000..662ede003a8 --- /dev/null +++ b/neural_compressor/torch/algorithms/mix_precision/half_precision_convert.py @@ -0,0 +1,89 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""FP16 Convert for Torch Modules.""" + +from typing import Dict, Tuple + +import torch + +from neural_compressor.common import logger +from neural_compressor.torch.algorithms.mix_precision.module_wrappers import HalfPrecisionModuleWrapper +from neural_compressor.torch.utils import get_device + + +class HalfPrecisionConverter: + """Converter Class for FP16 and BF16.""" + + dtype_mapping = { + "fp16": torch.float16, + "bf16": torch.bfloat16, + } + + def __init__(self, configs_mapping: Dict[Tuple[str], object], *args, **kwargs): + """Initialize the Half-precision Converter with config. + + Args: + configs_mapping (Dict): config class for mix-precision. + """ + self.configs_mapping = configs_mapping + self.device = get_device() + + def convert(self, model: torch.nn.Module): + """Convert to FP16 or BF16 model. + + Args: + model (torch.nn.Module): the input model. + + Returns: + mix_precision_model (torch.nn.Module): model with mix-precision. + """ + if len(self.configs_mapping) > 0: + logger.info("Convert operators to half-precision") + + if next(model.parameters()).is_cuda: + self.device = "cuda" + elif next(model.parameters()).is_cpu: + self.device = "cpu" + + mix_precision_model = self._wrap_half_precision_model(model) + mix_precision_model.to(self.device) + + return mix_precision_model + + def _wrap_half_precision_model(self, model: torch.nn.Module, prefix=""): + """wrap and replace half-precision target modules. + + Args: + model (torch.nn.Module): the input module. + prefix (str): the name prefix for named children. + + Returns: + model (torch.nn.Module): the model whose target modules have been wrapped. + """ + for name, child in model.named_children(): + op_name = prefix + "." + name if prefix != "" else name + for op_info, config in self.configs_mapping.items(): + if op_name == op_info[0] and config.dtype in ("fp16", "bf16"): + child = HalfPrecisionModuleWrapper(module=child, + device=self.device, + dtype=self.dtype_mapping[config.dtype] + ) + else: + self._wrap_half_precision_model(child, op_name) + setattr(model, name, child) + + return model diff --git a/neural_compressor/torch/algorithms/mix_precision/module_wrappers.py b/neural_compressor/torch/algorithms/mix_precision/module_wrappers.py index ca5e1c3beb1..e159228d9d7 100644 --- a/neural_compressor/torch/algorithms/mix_precision/module_wrappers.py +++ b/neural_compressor/torch/algorithms/mix_precision/module_wrappers.py @@ -19,22 +19,20 @@ import torch -class FP16ModuleWrapper(torch.nn.Module): - """FP16Module Wrapper Class.""" +class HalfPrecisionModuleWrapper(torch.nn.Module): + """FP16 or BF16 Module Wrapper Class.""" - def __init__(self, module, device): - """Init a FP16ModuleWrapper object.""" - super(FP16ModuleWrapper, self).__init__() + def __init__(self, module, device="cpu", dtype=torch.float16): + """Init a HalfPrecisionModuleWrapper object.""" + super(HalfPrecisionModuleWrapper, self).__init__() self.add_module("module", module) - self.train(module.training) self.device = device - # WA for TransformerEncoder to access its Linear's weights and bias - if isinstance(module, torch.nn.Linear): - self.weight = self.module.weight if hasattr(self.module, "weight") else None - self.bias = self.module.bias if hasattr(self.module, "bias") else None + self.dtype = dtype + self.weight = self.module.weight if hasattr(self.module, "weight") else None + self.bias = self.module.bias if hasattr(self.module, "bias") else None def forward(self, X): """Convert dtype.""" - with torch.autocast(device_type=self.device, dtype=torch.float16): + with torch.autocast(device_type=self.device, dtype=self.dtype): X = self.module(X) return X.float() diff --git a/neural_compressor/torch/quantization/algorithm_entry.py b/neural_compressor/torch/quantization/algorithm_entry.py index e25b2730c44..9b64fbc1855 100644 --- a/neural_compressor/torch/quantization/algorithm_entry.py +++ b/neural_compressor/torch/quantization/algorithm_entry.py @@ -507,9 +507,10 @@ def fp8_quant_entry( def mix_precision_entry( model: torch.nn.Module, configs_mapping: Dict[Tuple[str], MixPrecisionConfig], *args, **kwargs ) -> torch.nn.Module: - # only support fp16 now, more types might be added later - from neural_compressor.torch.algorithms.mix_precision import FP16Converter + # only support fp16 and bf16 now, more types might be added later + from neural_compressor.torch.algorithms.mix_precision import HalfPrecisionConverter - fp16_converter = FP16Converter(configs_mapping, *args, **kwargs) + half_precision_converter = HalfPrecisionConverter(configs_mapping, *args, **kwargs) + mix_precision_model = half_precision_converter.convert(model) - return fp16_converter.convert(model) + return mix_precision_model \ No newline at end of file diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py index 3bc1dcf18bb..8813fa5b508 100644 --- a/neural_compressor/torch/quantization/config.py +++ b/neural_compressor/torch/quantization/config.py @@ -1144,9 +1144,8 @@ class MixPrecisionConfig(BaseConfig): supported_configs: List[OperatorConfig] = [] params_list = [ "dtype", - "device", ] - supported_fp16_ops = ( + supported_half_precision_ops = ( torch.nn.Linear, torch.nn.Conv1d, torch.nn.Conv2d, @@ -1156,7 +1155,6 @@ class MixPrecisionConfig(BaseConfig): def __init__( self, dtype: Union[str, List[str]] = "fp16", - device: Union[str, List[str]] = "auto", white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST, ): """Init MixPrecision config. @@ -1165,23 +1163,21 @@ def __init__( """ super().__init__(white_list=white_list) self.dtype = dtype - self.device = device self._post_init() @classmethod def register_supported_configs(cls) -> List[OperatorConfig]: supported_configs = [] mix_precision_config = MixPrecisionConfig( - dtype=["fp16", "fp32"], - device=["auto", "cpu", "cuda"], + dtype=["fp16", "bf16", "fp32"], ) - operators = cls.supported_fp16_ops + operators = cls.supported_half_precision_ops supported_configs.append(OperatorConfig(config=mix_precision_config, operators=operators)) cls.supported_configs = supported_configs @staticmethod def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]: - white_list = tuple(MixPrecisionConfig.supported_fp16_ops) + white_list = tuple(MixPrecisionConfig.supported_half_precision_ops) filter_result = [] for op_name, module in model.named_modules(): if isinstance(module, white_list): @@ -1193,7 +1189,7 @@ def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]: @classmethod def get_config_set_for_tuning(cls) -> Union[None, "MixPrecisionConfig", List["MixPrecisionConfig"]]: # TODO fwk owner needs to update it. - return MixPrecisionConfig(dtype=["fp16", "fp32"]) + return MixPrecisionConfig(dtype=["fp16", "bf16", "fp32"]) def get_default_mix_precision_config() -> MixPrecisionConfig: diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index a4f81758c88..96044f42bd2 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -340,7 +340,7 @@ def eval_acc_fn(model): @reset_tuning_target def test_autotune_mix_precision_default(self): - from neural_compressor.torch.algorithms.mix_precision import FP16ModuleWrapper + from neural_compressor.torch.algorithms.mix_precision import HalfPrecisionModuleWrapper def eval_acc_fn(model) -> float: return 1.0 @@ -349,14 +349,14 @@ def eval_acc_fn(model) -> float: best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) self.assertIsNotNone(best_model) - self.assertTrue(isinstance(best_model.fc1, FP16ModuleWrapper)) - self.assertTrue(isinstance(best_model.fc2, FP16ModuleWrapper)) - self.assertTrue(isinstance(best_model.fc3, FP16ModuleWrapper)) + self.assertTrue(isinstance(best_model.fc1, HalfPrecisionModuleWrapper)) + self.assertTrue(isinstance(best_model.fc2, HalfPrecisionModuleWrapper)) + self.assertTrue(isinstance(best_model.fc3, HalfPrecisionModuleWrapper)) @reset_tuning_target def test_autotune_mix_precision_set_op_name(self): from neural_compressor.common.base_config import ComposableConfig, config_registry - from neural_compressor.torch.algorithms.mix_precision import FP16ModuleWrapper + from neural_compressor.torch.algorithms.mix_precision import HalfPrecisionModuleWrapper def eval_acc_fn(model) -> float: return 1.0 @@ -380,8 +380,8 @@ def eval_acc_fn(model) -> float: self.assertIsNotNone(best_model) self.assertTrue(isinstance(best_model.fc1, torch.nn.Linear)) - self.assertTrue(isinstance(best_model.fc2, FP16ModuleWrapper)) - self.assertTrue(isinstance(best_model.fc3, FP16ModuleWrapper)) + self.assertTrue(isinstance(best_model.fc2, HalfPrecisionModuleWrapper)) + self.assertTrue(isinstance(best_model.fc3, HalfPrecisionModuleWrapper)) if __name__ == "__main__": From 77e06dc8414db4b337096a90719019513920a60f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 16 May 2024 05:23:23 +0000 Subject: [PATCH 08/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../mix_precision/half_precision_convert.py | 13 ++++++------- .../torch/quantization/algorithm_entry.py | 2 +- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/neural_compressor/torch/algorithms/mix_precision/half_precision_convert.py b/neural_compressor/torch/algorithms/mix_precision/half_precision_convert.py index 662ede003a8..dbde9c7731f 100644 --- a/neural_compressor/torch/algorithms/mix_precision/half_precision_convert.py +++ b/neural_compressor/torch/algorithms/mix_precision/half_precision_convert.py @@ -55,17 +55,17 @@ def convert(self, model: torch.nn.Module): logger.info("Convert operators to half-precision") if next(model.parameters()).is_cuda: - self.device = "cuda" + self.device = "cuda" elif next(model.parameters()).is_cpu: self.device = "cpu" - + mix_precision_model = self._wrap_half_precision_model(model) mix_precision_model.to(self.device) return mix_precision_model def _wrap_half_precision_model(self, model: torch.nn.Module, prefix=""): - """wrap and replace half-precision target modules. + """Wrap and replace half-precision target modules. Args: model (torch.nn.Module): the input module. @@ -78,10 +78,9 @@ def _wrap_half_precision_model(self, model: torch.nn.Module, prefix=""): op_name = prefix + "." + name if prefix != "" else name for op_info, config in self.configs_mapping.items(): if op_name == op_info[0] and config.dtype in ("fp16", "bf16"): - child = HalfPrecisionModuleWrapper(module=child, - device=self.device, - dtype=self.dtype_mapping[config.dtype] - ) + child = HalfPrecisionModuleWrapper( + module=child, device=self.device, dtype=self.dtype_mapping[config.dtype] + ) else: self._wrap_half_precision_model(child, op_name) setattr(model, name, child) diff --git a/neural_compressor/torch/quantization/algorithm_entry.py b/neural_compressor/torch/quantization/algorithm_entry.py index 9b64fbc1855..06dead63572 100644 --- a/neural_compressor/torch/quantization/algorithm_entry.py +++ b/neural_compressor/torch/quantization/algorithm_entry.py @@ -513,4 +513,4 @@ def mix_precision_entry( half_precision_converter = HalfPrecisionConverter(configs_mapping, *args, **kwargs) mix_precision_model = half_precision_converter.convert(model) - return mix_precision_model \ No newline at end of file + return mix_precision_model From 61c799bd1d6e3671d560ec27139e11e45ffb7090 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Thu, 16 May 2024 14:56:06 +0800 Subject: [PATCH 09/13] fix annotation Signed-off-by: zehao-intel --- .../torch/algorithms/mix_precision/half_precision_convert.py | 2 +- .../torch/algorithms/mix_precision/module_wrappers.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_compressor/torch/algorithms/mix_precision/half_precision_convert.py b/neural_compressor/torch/algorithms/mix_precision/half_precision_convert.py index dbde9c7731f..83fb197b6e8 100644 --- a/neural_compressor/torch/algorithms/mix_precision/half_precision_convert.py +++ b/neural_compressor/torch/algorithms/mix_precision/half_precision_convert.py @@ -14,7 +14,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""FP16 Convert for Torch Modules.""" +"""Half-precision Convert for Torch Modules.""" from typing import Dict, Tuple diff --git a/neural_compressor/torch/algorithms/mix_precision/module_wrappers.py b/neural_compressor/torch/algorithms/mix_precision/module_wrappers.py index e159228d9d7..7e8f0758515 100644 --- a/neural_compressor/torch/algorithms/mix_precision/module_wrappers.py +++ b/neural_compressor/torch/algorithms/mix_precision/module_wrappers.py @@ -14,7 +14,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""FP16 Wrapper for Torch Modules.""" +"""Half-precision Wrapper for Torch Modules.""" import torch From 12413125158de747568f320a0c2c7586eba49ecb Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Fri, 17 May 2024 11:49:12 +0800 Subject: [PATCH 10/13] update ut Signed-off-by: zehao-intel --- test/3x/torch/test_autotune.py | 40 +++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index 96044f42bd2..ea52d604c67 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -342,10 +342,14 @@ def eval_acc_fn(model): def test_autotune_mix_precision_default(self): from neural_compressor.torch.algorithms.mix_precision import HalfPrecisionModuleWrapper - def eval_acc_fn(model) -> float: - return 1.0 + baseline = [1] + acc_res_lst = baseline + [0.9, 0.99, 1.1] + + def eval_acc_fn(model): + res = acc_res_lst.pop(0) + return res - custom_tune_config = TuningConfig(config_set=[MixPrecisionConfig()], max_trials=2) + custom_tune_config = TuningConfig(config_set=[MixPrecisionConfig(dtype=["fp16", "bf16", "fp32"])], tolerable_loss=-1) best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) self.assertIsNotNone(best_model) @@ -358,10 +362,14 @@ def test_autotune_mix_precision_set_op_name(self): from neural_compressor.common.base_config import ComposableConfig, config_registry from neural_compressor.torch.algorithms.mix_precision import HalfPrecisionModuleWrapper - def eval_acc_fn(model) -> float: - return 1.0 + baseline = [1] + acc_res_lst = baseline + [0.9, 1.1] + + def eval_acc_fn(model): + res = acc_res_lst.pop(0) + return res - config = { + config1 = { "mix_precision": { "global": { "dtype": "fp16", @@ -373,9 +381,25 @@ def eval_acc_fn(model) -> float: }, } } + + config2 = { + "mix_precision": { + "global": { + "dtype": "bf16", + }, + "local": { + "fc2": { + "dtype": "fp32", + } + }, + } + } + registered_configs = config_registry.get_cls_configs() - config = ComposableConfig.from_dict(config, config_registry=registered_configs["torch"]) - custom_tune_config = TuningConfig(config_set=[config], max_trials=2) + config1 = ComposableConfig.from_dict(config1, config_registry=registered_configs["torch"]) + config2 = ComposableConfig.from_dict(config2, config_registry=registered_configs["torch"]) + + custom_tune_config = TuningConfig(config_set=[config1, config2], max_trials=2) best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) self.assertIsNotNone(best_model) From ee6096b829c21852ff97105ea244436451a162f2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 17 May 2024 03:50:43 +0000 Subject: [PATCH 11/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- test/3x/torch/test_autotune.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index ea52d604c67..0e776897055 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -349,7 +349,9 @@ def eval_acc_fn(model): res = acc_res_lst.pop(0) return res - custom_tune_config = TuningConfig(config_set=[MixPrecisionConfig(dtype=["fp16", "bf16", "fp32"])], tolerable_loss=-1) + custom_tune_config = TuningConfig( + config_set=[MixPrecisionConfig(dtype=["fp16", "bf16", "fp32"])], tolerable_loss=-1 + ) best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) self.assertIsNotNone(best_model) From e55d2bce53300355e3b2ef12a410aae6402ec034 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Fri, 17 May 2024 13:24:59 +0800 Subject: [PATCH 12/13] fix ut Signed-off-by: zehao-intel --- test/3x/torch/test_autotune.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index ea52d604c67..3eeedc6ec8b 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -343,13 +343,13 @@ def test_autotune_mix_precision_default(self): from neural_compressor.torch.algorithms.mix_precision import HalfPrecisionModuleWrapper baseline = [1] - acc_res_lst = baseline + [0.9, 0.99, 1.1] + acc_res_lst = baseline + [0.9, 0.99, 1] def eval_acc_fn(model): res = acc_res_lst.pop(0) return res - custom_tune_config = TuningConfig(config_set=[MixPrecisionConfig(dtype=["fp16", "bf16", "fp32"])], tolerable_loss=-1) + custom_tune_config = TuningConfig(config_set=[MixPrecisionConfig(dtype=["fp16", "bf16", "fp32"])], max_trials=3) best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) self.assertIsNotNone(best_model) @@ -372,29 +372,28 @@ def eval_acc_fn(model): config1 = { "mix_precision": { "global": { - "dtype": "fp16", + "dtype": "bf16", }, "local": { - "fc1": { + "fc2": { "dtype": "fp32", } }, } } - config2 = { "mix_precision": { "global": { - "dtype": "bf16", + "dtype": "fp16", }, "local": { - "fc2": { + "fc1": { "dtype": "fp32", } }, } - } - + } + registered_configs = config_registry.get_cls_configs() config1 = ComposableConfig.from_dict(config1, config_registry=registered_configs["torch"]) config2 = ComposableConfig.from_dict(config2, config_registry=registered_configs["torch"]) From de536394020944adbfd7c9bcf5ae51c86dbdac26 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 17 May 2024 05:29:26 +0000 Subject: [PATCH 13/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- test/3x/torch/test_autotune.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index 3eeedc6ec8b..268dc6b44e7 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -392,8 +392,8 @@ def eval_acc_fn(model): } }, } - } - + } + registered_configs = config_registry.get_cls_configs() config1 = ComposableConfig.from_dict(config1, config_registry=registered_configs["torch"]) config2 = ComposableConfig.from_dict(config2, config_registry=registered_configs["torch"])