From 542a74fd57aa69bc551366ba68d9d39964372bff Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Wed, 24 Jan 2024 11:05:39 +0800 Subject: [PATCH 1/4] pt 3.x config for static quant and smooth quant Signed-off-by: Cheng, Zixuan --- neural_compressor/common/utils/constants.py | 1 + neural_compressor/torch/__init__.py | 4 + .../torch/quantization/__init__.py | 4 + .../torch/quantization/config.py | 195 +++++++++++++++++- 4 files changed, 203 insertions(+), 1 deletion(-) diff --git a/neural_compressor/common/utils/constants.py b/neural_compressor/common/utils/constants.py index 615c5f26ffd..c92f916e6f7 100644 --- a/neural_compressor/common/utils/constants.py +++ b/neural_compressor/common/utils/constants.py @@ -29,6 +29,7 @@ COMPOSABLE_CONFIG = "composable_config" RTN = "rtn" STATIC_QUANT = "static_quant" +SMOOTH_QUANT = "smooth_quant" GPTQ = "gptq" FP8_QUANT = "fp8_quant" diff --git a/neural_compressor/torch/__init__.py b/neural_compressor/torch/__init__.py index 81f131ca114..5fe8d73cc8c 100644 --- a/neural_compressor/torch/__init__.py +++ b/neural_compressor/torch/__init__.py @@ -21,6 +21,10 @@ get_default_rtn_config, GPTQConfig, get_default_gptq_config, + StaticQuantConfig, + get_default_static_config, + SmoothQuantConfig, + get_default_sq_config, ) from neural_compressor.common.base_tuning import TuningConfig diff --git a/neural_compressor/torch/quantization/__init__.py b/neural_compressor/torch/quantization/__init__.py index c78a7b0552e..b287c5ae2d6 100644 --- a/neural_compressor/torch/quantization/__init__.py +++ b/neural_compressor/torch/quantization/__init__.py @@ -18,4 +18,8 @@ get_default_rtn_config, GPTQConfig, get_default_gptq_config, + StaticQuantConfig, + get_default_static_config, + SmoothQuantConfig, + get_default_sq_config, ) diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py index 10799a30bc4..0a3584c232f 100644 --- a/neural_compressor/torch/quantization/config.py +++ b/neural_compressor/torch/quantization/config.py @@ -24,7 +24,15 @@ import torch from neural_compressor.common.base_config import BaseConfig, config_registry, register_config -from neural_compressor.common.utils import DEFAULT_WHITE_LIST, FP8_QUANT, GPTQ, OP_NAME_OR_MODULE_TYPE, RTN +from neural_compressor.common.utils import ( + DEFAULT_WHITE_LIST, + FP8_QUANT, + GPTQ, + OP_NAME_OR_MODULE_TYPE, + RTN, + SMOOTH_QUANT, + STATIC_QUANT, +) from neural_compressor.torch.utils.constants import PRIORITY_GPTQ, PRIORITY_RTN from neural_compressor.torch.utils.utility import is_hpex_avaliable, logger @@ -311,6 +319,191 @@ def get_default_gptq_config() -> GPTQConfig: return GPTQConfig() +######################## Static Quant Config ############################### +@register_config(framework_name=FRAMEWORK_NAME, algo_name=STATIC_QUANT) +class StaticQuantConfig(BaseConfig): + """Config class for static quantization.""" + + name = STATIC_QUANT + params_list = [ + "w_dtype", + "w_sym", + "w_granularity", + "w_algo", + "act_dtype", + "act_sym", + "act_granularity", + "act_algo", + ] + supported_configs: List[OperatorConfig] = [] + + def __init__( + self, + w_dtype: str = "int8", + w_sym: bool = True, + w_granularity: str = "per_channel", + w_algo: str = "minmax", + act_dtype: str = "uint8", + act_sym: bool = False, + act_granularity: str = "per_tensor", + act_algo: str = "minmax", + white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST, + ): + """Init Static Quant Configs.""" + super().__init__(white_list=white_list) + self.w_dtype = w_dtype + self.w_sym = w_sym + self.w_granularity = w_granularity + self.w_algo = w_algo + self.act_dtype = act_dtype + self.act_sym = act_sym + self.act_granularity = act_granularity + self.act_algo = act_algo + self._post_init() + + @classmethod + def register_supported_configs(cls) -> List[OperatorConfig]: + supported_configs = [] + # TODO(Yi) + linear_static_config = StaticQuantConfig() + operators = [torch.nn.Linear, torch.nn.functional.linear] + supported_configs.append(OperatorConfig(config=linear_static_config, operators=operators)) + cls.supported_configs = supported_configs + + @staticmethod + def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]: + white_list = (torch.nn.Linear,) + filter_result = [] + for op_name, module in model.named_modules(): + if isinstance(module, white_list): + pair = (op_name, type(module).__name__) + filter_result.append(pair) + logger.debug(f"Get model info: {filter_result}") + return filter_result + + +# TODO(Yi) run `register_supported_configs` for all registered config. +StaticQuantConfig.register_supported_configs() + + +def get_default_static_config() -> StaticQuantConfig: + """Generate the default static quant config. + + Returns: + the default static quant config. + """ + return StaticQuantConfig() + + +######################## Smooth Quant Config ############################### +@register_config(framework_name=FRAMEWORK_NAME, algo_name=SMOOTH_QUANT) +class SmoothQuantConfig(BaseConfig): + """Config class for smooth quantization.""" + + name = SMOOTH_QUANT + params_list = [ + "w_dtype", + "w_sym", + "w_granularity", + "w_algo", + "act_dtype", + "act_sym", + "act_granularity", + "act_algo", + "alpha", + "folding", + "scale_sharing", + "auto_alpha_args", + ] + supported_configs: List[OperatorConfig] = [] + + def __init__( + self, + w_dtype: str = "int8", + w_sym: bool = True, + w_granularity: str = "per_channel", + w_algo: str = "minmax", + act_dtype: str = "uint8", + act_sym: bool = False, + act_granularity: str = "per_tensor", + act_algo: str = "minmax", + alpha: float = 0.5, + folding: bool = False, + # below for autotune + scale_sharing: bool = False, + init_alpha: float = 0.5, + alpha_min: float = 0.0, + alpha_max: float = 1.0, + alpha_step: float = 0.1, + shared_criterion: str = "max", + enable_blockwise_loss: bool = False, + auto_alpha_args: dict = None, + white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST, + ): + """Init SmoothQuant Configs.""" + super().__init__(white_list=white_list) + self.w_dtype = w_dtype + self.w_sym = w_sym + self.w_granularity = w_granularity + self.w_algo = w_algo + self.act_dtype = act_dtype + self.act_sym = act_sym + self.act_granularity = act_granularity + self.act_algo = act_algo + self.alpha = alpha + self.folding = folding + # below for autotune + self.scale_sharing = scale_sharing + self.init_alpha = init_alpha + self.alpha_min = alpha_min + self.alpha_max = alpha_max + self.alpha_step = alpha_step + self.shared_criterion = shared_criterion + self.enable_blockwise_loss = enable_blockwise_loss + self.auto_alpha_args = { + "init_alpha": self.init_alpha, + "alpha_min": self.alpha_min, + "alpha_max": self.alpha_max, + "alpha_step": self.alpha_step, + "shared_criterion": self.shared_criterion, + "enable_blockwise_loss": self.enable_blockwise_loss, + } + self._post_init() + + @classmethod + def register_supported_configs(cls) -> List[OperatorConfig]: + supported_configs = [] + # TODO(Yi) + linear_sq_config = SmoothQuantConfig() + operators = [torch.nn.Linear] + supported_configs.append(OperatorConfig(config=linear_sq_config, operators=operators)) + cls.supported_configs = supported_configs + + @staticmethod + def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]: + white_list = (torch.nn.Linear,) + filter_result = [] + for op_name, module in model.named_modules(): + if isinstance(module, white_list): + pair = (op_name, type(module).__name__) + filter_result.append(pair) + logger.debug(f"Get model info: {filter_result}") + return filter_result + + +# TODO(Yi) run `register_supported_configs` for all registered config. +SmoothQuantConfig.register_supported_configs() + + +def get_default_sq_config() -> SmoothQuantConfig: + """Generate the default smoothquant config. + + Returns: + the default smoothquant config. + """ + return SmoothQuantConfig() + + ######################## FP8 Config ############################### if is_hpex_avaliable(): From ad48539446aa491f8eb9087d95ba4deaef750fa7 Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Wed, 24 Jan 2024 14:04:17 +0800 Subject: [PATCH 2/4] minor fix Signed-off-by: Cheng, Zixuan --- neural_compressor/torch/quantization/config.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py index 0a3584c232f..f75529ad901 100644 --- a/neural_compressor/torch/quantization/config.py +++ b/neural_compressor/torch/quantization/config.py @@ -346,7 +346,7 @@ def __init__( act_dtype: str = "uint8", act_sym: bool = False, act_granularity: str = "per_tensor", - act_algo: str = "minmax", + act_algo: str = "kl", white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST, ): """Init Static Quant Configs.""" @@ -366,7 +366,7 @@ def register_supported_configs(cls) -> List[OperatorConfig]: supported_configs = [] # TODO(Yi) linear_static_config = StaticQuantConfig() - operators = [torch.nn.Linear, torch.nn.functional.linear] + operators = [torch.nn.Linear] supported_configs.append(OperatorConfig(config=linear_static_config, operators=operators)) cls.supported_configs = supported_configs @@ -426,7 +426,7 @@ def __init__( act_dtype: str = "uint8", act_sym: bool = False, act_granularity: str = "per_tensor", - act_algo: str = "minmax", + act_algo: str = "kl", alpha: float = 0.5, folding: bool = False, # below for autotune From 68c2d2484704e7b5a9f9abdbbccdfc7403d42d5f Mon Sep 17 00:00:00 2001 From: "Cheng, Zixuan" Date: Thu, 25 Jan 2024 14:09:43 +0800 Subject: [PATCH 3/4] add ut for configs Signed-off-by: Cheng, Zixuan --- test/3x/torch/test_config.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/test/3x/torch/test_config.py b/test/3x/torch/test_config.py index 0e74fa56300..666fabaac42 100644 --- a/test/3x/torch/test_config.py +++ b/test/3x/torch/test_config.py @@ -321,6 +321,27 @@ def test_gptq_config(self): gptq_config2 = GPTQConfig.from_dict(quant_config_dict["gptq"]) self.assertEqual(gptq_config1.to_dict(), gptq_config2.to_dict()) + def test_static_quant_config(self): + from neural_compressor.torch.quantization import StaticQuantConfig + static_config1 = StaticQuantConfig(w_dtype="int8", act_sym=True, act_algo="minmax") + quant_config_dict = { + "static": { + "w_dtype": "int8", + "act_sym": True, + "act_algo": "minmax" + } + } + static_config2 = StaticQuantConfig.from_dict(quant_config_dict["static"]) + self.assertEqual(static_config1.to_dict(), static_config2.to_dict()) + + def test_smooth_quant_config(self): + from neural_compressor.torch.quantization import SmoothQuantConfig + sq_config1 = SmoothQuantConfig(alpha=0.8, folding=True) + quant_config_dict = { + "sq": {"alpha": 0.8, "folding": True} + } + sq_config2 = SmoothQuantConfig.from_dict(quant_config_dict["sq"]) + self.assertEqual(sq_config1.to_dict(), sq_config2.to_dict()) class TestQuantConfigForAutotune(unittest.TestCase): def test_expand_config(self): From 2ec7dffbf61d0282c1eb91615c1153c364ab7e67 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 25 Jan 2024 06:11:05 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- test/3x/torch/test_config.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/test/3x/torch/test_config.py b/test/3x/torch/test_config.py index 666fabaac42..ff1012dc1ed 100644 --- a/test/3x/torch/test_config.py +++ b/test/3x/torch/test_config.py @@ -323,26 +323,21 @@ def test_gptq_config(self): def test_static_quant_config(self): from neural_compressor.torch.quantization import StaticQuantConfig + static_config1 = StaticQuantConfig(w_dtype="int8", act_sym=True, act_algo="minmax") - quant_config_dict = { - "static": { - "w_dtype": "int8", - "act_sym": True, - "act_algo": "minmax" - } - } + quant_config_dict = {"static": {"w_dtype": "int8", "act_sym": True, "act_algo": "minmax"}} static_config2 = StaticQuantConfig.from_dict(quant_config_dict["static"]) self.assertEqual(static_config1.to_dict(), static_config2.to_dict()) def test_smooth_quant_config(self): from neural_compressor.torch.quantization import SmoothQuantConfig + sq_config1 = SmoothQuantConfig(alpha=0.8, folding=True) - quant_config_dict = { - "sq": {"alpha": 0.8, "folding": True} - } + quant_config_dict = {"sq": {"alpha": 0.8, "folding": True}} sq_config2 = SmoothQuantConfig.from_dict(quant_config_dict["sq"]) self.assertEqual(sq_config1.to_dict(), sq_config2.to_dict()) + class TestQuantConfigForAutotune(unittest.TestCase): def test_expand_config(self): # test the expand functionalities, the user is not aware it