From 5bd9858e26c42226a20eae970e93d6015a6483b1 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Wed, 6 Mar 2024 16:31:09 +0800 Subject: [PATCH 1/3] rename autoround args Signed-off-by: Kaihui-intel --- neural_compressor/adaptor/pytorch.py | 9 +++++---- neural_compressor/adaptor/torch_utils/weight_only.py | 10 +++++----- neural_compressor/model/torch_model.py | 4 ++-- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index c937cda3d70..9307011d728 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -4913,11 +4913,12 @@ def autoround_quantize(self, model, tune_cfg, dataloader): weight_config[op_name]["data_type"] = config["weight"]["dtype"] weight_config[op_name]["bits"] = config["weight"]["bits"] weight_config[op_name]["group_size"] = config["weight"]["group_size"] - weight_config[op_name]["scheme"] = config["weight"]["scheme"] + weight_config[op_name]["sym"] = config["weight"]["scheme"] == "sym" + # auto round recipes enable_full_range = self.recipes["autoround_args"].get("enable_full_range", False) - bs = self.recipes["autoround_args"].get("bs", 8) + batch_size = self.recipes["autoround_args"].get("batch_size", 8) lr_scheduler = self.recipes["autoround_args"].get("lr_scheduler", None) dataset_name = self.recipes["autoround_args"].get("dataset_name", "NeelNanda/pile-10k") dataset_split = self.recipes["autoround_args"].get("dataset_split", "train") @@ -4943,10 +4944,10 @@ def autoround_quantize(self, model, tune_cfg, dataloader): tokenizer=None, bits=4, group_size=128, - scheme="asym", + sym=False, weight_config=weight_config, enable_full_range=enable_full_range, - bs=bs, + batch_size=batch_size, lr_scheduler=lr_scheduler, dataloader=dataloader, dataset_name=dataset_name, diff --git a/neural_compressor/adaptor/torch_utils/weight_only.py b/neural_compressor/adaptor/torch_utils/weight_only.py index d40cc2f5fab..0e17b357e99 100644 --- a/neural_compressor/adaptor/torch_utils/weight_only.py +++ b/neural_compressor/adaptor/torch_utils/weight_only.py @@ -677,10 +677,10 @@ def autoround_quantize( tokenizer, bits: int = 4, group_size: int = 128, - scheme: str = "asym", + sym: bool = False, weight_config: dict = {}, enable_full_range: bool = False, ##for symmetric, TODO support later - bs: int = 8, + batch_size: int = 8, amp: bool = True, device=None, lr_scheduler=None, @@ -711,7 +711,7 @@ def autoround_quantize( tokenizer: Tokenizer for processing input data. Temporarily set as a mandatory parameter. bits (int): Number of bits for quantization (default is 4). group_size (int): Size of the quantization group (default is 128). - scheme (str): The quantization scheme to be used (default is "asym"). + sym (bool): Whether the symmetric quantization is to be used. weight_config (dict): Configuration for weight quantization (default is an empty dictionary). weight_config={ 'layer1':##layer_name @@ -758,10 +758,10 @@ def autoround_quantize( tokenizer=tokenizer, bits=bits, group_size=group_size, - scheme=scheme, + sym=sym, weight_config=weight_config, enable_full_range=enable_full_range, ##for symmetric, TODO support later - bs=bs, + batch_size=batch_size, amp=amp, device=device, lr_scheduler=lr_scheduler, diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py index 7aea57770c5..228707b063b 100644 --- a/neural_compressor/model/torch_model.py +++ b/neural_compressor/model/torch_model.py @@ -559,9 +559,9 @@ def export_compressed_model( new_module.pack(int_weight, gptq_scale, gptq_zp, m.bias, gptq_perm) set_module(self.model, k, new_module) elif autoround_config: - from auto_round.export.export_to_itrex.export import _pack_model # pylint: disable=E0401 + from auto_round.export.export_to_itrex.export import pack_model # pylint: disable=E0401 - self.model = _pack_model( + self.model = pack_model( self.model, weight_config=autoround_config, enable_full_range=enable_full_range, From bbadbf5c15c832fcf5f3c22d8661a37140084e19 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Wed, 6 Mar 2024 16:37:30 +0800 Subject: [PATCH 2/3] update autoround version Signed-off-by: Kaihui-intel --- .azure-pipelines/scripts/ut/env_setup.sh | 2 +- test/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.azure-pipelines/scripts/ut/env_setup.sh b/.azure-pipelines/scripts/ut/env_setup.sh index 70a77354f58..0e5ac642c15 100644 --- a/.azure-pipelines/scripts/ut/env_setup.sh +++ b/.azure-pipelines/scripts/ut/env_setup.sh @@ -99,7 +99,7 @@ elif [[ $(echo "${test_case}" | grep -c "tf pruning") != 0 ]]; then fi if [[ $(echo "${test_case}" | grep -c "api") != 0 ]] || [[ $(echo "${test_case}" | grep -c "adaptor") != 0 ]]; then - pip install git+https://github.com/intel/auto-round.git@6815f8b66be456ecbef2d0beb33dbc4efeefdc04 + pip install git+https://github.com/intel/auto-round.git@d02f94d4b085523df3b313863fb07f83b2989cce fi # test deps diff --git a/test/requirements.txt b/test/requirements.txt index ca603fd7afc..b51a59d0ba7 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -1,7 +1,7 @@ --find-links https://download.pytorch.org/whl/torch_stable.html accelerate==0.21.0 dynast==1.6.0rc1 -git+https://github.com/intel/auto-round.git@6815f8b66be456ecbef2d0beb33dbc4efeefdc04 +git+https://github.com/intel/auto-round.git@d02f94d4b085523df3b313863fb07f83b2989cce horovod intel-extension-for-pytorch intel-tensorflow>=2.12.0 From c00cbd004c5aa3cf6af9b8f820415a8860fee6ab Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 6 Mar 2024 09:28:00 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- neural_compressor/adaptor/pytorch.py | 1 - 1 file changed, 1 deletion(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 9307011d728..fcdf22bcaee 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -4915,7 +4915,6 @@ def autoround_quantize(self, model, tune_cfg, dataloader): weight_config[op_name]["group_size"] = config["weight"]["group_size"] weight_config[op_name]["sym"] = config["weight"]["scheme"] == "sym" - # auto round recipes enable_full_range = self.recipes["autoround_args"].get("enable_full_range", False) batch_size = self.recipes["autoround_args"].get("batch_size", 8)