diff --git a/.azure-pipelines/scripts/ut/env_setup.sh b/.azure-pipelines/scripts/ut/env_setup.sh index 70a77354f58..0e5ac642c15 100644 --- a/.azure-pipelines/scripts/ut/env_setup.sh +++ b/.azure-pipelines/scripts/ut/env_setup.sh @@ -99,7 +99,7 @@ elif [[ $(echo "${test_case}" | grep -c "tf pruning") != 0 ]]; then fi if [[ $(echo "${test_case}" | grep -c "api") != 0 ]] || [[ $(echo "${test_case}" | grep -c "adaptor") != 0 ]]; then - pip install git+https://github.com/intel/auto-round.git@6815f8b66be456ecbef2d0beb33dbc4efeefdc04 + pip install git+https://github.com/intel/auto-round.git@d02f94d4b085523df3b313863fb07f83b2989cce fi # test deps diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index c937cda3d70..fcdf22bcaee 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -4913,11 +4913,11 @@ def autoround_quantize(self, model, tune_cfg, dataloader): weight_config[op_name]["data_type"] = config["weight"]["dtype"] weight_config[op_name]["bits"] = config["weight"]["bits"] weight_config[op_name]["group_size"] = config["weight"]["group_size"] - weight_config[op_name]["scheme"] = config["weight"]["scheme"] + weight_config[op_name]["sym"] = config["weight"]["scheme"] == "sym" # auto round recipes enable_full_range = self.recipes["autoround_args"].get("enable_full_range", False) - bs = self.recipes["autoround_args"].get("bs", 8) + batch_size = self.recipes["autoround_args"].get("batch_size", 8) lr_scheduler = self.recipes["autoround_args"].get("lr_scheduler", None) dataset_name = self.recipes["autoround_args"].get("dataset_name", "NeelNanda/pile-10k") dataset_split = self.recipes["autoround_args"].get("dataset_split", "train") @@ -4943,10 +4943,10 @@ def autoround_quantize(self, model, tune_cfg, dataloader): tokenizer=None, bits=4, group_size=128, - scheme="asym", + sym=False, weight_config=weight_config, enable_full_range=enable_full_range, - bs=bs, + batch_size=batch_size, lr_scheduler=lr_scheduler, dataloader=dataloader, dataset_name=dataset_name, diff --git a/neural_compressor/adaptor/torch_utils/weight_only.py b/neural_compressor/adaptor/torch_utils/weight_only.py index d40cc2f5fab..0e17b357e99 100644 --- a/neural_compressor/adaptor/torch_utils/weight_only.py +++ b/neural_compressor/adaptor/torch_utils/weight_only.py @@ -677,10 +677,10 @@ def autoround_quantize( tokenizer, bits: int = 4, group_size: int = 128, - scheme: str = "asym", + sym: bool = False, weight_config: dict = {}, enable_full_range: bool = False, ##for symmetric, TODO support later - bs: int = 8, + batch_size: int = 8, amp: bool = True, device=None, lr_scheduler=None, @@ -711,7 +711,7 @@ def autoround_quantize( tokenizer: Tokenizer for processing input data. Temporarily set as a mandatory parameter. bits (int): Number of bits for quantization (default is 4). group_size (int): Size of the quantization group (default is 128). - scheme (str): The quantization scheme to be used (default is "asym"). + sym (bool): Whether the symmetric quantization is to be used. weight_config (dict): Configuration for weight quantization (default is an empty dictionary). weight_config={ 'layer1':##layer_name @@ -758,10 +758,10 @@ def autoround_quantize( tokenizer=tokenizer, bits=bits, group_size=group_size, - scheme=scheme, + sym=sym, weight_config=weight_config, enable_full_range=enable_full_range, ##for symmetric, TODO support later - bs=bs, + batch_size=batch_size, amp=amp, device=device, lr_scheduler=lr_scheduler, diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py index 7aea57770c5..228707b063b 100644 --- a/neural_compressor/model/torch_model.py +++ b/neural_compressor/model/torch_model.py @@ -559,9 +559,9 @@ def export_compressed_model( new_module.pack(int_weight, gptq_scale, gptq_zp, m.bias, gptq_perm) set_module(self.model, k, new_module) elif autoround_config: - from auto_round.export.export_to_itrex.export import _pack_model # pylint: disable=E0401 + from auto_round.export.export_to_itrex.export import pack_model # pylint: disable=E0401 - self.model = _pack_model( + self.model = pack_model( self.model, weight_config=autoround_config, enable_full_range=enable_full_range, diff --git a/test/requirements.txt b/test/requirements.txt index ca603fd7afc..b51a59d0ba7 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -1,7 +1,7 @@ --find-links https://download.pytorch.org/whl/torch_stable.html accelerate==0.21.0 dynast==1.6.0rc1 -git+https://github.com/intel/auto-round.git@6815f8b66be456ecbef2d0beb33dbc4efeefdc04 +git+https://github.com/intel/auto-round.git@d02f94d4b085523df3b313863fb07f83b2989cce horovod intel-extension-for-pytorch intel-tensorflow>=2.12.0