From 8a4b54b3e4f9e060df65585fb4e84641628b1a01 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Tue, 5 Mar 2024 09:51:26 +0800 Subject: [PATCH 1/4] add export compressed model Signed-off-by: Kaihui-intel --- neural_compressor/model/torch_model.py | 14 ++++++++++++++ .../test_weight_only_adaptor_pytorch.py | 10 ++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py index 395b9c007fe..895722f3292 100644 --- a/neural_compressor/model/torch_model.py +++ b/neural_compressor/model/torch_model.py @@ -496,6 +496,9 @@ def export_compressed_model( gptq_config = json.load(f) else: gptq_config = self.gptq_config if hasattr(self, "gptq_config") else {} + + autoround_config = self.autoround_config if hasattr(self, "autoround_config") else {} + if gptq_config: for k, v in weight_config.items(): logger.debug(f"Compressing {k} on device {device}") @@ -555,6 +558,17 @@ def export_compressed_model( ) new_module.pack(int_weight, gptq_scale, gptq_zp, m.bias, gptq_perm) set_module(self.model, k, new_module) + elif autoround_config: + from auto_round.export.export_to_itrex import compress_model + self.model = compress_model( + self.model, + weight_config=autoround_config, + enable_full_range=enable_full_range, + compression_dtype=compression_dtype, + compression_dim=compression_dim, + device=device, + use_optimum_format=use_optimum_format, + inplace=True) else: for k, v in weight_config.items(): logger.debug(f"Compressing {k} on device {device}") diff --git a/test/adaptor/pytorch_adaptor/test_weight_only_adaptor_pytorch.py b/test/adaptor/pytorch_adaptor/test_weight_only_adaptor_pytorch.py index ecfa34e56ff..8ca5c73d50c 100644 --- a/test/adaptor/pytorch_adaptor/test_weight_only_adaptor_pytorch.py +++ b/test/adaptor/pytorch_adaptor/test_weight_only_adaptor_pytorch.py @@ -801,6 +801,14 @@ def test_AutoRound_quant(self): self.assertTrue("scale" in q_model.autoround_config["transformer.h.0.attn.k_proj"].keys()) self.assertTrue(torch.float32 == q_model.autoround_config["transformer.h.0.attn.k_proj"]["scale_dtype"]) + export_model = q_model.export_compressed_model() + export_out = export_model(input) + self.assertTrue(torch.allclose(out2[0], export_out[0])) + from auto_round.export.export_to_itrex.model_wrapper import WeightOnlyLinear + + self.assertTrue(isinstance(q_model.model.transformer.h[0].attn.k_proj, WeightOnlyLinear)) + self.assertTrue(isinstance(export_model.transformer.h[0].attn.k_proj, WeightOnlyLinear)) + fp32_model = copy.deepcopy(self.gptj) conf = PostTrainingQuantConfig( @@ -852,8 +860,6 @@ def test_AutoRound_quant(self): ) out2 = export_model.model(input) self.assertTrue(torch.allclose(out1[0], out2[0], atol=1e-01)) - from auto_round.export.export_to_itrex.model_wrapper import WeightOnlyLinear - self.assertTrue(isinstance(export_model.model.transformer.h[0].attn.k_proj, WeightOnlyLinear)) From 2315af69b0f98d2347e360fe6d535928b6e9b3fb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 5 Mar 2024 02:48:30 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- neural_compressor/model/torch_model.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py index 895722f3292..e3a0ef4443d 100644 --- a/neural_compressor/model/torch_model.py +++ b/neural_compressor/model/torch_model.py @@ -560,6 +560,7 @@ def export_compressed_model( set_module(self.model, k, new_module) elif autoround_config: from auto_round.export.export_to_itrex import compress_model + self.model = compress_model( self.model, weight_config=autoround_config, @@ -568,7 +569,8 @@ def export_compressed_model( compression_dim=compression_dim, device=device, use_optimum_format=use_optimum_format, - inplace=True) + inplace=True, + ) else: for k, v in weight_config.items(): logger.debug(f"Compressing {k} on device {device}") From 2598cfa5b78e155e1d37e246dc71dc25dcb787d1 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Tue, 5 Mar 2024 10:15:40 +0800 Subject: [PATCH 3/4] disable pylint Signed-off-by: Kaihui-intel --- neural_compressor/model/torch_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py index e3a0ef4443d..7659fa58c56 100644 --- a/neural_compressor/model/torch_model.py +++ b/neural_compressor/model/torch_model.py @@ -559,7 +559,7 @@ def export_compressed_model( new_module.pack(int_weight, gptq_scale, gptq_zp, m.bias, gptq_perm) set_module(self.model, k, new_module) elif autoround_config: - from auto_round.export.export_to_itrex import compress_model + from auto_round.export.export_to_itrex import compress_model # pylint: disable=E0401 self.model = compress_model( self.model, From 549b2734fdb4b165a772069a7c1d609086c8990d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 5 Mar 2024 03:08:03 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- neural_compressor/model/torch_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py index 7659fa58c56..3685c5c208d 100644 --- a/neural_compressor/model/torch_model.py +++ b/neural_compressor/model/torch_model.py @@ -559,7 +559,7 @@ def export_compressed_model( new_module.pack(int_weight, gptq_scale, gptq_zp, m.bias, gptq_perm) set_module(self.model, k, new_module) elif autoround_config: - from auto_round.export.export_to_itrex import compress_model # pylint: disable=E0401 + from auto_round.export.export_to_itrex import compress_model # pylint: disable=E0401 self.model = compress_model( self.model,