From 8a4b54b3e4f9e060df65585fb4e84641628b1a01 Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Tue, 5 Mar 2024 09:51:26 +0800
Subject: [PATCH 1/4] add export compressed model

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 neural_compressor/model/torch_model.py             | 14 ++++++++++++++
 .../test_weight_only_adaptor_pytorch.py            | 10 ++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py
index 395b9c007fe..895722f3292 100644
--- a/neural_compressor/model/torch_model.py
+++ b/neural_compressor/model/torch_model.py
@@ -496,6 +496,9 @@ def export_compressed_model(
                 gptq_config = json.load(f)
         else:
             gptq_config = self.gptq_config if hasattr(self, "gptq_config") else {}
+
+        autoround_config = self.autoround_config if hasattr(self, "autoround_config") else {}
+
         if gptq_config:
             for k, v in weight_config.items():
                 logger.debug(f"Compressing {k} on device {device}")
@@ -555,6 +558,17 @@ def export_compressed_model(
                 )
                 new_module.pack(int_weight, gptq_scale, gptq_zp, m.bias, gptq_perm)
                 set_module(self.model, k, new_module)
+        elif autoround_config:
+            from auto_round.export.export_to_itrex import compress_model
+            self.model = compress_model(
+                self.model,
+                weight_config=autoround_config,
+                enable_full_range=enable_full_range,
+                compression_dtype=compression_dtype,
+                compression_dim=compression_dim,
+                device=device,
+                use_optimum_format=use_optimum_format,
+                inplace=True)
         else:
             for k, v in weight_config.items():
                 logger.debug(f"Compressing {k} on device {device}")
diff --git a/test/adaptor/pytorch_adaptor/test_weight_only_adaptor_pytorch.py b/test/adaptor/pytorch_adaptor/test_weight_only_adaptor_pytorch.py
index ecfa34e56ff..8ca5c73d50c 100644
--- a/test/adaptor/pytorch_adaptor/test_weight_only_adaptor_pytorch.py
+++ b/test/adaptor/pytorch_adaptor/test_weight_only_adaptor_pytorch.py
@@ -801,6 +801,14 @@ def test_AutoRound_quant(self):
         self.assertTrue("scale" in q_model.autoround_config["transformer.h.0.attn.k_proj"].keys())
         self.assertTrue(torch.float32 == q_model.autoround_config["transformer.h.0.attn.k_proj"]["scale_dtype"])
 
+        export_model = q_model.export_compressed_model()
+        export_out = export_model(input)
+        self.assertTrue(torch.allclose(out2[0], export_out[0]))
+        from auto_round.export.export_to_itrex.model_wrapper import WeightOnlyLinear
+
+        self.assertTrue(isinstance(q_model.model.transformer.h[0].attn.k_proj, WeightOnlyLinear))
+        self.assertTrue(isinstance(export_model.transformer.h[0].attn.k_proj, WeightOnlyLinear))
+
         fp32_model = copy.deepcopy(self.gptj)
 
         conf = PostTrainingQuantConfig(
@@ -852,8 +860,6 @@ def test_AutoRound_quant(self):
         )
         out2 = export_model.model(input)
         self.assertTrue(torch.allclose(out1[0], out2[0], atol=1e-01))
-        from auto_round.export.export_to_itrex.model_wrapper import WeightOnlyLinear
-
         self.assertTrue(isinstance(export_model.model.transformer.h[0].attn.k_proj, WeightOnlyLinear))
 
 

From 2315af69b0f98d2347e360fe6d535928b6e9b3fb Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 5 Mar 2024 02:48:30 +0000
Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 neural_compressor/model/torch_model.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py
index 895722f3292..e3a0ef4443d 100644
--- a/neural_compressor/model/torch_model.py
+++ b/neural_compressor/model/torch_model.py
@@ -560,6 +560,7 @@ def export_compressed_model(
                 set_module(self.model, k, new_module)
         elif autoround_config:
             from auto_round.export.export_to_itrex import compress_model
+
             self.model = compress_model(
                 self.model,
                 weight_config=autoround_config,
@@ -568,7 +569,8 @@ def export_compressed_model(
                 compression_dim=compression_dim,
                 device=device,
                 use_optimum_format=use_optimum_format,
-                inplace=True)
+                inplace=True,
+            )
         else:
             for k, v in weight_config.items():
                 logger.debug(f"Compressing {k} on device {device}")

From 2598cfa5b78e155e1d37e246dc71dc25dcb787d1 Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Tue, 5 Mar 2024 10:15:40 +0800
Subject: [PATCH 3/4] disable pylint

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 neural_compressor/model/torch_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py
index e3a0ef4443d..7659fa58c56 100644
--- a/neural_compressor/model/torch_model.py
+++ b/neural_compressor/model/torch_model.py
@@ -559,7 +559,7 @@ def export_compressed_model(
                 new_module.pack(int_weight, gptq_scale, gptq_zp, m.bias, gptq_perm)
                 set_module(self.model, k, new_module)
         elif autoround_config:
-            from auto_round.export.export_to_itrex import compress_model
+            from auto_round.export.export_to_itrex import compress_model # pylint: disable=E0401
 
             self.model = compress_model(
                 self.model,

From 549b2734fdb4b165a772069a7c1d609086c8990d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 5 Mar 2024 03:08:03 +0000
Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 neural_compressor/model/torch_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py
index 7659fa58c56..3685c5c208d 100644
--- a/neural_compressor/model/torch_model.py
+++ b/neural_compressor/model/torch_model.py
@@ -559,7 +559,7 @@ def export_compressed_model(
                 new_module.pack(int_weight, gptq_scale, gptq_zp, m.bias, gptq_perm)
                 set_module(self.model, k, new_module)
         elif autoround_config:
-            from auto_round.export.export_to_itrex import compress_model # pylint: disable=E0401
+            from auto_round.export.export_to_itrex import compress_model  # pylint: disable=E0401
 
             self.model = compress_model(
                 self.model,