From baa9b5cc210fb8dbeda843947b88ac0d4e6a94a5 Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Tue, 20 Aug 2024 14:18:34 +0800
Subject: [PATCH 1/3] support xpu lw forward

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 neural_compressor/torch/algorithms/weight_only/rtn.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/neural_compressor/torch/algorithms/weight_only/rtn.py b/neural_compressor/torch/algorithms/weight_only/rtn.py
index 6ce9b49fac8..5e1dcdace4c 100644
--- a/neural_compressor/torch/algorithms/weight_only/rtn.py
+++ b/neural_compressor/torch/algorithms/weight_only/rtn.py
@@ -142,6 +142,8 @@ def convert(
         for name, m in model.named_modules():
 
             if not isinstance(m, supported_layers):
+                if use_layer_wise and device == "xpu":
+                    load_module(model, name, model_path, device=device)
                 continue
             if name in weight_config:  # pragma: no cover
                 # initialize op configuration

From 3f3b239a31a4fdd48bbeec75045f05e64ccb639e Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Tue, 3 Sep 2024 14:38:04 +0800
Subject: [PATCH 2/3] update gptq config

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 neural_compressor/torch/algorithms/weight_only/gptq.py | 10 ++--------
 .../torch/algorithms/weight_only/save_load.py          |  5 -----
 2 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/neural_compressor/torch/algorithms/weight_only/gptq.py b/neural_compressor/torch/algorithms/weight_only/gptq.py
index 1dbd7511663..d0e133b1758 100644
--- a/neural_compressor/torch/algorithms/weight_only/gptq.py
+++ b/neural_compressor/torch/algorithms/weight_only/gptq.py
@@ -930,12 +930,7 @@ def tmp(_, inp, out):
 
         logger.info("Quantization done")
         # self.model.config.use_cache = self.use_cache
-
-        # obtain model (all weight only quantization API function should return)
-        for k, v in gptq_config.items():
-            for m, n in v.items():
-                gptq_config[k][m] = n.tolist()
-        return self.model, gptq_config
+        return self.model
 
 
 class GPTQ:
@@ -1379,9 +1374,8 @@ def convert(self, model, *args, **kwargs):
         self.gptq_quantizer.model = model
         self.gptq_quantizer.remove_prepare_for_calibration()
 
-        q_model, gptq_config = self.gptq_quantizer.execute_quantization()
+        q_model = self.gptq_quantizer.execute_quantization()
         if not self.gptq_quantizer.use_layer_wise:
             q_model = q_model.to(self.model_device)
-        q_model.gptq_config = gptq_config
         logger.info("GPTQ quantizing done.")
         return q_model
diff --git a/neural_compressor/torch/algorithms/weight_only/save_load.py b/neural_compressor/torch/algorithms/weight_only/save_load.py
index d515d91f9a7..feb4b907b7e 100644
--- a/neural_compressor/torch/algorithms/weight_only/save_load.py
+++ b/neural_compressor/torch/algorithms/weight_only/save_load.py
@@ -53,11 +53,6 @@ def save(model, output_dir="./saved_results"):
     # saving process
     save_config_mapping(model.qconfig, qconfig_file_path)
 
-    if hasattr(model, "gptq_config") and model.gptq_config:
-        gptq_config_path = os.path.join(os.path.abspath(os.path.expanduser(output_dir)), "gptq_config.json")
-        with open(gptq_config_path, "w") as f:
-            json.dump(model.gptq_config, f, indent=4)
-
     # MethodType 'save' not in state_dict
     del model.save
     torch.save(model.state_dict(), qmodel_weight_file_path)

From d969d8c1e2d2f4963b2645218fca77f4f7a40e6c Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Tue, 3 Sep 2024 14:40:49 +0800
Subject: [PATCH 3/3] revert code

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 neural_compressor/torch/algorithms/weight_only/rtn.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/neural_compressor/torch/algorithms/weight_only/rtn.py b/neural_compressor/torch/algorithms/weight_only/rtn.py
index 5e1dcdace4c..6ce9b49fac8 100644
--- a/neural_compressor/torch/algorithms/weight_only/rtn.py
+++ b/neural_compressor/torch/algorithms/weight_only/rtn.py
@@ -142,8 +142,6 @@ def convert(
         for name, m in model.named_modules():
 
             if not isinstance(m, supported_layers):
-                if use_layer_wise and device == "xpu":
-                    load_module(model, name, model_path, device=device)
                 continue
             if name in weight_config:  # pragma: no cover
                 # initialize op configuration