intel · XuehaoSun · Sep 3, 2024 · Aug 20, 2024 · Sep 3, 2024 · Sep 3, 2024
diff --git a/neural_compressor/torch/algorithms/weight_only/gptq.py b/neural_compressor/torch/algorithms/weight_only/gptq.py
@@ -930,12 +930,7 @@ def tmp(_, inp, out):
 
         logger.info("Quantization done")
         # self.model.config.use_cache = self.use_cache
-
-        # obtain model (all weight only quantization API function should return)
-        for k, v in gptq_config.items():
-            for m, n in v.items():
-                gptq_config[k][m] = n.tolist()
-        return self.model, gptq_config
+        return self.model
 
 
 class GPTQ:
@@ -1379,9 +1374,8 @@ def convert(self, model, *args, **kwargs):
         self.gptq_quantizer.model = model
         self.gptq_quantizer.remove_prepare_for_calibration()
 
-        q_model, gptq_config = self.gptq_quantizer.execute_quantization()
+        q_model = self.gptq_quantizer.execute_quantization()
         if not self.gptq_quantizer.use_layer_wise:
             q_model = q_model.to(self.model_device)
-        q_model.gptq_config = gptq_config
         logger.info("GPTQ quantizing done.")
         return q_model
diff --git a/neural_compressor/torch/algorithms/weight_only/save_load.py b/neural_compressor/torch/algorithms/weight_only/save_load.py
@@ -53,11 +53,6 @@ def save(model, output_dir="./saved_results"):
     # saving process
     save_config_mapping(model.qconfig, qconfig_file_path)
 
-    if hasattr(model, "gptq_config") and model.gptq_config:
-        gptq_config_path = os.path.join(os.path.abspath(os.path.expanduser(output_dir)), "gptq_config.json")
-        with open(gptq_config_path, "w") as f:
-            json.dump(model.gptq_config, f, indent=4)
-
     # MethodType 'save' not in state_dict
     del model.save
     torch.save(model.state_dict(), qmodel_weight_file_path)