From baa9b5cc210fb8dbeda843947b88ac0d4e6a94a5 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Tue, 20 Aug 2024 14:18:34 +0800 Subject: [PATCH 1/3] support xpu lw forward Signed-off-by: Kaihui-intel --- neural_compressor/torch/algorithms/weight_only/rtn.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/neural_compressor/torch/algorithms/weight_only/rtn.py b/neural_compressor/torch/algorithms/weight_only/rtn.py index 6ce9b49fac8..5e1dcdace4c 100644 --- a/neural_compressor/torch/algorithms/weight_only/rtn.py +++ b/neural_compressor/torch/algorithms/weight_only/rtn.py @@ -142,6 +142,8 @@ def convert( for name, m in model.named_modules(): if not isinstance(m, supported_layers): + if use_layer_wise and device == "xpu": + load_module(model, name, model_path, device=device) continue if name in weight_config: # pragma: no cover # initialize op configuration From 3f3b239a31a4fdd48bbeec75045f05e64ccb639e Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Tue, 3 Sep 2024 14:38:04 +0800 Subject: [PATCH 2/3] update gptq config Signed-off-by: Kaihui-intel --- neural_compressor/torch/algorithms/weight_only/gptq.py | 10 ++-------- .../torch/algorithms/weight_only/save_load.py | 5 ----- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/neural_compressor/torch/algorithms/weight_only/gptq.py b/neural_compressor/torch/algorithms/weight_only/gptq.py index 1dbd7511663..d0e133b1758 100644 --- a/neural_compressor/torch/algorithms/weight_only/gptq.py +++ b/neural_compressor/torch/algorithms/weight_only/gptq.py @@ -930,12 +930,7 @@ def tmp(_, inp, out): logger.info("Quantization done") # self.model.config.use_cache = self.use_cache - - # obtain model (all weight only quantization API function should return) - for k, v in gptq_config.items(): - for m, n in v.items(): - gptq_config[k][m] = n.tolist() - return self.model, gptq_config + return self.model class GPTQ: @@ -1379,9 +1374,8 @@ def convert(self, model, *args, **kwargs): self.gptq_quantizer.model = model self.gptq_quantizer.remove_prepare_for_calibration() - q_model, gptq_config = self.gptq_quantizer.execute_quantization() + q_model = self.gptq_quantizer.execute_quantization() if not self.gptq_quantizer.use_layer_wise: q_model = q_model.to(self.model_device) - q_model.gptq_config = gptq_config logger.info("GPTQ quantizing done.") return q_model diff --git a/neural_compressor/torch/algorithms/weight_only/save_load.py b/neural_compressor/torch/algorithms/weight_only/save_load.py index d515d91f9a7..feb4b907b7e 100644 --- a/neural_compressor/torch/algorithms/weight_only/save_load.py +++ b/neural_compressor/torch/algorithms/weight_only/save_load.py @@ -53,11 +53,6 @@ def save(model, output_dir="./saved_results"): # saving process save_config_mapping(model.qconfig, qconfig_file_path) - if hasattr(model, "gptq_config") and model.gptq_config: - gptq_config_path = os.path.join(os.path.abspath(os.path.expanduser(output_dir)), "gptq_config.json") - with open(gptq_config_path, "w") as f: - json.dump(model.gptq_config, f, indent=4) - # MethodType 'save' not in state_dict del model.save torch.save(model.state_dict(), qmodel_weight_file_path) From d969d8c1e2d2f4963b2645218fca77f4f7a40e6c Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Tue, 3 Sep 2024 14:40:49 +0800 Subject: [PATCH 3/3] revert code Signed-off-by: Kaihui-intel --- neural_compressor/torch/algorithms/weight_only/rtn.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/neural_compressor/torch/algorithms/weight_only/rtn.py b/neural_compressor/torch/algorithms/weight_only/rtn.py index 5e1dcdace4c..6ce9b49fac8 100644 --- a/neural_compressor/torch/algorithms/weight_only/rtn.py +++ b/neural_compressor/torch/algorithms/weight_only/rtn.py @@ -142,8 +142,6 @@ def convert( for name, m in model.named_modules(): if not isinstance(m, supported_layers): - if use_layer_wise and device == "xpu": - load_module(model, name, model_path, device=device) continue if name in weight_config: # pragma: no cover # initialize op configuration