From 4c45d10adbd9f64c7590f00bcdfc5e1649a7aa2c Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Mon, 24 Feb 2025 08:41:10 -0500 Subject: [PATCH 1/4] fix bug Signed-off-by: n1ck-guo --- neural_compressor/adaptor/torch_utils/gptq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/adaptor/torch_utils/gptq.py b/neural_compressor/adaptor/torch_utils/gptq.py index 2f1c6cc0582..7bcefc154c3 100644 --- a/neural_compressor/adaptor/torch_utils/gptq.py +++ b/neural_compressor/adaptor/torch_utils/gptq.py @@ -745,6 +745,7 @@ def tmp(_, inp, out): for j in range(len(self.dataloader)): cache_keyword_batch = self.gather_single_batch_from_dict(self.cache_key_arguments, j) cache_positional_batch = self.gather_single_batch_from_list(self.cache_positional_arguments, j) + transformer_block.to(cache_positional_batch[0].dtype) out = transformer_block(*cache_positional_batch, **cache_keyword_batch) out = self.track_hidden_states(out) outs.append(out) @@ -967,7 +968,6 @@ def fasterquant(self, W, blocksize=128, percdamp=0.01, groupsize=-1, act_order=F if not static_groups: if (i1 + i) % groupsize == 0: self.quantizer.find_params(W[:, (i1 + i) : (i1 + i + groupsize)], weight=True) - scale.append(self.quantizer.scale) zero.append(self.quantizer.zero) else: idx = i1 + i From 63bea6bf2e596307623bf319760d0980e7a880ed Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Mon, 24 Feb 2025 08:52:07 -0500 Subject: [PATCH 2/4] update Signed-off-by: n1ck-guo --- neural_compressor/adaptor/torch_utils/gptq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/adaptor/torch_utils/gptq.py b/neural_compressor/adaptor/torch_utils/gptq.py index 7bcefc154c3..bd40d3907fa 100644 --- a/neural_compressor/adaptor/torch_utils/gptq.py +++ b/neural_compressor/adaptor/torch_utils/gptq.py @@ -745,7 +745,7 @@ def tmp(_, inp, out): for j in range(len(self.dataloader)): cache_keyword_batch = self.gather_single_batch_from_dict(self.cache_key_arguments, j) cache_positional_batch = self.gather_single_batch_from_list(self.cache_positional_arguments, j) - transformer_block.to(cache_positional_batch[0].dtype) + transformer_block = transformer_block.to(cache_positional_batch[0].dtype) out = transformer_block(*cache_positional_batch, **cache_keyword_batch) out = self.track_hidden_states(out) outs.append(out) From 15c39f19b55bd1efa22aeb16c7ca5f4a90bd1f6e Mon Sep 17 00:00:00 2001 From: n1ck-guo Date: Mon, 24 Feb 2025 20:48:50 -0500 Subject: [PATCH 3/4] fix Signed-off-by: n1ck-guo --- neural_compressor/adaptor/torch_utils/gptq.py | 8 +++++--- .../adaptor/torch_utils/layer_wise_quant/utils.py | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/neural_compressor/adaptor/torch_utils/gptq.py b/neural_compressor/adaptor/torch_utils/gptq.py index bd40d3907fa..ad597620fef 100644 --- a/neural_compressor/adaptor/torch_utils/gptq.py +++ b/neural_compressor/adaptor/torch_utils/gptq.py @@ -718,10 +718,10 @@ def tmp(_, inp, out): for n, p in sub_layer.named_parameters(): param_name = full_layer_name + "." + n if n == "weight": - set_module_tensor_to_device(self.model, param_name, self.device, Q) + set_module_tensor_to_device(self.model, param_name, self.device, Q, dtype=Q.dtype) else: value = load_value(self.model, param_name, model_path) - set_module_tensor_to_device(self.model, param_name, self.device, value) + set_module_tensor_to_device(self.model, param_name, self.device, value, dtype=value.dtype) # sub_layer.weight.data = Q torch.save(sub_layer.state_dict(), LWQ_WORKSPACE + f"/{full_layer_name}.pt") clean_module_weight(sub_layer) @@ -745,7 +745,8 @@ def tmp(_, inp, out): for j in range(len(self.dataloader)): cache_keyword_batch = self.gather_single_batch_from_dict(self.cache_key_arguments, j) cache_positional_batch = self.gather_single_batch_from_list(self.cache_positional_arguments, j) - transformer_block = transformer_block.to(cache_positional_batch[0].dtype) + # breakpoint() + # transformer_block = transformer_block.to(getattr(torch, self.model.config.torch_dtype)) out = transformer_block(*cache_positional_batch, **cache_keyword_batch) out = self.track_hidden_states(out) outs.append(out) @@ -968,6 +969,7 @@ def fasterquant(self, W, blocksize=128, percdamp=0.01, groupsize=-1, act_order=F if not static_groups: if (i1 + i) % groupsize == 0: self.quantizer.find_params(W[:, (i1 + i) : (i1 + i + groupsize)], weight=True) + scale.append(self.quantizer.scale) zero.append(self.quantizer.zero) else: idx = i1 + i diff --git a/neural_compressor/adaptor/torch_utils/layer_wise_quant/utils.py b/neural_compressor/adaptor/torch_utils/layer_wise_quant/utils.py index 8bd3d32d320..211cfebbad1 100644 --- a/neural_compressor/adaptor/torch_utils/layer_wise_quant/utils.py +++ b/neural_compressor/adaptor/torch_utils/layer_wise_quant/utils.py @@ -221,7 +221,7 @@ def load_module(model, module_name, path, device="cpu"): for n, p in module.named_parameters(): param_name = module_name + "." + n value = load_value(model, param_name, path) - set_module_tensor_to_device(model, param_name, device, value) + set_module_tensor_to_device(model, param_name, device, value, dtype=value.dtype) def register_weight_hooks(model, path, device="cpu", clean_weight=True, saved_path=None): @@ -239,7 +239,7 @@ def hook(module, input): value = state_dict[n] else: value = load_value(model, param_name, path) - set_module_tensor_to_device(model, param_name, device, value) + set_module_tensor_to_device(model, param_name, device, value, dtype=value.dtype) return hook From d6b8524fe963efff4a3146087611d746ca84c73b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 25 Feb 2025 01:50:03 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- neural_compressor/adaptor/torch_utils/gptq.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/neural_compressor/adaptor/torch_utils/gptq.py b/neural_compressor/adaptor/torch_utils/gptq.py index ad597620fef..a077e932b33 100644 --- a/neural_compressor/adaptor/torch_utils/gptq.py +++ b/neural_compressor/adaptor/torch_utils/gptq.py @@ -721,7 +721,9 @@ def tmp(_, inp, out): set_module_tensor_to_device(self.model, param_name, self.device, Q, dtype=Q.dtype) else: value = load_value(self.model, param_name, model_path) - set_module_tensor_to_device(self.model, param_name, self.device, value, dtype=value.dtype) + set_module_tensor_to_device( + self.model, param_name, self.device, value, dtype=value.dtype + ) # sub_layer.weight.data = Q torch.save(sub_layer.state_dict(), LWQ_WORKSPACE + f"/{full_layer_name}.pt") clean_module_weight(sub_layer)