From 5f61b06894e2fa217a39d92b66cc54bc4763dc46 Mon Sep 17 00:00:00 2001 From: "He, Xin3" Date: Mon, 15 Apr 2024 22:24:16 -0400 Subject: [PATCH 1/6] use cuda if exists for XPU format export Signed-off-by: He, Xin3 --- neural_compressor/model/torch_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py index eccd8a9218e..a234ded38cd 100644 --- a/neural_compressor/model/torch_model.py +++ b/neural_compressor/model/torch_model.py @@ -587,7 +587,7 @@ def export_compressed_model( compression_dtype=compression_dtype, compression_dim=compression_dim, scale_dtype=scale_dtype, - device=device, + device="cuda" if torch.cuda.is_available() else device, use_optimum_format=use_optimum_format, ) new_module.pack(int_weight, autoround_scale, autoround_zp, m.bias, None) From ee2578f54598ff943b1073d77168e17148a33408 Mon Sep 17 00:00:00 2001 From: y Date: Thu, 18 Apr 2024 03:00:46 -0700 Subject: [PATCH 2/6] support cpu export Signed-off-by: y --- neural_compressor/model/torch_model.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py index a234ded38cd..61ab0d50b8e 100644 --- a/neural_compressor/model/torch_model.py +++ b/neural_compressor/model/torch_model.py @@ -575,6 +575,12 @@ def export_compressed_model( autoround_zp = None if scheme == "sym" else torch.tensor(autoround_conf["zero"], dtype=torch.int32) int_weight = quant_weight_w_scale(fp32_weight, autoround_scale, autoround_zp, group_size) int_weight = int_weight.type(torch.int32) + if torch.cuda.is_available(): + device = "cuda" + elif hasattr(torch, 'xpu') and torch.xpu.is_available(): + device = "xpu" + else: + device = "cpu" new_module = WeightOnlyLinear( m.in_features, m.out_features, From 757e3c8f94466a4946aaf8a0d0a093ac583d6f90 Mon Sep 17 00:00:00 2001 From: "He, Xin3" Date: Fri, 19 Apr 2024 03:36:15 -0400 Subject: [PATCH 3/6] auto select availiable device when export Signed-off-by: He, Xin3 --- neural_compressor/model/torch_model.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py index 61ab0d50b8e..bfd7e0fe42d 100644 --- a/neural_compressor/model/torch_model.py +++ b/neural_compressor/model/torch_model.py @@ -498,6 +498,17 @@ def export_compressed_model( gptq_config = self.gptq_config if hasattr(self, "gptq_config") else {} autoround_config = self.autoround_config if hasattr(self, "autoround_config") else {} + # check availiable device, priority: ["xpu", "cuda", "cpu"] + availiable_device = [] + if hasattr(torch, 'xpu') and torch.xpu.is_available(): + availiable_device.append("xpu") + if torch.cuda.is_available(): + availiable_device.append("cuda") + orig_device = device + for i in availiable_device: + if i in device: # cuda in cuda:0 + device == i + break if gptq_config: for k, v in weight_config.items(): logger.debug(f"Compressing {k} on device {device}") @@ -558,7 +569,7 @@ def export_compressed_model( new_module.pack(int_weight, gptq_scale, gptq_zp, m.bias, gptq_perm) set_module(self.model, k, new_module) elif autoround_config: - if device == "xpu": + if orig_device == "xpu": for k, v in weight_config.items(): logger.debug(f"Compressing {k} on device {device}") if v["dtype"] == "fp32": @@ -575,12 +586,6 @@ def export_compressed_model( autoround_zp = None if scheme == "sym" else torch.tensor(autoround_conf["zero"], dtype=torch.int32) int_weight = quant_weight_w_scale(fp32_weight, autoround_scale, autoround_zp, group_size) int_weight = int_weight.type(torch.int32) - if torch.cuda.is_available(): - device = "cuda" - elif hasattr(torch, 'xpu') and torch.xpu.is_available(): - device = "xpu" - else: - device = "cpu" new_module = WeightOnlyLinear( m.in_features, m.out_features, @@ -593,7 +598,7 @@ def export_compressed_model( compression_dtype=compression_dtype, compression_dim=compression_dim, scale_dtype=scale_dtype, - device="cuda" if torch.cuda.is_available() else device, + device=device, use_optimum_format=use_optimum_format, ) new_module.pack(int_weight, autoround_scale, autoround_zp, m.bias, None) From 929ecf187e2ca2c3d6c719951bf5b4e889d65af8 Mon Sep 17 00:00:00 2001 From: "He, Xin3" Date: Fri, 19 Apr 2024 04:02:41 -0400 Subject: [PATCH 4/6] add log Signed-off-by: He, Xin3 --- neural_compressor/model/torch_model.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py index bfd7e0fe42d..f914efb8d06 100644 --- a/neural_compressor/model/torch_model.py +++ b/neural_compressor/model/torch_model.py @@ -505,10 +505,13 @@ def export_compressed_model( if torch.cuda.is_available(): availiable_device.append("cuda") orig_device = device - for i in availiable_device: - if i in device: # cuda in cuda:0 - device == i - break + if device not in availiable_device and "cuda" not in device: # cuda in cuda:0 + for dev in availiable_device: + if dev in device: + logger.info(f"{device} is not detected in current environment, please check.") + device == dev + logger.info(f"The compression device has been changed to {device}.") + break if gptq_config: for k, v in weight_config.items(): logger.debug(f"Compressing {k} on device {device}") From 2306c694b056bfd5b633e81c32bde3af161ffce6 Mon Sep 17 00:00:00 2001 From: "He, Xin3" Date: Fri, 19 Apr 2024 04:32:32 -0400 Subject: [PATCH 5/6] fix bug Signed-off-by: He, Xin3 --- neural_compressor/model/torch_model.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py index f914efb8d06..7bdee3355ec 100644 --- a/neural_compressor/model/torch_model.py +++ b/neural_compressor/model/torch_model.py @@ -504,14 +504,12 @@ def export_compressed_model( availiable_device.append("xpu") if torch.cuda.is_available(): availiable_device.append("cuda") + availiable_device.append("cpu") orig_device = device if device not in availiable_device and "cuda" not in device: # cuda in cuda:0 - for dev in availiable_device: - if dev in device: - logger.info(f"{device} is not detected in current environment, please check.") - device == dev - logger.info(f"The compression device has been changed to {device}.") - break + logger.info(f"{device} is not detected in current environment, please check.") + device = availiable_device[0] + logger.info(f"The compression device has been changed to {device}.") if gptq_config: for k, v in weight_config.items(): logger.debug(f"Compressing {k} on device {device}") From e567ab231af80c9b85dc82b80c869b642e1c8a62 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 19 Apr 2024 08:47:47 +0000 Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- neural_compressor/model/torch_model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py index 7bdee3355ec..31502dbae6b 100644 --- a/neural_compressor/model/torch_model.py +++ b/neural_compressor/model/torch_model.py @@ -498,15 +498,15 @@ def export_compressed_model( gptq_config = self.gptq_config if hasattr(self, "gptq_config") else {} autoround_config = self.autoround_config if hasattr(self, "autoround_config") else {} - # check availiable device, priority: ["xpu", "cuda", "cpu"] + # check available device, priority: ["xpu", "cuda", "cpu"] availiable_device = [] - if hasattr(torch, 'xpu') and torch.xpu.is_available(): + if hasattr(torch, "xpu") and torch.xpu.is_available(): availiable_device.append("xpu") if torch.cuda.is_available(): availiable_device.append("cuda") availiable_device.append("cpu") orig_device = device - if device not in availiable_device and "cuda" not in device: # cuda in cuda:0 + if device not in availiable_device and "cuda" not in device: # cuda in cuda:0 logger.info(f"{device} is not detected in current environment, please check.") device = availiable_device[0] logger.info(f"The compression device has been changed to {device}.")