From 5f61b06894e2fa217a39d92b66cc54bc4763dc46 Mon Sep 17 00:00:00 2001
From: "He, Xin3" <xin3.he@intel.com>
Date: Mon, 15 Apr 2024 22:24:16 -0400
Subject: [PATCH 1/6] use cuda if exists for XPU format export

Signed-off-by: He, Xin3 <xin3.he@intel.com>
---
 neural_compressor/model/torch_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py
index eccd8a9218e..a234ded38cd 100644
--- a/neural_compressor/model/torch_model.py
+++ b/neural_compressor/model/torch_model.py
@@ -587,7 +587,7 @@ def export_compressed_model(
                         compression_dtype=compression_dtype,
                         compression_dim=compression_dim,
                         scale_dtype=scale_dtype,
-                        device=device,
+                        device="cuda" if torch.cuda.is_available() else device,
                         use_optimum_format=use_optimum_format,
                     )
                     new_module.pack(int_weight, autoround_scale, autoround_zp, m.bias, None)

From ee2578f54598ff943b1073d77168e17148a33408 Mon Sep 17 00:00:00 2001
From: y <xin3.he@intel.com>
Date: Thu, 18 Apr 2024 03:00:46 -0700
Subject: [PATCH 2/6] support cpu export

Signed-off-by: y <xin3.he@intel.com>
---
 neural_compressor/model/torch_model.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py
index a234ded38cd..61ab0d50b8e 100644
--- a/neural_compressor/model/torch_model.py
+++ b/neural_compressor/model/torch_model.py
@@ -575,6 +575,12 @@ def export_compressed_model(
                     autoround_zp = None if scheme == "sym" else torch.tensor(autoround_conf["zero"], dtype=torch.int32)
                     int_weight = quant_weight_w_scale(fp32_weight, autoround_scale, autoround_zp, group_size)
                     int_weight = int_weight.type(torch.int32)
+                    if torch.cuda.is_available():
+                        device = "cuda"
+                    elif hasattr(torch, 'xpu') and torch.xpu.is_available():
+                        device = "xpu"
+                    else:
+                        device = "cpu"
                     new_module = WeightOnlyLinear(
                         m.in_features,
                         m.out_features,

From 757e3c8f94466a4946aaf8a0d0a093ac583d6f90 Mon Sep 17 00:00:00 2001
From: "He, Xin3" <xin3.he@intel.com>
Date: Fri, 19 Apr 2024 03:36:15 -0400
Subject: [PATCH 3/6] auto select availiable device when export

Signed-off-by: He, Xin3 <xin3.he@intel.com>
---
 neural_compressor/model/torch_model.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py
index 61ab0d50b8e..bfd7e0fe42d 100644
--- a/neural_compressor/model/torch_model.py
+++ b/neural_compressor/model/torch_model.py
@@ -498,6 +498,17 @@ def export_compressed_model(
             gptq_config = self.gptq_config if hasattr(self, "gptq_config") else {}
 
         autoround_config = self.autoround_config if hasattr(self, "autoround_config") else {}
+        # check availiable device, priority: ["xpu", "cuda", "cpu"]
+        availiable_device = []
+        if hasattr(torch, 'xpu') and torch.xpu.is_available():
+            availiable_device.append("xpu")
+        if torch.cuda.is_available():
+            availiable_device.append("cuda")
+        orig_device = device
+        for i in availiable_device:
+            if i in device: # cuda in cuda:0
+                device == i
+                break
         if gptq_config:
             for k, v in weight_config.items():
                 logger.debug(f"Compressing {k} on device {device}")
@@ -558,7 +569,7 @@ def export_compressed_model(
                 new_module.pack(int_weight, gptq_scale, gptq_zp, m.bias, gptq_perm)
                 set_module(self.model, k, new_module)
         elif autoround_config:
-            if device == "xpu":
+            if orig_device == "xpu":
                 for k, v in weight_config.items():
                     logger.debug(f"Compressing {k} on device {device}")
                     if v["dtype"] == "fp32":
@@ -575,12 +586,6 @@ def export_compressed_model(
                     autoround_zp = None if scheme == "sym" else torch.tensor(autoround_conf["zero"], dtype=torch.int32)
                     int_weight = quant_weight_w_scale(fp32_weight, autoround_scale, autoround_zp, group_size)
                     int_weight = int_weight.type(torch.int32)
-                    if torch.cuda.is_available():
-                        device = "cuda"
-                    elif hasattr(torch, 'xpu') and torch.xpu.is_available():
-                        device = "xpu"
-                    else:
-                        device = "cpu"
                     new_module = WeightOnlyLinear(
                         m.in_features,
                         m.out_features,
@@ -593,7 +598,7 @@ def export_compressed_model(
                         compression_dtype=compression_dtype,
                         compression_dim=compression_dim,
                         scale_dtype=scale_dtype,
-                        device="cuda" if torch.cuda.is_available() else device,
+                        device=device,
                         use_optimum_format=use_optimum_format,
                     )
                     new_module.pack(int_weight, autoround_scale, autoround_zp, m.bias, None)

From 929ecf187e2ca2c3d6c719951bf5b4e889d65af8 Mon Sep 17 00:00:00 2001
From: "He, Xin3" <xin3.he@intel.com>
Date: Fri, 19 Apr 2024 04:02:41 -0400
Subject: [PATCH 4/6] add log

Signed-off-by: He, Xin3 <xin3.he@intel.com>
---
 neural_compressor/model/torch_model.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py
index bfd7e0fe42d..f914efb8d06 100644
--- a/neural_compressor/model/torch_model.py
+++ b/neural_compressor/model/torch_model.py
@@ -505,10 +505,13 @@ def export_compressed_model(
         if torch.cuda.is_available():
             availiable_device.append("cuda")
         orig_device = device
-        for i in availiable_device:
-            if i in device: # cuda in cuda:0
-                device == i
-                break
+        if device not in availiable_device and "cuda" not in device: # cuda in cuda:0
+            for dev in availiable_device:
+                if dev in device:
+                    logger.info(f"{device} is not detected in current environment, please check.")
+                    device == dev
+                    logger.info(f"The compression device has been changed to {device}.")
+                    break
         if gptq_config:
             for k, v in weight_config.items():
                 logger.debug(f"Compressing {k} on device {device}")

From 2306c694b056bfd5b633e81c32bde3af161ffce6 Mon Sep 17 00:00:00 2001
From: "He, Xin3" <xin3.he@intel.com>
Date: Fri, 19 Apr 2024 04:32:32 -0400
Subject: [PATCH 5/6] fix bug

Signed-off-by: He, Xin3 <xin3.he@intel.com>
---
 neural_compressor/model/torch_model.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py
index f914efb8d06..7bdee3355ec 100644
--- a/neural_compressor/model/torch_model.py
+++ b/neural_compressor/model/torch_model.py
@@ -504,14 +504,12 @@ def export_compressed_model(
             availiable_device.append("xpu")
         if torch.cuda.is_available():
             availiable_device.append("cuda")
+        availiable_device.append("cpu")
         orig_device = device
         if device not in availiable_device and "cuda" not in device: # cuda in cuda:0
-            for dev in availiable_device:
-                if dev in device:
-                    logger.info(f"{device} is not detected in current environment, please check.")
-                    device == dev
-                    logger.info(f"The compression device has been changed to {device}.")
-                    break
+            logger.info(f"{device} is not detected in current environment, please check.")
+            device = availiable_device[0]
+            logger.info(f"The compression device has been changed to {device}.")
         if gptq_config:
             for k, v in weight_config.items():
                 logger.debug(f"Compressing {k} on device {device}")

From e567ab231af80c9b85dc82b80c869b642e1c8a62 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 19 Apr 2024 08:47:47 +0000
Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 neural_compressor/model/torch_model.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py
index 7bdee3355ec..31502dbae6b 100644
--- a/neural_compressor/model/torch_model.py
+++ b/neural_compressor/model/torch_model.py
@@ -498,15 +498,15 @@ def export_compressed_model(
             gptq_config = self.gptq_config if hasattr(self, "gptq_config") else {}
 
         autoround_config = self.autoround_config if hasattr(self, "autoround_config") else {}
-        # check availiable device, priority: ["xpu", "cuda", "cpu"]
+        # check available device, priority: ["xpu", "cuda", "cpu"]
         availiable_device = []
-        if hasattr(torch, 'xpu') and torch.xpu.is_available():
+        if hasattr(torch, "xpu") and torch.xpu.is_available():
             availiable_device.append("xpu")
         if torch.cuda.is_available():
             availiable_device.append("cuda")
         availiable_device.append("cpu")
         orig_device = device
-        if device not in availiable_device and "cuda" not in device: # cuda in cuda:0
+        if device not in availiable_device and "cuda" not in device:  # cuda in cuda:0
             logger.info(f"{device} is not detected in current environment, please check.")
             device = availiable_device[0]
             logger.info(f"The compression device has been changed to {device}.")