From 0c3c1ec022d24b08368c0d0344d7540cd2f31a1b Mon Sep 17 00:00:00 2001
From: zehao-intel <zehao.huang@intel.com>
Date: Wed, 13 Dec 2023 16:50:46 +0800
Subject: [PATCH 1/2] Neural Coder support LLM to enable running on Intel GPU
 by IPEX

Signed-off-by: zehao-intel <zehao.huang@intel.com>
---
 .../pytorch_inc_static_quant_ipex_xpu.yaml    | 34 +++++++++++++++++++
 neural_coder/docs/SupportMatrix.md            |  2 +-
 neural_coder/interface.py                     | 12 +++++++
 neural_coder/launcher.py                      |  2 ++
 4 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml

diff --git a/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml b/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml
new file mode 100644
index 00000000000..7c847ecda50
--- /dev/null
+++ b/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml
@@ -0,0 +1,34 @@
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+transformation:
+  location:
+    - ["insert_below_dataloader_definition_line", "insert_below_model_definition_line"]
+  content:
+    - |-
+      [+] from neural_compressor.config import PostTrainingQuantConfig
+      [+] from neural_compressor.quantization import fit
+      [+] MODEL_NAME = MODEL_NAME.to("xpu")
+      [+] conf = PostTrainingQuantConfig(backend='ipex', quant_level=1, device="xpu")
+      [+] MODEL_NAME = fit(model=MODEL_NAME, conf=conf, calib_dataloader=DATALOADER_NAME)
+      [+] MODEL_NAME.save("./quantized_model")
+      [+] MODEL_NAME.eval()
+  order:
+    - below:
+      above:
+        - pytorch_jit_script
+        - pytorch_jit_script_ofi
+        - pytorch_jit_trace
+        - pytorch_jit_trace_ofi
+        - pytorch_channels_last
\ No newline at end of file
diff --git a/neural_coder/docs/SupportMatrix.md b/neural_coder/docs/SupportMatrix.md
index 89a2d6e0e1f..be2a7fea308 100644
--- a/neural_coder/docs/SupportMatrix.md
+++ b/neural_coder/docs/SupportMatrix.md
@@ -8,7 +8,7 @@ Supported Optimization Features
 | PyTorch | [JIT (Just-In-Time) Script/Trace](https://pytorch.org/docs/stable/jit.html) & [optimize_for_inference](https://pytorch.org/docs/stable/generated/torch.jit.optimize_for_inference.html) | `pytorch_jit_script`, `pytorch_jit_trace`, `pytorch_jit_script_ofi`, `pytorch_jit_trace_ofi` |
 | PyTorch | JIT with [TorchDynamo](https://github.com/pytorch/torchdynamo) | `pytorch_torchdynamo_jit_script`, `pytorch_torchdynamo_jit_trace`, `pytorch_torchdynamo_jit_script_ofi`, `pytorch_torchdynamo_jit_trace_ofi` |
 | PyTorch | [Intel Neural Compressor (INC) Mixed Precision](https://github.com/intel/neural-compressor/blob/master/docs/source/mixed_precision.md) | `pytorch_inc_bf16` | 
-| PyTorch | [INC INT8 Static Quantization (FX/IPEX)](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization.md#supported-feature-matrix) | `pytorch_inc_static_quant_fx`, `pytorch_inc_static_quant_ipex` |
+| PyTorch | [INC INT8 Static Quantization (FX/IPEX)](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization.md#supported-feature-matrix) | `pytorch_inc_static_quant_fx`, `pytorch_inc_static_quant_ipex`, `pytorch_inc_static_quant_ipex_xpu` |
 | PyTorch | [INC INT8 Dynamic Quantization](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization.md#supported-feature-matrix) | `pytorch_inc_dynamic_quant` |
 | PyTorch | [Intel Extension for PyTorch (FP32, BF16, INT8 Static/Dynamic Quantization)](https://github.com/intel/intel-extension-for-pytorch) | `pytorch_ipex_fp32`, `pytorch_ipex_bf16`, `pytorch_ipex_int8_static_quant`, `pytorch_ipex_int8_dynamic_quant` |
 | PyTorch | [Alibaba Blade-DISC](https://github.com/alibaba/BladeDISC) | `pytorch_aliblade` |
diff --git a/neural_coder/interface.py b/neural_coder/interface.py
index 98206ee3e06..b923cde5a25 100644
--- a/neural_coder/interface.py
+++ b/neural_coder/interface.py
@@ -118,6 +118,7 @@ def enable(
         "pytorch_inc_dynamic_quant",
         "pytorch_inc_static_quant_fx",
         "pytorch_inc_static_quant_ipex",
+        "pytorch_inc_static_quant_ipex_xpu",
         "pytorch_inc_bf16",
         "pytorch_inc_huggingface_optimum_static",
         "pytorch_inc_huggingface_optimum_dynamic",
@@ -210,6 +211,7 @@ def enable(
         or "pytorch_jit_trace_ofi" in features
         or "pytorch_inc_static_quant_fx" in features
         or "pytorch_inc_static_quant_ipex" in features
+        or "pytorch_inc_static_quant_ipex_xpu" in features
     ):
         features = ["pytorch_reclaim_inputs"] + features
 
@@ -312,6 +314,7 @@ def enable(
                 "pytorch_inc_dynamic_quant",
                 "pytorch_inc_static_quant_fx",
                 "pytorch_inc_static_quant_ipex",
+                "pytorch_inc_static_quant_ipex_xpu",
                 "pytorch_inc_huggingface_optimum_static",
                 "pytorch_inc_huggingface_optimum_dynamic",
                 "onnx_inc_static_quant_qlinear",
@@ -839,6 +842,7 @@ def superbench(
                 ["pytorch_inc_dynamic_quant"],
                 ["pytorch_inc_static_quant_fx"],
                 ["pytorch_inc_static_quant_ipex"],
+                ["pytorch_inc_static_quant_ipex_xpu"],
                 ["pytorch_inc_bf16"],
             ]
             standalones_pool = []
@@ -857,12 +861,14 @@ def superbench(
                 "pytorch_ipex_bf16",
                 "pytorch_inc_static_quant_fx",
                 "pytorch_inc_static_quant_ipex",
+                "pytorch_inc_static_quant_ipex_xpu",
                 "pytorch_inc_dynamic_quant",
                 "pytorch_ipex_int8_static_quant",
                 "pytorch_ipex_int8_dynamic_quant",
             ]
             # features that can be standalone (either use alone or use with "backend"):
             standalones_pool = [
+                "pytorch_ipex_xpu",
                 "pytorch_mixed_precision_cpu",
                 "pytorch_channels_last",
             ]
@@ -906,6 +912,8 @@ def superbench(
                     continue
                 if "pytorch_inc_static_quant_ipex" in features and "pytorch_mixed_precision_cpu" in features:
                     continue
+                if "pytorch_inc_static_quant_ipex_xpu" in features and "pytorch_mixed_precision_cpu" in features:
+                    continue
                 if "pytorch_inc_dynamic_quant" in features and "pytorch_mixed_precision_cpu" in features:
                     continue
 
@@ -960,6 +968,8 @@ def remove_if_have(list, element):
                         features_display = "Intel INT8 (Static)"
                     elif features == ["pytorch_inc_static_quant_ipex"]:
                         features_display = "Intel INT8 (IPEX)"
+                    elif features == ["pytorch_inc_static_quant_ipex_xpu"]:
+                        features_display = "Intel INT8 (IPEX XPU)"
                     elif features == ["pytorch_inc_bf16"]:
                         features_display = "Intel BF16"
                     elif features == []:
@@ -1047,6 +1057,8 @@ def remove_if_have(list, element):
                 best_optimization_display = "Intel INT8 (Static)"
             elif list_optimization_set_top3[0] == ["pytorch_inc_static_quant_ipex"]:
                 best_optimization_display = "Intel INT8 (IPEX)"
+            elif list_optimization_set_top3[0] == ["pytorch_inc_static_quant_ipex_xpu"]:
+                best_optimization_display = "Intel INT8 (IPEX XPU)"
             elif list_optimization_set_top3[0] == ["pytorch_inc_bf16"]:
                 best_optimization_display = "Intel BF16"
             elif list_optimization_set_top3[0] == []:
diff --git a/neural_coder/launcher.py b/neural_coder/launcher.py
index 43446712e35..a24b265994c 100644
--- a/neural_coder/launcher.py
+++ b/neural_coder/launcher.py
@@ -57,6 +57,8 @@ def execute(
                     args.opt = "pytorch_inc_static_quant_fx"
                 if args.approach == "static_ipex":
                     args.opt = "pytorch_inc_static_quant_ipex"
+                if args.approach == "static_ipex_xpu":
+                    args.opt = "pytorch_inc_static_quant_ipex_xpu"
                 if args.approach == "dynamic":
                     args.opt = "pytorch_inc_dynamic_quant"
                 if args.approach == "auto":

From 62cd87c0d5d70ed671208a2f9b42c30528ac835d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 13 Dec 2023 08:57:17 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml b/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml
index 7c847ecda50..f4835516cdf 100644
--- a/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml
+++ b/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml
@@ -31,4 +31,4 @@ transformation:
         - pytorch_jit_script_ofi
         - pytorch_jit_trace
         - pytorch_jit_trace_ofi
-        - pytorch_channels_last
\ No newline at end of file
+        - pytorch_channels_last