From 0c3c1ec022d24b08368c0d0344d7540cd2f31a1b Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Wed, 13 Dec 2023 16:50:46 +0800 Subject: [PATCH 1/2] Neural Coder support LLM to enable running on Intel GPU by IPEX Signed-off-by: zehao-intel --- .../pytorch_inc_static_quant_ipex_xpu.yaml | 34 +++++++++++++++++++ neural_coder/docs/SupportMatrix.md | 2 +- neural_coder/interface.py | 12 +++++++ neural_coder/launcher.py | 2 ++ 4 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml diff --git a/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml b/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml new file mode 100644 index 00000000000..7c847ecda50 --- /dev/null +++ b/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml @@ -0,0 +1,34 @@ +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +transformation: + location: + - ["insert_below_dataloader_definition_line", "insert_below_model_definition_line"] + content: + - |- + [+] from neural_compressor.config import PostTrainingQuantConfig + [+] from neural_compressor.quantization import fit + [+] MODEL_NAME = MODEL_NAME.to("xpu") + [+] conf = PostTrainingQuantConfig(backend='ipex', quant_level=1, device="xpu") + [+] MODEL_NAME = fit(model=MODEL_NAME, conf=conf, calib_dataloader=DATALOADER_NAME) + [+] MODEL_NAME.save("./quantized_model") + [+] MODEL_NAME.eval() + order: + - below: + above: + - pytorch_jit_script + - pytorch_jit_script_ofi + - pytorch_jit_trace + - pytorch_jit_trace_ofi + - pytorch_channels_last \ No newline at end of file diff --git a/neural_coder/docs/SupportMatrix.md b/neural_coder/docs/SupportMatrix.md index 89a2d6e0e1f..be2a7fea308 100644 --- a/neural_coder/docs/SupportMatrix.md +++ b/neural_coder/docs/SupportMatrix.md @@ -8,7 +8,7 @@ Supported Optimization Features | PyTorch | [JIT (Just-In-Time) Script/Trace](https://pytorch.org/docs/stable/jit.html) & [optimize_for_inference](https://pytorch.org/docs/stable/generated/torch.jit.optimize_for_inference.html) | `pytorch_jit_script`, `pytorch_jit_trace`, `pytorch_jit_script_ofi`, `pytorch_jit_trace_ofi` | | PyTorch | JIT with [TorchDynamo](https://github.com/pytorch/torchdynamo) | `pytorch_torchdynamo_jit_script`, `pytorch_torchdynamo_jit_trace`, `pytorch_torchdynamo_jit_script_ofi`, `pytorch_torchdynamo_jit_trace_ofi` | | PyTorch | [Intel Neural Compressor (INC) Mixed Precision](https://github.com/intel/neural-compressor/blob/master/docs/source/mixed_precision.md) | `pytorch_inc_bf16` | -| PyTorch | [INC INT8 Static Quantization (FX/IPEX)](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization.md#supported-feature-matrix) | `pytorch_inc_static_quant_fx`, `pytorch_inc_static_quant_ipex` | +| PyTorch | [INC INT8 Static Quantization (FX/IPEX)](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization.md#supported-feature-matrix) | `pytorch_inc_static_quant_fx`, `pytorch_inc_static_quant_ipex`, `pytorch_inc_static_quant_ipex_xpu` | | PyTorch | [INC INT8 Dynamic Quantization](https://github.com/intel/neural-compressor/blob/master/docs/source/quantization.md#supported-feature-matrix) | `pytorch_inc_dynamic_quant` | | PyTorch | [Intel Extension for PyTorch (FP32, BF16, INT8 Static/Dynamic Quantization)](https://github.com/intel/intel-extension-for-pytorch) | `pytorch_ipex_fp32`, `pytorch_ipex_bf16`, `pytorch_ipex_int8_static_quant`, `pytorch_ipex_int8_dynamic_quant` | | PyTorch | [Alibaba Blade-DISC](https://github.com/alibaba/BladeDISC) | `pytorch_aliblade` | diff --git a/neural_coder/interface.py b/neural_coder/interface.py index 98206ee3e06..b923cde5a25 100644 --- a/neural_coder/interface.py +++ b/neural_coder/interface.py @@ -118,6 +118,7 @@ def enable( "pytorch_inc_dynamic_quant", "pytorch_inc_static_quant_fx", "pytorch_inc_static_quant_ipex", + "pytorch_inc_static_quant_ipex_xpu", "pytorch_inc_bf16", "pytorch_inc_huggingface_optimum_static", "pytorch_inc_huggingface_optimum_dynamic", @@ -210,6 +211,7 @@ def enable( or "pytorch_jit_trace_ofi" in features or "pytorch_inc_static_quant_fx" in features or "pytorch_inc_static_quant_ipex" in features + or "pytorch_inc_static_quant_ipex_xpu" in features ): features = ["pytorch_reclaim_inputs"] + features @@ -312,6 +314,7 @@ def enable( "pytorch_inc_dynamic_quant", "pytorch_inc_static_quant_fx", "pytorch_inc_static_quant_ipex", + "pytorch_inc_static_quant_ipex_xpu", "pytorch_inc_huggingface_optimum_static", "pytorch_inc_huggingface_optimum_dynamic", "onnx_inc_static_quant_qlinear", @@ -839,6 +842,7 @@ def superbench( ["pytorch_inc_dynamic_quant"], ["pytorch_inc_static_quant_fx"], ["pytorch_inc_static_quant_ipex"], + ["pytorch_inc_static_quant_ipex_xpu"], ["pytorch_inc_bf16"], ] standalones_pool = [] @@ -857,12 +861,14 @@ def superbench( "pytorch_ipex_bf16", "pytorch_inc_static_quant_fx", "pytorch_inc_static_quant_ipex", + "pytorch_inc_static_quant_ipex_xpu", "pytorch_inc_dynamic_quant", "pytorch_ipex_int8_static_quant", "pytorch_ipex_int8_dynamic_quant", ] # features that can be standalone (either use alone or use with "backend"): standalones_pool = [ + "pytorch_ipex_xpu", "pytorch_mixed_precision_cpu", "pytorch_channels_last", ] @@ -906,6 +912,8 @@ def superbench( continue if "pytorch_inc_static_quant_ipex" in features and "pytorch_mixed_precision_cpu" in features: continue + if "pytorch_inc_static_quant_ipex_xpu" in features and "pytorch_mixed_precision_cpu" in features: + continue if "pytorch_inc_dynamic_quant" in features and "pytorch_mixed_precision_cpu" in features: continue @@ -960,6 +968,8 @@ def remove_if_have(list, element): features_display = "Intel INT8 (Static)" elif features == ["pytorch_inc_static_quant_ipex"]: features_display = "Intel INT8 (IPEX)" + elif features == ["pytorch_inc_static_quant_ipex_xpu"]: + features_display = "Intel INT8 (IPEX XPU)" elif features == ["pytorch_inc_bf16"]: features_display = "Intel BF16" elif features == []: @@ -1047,6 +1057,8 @@ def remove_if_have(list, element): best_optimization_display = "Intel INT8 (Static)" elif list_optimization_set_top3[0] == ["pytorch_inc_static_quant_ipex"]: best_optimization_display = "Intel INT8 (IPEX)" + elif list_optimization_set_top3[0] == ["pytorch_inc_static_quant_ipex_xpu"]: + best_optimization_display = "Intel INT8 (IPEX XPU)" elif list_optimization_set_top3[0] == ["pytorch_inc_bf16"]: best_optimization_display = "Intel BF16" elif list_optimization_set_top3[0] == []: diff --git a/neural_coder/launcher.py b/neural_coder/launcher.py index 43446712e35..a24b265994c 100644 --- a/neural_coder/launcher.py +++ b/neural_coder/launcher.py @@ -57,6 +57,8 @@ def execute( args.opt = "pytorch_inc_static_quant_fx" if args.approach == "static_ipex": args.opt = "pytorch_inc_static_quant_ipex" + if args.approach == "static_ipex_xpu": + args.opt = "pytorch_inc_static_quant_ipex_xpu" if args.approach == "dynamic": args.opt = "pytorch_inc_dynamic_quant" if args.approach == "auto": From 62cd87c0d5d70ed671208a2f9b42c30528ac835d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 13 Dec 2023 08:57:17 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml b/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml index 7c847ecda50..f4835516cdf 100644 --- a/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml +++ b/neural_coder/backends/pytorch_inc_static_quant_ipex_xpu.yaml @@ -31,4 +31,4 @@ transformation: - pytorch_jit_script_ofi - pytorch_jit_trace - pytorch_jit_trace_ofi - - pytorch_channels_last \ No newline at end of file + - pytorch_channels_last