From e7a63c65dda4eed17b363cc734f24c5f490f014a Mon Sep 17 00:00:00 2001 From: kaiyaointel Date: Mon, 5 Dec 2022 11:03:23 +0800 Subject: [PATCH 01/14] Create intel_extension_for_transformers.yaml --- .../intel_extension_for_transformers.yaml | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 neural_coder/backends/intel_extension_for_transformers.yaml diff --git a/neural_coder/backends/intel_extension_for_transformers.yaml b/neural_coder/backends/intel_extension_for_transformers.yaml new file mode 100644 index 00000000000..a1accbbfb4b --- /dev/null +++ b/neural_coder/backends/intel_extension_for_transformers.yaml @@ -0,0 +1,35 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Note: For intel_extension_for_transformers support +# we default apply "PostTrainingDynamic" and "eval_f1" +# support for customization is pending further evaluation + +transformation: + location: + - ["insert_below_dataloader_definition_line", "insert_below_model_definition_line"] + content: + - |- + [+] metric = metrics.Metric(name="eval_f1", is_relative=True, criterion=0.01) + [+] objective = objectives.performance + [+] q_config = QuantizationConfig(approach="PostTrainingDynamic", metrics=[metric], objectives=[objective]) + [+] MODEL_NAME = trainer.quantize(quant_config=q_config) + order: + - below: + above: + - pytorch_jit_script + - pytorch_jit_script_ofi + - pytorch_jit_trace + - pytorch_jit_trace_ofi + - pytorch_channels_last From 94ed66b592b4181654823b745aebe2491520c9cb Mon Sep 17 00:00:00 2001 From: kaiyaointel Date: Mon, 5 Dec 2022 11:04:32 +0800 Subject: [PATCH 02/14] change default strategy to dynamic according to huggingface sync --- neural_coder/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_coder/__main__.py b/neural_coder/__main__.py index 8d9da0472c8..dee99c84a12 100644 --- a/neural_coder/__main__.py +++ b/neural_coder/__main__.py @@ -28,7 +28,7 @@ def parse_args(): parser.add_argument("--opt", type=str, default="", help="optimization feature to enable") - parser.add_argument("--approach", type=str, default="static", + parser.add_argument("--approach", type=str, default="dynamic", help="quantization approach (strategy)") parser.add_argument('--config', type=str, default="", From 9aac91b1b0563bbce3f0f1e405d75b8f6f16f8fc Mon Sep 17 00:00:00 2001 From: kaiyaointel Date: Mon, 5 Dec 2022 11:05:50 +0800 Subject: [PATCH 03/14] change default strategy to dynamic according to HF sync --- neural_coder/docs/PythonLauncher.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/neural_coder/docs/PythonLauncher.md b/neural_coder/docs/PythonLauncher.md index d73257a3a97..f08fc1d2085 100644 --- a/neural_coder/docs/PythonLauncher.md +++ b/neural_coder/docs/PythonLauncher.md @@ -10,7 +10,7 @@ Example: Let's say you are running an NLP model using ```run_glue.py``` from Hug python run_glue.py --model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result ``` -With Neural Coder's **Launcher**, users can easily enjoy Deep Learning optimizations (default: INT8 static quantization by Intel® Neural Compressor) by simply adding an inline prefix +With Neural Coder's **Launcher**, users can easily enjoy Deep Learning optimizations (default: INT8 dynamic quantization by Intel® Neural Compressor) by simply adding an inline prefix ```bash -m neural_coder ``` @@ -27,7 +27,7 @@ Note: Any modification on the optimized code ```run_glue_optimized.py``` will be Users can specify which Deep Learning optimization they want to conduct using ```--opt``` argument. The list of supported Deep Learning optimization features can be found [here](SupportMatrix.md). -Note that if specifically optimizing with INT8 quantization by Intel® Neural Compressor, to choose a quantization approach (strategy), ```--approach``` argument can be specified with either ```static```, ```static_ipex``` or ```dynamic```. For example, to run INT8 dynamic quantization by Intel® Neural Compressor instead of the default static quantization: +Note that if specifically optimizing with INT8 quantization by Intel® Neural Compressor, to choose a quantization approach (strategy), ```--approach``` argument can be specified with either ```static```, ```static_ipex``` or ```dynamic```. For example, to run INT8 static quantization by Intel® Neural Compressor instead of the default dynamic quantization: ```bash -python -m neural_coder --approach dynamic run_glue.py --model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result +python -m neural_coder --approach static run_glue.py --model_name_or_path bert-base-cased --task_name mrpc --do_eval --output_dir result ``` From a57c127fb4ea0769734946ad9121c9c3d772a49c Mon Sep 17 00:00:00 2001 From: kaiyaointel Date: Mon, 5 Dec 2022 11:14:19 +0800 Subject: [PATCH 04/14] enable intel extension for transformers --- neural_coder/interface.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/neural_coder/interface.py b/neural_coder/interface.py index ac062b681bf..6c97a7bdc13 100644 --- a/neural_coder/interface.py +++ b/neural_coder/interface.py @@ -198,6 +198,10 @@ def enable( "pytorch_inc_static_quant_ipex" in features: features = ["pytorch_reclaim_inputs"] + features + # intel_extension_for_transformers + if "intel_extension_for_transformers" in features: + features = ["change_trainer_to_nlptrainer"] + features + transformed_list_code_path = [] ## Determine Code Domain @@ -276,7 +280,10 @@ def enable( "pytorch_inc_static_quant_ipex", "pytorch_inc_huggingface_optimum_static", "pytorch_inc_huggingface_optimum_dynamic", - "onnx_inc_static_quant_qlinear" + "onnx_inc_static_quant_qlinear", + "onnx_inc_static_quant_qdq", + "onnx_inc_dynamic_quant", + "intel_extension_for_transformers", ]: # determine domain @@ -332,6 +339,10 @@ def enable( if "tensorflow_mixed_precision" in features: from .coders.tensorflow.amp import TensorFlowKerasAMP list_transformed_code[i] = TensorFlowKerasAMP(list_transformed_code[i]).transform() + # Change Trainer to NLPTrainer (only for intel_extension_for_pytorch) + if "change_trainer_to_nlptrainer" in features: + from .coders.pytorch.change_trainer_to_nlptrainer import TrainerToNLPTrainer + list_transformed_code[i] = TrainerToNLPTrainer(list_transformed_code[i]).transform() logger.info(f"Code transformation for feature: [{feature}] finished.") From f72cdf866585ba9a5ef41aad4683fe83f9d05504 Mon Sep 17 00:00:00 2001 From: kaiyaointel Date: Mon, 5 Dec 2022 11:18:55 +0800 Subject: [PATCH 05/14] Create change_trainer_to_nlptrainer.py --- .../pytorch/change_trainer_to_nlptrainer.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 neural_coder/coders/pytorch/change_trainer_to_nlptrainer.py diff --git a/neural_coder/coders/pytorch/change_trainer_to_nlptrainer.py b/neural_coder/coders/pytorch/change_trainer_to_nlptrainer.py new file mode 100644 index 00000000000..512310c46e9 --- /dev/null +++ b/neural_coder/coders/pytorch/change_trainer_to_nlptrainer.py @@ -0,0 +1,46 @@ +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from ...utils.line_operation import get_line_indent_level + +class TrainerToNLPTrainer(object): + def __init__(self, file) -> None: + self.file = file + self.result = [] + + def transform(self): + lines = self.file.split('\n') + + for line in lines: + if self.is_modify(line): + new_line = self.modify(line) + self.result.append(new_line) + else: + self.result.append(line) + for index, line in enumerate(self.result): + if index != len(self.result)-1: + self.result[index] += '\n' + return ''.join(self.result) + + def is_modify(self, s): + if 'trainer = Trainer(' in s: + return True + else: + return False + + def modify(self, s): + old = 'Trainer' + s = s.replace(old, 'NLPTrainer') + return s From 5307f68e9f5261a1486ae7deb15432ee26a7f7d5 Mon Sep 17 00:00:00 2001 From: kaiyaointel Date: Mon, 5 Dec 2022 11:48:42 +0800 Subject: [PATCH 06/14] add use_inc for not using default optimum for HF code --- neural_coder/interface.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/neural_coder/interface.py b/neural_coder/interface.py index 6c97a7bdc13..18bdca2874e 100644 --- a/neural_coder/interface.py +++ b/neural_coder/interface.py @@ -65,6 +65,7 @@ def enable( test_code_line=False, # print code line info for debug use cache_load_transformers=True, optimum_quant_config="", # only for HF optimum optimizations, yaml or hub path + use_inc=False, ): """enable a feature or a couple of features for the code @@ -291,7 +292,8 @@ def enable( globals.code_domain = determine_domain(globals.list_code_path[0]) # for transformers code, enable optimum-intel api by default - if "transformers" in globals.code_domain: + # if specify use_inc, then still use INC API + if "transformers" in globals.code_domain and not use_inc: if "static_quant" in feature: feature = "pytorch_inc_huggingface_optimum_static" elif "dynamic_quant" in feature: @@ -711,6 +713,7 @@ def superbench( ncore_per_instance=-1, # only for "self_defined" mode ninstances=-1, # only for "self_defined" mode bench_batch_size=-1, # only for "self_defined" mode + use_inc=False, auto_quant=False, ): @@ -877,6 +880,7 @@ def superbench( ncore_per_instance=ncore_per_instance, ninstances=ninstances, bench_batch_size=bench_batch_size, + use_inc=use_inc, ) if dry_run: @@ -1072,6 +1076,7 @@ def remove_if_have(list, element): ncore_per_instance=ncore_per_instance, ninstances=ninstances, bench_batch_size=bench_batch_size, + use_inc=use_inc, ) if dry_run: @@ -1236,6 +1241,7 @@ def auto_quant( ncore_per_instance=-1, # only for "self_defined" mode ninstances=-1, # only for "self_defined" mode bench_batch_size=-1, # only for "self_defined" mode + use_inc=False, ): return superbench( code, @@ -1251,5 +1257,6 @@ def auto_quant( ncore_per_instance=ncore_per_instance, # only for "self_defined" mode ninstances=ninstances, # only for "self_defined" mode bench_batch_size=bench_batch_size, # only for "self_defined" mode + use_inc=use_inc, auto_quant=True, ) From f5643aa349307309e7c5cc30a59bfd9d223dac71 Mon Sep 17 00:00:00 2001 From: kaiyaointel Date: Mon, 5 Dec 2022 11:49:53 +0800 Subject: [PATCH 07/14] add use_inc --- neural_coder/interface.py | 1 + 1 file changed, 1 insertion(+) diff --git a/neural_coder/interface.py b/neural_coder/interface.py index 18bdca2874e..0ebaa948870 100644 --- a/neural_coder/interface.py +++ b/neural_coder/interface.py @@ -1017,6 +1017,7 @@ def remove_if_have(list, element): code=code, features=features_to_generate, save_patch_path="intel_optimization", + use_inc=use_inc, ) logger.info('The optimization patch was saved to "intel_optimziation.diff"') From dc132d041887010ea2b53862e7d0f86225395c30 Mon Sep 17 00:00:00 2001 From: kaiyaointel Date: Mon, 5 Dec 2022 11:59:35 +0800 Subject: [PATCH 08/14] update optimum quant static dynamic separation --- neural_coder/interface.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/neural_coder/interface.py b/neural_coder/interface.py index 0ebaa948870..1f7cc865294 100644 --- a/neural_coder/interface.py +++ b/neural_coder/interface.py @@ -299,6 +299,12 @@ def enable( elif "dynamic_quant" in feature: feature = "pytorch_inc_huggingface_optimum_dynamic" + # optimum-intel quantization config for static and dynamic + if feature = "pytorch_inc_huggingface_optimum_static": + globals.optimum_quant_config = "quantization/quant_config_static" + elif feautre = "pytorch_inc_huggingface_optimum_dynamic": + globals.optimum_quant_config = "quantization/quant_config_dynamic" + from .coders.autoinc.autoinc_harness import AutoInc_Harness from .coders.autoinc.calib_dataloader import Calib_Dataloader from .coders.autoinc.eval_func import Eval_Func From 3eeddf0259b6a363d90106f0a9d27ce82a2c6918 Mon Sep 17 00:00:00 2001 From: kaiyaointel Date: Mon, 5 Dec 2022 12:00:29 +0800 Subject: [PATCH 09/14] Update interface.py --- neural_coder/interface.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/neural_coder/interface.py b/neural_coder/interface.py index 1f7cc865294..1bb2c057989 100644 --- a/neural_coder/interface.py +++ b/neural_coder/interface.py @@ -302,8 +302,10 @@ def enable( # optimum-intel quantization config for static and dynamic if feature = "pytorch_inc_huggingface_optimum_static": globals.optimum_quant_config = "quantization/quant_config_static" - elif feautre = "pytorch_inc_huggingface_optimum_dynamic": + elif feature = "pytorch_inc_huggingface_optimum_dynamic": globals.optimum_quant_config = "quantization/quant_config_dynamic" + else: + pass from .coders.autoinc.autoinc_harness import AutoInc_Harness from .coders.autoinc.calib_dataloader import Calib_Dataloader From be11c4cbc1ac47441b312635037098a8674d6321 Mon Sep 17 00:00:00 2001 From: kaiyaointel Date: Mon, 5 Dec 2022 12:01:03 +0800 Subject: [PATCH 10/14] Update interface.py --- neural_coder/interface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_coder/interface.py b/neural_coder/interface.py index 1bb2c057989..cad5481c393 100644 --- a/neural_coder/interface.py +++ b/neural_coder/interface.py @@ -300,9 +300,9 @@ def enable( feature = "pytorch_inc_huggingface_optimum_dynamic" # optimum-intel quantization config for static and dynamic - if feature = "pytorch_inc_huggingface_optimum_static": + if feature == "pytorch_inc_huggingface_optimum_static": globals.optimum_quant_config = "quantization/quant_config_static" - elif feature = "pytorch_inc_huggingface_optimum_dynamic": + elif feature == "pytorch_inc_huggingface_optimum_dynamic": globals.optimum_quant_config = "quantization/quant_config_dynamic" else: pass From 65f0f03ebf39d3ef366e10bafec946e64daa2a3c Mon Sep 17 00:00:00 2001 From: kaiyaointel Date: Mon, 5 Dec 2022 12:07:37 +0800 Subject: [PATCH 11/14] Update autoinc_harness.py --- neural_coder/coders/autoinc/autoinc_harness.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/neural_coder/coders/autoinc/autoinc_harness.py b/neural_coder/coders/autoinc/autoinc_harness.py index 0430534f768..6918ac489ad 100644 --- a/neural_coder/coders/autoinc/autoinc_harness.py +++ b/neural_coder/coders/autoinc/autoinc_harness.py @@ -271,8 +271,6 @@ def register_transformation(self): lines_to_insert = lines_to_insert \ .replace("DATALOADER_NAME", dataloader_name) - if globals.optimum_quant_config == "": - globals.optimum_quant_config = "quantization/quant_config" optimum_quant_config_line = \ 'IncQuantizationConfig.from_pretrained("' + globals.optimum_quant_config + '")' From 712d41e7a9232e07c263e08635f72e47408a6fc4 Mon Sep 17 00:00:00 2001 From: kaiyaointel Date: Mon, 5 Dec 2022 12:17:20 +0800 Subject: [PATCH 12/14] Update README.md --- neural_coder/README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/neural_coder/README.md b/neural_coder/README.md index 9c07f9f6503..241ee4bf524 100644 --- a/neural_coder/README.md +++ b/neural_coder/README.md @@ -35,11 +35,14 @@ simultaneously on below PyTorch evaluation code, we generate the optimized code ## Getting Started! -There are currently 2 ways to use Neural Coder for automatic quantization enabling and benchmark. +There are currently 3 ways to use Neural Coder for automatic quantization enabling and benchmark. ### Jupyter Lab Extension We offer Neural Coder as an extension plugin in Jupyter Lab. This enables users to utilize Neural Coder while writing their Deep Learning models in Jupyter Lab coding platform. Users can simply search for ```jupyter-lab-neural-compressor``` in the Extension Manager in JupyterLab and install Neural Coder with one click. For more details, please refer to this [guide](extensions/neural_compressor_ext_lab/README.md) +### Python Launcher +Neural Coder can be used as a Python Launcher. Users can run the Python Deep Learning model code as it is with automatic enabling of optimizations by simply adding an inline prefix ```-m neural_coder``` to the Python command line. For more details, please refer to this [guide](docs/PythonLauncher.md) + ### Python API There are 3 user-facing APIs for Neural Coder: enable, bench and superbench. For more details, please refer to this [guide](docs/PythonAPI.md). We have provided a [list](docs/SupportMatrix.md) of supported Deep Learning optimization features. Specifically for quantization, we provide an auto-quantization API that helps automatically enable quantization on Deep Learning models and automatically evaluates for the best performance on the model with no manual coding needed. Supported features include Post-Training Static Quantization, Post-Training Dynamic Quantization, and Mixed Precision. For more details, please refer to this [guide](docs/Quantization.md). From f0d08cec1f34182b83c45b665407125b576b1611 Mon Sep 17 00:00:00 2001 From: kaiyaointel Date: Mon, 5 Dec 2022 12:31:43 +0800 Subject: [PATCH 13/14] add change_trainer_to_nlptrainer to outside_harness --- neural_coder/interface.py | 1 + 1 file changed, 1 insertion(+) diff --git a/neural_coder/interface.py b/neural_coder/interface.py index cad5481c393..3a28ebcd767 100644 --- a/neural_coder/interface.py +++ b/neural_coder/interface.py @@ -185,6 +185,7 @@ def enable( "pytorch_cuda_to_cpu", "pytorch_lightning_bf16_cpu", "tensorflow_mixed_precision", + "change_trainer_to_nlptrainer", ] # # features that need creating dummy dataloader (when needed) first From 29717a926a11cb9c6f7e42c5bb6cd4c07cb0da4d Mon Sep 17 00:00:00 2001 From: "Yue, Wenjiao" Date: Mon, 5 Dec 2022 15:22:39 +0800 Subject: [PATCH 14/14] add PythonLauncher to pass spelling check CI Signed-off-by: Yue, Wenjiao --- .azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt index 4601c3ab69e..64361fcbe80 100644 --- a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt +++ b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt @@ -2385,3 +2385,4 @@ Nsh UmK fe vmware +PythonLauncher \ No newline at end of file