From e507ea2fc5899228d8e98645ecb905bcb3ea2f1b Mon Sep 17 00:00:00 2001 From: Xin He Date: Wed, 3 May 2023 12:07:32 +0800 Subject: [PATCH 1/5] fix example readme and capability Signed-off-by: Xin He --- .../quantization/ptq/cpu/fx/README.md | 43 +++++++++++++++++++ .../quantization/ptq/cpu/fx/main.py | 6 +-- .../quantization/ptq/cpu/ipex/README.md | 12 +++--- 3 files changed, 52 insertions(+), 9 deletions(-) diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md index a949679b82d..935c298fc1e 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md @@ -33,36 +33,67 @@ train val ```shell python main.py -t -a resnet50 --pretrained /path/to/imagenet ``` +or +```shell +bash run_tuning.sh --input_model=resnet50 --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=resnet50 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false +``` ### 2. ResNet18 ```shell python main.py -t -a resnet18 --pretrained /path/to/imagenet ``` +or +```shell +bash run_tuning.sh --input_model=resnet18 --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false +``` ### 3. ResNeXt101_32x8d ```shell python main.py -t -a resnext101_32x8d --pretrained /path/to/imagenet ``` +or +```shell +bash run_tuning.sh --input_model=resnext101_32x8d --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=resnext101_32x8d --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false +``` ### 4. InceptionV3 ```shell python main.py -t -a inception_v3 --pretrained /path/to/imagenet ``` +or +```shell +bash run_tuning.sh --input_model=inception_v3 --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=inception_v3 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false +``` ### 5. Mobilenet_v2 ```shell python main.py -t -a mobilenet_v2 --pretrained /path/to/imagenet ``` +or +```shell +bash run_tuning.sh --input_model=mobilenet_v2 --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=mobilenet_v2 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false +``` ### 6. Efficientnet_b0 ```shell python main.py -t -a efficientnet_b0 --pretrained /path/to/imagenet ``` +or +```shell +bash run_tuning.sh --input_model=efficientnet_b0 --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=efficientnet_b0 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false +``` + > **Note** > > To reduce tuning time and get the result faster, the `efficientnet_b0` model uses @@ -74,6 +105,12 @@ python main.py -t -a efficientnet_b0 --pretrained /path/to/imagenet ```shell python main.py -t -a efficientnet_b3 --pretrained /path/to/imagenet ``` +or +```shell +bash run_tuning.sh --input_model=efficientnet_b3 --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=efficientnet_b3 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false +``` + > **Note** > > To reduce tuning time and get the result faster, the `efficientnet_b3` model uses @@ -83,6 +120,12 @@ python main.py -t -a efficientnet_b3 --pretrained /path/to/imagenet ```shell python main.py -t -a efficientnet_b7 --pretrained /path/to/imagenet ``` +or +```shell +bash run_tuning.sh --input_model=efficientnet_b7 --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=efficientnet_b7 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false +``` + > **Note** > > To reduce tuning time and get the result faster, the `efficientnet_b7` model uses diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py index 59b9027d8af..4c315c8acb9 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py @@ -96,10 +96,10 @@ def main(): args = parser.parse_args() - if 'efficient' in args.arch: - import torchvision.models as models - else: + if 'mobilenet' in args.arch: import torchvision.models.quantization as models + else: + import torchvision.models as models if args.seed is not None: random.seed(args.seed) diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md index 34bdeb37fdf..663fd94962b 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md @@ -71,8 +71,8 @@ python main.py -t -a resnet18 --ipex --pretrained /path/to/imagenet ``` or ```shell -bash run_tuning.sh --topology=resnet18 --dataset_location=/path/to/imagenet -bash run_benchmark.sh --topology=resnet18 --dataset_location=/path/to/imagenet --mode=benchmark/accuracy --int8=true/false +bash run_tuning.sh --input_model=resnet18 --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false ``` ### 2. ResNet50 With Intel PyTorch Extension @@ -82,8 +82,8 @@ python main.py -t -a resnet50 --ipex --pretrained /path/to/imagenet ``` or ```shell -bash run_tuning.sh --topology=resnet50 --dataset_location=/path/to/imagenet -bash run_benchmark.sh --topology=resnet50 --dataset_location=/path/to/imagenet --mode=benchmark/accuracy --int8=true/false +bash run_tuning.sh --input_model=resnet50 --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=resnet50 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false ``` ### 3. ResNext101_32x16d With Intel PyTorch Extension @@ -93,8 +93,8 @@ python main.py -t -a resnext101_32x16d_wsl --hub --ipex --pretrained /path/to/im ``` or ```shell -bash run_tuning.sh --topology=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet -bash run_benchmark.sh --topology=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet --mode=benchmark/accuracy --int8=true/false +bash run_tuning.sh --input_model=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet --mode=benchmark/accuracy --int8=true/false ``` # Saving and Loading Model From 50c0f8cbdd3b465718b8f7c6f6fbd8fe33e98211 Mon Sep 17 00:00:00 2001 From: Xin He Date: Wed, 3 May 2023 12:11:27 +0800 Subject: [PATCH 2/5] fix bug Signed-off-by: Xin He --- .../torchvision_models/quantization/ptq/cpu/ipex/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md index 663fd94962b..f5d5b4d4c73 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md @@ -94,7 +94,7 @@ python main.py -t -a resnext101_32x16d_wsl --hub --ipex --pretrained /path/to/im or ```shell bash run_tuning.sh --input_model=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet -bash run_benchmark.sh --input_model=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet --mode=benchmark/accuracy --int8=true/false +bash run_benchmark.sh --input_model=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false ``` # Saving and Loading Model From cc4c2f575f47a7f68de70f5407123e9a2c1f6b3d Mon Sep 17 00:00:00 2001 From: Xin He Date: Thu, 4 May 2023 15:52:52 +0800 Subject: [PATCH 3/5] fix document Signed-off-by: Xin He --- docs/source/smooth_quant.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/source/smooth_quant.md b/docs/source/smooth_quant.md index 9b9b18aaf2a..28cc836c5cc 100644 --- a/docs/source/smooth_quant.md +++ b/docs/source/smooth_quant.md @@ -309,13 +309,13 @@ conv2d/linear->conv2d/linear/layernorm/batchnorm/instancenorm/t5norm/llamanorm/g ``` ## Validated Models -neural_compressor: 2.1 +Neural Compressor: 2.1 IPEX: 2.0 Dataset: lambada -task: text-generation +Task: text-generation alpha [0.4, 0.6] is sweet spot region in SmoothQuant paper @@ -351,9 +351,9 @@ smooth_quant_args description: "alpha": "auto" or a float value. Default is 0.5. "auto" means automatic tuning. -"folding": -- False: Allow inserting mul to update the input distribution and not absorbing. IPEX can fuse inserted mul automatically and folding=False is recommended. And for PyTorch FBGEMM backend, folding=False setting will only convert model to QDQ model. -- True: Only allow inserting mul with the input scale that can be absorbed into the last layer. +"folding": whether to fold torch.mul into the last layer. +- False: Allow inserting mul to update the input distribution and no folding. IPEX (version>=2.1) can fuse inserted mul automatically and folding=False is recommended. For Stock PyTorch, setting folding=False will only convert the model to a QDQ model. +- True: Only allow inserting mul with the input scale that can be folded in the last layer. - If folding not set in config, the default value is IPEX: False (True if version<2.1), Stock PyTorch: True. @@ -365,4 +365,4 @@ smooth_quant_args description: [^3]: Wei, Xiuying, et al. "Outlier suppression: Pushing the limit of low-bit transformer language models." arXiv preprint arXiv:2209.13325 (2022). -[^4]: Xiao, Guangxuan, et al. "Smoothquant: Accurate and efficient post-training quantization for large language models." arXiv preprint arXiv:2211.10438 (2022).. +[^4]: Xiao, Guangxuan, et al. "Smoothquant: Accurate and efficient post-training quantization for large language models." arXiv preprint arXiv:2211.10438 (2022). From 1b4fd5b47de8245809ce72a8a45b460b19cd1d3d Mon Sep 17 00:00:00 2001 From: Xin He Date: Thu, 4 May 2023 20:14:58 +0800 Subject: [PATCH 4/5] refine docs with comments Signed-off-by: Xin He --- docs/source/smooth_quant.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/smooth_quant.md b/docs/source/smooth_quant.md index 28cc836c5cc..51f87f4a588 100644 --- a/docs/source/smooth_quant.md +++ b/docs/source/smooth_quant.md @@ -351,10 +351,10 @@ smooth_quant_args description: "alpha": "auto" or a float value. Default is 0.5. "auto" means automatic tuning. -"folding": whether to fold torch.mul into the last layer. -- False: Allow inserting mul to update the input distribution and no folding. IPEX (version>=2.1) can fuse inserted mul automatically and folding=False is recommended. For Stock PyTorch, setting folding=False will only convert the model to a QDQ model. -- True: Only allow inserting mul with the input scale that can be folded in the last layer. -- If folding not set in config, the default value is IPEX: False (True if version<2.1), Stock PyTorch: True. +"folding": whether to fold mul into the previous layer, where mul is required to update the input distribution during smoothing. +- False: Allow inserting mul to update the input distribution and no folding. IPEX (version>=2.1) can fuse inserted mul automatically. For Stock PyTorch, setting folding=False will only convert the model to a QDQ model. +- True: Only allow inserting mul that can be folded into the previous layer. +- If folding is not set in config, the default value is: IPEX: False (True if version<2.1), Stock PyTorch: True. ## Reference From c95940c9ed22f07ccc0dd1e6b861c813b92d4eca Mon Sep 17 00:00:00 2001 From: Xin He Date: Mon, 8 May 2023 17:49:58 +0800 Subject: [PATCH 5/5] refine docs Signed-off-by: Xin He --- docs/source/smooth_quant.md | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/docs/source/smooth_quant.md b/docs/source/smooth_quant.md index 51f87f4a588..52555e014a4 100644 --- a/docs/source/smooth_quant.md +++ b/docs/source/smooth_quant.md @@ -311,7 +311,7 @@ conv2d/linear->conv2d/linear/layernorm/batchnorm/instancenorm/t5norm/llamanorm/g ## Validated Models Neural Compressor: 2.1 -IPEX: 2.0 +IPEX (Intel Extension for PyTorch): 2.0 Dataset: lambada @@ -352,14 +352,20 @@ smooth_quant_args description: "alpha": "auto" or a float value. Default is 0.5. "auto" means automatic tuning. "folding": whether to fold mul into the previous layer, where mul is required to update the input distribution during smoothing. -- False: Allow inserting mul to update the input distribution and no folding. IPEX (version>=2.1) can fuse inserted mul automatically. For Stock PyTorch, setting folding=False will only convert the model to a QDQ model. -- True: Only allow inserting mul that can be folded into the previous layer. -- If folding is not set in config, the default value is: IPEX: False (True if version<2.1), Stock PyTorch: True. +- True: Fold inserted mul into the previous layer. IPEX will only insert mul for layers can do folding. +- False: Allow inserting mul to update the input distribution and no folding. IPEX (version>=2.1) can fuse inserted mul automatically. For Stock PyTorch, setting folding=False will convert the model to a QDQ model. +## Supported Framework Matrix + +| Framework | Alpha | Folding | +|:---------:|--------------|------------| +| PyTorch | [0-1] / 'auto' | False | +| IPEX | [0-1] / 'auto' | True / False(Version>2.1) | +| ONNX | [0-1] | True | ## Reference -[^1]: Jason, Wei, et al. "Emergent Abilities of Large Language Models". Published in Transactions on Machine Learning Research (2022) +[^1]: Jason, Wei, et al. "Emergent Abilities of Large Language Models". Published in Transactions on Machine Learning Research (2022). [^2]: Yvinec, Edouard, et al. "SPIQ: Data-Free Per-Channel Static Input Quantization." Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision. 2023.