From e507ea2fc5899228d8e98645ecb905bcb3ea2f1b Mon Sep 17 00:00:00 2001
From: Xin He <xin3.he@intel.com>
Date: Wed, 3 May 2023 12:07:32 +0800
Subject: [PATCH 1/5] fix example readme and capability

Signed-off-by: Xin He <xin3.he@intel.com>
---
 .../quantization/ptq/cpu/fx/README.md         | 43 +++++++++++++++++++
 .../quantization/ptq/cpu/fx/main.py           |  6 +--
 .../quantization/ptq/cpu/ipex/README.md       | 12 +++---
 3 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md
index a949679b82d..935c298fc1e 100644
--- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md
+++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md
@@ -33,36 +33,67 @@ train  val
 ```shell
 python main.py -t -a resnet50 --pretrained /path/to/imagenet
 ```
+or
+```shell
+bash run_tuning.sh --input_model=resnet50 --dataset_location=/path/to/imagenet
+bash run_benchmark.sh --input_model=resnet50 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false
+```
 
 ### 2. ResNet18
 
 ```shell
 python main.py -t -a resnet18 --pretrained /path/to/imagenet
 ```
+or
+```shell
+bash run_tuning.sh --input_model=resnet18 --dataset_location=/path/to/imagenet
+bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false
+```
 
 ### 3. ResNeXt101_32x8d
 
 ```shell
 python main.py -t -a resnext101_32x8d --pretrained /path/to/imagenet
 ```
+or
+```shell
+bash run_tuning.sh --input_model=resnext101_32x8d --dataset_location=/path/to/imagenet
+bash run_benchmark.sh --input_model=resnext101_32x8d --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false
+```
 
 ### 4. InceptionV3
 
 ```shell
 python main.py -t -a inception_v3 --pretrained /path/to/imagenet
 ```
+or
+```shell
+bash run_tuning.sh --input_model=inception_v3 --dataset_location=/path/to/imagenet
+bash run_benchmark.sh --input_model=inception_v3 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false
+```
 
 ### 5. Mobilenet_v2
 
 ```shell
 python main.py -t -a mobilenet_v2 --pretrained /path/to/imagenet
 ```
+or
+```shell
+bash run_tuning.sh --input_model=mobilenet_v2 --dataset_location=/path/to/imagenet
+bash run_benchmark.sh --input_model=mobilenet_v2 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false
+```
 
 ### 6. Efficientnet_b0
 
 ```shell
 python main.py -t -a efficientnet_b0 --pretrained /path/to/imagenet
 ```
+or
+```shell
+bash run_tuning.sh --input_model=efficientnet_b0 --dataset_location=/path/to/imagenet
+bash run_benchmark.sh --input_model=efficientnet_b0 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false
+```
+
 > **Note**
 >
 > To reduce tuning time and get the result faster, the `efficientnet_b0` model uses 
@@ -74,6 +105,12 @@ python main.py -t -a efficientnet_b0 --pretrained /path/to/imagenet
 ```shell
 python main.py -t -a efficientnet_b3 --pretrained /path/to/imagenet
 ```
+or
+```shell
+bash run_tuning.sh --input_model=efficientnet_b3 --dataset_location=/path/to/imagenet
+bash run_benchmark.sh --input_model=efficientnet_b3 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false
+```
+
 > **Note**
 >
 > To reduce tuning time and get the result faster, the `efficientnet_b3` model uses 
@@ -83,6 +120,12 @@ python main.py -t -a efficientnet_b3 --pretrained /path/to/imagenet
 ```shell
 python main.py -t -a efficientnet_b7 --pretrained /path/to/imagenet
 ```
+or
+```shell
+bash run_tuning.sh --input_model=efficientnet_b7 --dataset_location=/path/to/imagenet
+bash run_benchmark.sh --input_model=efficientnet_b7 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false
+```
+
 > **Note**
 >
 > To reduce tuning time and get the result faster, the `efficientnet_b7` model uses 
diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py
index 59b9027d8af..4c315c8acb9 100644
--- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py
+++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py
@@ -96,10 +96,10 @@
 def main():
     args = parser.parse_args()
     
-    if 'efficient' in args.arch:
-        import torchvision.models as models
-    else:
+    if 'mobilenet' in args.arch:
         import torchvision.models.quantization as models
+    else:
+        import torchvision.models as models
 
     if args.seed is not None:
         random.seed(args.seed)
diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md
index 34bdeb37fdf..663fd94962b 100644
--- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md
+++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md
@@ -71,8 +71,8 @@ python main.py -t -a resnet18 --ipex --pretrained /path/to/imagenet
 ```
 or
 ```shell
-bash run_tuning.sh --topology=resnet18 --dataset_location=/path/to/imagenet
-bash run_benchmark.sh --topology=resnet18 --dataset_location=/path/to/imagenet --mode=benchmark/accuracy --int8=true/false
+bash run_tuning.sh --input_model=resnet18 --dataset_location=/path/to/imagenet
+bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false
 ```
 
 ### 2. ResNet50 With Intel PyTorch Extension
@@ -82,8 +82,8 @@ python main.py -t -a resnet50 --ipex --pretrained /path/to/imagenet
 ```
 or
 ```shell
-bash run_tuning.sh --topology=resnet50 --dataset_location=/path/to/imagenet
-bash run_benchmark.sh --topology=resnet50 --dataset_location=/path/to/imagenet --mode=benchmark/accuracy --int8=true/false
+bash run_tuning.sh --input_model=resnet50 --dataset_location=/path/to/imagenet
+bash run_benchmark.sh --input_model=resnet50 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false
 ```
 
 ### 3. ResNext101_32x16d With Intel PyTorch Extension
@@ -93,8 +93,8 @@ python main.py -t -a resnext101_32x16d_wsl --hub --ipex --pretrained /path/to/im
 ```
 or
 ```shell
-bash run_tuning.sh --topology=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet
-bash run_benchmark.sh --topology=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet --mode=benchmark/accuracy --int8=true/false
+bash run_tuning.sh --input_model=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet
+bash run_benchmark.sh --input_model=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet --mode=benchmark/accuracy --int8=true/false
 ```
 
 # Saving and Loading Model

From 50c0f8cbdd3b465718b8f7c6f6fbd8fe33e98211 Mon Sep 17 00:00:00 2001
From: Xin He <xin3.he@intel.com>
Date: Wed, 3 May 2023 12:11:27 +0800
Subject: [PATCH 2/5] fix bug

Signed-off-by: Xin He <xin3.he@intel.com>
---
 .../torchvision_models/quantization/ptq/cpu/ipex/README.md      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md
index 663fd94962b..f5d5b4d4c73 100644
--- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md
+++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md
@@ -94,7 +94,7 @@ python main.py -t -a resnext101_32x16d_wsl --hub --ipex --pretrained /path/to/im
 or
 ```shell
 bash run_tuning.sh --input_model=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet
-bash run_benchmark.sh --input_model=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet --mode=benchmark/accuracy --int8=true/false
+bash run_benchmark.sh --input_model=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false
 ```
 
 # Saving and Loading Model

From cc4c2f575f47a7f68de70f5407123e9a2c1f6b3d Mon Sep 17 00:00:00 2001
From: Xin He <xin3.he@intel.com>
Date: Thu, 4 May 2023 15:52:52 +0800
Subject: [PATCH 3/5] fix document

Signed-off-by: Xin He <xin3.he@intel.com>
---
 docs/source/smooth_quant.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/source/smooth_quant.md b/docs/source/smooth_quant.md
index 9b9b18aaf2a..28cc836c5cc 100644
--- a/docs/source/smooth_quant.md
+++ b/docs/source/smooth_quant.md
@@ -309,13 +309,13 @@ conv2d/linear->conv2d/linear/layernorm/batchnorm/instancenorm/t5norm/llamanorm/g
 ```
 
 ## Validated Models
-neural_compressor: 2.1
+Neural Compressor: 2.1
 
 IPEX: 2.0
 
 Dataset: lambada
 
-task: text-generation
+Task: text-generation
 
 alpha [0.4, 0.6] is sweet spot region in SmoothQuant paper
 
@@ -351,9 +351,9 @@ smooth_quant_args description:
 
 "alpha": "auto" or a float value. Default is 0.5. "auto" means automatic tuning.
 
-"folding":
-- False: Allow inserting mul to update the input distribution and not absorbing. IPEX can fuse inserted mul automatically and folding=False is recommended. And for PyTorch FBGEMM backend, folding=False setting will only convert model to QDQ model.
-- True: Only allow inserting mul with the input scale that can be absorbed into the last layer. 
+"folding": whether to fold torch.mul into the last layer.
+- False: Allow inserting mul to update the input distribution and no folding. IPEX (version>=2.1) can fuse inserted mul automatically and folding=False is recommended. For Stock PyTorch, setting folding=False will only convert the model to a QDQ model.
+- True: Only allow inserting mul with the input scale that can be folded in the last layer.
 - If folding not set in config, the default value is IPEX: False (True if version<2.1), Stock PyTorch: True.
 
 
@@ -365,4 +365,4 @@ smooth_quant_args description:
 
 [^3]: Wei, Xiuying, et al. "Outlier suppression: Pushing the limit of low-bit transformer language models." arXiv preprint arXiv:2209.13325 (2022).
 
-[^4]: Xiao, Guangxuan, et al. "Smoothquant: Accurate and efficient post-training quantization for large language models." arXiv preprint arXiv:2211.10438 (2022)..
+[^4]: Xiao, Guangxuan, et al. "Smoothquant: Accurate and efficient post-training quantization for large language models." arXiv preprint arXiv:2211.10438 (2022).

From 1b4fd5b47de8245809ce72a8a45b460b19cd1d3d Mon Sep 17 00:00:00 2001
From: Xin He <xin3.he@intel.com>
Date: Thu, 4 May 2023 20:14:58 +0800
Subject: [PATCH 4/5] refine docs with comments

Signed-off-by: Xin He <xin3.he@intel.com>
---
 docs/source/smooth_quant.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/source/smooth_quant.md b/docs/source/smooth_quant.md
index 28cc836c5cc..51f87f4a588 100644
--- a/docs/source/smooth_quant.md
+++ b/docs/source/smooth_quant.md
@@ -351,10 +351,10 @@ smooth_quant_args description:
 
 "alpha": "auto" or a float value. Default is 0.5. "auto" means automatic tuning.
 
-"folding": whether to fold torch.mul into the last layer.
-- False: Allow inserting mul to update the input distribution and no folding. IPEX (version>=2.1) can fuse inserted mul automatically and folding=False is recommended. For Stock PyTorch, setting folding=False will only convert the model to a QDQ model.
-- True: Only allow inserting mul with the input scale that can be folded in the last layer.
-- If folding not set in config, the default value is IPEX: False (True if version<2.1), Stock PyTorch: True.
+"folding": whether to fold mul into the previous layer, where mul is required to update the input distribution during smoothing.
+- False: Allow inserting mul to update the input distribution and no folding. IPEX (version>=2.1) can fuse inserted mul automatically. For Stock PyTorch, setting folding=False will only convert the model to a QDQ model.
+- True: Only allow inserting mul that can be folded into the previous layer.
+- If folding is not set in config, the default value is: IPEX: False (True if version<2.1), Stock PyTorch: True.
 
 
 ## Reference

From c95940c9ed22f07ccc0dd1e6b861c813b92d4eca Mon Sep 17 00:00:00 2001
From: Xin He <xin3.he@intel.com>
Date: Mon, 8 May 2023 17:49:58 +0800
Subject: [PATCH 5/5] refine docs

Signed-off-by: Xin He <xin3.he@intel.com>
---
 docs/source/smooth_quant.md | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/docs/source/smooth_quant.md b/docs/source/smooth_quant.md
index 51f87f4a588..52555e014a4 100644
--- a/docs/source/smooth_quant.md
+++ b/docs/source/smooth_quant.md
@@ -311,7 +311,7 @@ conv2d/linear->conv2d/linear/layernorm/batchnorm/instancenorm/t5norm/llamanorm/g
 ## Validated Models
 Neural Compressor: 2.1
 
-IPEX: 2.0
+IPEX (Intel Extension for PyTorch): 2.0
 
 Dataset: lambada
 
@@ -352,14 +352,20 @@ smooth_quant_args description:
 "alpha": "auto" or a float value. Default is 0.5. "auto" means automatic tuning.
 
 "folding": whether to fold mul into the previous layer, where mul is required to update the input distribution during smoothing.
-- False: Allow inserting mul to update the input distribution and no folding. IPEX (version>=2.1) can fuse inserted mul automatically. For Stock PyTorch, setting folding=False will only convert the model to a QDQ model.
-- True: Only allow inserting mul that can be folded into the previous layer.
-- If folding is not set in config, the default value is: IPEX: False (True if version<2.1), Stock PyTorch: True.
+- True: Fold inserted mul into the previous layer. IPEX will only insert mul for layers can do folding. 
+- False: Allow inserting mul to update the input distribution and no folding. IPEX (version>=2.1) can fuse inserted mul automatically. For Stock PyTorch, setting folding=False will convert the model to a QDQ model.
 
+## Supported Framework Matrix
+
+| Framework | Alpha        | Folding    |
+|:---------:|--------------|------------|
+| PyTorch   | [0-1] / 'auto' | False      |
+| IPEX      | [0-1] / 'auto' | True / False(Version>2.1) |
+| ONNX      | [0-1]        | True       |
 
 ## Reference
 
-[^1]: Jason, Wei, et al. "Emergent Abilities of Large Language Models". Published in Transactions on Machine Learning Research (2022)
+[^1]: Jason, Wei, et al. "Emergent Abilities of Large Language Models". Published in Transactions on Machine Learning Research (2022).
 
 [^2]: Yvinec, Edouard, et al. "SPIQ: Data-Free Per-Channel Static Input Quantization." Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision. 2023.