diff --git a/docs/source/smooth_quant.md b/docs/source/smooth_quant.md index 9b9b18aaf2a..52555e014a4 100644 --- a/docs/source/smooth_quant.md +++ b/docs/source/smooth_quant.md @@ -309,13 +309,13 @@ conv2d/linear->conv2d/linear/layernorm/batchnorm/instancenorm/t5norm/llamanorm/g ``` ## Validated Models -neural_compressor: 2.1 +Neural Compressor: 2.1 -IPEX: 2.0 +IPEX (Intel Extension for PyTorch): 2.0 Dataset: lambada -task: text-generation +Task: text-generation alpha [0.4, 0.6] is sweet spot region in SmoothQuant paper @@ -351,18 +351,24 @@ smooth_quant_args description: "alpha": "auto" or a float value. Default is 0.5. "auto" means automatic tuning. -"folding": -- False: Allow inserting mul to update the input distribution and not absorbing. IPEX can fuse inserted mul automatically and folding=False is recommended. And for PyTorch FBGEMM backend, folding=False setting will only convert model to QDQ model. -- True: Only allow inserting mul with the input scale that can be absorbed into the last layer. -- If folding not set in config, the default value is IPEX: False (True if version<2.1), Stock PyTorch: True. +"folding": whether to fold mul into the previous layer, where mul is required to update the input distribution during smoothing. +- True: Fold inserted mul into the previous layer. IPEX will only insert mul for layers can do folding. +- False: Allow inserting mul to update the input distribution and no folding. IPEX (version>=2.1) can fuse inserted mul automatically. For Stock PyTorch, setting folding=False will convert the model to a QDQ model. +## Supported Framework Matrix + +| Framework | Alpha | Folding | +|:---------:|--------------|------------| +| PyTorch | [0-1] / 'auto' | False | +| IPEX | [0-1] / 'auto' | True / False(Version>2.1) | +| ONNX | [0-1] | True | ## Reference -[^1]: Jason, Wei, et al. "Emergent Abilities of Large Language Models". Published in Transactions on Machine Learning Research (2022) +[^1]: Jason, Wei, et al. "Emergent Abilities of Large Language Models". Published in Transactions on Machine Learning Research (2022). [^2]: Yvinec, Edouard, et al. "SPIQ: Data-Free Per-Channel Static Input Quantization." Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision. 2023. [^3]: Wei, Xiuying, et al. "Outlier suppression: Pushing the limit of low-bit transformer language models." arXiv preprint arXiv:2209.13325 (2022). -[^4]: Xiao, Guangxuan, et al. "Smoothquant: Accurate and efficient post-training quantization for large language models." arXiv preprint arXiv:2211.10438 (2022).. +[^4]: Xiao, Guangxuan, et al. "Smoothquant: Accurate and efficient post-training quantization for large language models." arXiv preprint arXiv:2211.10438 (2022). diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md index a949679b82d..935c298fc1e 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md @@ -33,36 +33,67 @@ train val ```shell python main.py -t -a resnet50 --pretrained /path/to/imagenet ``` +or +```shell +bash run_tuning.sh --input_model=resnet50 --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=resnet50 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false +``` ### 2. ResNet18 ```shell python main.py -t -a resnet18 --pretrained /path/to/imagenet ``` +or +```shell +bash run_tuning.sh --input_model=resnet18 --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false +``` ### 3. ResNeXt101_32x8d ```shell python main.py -t -a resnext101_32x8d --pretrained /path/to/imagenet ``` +or +```shell +bash run_tuning.sh --input_model=resnext101_32x8d --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=resnext101_32x8d --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false +``` ### 4. InceptionV3 ```shell python main.py -t -a inception_v3 --pretrained /path/to/imagenet ``` +or +```shell +bash run_tuning.sh --input_model=inception_v3 --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=inception_v3 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false +``` ### 5. Mobilenet_v2 ```shell python main.py -t -a mobilenet_v2 --pretrained /path/to/imagenet ``` +or +```shell +bash run_tuning.sh --input_model=mobilenet_v2 --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=mobilenet_v2 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false +``` ### 6. Efficientnet_b0 ```shell python main.py -t -a efficientnet_b0 --pretrained /path/to/imagenet ``` +or +```shell +bash run_tuning.sh --input_model=efficientnet_b0 --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=efficientnet_b0 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false +``` + > **Note** > > To reduce tuning time and get the result faster, the `efficientnet_b0` model uses @@ -74,6 +105,12 @@ python main.py -t -a efficientnet_b0 --pretrained /path/to/imagenet ```shell python main.py -t -a efficientnet_b3 --pretrained /path/to/imagenet ``` +or +```shell +bash run_tuning.sh --input_model=efficientnet_b3 --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=efficientnet_b3 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false +``` + > **Note** > > To reduce tuning time and get the result faster, the `efficientnet_b3` model uses @@ -83,6 +120,12 @@ python main.py -t -a efficientnet_b3 --pretrained /path/to/imagenet ```shell python main.py -t -a efficientnet_b7 --pretrained /path/to/imagenet ``` +or +```shell +bash run_tuning.sh --input_model=efficientnet_b7 --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=efficientnet_b7 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false +``` + > **Note** > > To reduce tuning time and get the result faster, the `efficientnet_b7` model uses diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py index 59b9027d8af..4c315c8acb9 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py @@ -96,10 +96,10 @@ def main(): args = parser.parse_args() - if 'efficient' in args.arch: - import torchvision.models as models - else: + if 'mobilenet' in args.arch: import torchvision.models.quantization as models + else: + import torchvision.models as models if args.seed is not None: random.seed(args.seed) diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md index 34bdeb37fdf..f5d5b4d4c73 100644 --- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md +++ b/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md @@ -71,8 +71,8 @@ python main.py -t -a resnet18 --ipex --pretrained /path/to/imagenet ``` or ```shell -bash run_tuning.sh --topology=resnet18 --dataset_location=/path/to/imagenet -bash run_benchmark.sh --topology=resnet18 --dataset_location=/path/to/imagenet --mode=benchmark/accuracy --int8=true/false +bash run_tuning.sh --input_model=resnet18 --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=resnet18 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false ``` ### 2. ResNet50 With Intel PyTorch Extension @@ -82,8 +82,8 @@ python main.py -t -a resnet50 --ipex --pretrained /path/to/imagenet ``` or ```shell -bash run_tuning.sh --topology=resnet50 --dataset_location=/path/to/imagenet -bash run_benchmark.sh --topology=resnet50 --dataset_location=/path/to/imagenet --mode=benchmark/accuracy --int8=true/false +bash run_tuning.sh --input_model=resnet50 --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=resnet50 --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false ``` ### 3. ResNext101_32x16d With Intel PyTorch Extension @@ -93,8 +93,8 @@ python main.py -t -a resnext101_32x16d_wsl --hub --ipex --pretrained /path/to/im ``` or ```shell -bash run_tuning.sh --topology=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet -bash run_benchmark.sh --topology=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet --mode=benchmark/accuracy --int8=true/false +bash run_tuning.sh --input_model=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet +bash run_benchmark.sh --input_model=resnext101_32x16d_wsl --dataset_location=/path/to/imagenet --mode=performance/accuracy --int8=true/false ``` # Saving and Loading Model