From fb579ab6bd866fa72fb05367dad50a0fa32a5ea6 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Mon, 22 Jul 2024 14:46:27 +0800 Subject: [PATCH 01/10] Enable yolov5 Example for TF 3x API Signed-off-by: zehao-intel --- .../.config/model_params_tensorflow_3x.json | 7 + .../yolo_v5/quantization/ptq/README.md | 102 +++++++++++ .../yolo_v5/quantization/ptq/main.py | 173 ++++++++++++++++++ .../quantization/ptq/prepare_dataset.sh | 90 +++++++++ .../yolo_v5/quantization/ptq/prepare_model.sh | 3 + .../yolo_v5/quantization/ptq/requirements.txt | 1 + .../yolo_v5/quantization/ptq/run_benchmark.sh | 49 +++++ .../yolo_v5/quantization/ptq/run_quant.sh | 40 ++++ 8 files changed, 465 insertions(+) create mode 100644 examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md create mode 100644 examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py create mode 100644 examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_dataset.sh create mode 100644 examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_model.sh create mode 100644 examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/requirements.txt create mode 100644 examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh create mode 100644 examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_quant.sh diff --git a/examples/.config/model_params_tensorflow_3x.json b/examples/.config/model_params_tensorflow_3x.json index 70e1497e508..c6040726ace 100644 --- a/examples/.config/model_params_tensorflow_3x.json +++ b/examples/.config/model_params_tensorflow_3x.json @@ -42,6 +42,13 @@ "input_model": "/tf_dataset/tensorflow/graphsage/graphsage_frozen_model.pb", "main_script": "main.py", "batch_size": 1000 + }, + "yolo_v5": { + "model_src_dir": "object_detection/yolo_v5/quantization/ptq", + "dataset_location": "/tf_dataset2/datasets/coco2017/coco/val2017", + "input_model": "/tf_dataset2/models/tensorflow/yolo_v5/yolov5s.pb", + "main_script": "main.py", + "batch_size": 1 } } } diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md new file mode 100644 index 00000000000..92c4628e748 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md @@ -0,0 +1,102 @@ +This document describes the step-by-step to reproduce Yolo-v5 tuning result with Neural Compressor. This example can run on Intel CPUs and GPUs. + +# Prerequisite + + +## 1. Environment +Recommend python 3.10 or higher version. + +### Install Intel® Neural Compressor +```shell +pip install neural-compressor +``` + +### Install Tensorflow +```shell +pip install tensorflow +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +### Installation Dependency packages +```shell +cd examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow + +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +## 2. Prepare model + +Users can choose to automatically or manually download the model. +### Automatic download + +Run the `prepare_model.sh` script. +```shell +. prepare_model.sh +``` + +This script will load yolov5 model to `./yolov5/yolov5s.pb`. + +### Manual download + +To get a TensorFlow pretrained model, you need to export it from a PyTorch model. Clone the [Ultralytics yolov5 repository](https://github.com/ultralytics/yolov5.git). +Generate the pretrained PyTorch model and then export to a Tensorflow supported format with the following commands: +```shell +python yolov5/models/tf.py --weights yolov5/yolov5s.pt +python yolov5/export.py --weights yolov5/yolov5s.pt --include pb +``` + +The yolov5 model will be loaded to `./yolov5/yolov5s.pb`. + +## 3. Prepare Dataset + +Users can choose to automatically or manually download the dataset. +### Automatic download + +Run the `prepare_dataset.sh` script. +```shell +. prepare_dataset.sh +``` + +This script will download the *train*, *validation* and *test* COCO datasets. + +### Manual download + +Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). + + +# Run + +## 1. Quantization +```python +bash run_quant.sh --input_model=./yolov5/yolov5s.pb --output_model=yolov5s_int8.pb --dataset_location=/path/to/dataset +``` + +## 2. Benchmark +```python +# run performance benchmark +bash run_benchmark.sh --input_model=yolov5s_int8.pb --dataset_location=/path/to/dataset --mode=performance + +# run accuracy benchmark +bash run_benchmark.sh --input_model=yolov5s_int8.pb --dataset_location=/path/to/dataset --mode=accuracy +``` + +Finally, the program will generate the quantized Yolo-v5 model with relative 1% loss. diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py new file mode 100644 index 00000000000..d2404e79485 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py @@ -0,0 +1,173 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +Validate a trained YOLOv5 classification model on a classification dataset + +Usage: + $ bash data/scripts/get_imagenet.sh --val # download ImageNet val split (6.3G, 50000 images) + $ python classify/val.py --weights yolov5m-cls.pt --data ../datasets/imagenet --img 224 # validate ImageNet + +Usage - formats: + $ python classify/val.py --weights yolov5s-cls.pt # PyTorch + yolov5s-cls.torchscript # TorchScript + yolov5s-cls.onnx # ONNX Runtime or OpenCV DNN with --dnn + yolov5s-cls_openvino_model # OpenVINO + yolov5s-cls.engine # TensorRT + yolov5s-cls.mlmodel # CoreML (macOS-only) + yolov5s-cls_saved_model # TensorFlow SavedModel + yolov5s-cls.pb # TensorFlow GraphDef + yolov5s-cls.tflite # TensorFlow Lite + yolov5s-cls_edgetpu.tflite # TensorFlow Edge TPU + yolov5s-cls_paddle_model # PaddlePaddle +""" + +import argparse +import os +import sys +from pathlib import Path + +import torch +from tqdm import tqdm + +from yolov5.models.common import DetectMultiBackend +from yolov5.utils.dataloaders import create_classification_dataloader +from yolov5.utils.general import (LOGGER, TQDM_BAR_FORMAT, Profile, check_img_size, check_requirements, colorstr, + increment_path, print_args) +from yolov5.utils.torch_utils import select_device, smart_inference_mode + + +parser = argparse.ArgumentParser() +parser.add_argument('--data', type=str, default='/datasets/mnist', help='dataset path') +parser.add_argument('--input_model', nargs='+', type=str, default='yolov5s.pb', help='input model path(s)') +parser.add_argument('--output_model', type=str, default='yolov5s_int8.pb', help='output model path(s)') +parser.add_argument('--batch-size', type=int, default=128, help='batch size') +parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=224, help='inference size (pixels)') +parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') +parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') +parser.add_argument('--verbose', nargs='?', const=True, default=False, help='verbose output') +parser.add_argument('--project', default='evaluate/val-cls', help='save to project/name') +parser.add_argument('--name', default='exp', help='save to project/name') +parser.add_argument('--tune', action="store_true", help='whether to apply quantization') +parser.add_argument('--benchmark', action="store_true", help='whether to run benchmark') +parser.add_argument('--mode', type=str, default='performance', help='run performance or accuracy benchmark') +args = parser.parse_args() + + +@smart_inference_mode() +def evaluate( + model=None, # model.pt path(s) + data=args.data, # dataset dir + batch_size=args.batch-size, # batch size + imgsz=args.imgsz, # inference size (pixels) + device=args.device, # cuda device, i.e. 0 or 0,1,2,3 or cpu + workers=args.workers, # max dataloader workers (per RANK in DDP mode) + verbose=args.verbose, # verbose output + project=args.project, # save to project/name + name=args.name, # save to project/name + exist_ok=False, # existing project/name ok, do not increment + half=False, # use FP16 half-precision inference + dnn=False, # use OpenCV DNN for ONNX inference + model=None, + dataloader=None, + criterion=None, + pbar=None, +): + # Initialize/load model and set device + training = model is not None + if training: # called by train.py + device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model + half &= device.type != 'cpu' # half precision only supported on CUDA + model.half() if half else model.float() + else: # called directly + device = select_device(device, batch_size=batch_size) + + # Directories + save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run + save_dir.mkdir(parents=True, exist_ok=True) # make dir + + # Load model + model = DetectMultiBackend(model, device=device, dnn=dnn, fp16=half) + stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine + imgsz = check_img_size(imgsz, s=stride) # check image size + half = model.fp16 # FP16 supported on limited backends with CUDA + if engine: + batch_size = model.batch_size + else: + device = model.device + if not (pt or jit): + batch_size = 1 # export.py models default to batch-size 1 + LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models') + + # Dataloader + data = Path(data) + test_dir = data / 'test' if (data / 'test').exists() else data / 'val' # data/test or data/val + dataloader = create_classification_dataloader(path=test_dir, + imgsz=imgsz, + batch_size=batch_size, + augment=False, + rank=-1, + workers=workers) + + model.eval() + pred, targets, loss, dt = [], [], 0, (Profile(), Profile(), Profile()) + n = len(dataloader) # number of batches + action = 'validating' if dataloader.dataset.root.stem == 'val' else 'testing' + desc = f"{pbar.desc[:-36]}{action:>36}" if pbar else f"{action}" + bar = tqdm(dataloader, desc, n, not training, bar_format=TQDM_BAR_FORMAT, position=0) + with torch.cuda.amp.autocast(enabled=device.type != 'cpu'): + for images, labels in bar: + with dt[0]: + images, labels = images.to(device, non_blocking=True), labels.to(device) + + with dt[1]: + y = model(images) + + with dt[2]: + pred.append(y.argsort(1, descending=True)[:, :5]) + targets.append(labels) + if criterion: + loss += criterion(y, labels) + + loss /= n + pred, targets = torch.cat(pred), torch.cat(targets) + correct = (targets[:, None] == pred).float() + acc = torch.stack((correct[:, 0], correct.max(1).values), dim=1) # (top1, top5) accuracy + top1, top5 = acc.mean(0).tolist() + + if pbar: + pbar.desc = f"{pbar.desc[:-36]}{loss:>12.3g}{top1:>12.3g}{top5:>12.3g}" + if verbose: # all classes + LOGGER.info(f"{'Class':>24}{'Images':>12}{'top1_acc':>12}{'top5_acc':>12}") + LOGGER.info(f"{'all':>24}{targets.shape[0]:>12}{top1:>12.3g}{top5:>12.3g}") + for i, c in model.names.items(): + aci = acc[targets == i] + top1i, top5i = aci.mean(0).tolist() + LOGGER.info(f"{c:>24}{aci.shape[0]:>12}{top1i:>12.3g}{top5i:>12.3g}") + + # Print results + t = tuple(x.t / len(dataloader.dataset.samples) * 1E3 for x in dt) # speeds per image + shape = (1, 3, imgsz, imgsz) + LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms post-process per image at shape {shape}' % t) + LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") + + return top1, top5, loss + + +def main(): + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + + quant_config = StaticQuantConfig(weight_granularity="per_channel") + q_model = quantize_model(args.input_model, quant_config, calib_func=evaluate) + q_model.save(args.output_model) + + if args.benchmark: + if args.mode == 'performance': + evaluate(args.input_graph) + elif args.mode == 'accuracy': + top1, top5, loss = eval(args.input_graph) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % top1) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_dataset.sh b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_dataset.sh new file mode 100644 index 00000000000..41e1ebb28eb --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_dataset.sh @@ -0,0 +1,90 @@ +#!/bin/bash +# set -x + +DATA_DIR="${PWD}/data" +DATA_NAME="val2017" +DATA_URL_LIST='http://images.cocodataset.org/zips/val2017.zip http://images.cocodataset.org/annotations/annotations_trainval2017.zip' +PACKAGES_LIST='val2017.zip annotations_trainval2017.zip' +VAL_IMAGE_DIR=$DATA_DIR/val2017 +TRAIN_ANNOTATIONS_FILE=$DATA_DIR/annotations/empty.json +VAL_ANNOTATIONS_FILE=$DATA_DIR/annotations/instances_val2017.json +TESTDEV_ANNOTATIONS_FILE=$DATA_DIR/annotations/empty.json +OUTPUT_DIR=$DATA_DIR + +help() +{ + cat <<- EOF + + Desc: Prepare dataset for Tensorflow COCO object detection. + + -h --help help info + + --dataset_location set dataset location, default is ./data + +EOF + exit 0 +} + +function main { + init_params "$@" + download_dataset +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --dataset_location=*) + DATA_DIR=$(echo "$var" |cut -f2 -d=) + ;; + -h|--help) help + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done + +} + +# removes files that will not be used anymore +function remove_zipped_packages { + for package in $PACKAGES_LIST; do + rm "$package" + done +} + +# download_dataset +function download_dataset { + if [ ! -d "${DATA_DIR}" ]; then + mkdir "${DATA_DIR}" + fi + + cd "${DATA_DIR}" || exit + if [ ! -f "${VAL_IMAGE_DIR}" ]; then + + for dataset_dowload_link in $DATA_URL_LIST; do + wget "$dataset_dowload_link" + done + for package in $PACKAGES_LIST; do + unzip -o "$package" + done + remove_zipped_packages + if [ ! -d empty_dir ]; then + mkdir empty_dir + fi + + cd annotations || exit + echo "{ \"images\": {}, \"categories\": {}}" > empty.json + cd .. + else + echo "Dataset ${DATA_NAME} is exist!" + fi + + cd ../ +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_model.sh b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_model.sh new file mode 100644 index 00000000000..3446739939d --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_model.sh @@ -0,0 +1,3 @@ +INSTALLATION_PATH=$(python3 -c "import sys; import yolov5; p=sys.modules['yolov5'].__file__; print(p.replace('/__init__.py', ''))") +python $INSTALLATION_PATH/models/tf.py --weights yolov5/yolov5s.pt +python $INSTALLATION_PATH/export.py --weights yolov5/yolov5s.pt --include pb \ No newline at end of file diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..2c40b972bcd --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/requirements.txt @@ -0,0 +1 @@ +yolov5 diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..c81e3d38442 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + for var in "$@" + do + case $var in + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + + +# run_tuning +function run_benchmark { + python main.py \ + --input_graph ${input_model} \ + --dataset_location ${dataset_location} \ + --mode ${mode} \ + --batch_size ${batch_size} \ + --benchmark \ +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..1e9599d6f8a --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_quant.sh @@ -0,0 +1,40 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + + +# run tuning +function run_benchmark { + python main.py \ + --input_graph ${input_model} \ + --output_graph ${output_model} \ + --dataset_location ${dataset_location} \ + --tune +} + +main "$@" From 7ce78eb6e728dd9b57100f7daa248c11c4300534 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Tue, 23 Jul 2024 09:58:44 +0800 Subject: [PATCH 02/10] fix script Signed-off-by: zehao-intel --- .../.config/model_params_tensorflow_3x.json | 2 +- .../yolo_v5/quantization/ptq/README.md | 8 +- .../yolo_v5/quantization/ptq/main.py | 364 ++++++++++++------ .../quantization/ptq/prepare_dataset.sh | 97 +---- .../yolo_v5/quantization/ptq/run_benchmark.sh | 27 +- .../yolo_v5/quantization/ptq/run_quant.sh | 4 +- 6 files changed, 275 insertions(+), 227 deletions(-) diff --git a/examples/.config/model_params_tensorflow_3x.json b/examples/.config/model_params_tensorflow_3x.json index c6040726ace..5c6a9ccecbe 100644 --- a/examples/.config/model_params_tensorflow_3x.json +++ b/examples/.config/model_params_tensorflow_3x.json @@ -45,7 +45,7 @@ }, "yolo_v5": { "model_src_dir": "object_detection/yolo_v5/quantization/ptq", - "dataset_location": "/tf_dataset2/datasets/coco2017/coco/val2017", + "dataset_location": "/tf_dataset2/datasets/coco_yolov5/coco", "input_model": "/tf_dataset2/models/tensorflow/yolo_v5/yolov5s.pb", "main_script": "main.py", "batch_size": 1 diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md index 92c4628e748..845e383cd59 100644 --- a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md @@ -75,13 +75,7 @@ Run the `prepare_dataset.sh` script. ```shell . prepare_dataset.sh ``` - -This script will download the *train*, *validation* and *test* COCO datasets. - -### Manual download - -Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). - +The validation set of coco2017 will be downloaded into a `./coco` folder. # Run diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py index d2404e79485..7282a7827ec 100644 --- a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py @@ -1,45 +1,63 @@ -# YOLOv5 🚀 by Ultralytics, GPL-3.0 license -""" -Validate a trained YOLOv5 classification model on a classification dataset - -Usage: - $ bash data/scripts/get_imagenet.sh --val # download ImageNet val split (6.3G, 50000 images) - $ python classify/val.py --weights yolov5m-cls.pt --data ../datasets/imagenet --img 224 # validate ImageNet - -Usage - formats: - $ python classify/val.py --weights yolov5s-cls.pt # PyTorch - yolov5s-cls.torchscript # TorchScript - yolov5s-cls.onnx # ONNX Runtime or OpenCV DNN with --dnn - yolov5s-cls_openvino_model # OpenVINO - yolov5s-cls.engine # TensorRT - yolov5s-cls.mlmodel # CoreML (macOS-only) - yolov5s-cls_saved_model # TensorFlow SavedModel - yolov5s-cls.pb # TensorFlow GraphDef - yolov5s-cls.tflite # TensorFlow Lite - yolov5s-cls_edgetpu.tflite # TensorFlow Edge TPU - yolov5s-cls_paddle_model # PaddlePaddle -""" +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# import argparse import os import sys -from pathlib import Path - import torch +import numpy as np + +from pathlib import Path from tqdm import tqdm + from yolov5.models.common import DetectMultiBackend -from yolov5.utils.dataloaders import create_classification_dataloader -from yolov5.utils.general import (LOGGER, TQDM_BAR_FORMAT, Profile, check_img_size, check_requirements, colorstr, - increment_path, print_args) +from yolov5.utils.callbacks import Callbacks +from yolov5.utils.dataloaders import create_dataloader +from yolov5.utils.general import ( + LOGGER, + TQDM_BAR_FORMAT, + Profile, + check_dataset, + check_img_size, + check_requirements, + check_yaml, + coco80_to_coco91_class, + colorstr, + increment_path, + non_max_suppression, + print_args, + scale_boxes, + xywh2xyxy, +) +from yolov5.utils.metrics import ap_per_class, box_iou +from yolov5.utils.plots import output_to_target, plot_images, plot_val_study from yolov5.utils.torch_utils import select_device, smart_inference_mode +from neural_compressor.tensorflow.utils import BaseModel + parser = argparse.ArgumentParser() -parser.add_argument('--data', type=str, default='/datasets/mnist', help='dataset path') -parser.add_argument('--input_model', nargs='+', type=str, default='yolov5s.pb', help='input model path(s)') +parser.add_argument('--dataset_location', type=str, default='/datasets/mnist', help='dataset path') +parser.add_argument('--input_model', type=str, default='yolov5s.pb', help='input model path(s)') parser.add_argument('--output_model', type=str, default='yolov5s_int8.pb', help='output model path(s)') -parser.add_argument('--batch-size', type=int, default=128, help='batch size') +parser.add_argument('--batch_size', type=int, default=128, help='batch size') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=224, help='inference size (pixels)') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)') @@ -49,107 +67,207 @@ parser.add_argument('--tune', action="store_true", help='whether to apply quantization') parser.add_argument('--benchmark', action="store_true", help='whether to run benchmark') parser.add_argument('--mode', type=str, default='performance', help='run performance or accuracy benchmark') +parser.add_argument('--iteration', type=int, default=100, help='iteration for calibration or evaluation') args = parser.parse_args() +def process_batch(detections, labels, iouv): + """ + Return correct prediction matrix. + + Arguments: + detections (array[N, 6]), x1, y1, x2, y2, conf, class + labels (array[M, 5]), class, x1, y1, x2, y2 + Returns: + correct (array[N, 10]), for 10 IoU levels + """ + correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool) + iou = box_iou(labels[:, 1:], detections[:, :4]) + correct_class = labels[:, 0:1] == detections[:, 5] + for i in range(len(iouv)): + x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match + if x[0].shape[0]: + matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detect, iou] + if x[0].shape[0] > 1: + matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 1], return_index=True)[1]] + # matches = matches[matches[:, 2].argsort()[::-1]] + matches = matches[np.unique(matches[:, 0], return_index=True)[1]] + correct[matches[:, 1].astype(int), i] = True + return torch.tensor(correct, dtype=torch.bool, device=iouv.device) @smart_inference_mode() def evaluate( - model=None, # model.pt path(s) - data=args.data, # dataset dir - batch_size=args.batch-size, # batch size - imgsz=args.imgsz, # inference size (pixels) - device=args.device, # cuda device, i.e. 0 or 0,1,2,3 or cpu - workers=args.workers, # max dataloader workers (per RANK in DDP mode) - verbose=args.verbose, # verbose output + model, # model.pt path(s) + source=args.dataset_location, + imgsz=640, # inference size (pixels) + conf_thres=0.001, # confidence threshold + iou_thres=0.6, # NMS IoU threshold + max_det=300, # maximum detections per image + task="val", # train, val, test, speed or study + device="", # cuda device, i.e. 0 or 0,1,2,3 or cpu + workers=8, # max dataloader workers (per RANK in DDP mode) + single_cls=False, # treat as single-class dataset + verbose=False, # verbose output project=args.project, # save to project/name - name=args.name, # save to project/name + name="exp", # save to project/name exist_ok=False, # existing project/name ok, do not increment - half=False, # use FP16 half-precision inference - dnn=False, # use OpenCV DNN for ONNX inference - model=None, - dataloader=None, - criterion=None, - pbar=None, + save_dir=Path(""), + callbacks=Callbacks(), + compute_loss=None, ): - # Initialize/load model and set device - training = model is not None - if training: # called by train.py - device, pt, jit, engine = next(model.parameters()).device, True, False, False # get model device, PyTorch model - half &= device.type != 'cpu' # half precision only supported on CUDA - model.half() if half else model.float() - else: # called directly - device = select_device(device, batch_size=batch_size) - - # Directories - save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run - save_dir.mkdir(parents=True, exist_ok=True) # make dir - - # Load model - model = DetectMultiBackend(model, device=device, dnn=dnn, fp16=half) - stride, pt, jit, engine = model.stride, model.pt, model.jit, model.engine - imgsz = check_img_size(imgsz, s=stride) # check image size - half = model.fp16 # FP16 supported on limited backends with CUDA - if engine: - batch_size = model.batch_size - else: - device = model.device - if not (pt or jit): - batch_size = 1 # export.py models default to batch-size 1 - LOGGER.info(f'Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models') - - # Dataloader - data = Path(data) - test_dir = data / 'test' if (data / 'test').exists() else data / 'val' # data/test or data/val - dataloader = create_classification_dataloader(path=test_dir, - imgsz=imgsz, - batch_size=batch_size, - augment=False, - rank=-1, - workers=workers) + if isinstance(model, BaseModel): + model.save("./yolov5s_eval.pb") + model = "./yolov5s_eval.pb" + device = select_device(device) + + save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run + (save_dir / "labels").mkdir(parents=True, exist_ok=True) # make dir + # Load model + model = DetectMultiBackend(model, device=device) + stride, pt = model.stride, model.pt + imgsz = check_img_size(imgsz, s=stride) # check image size + device = model.device + batch_size = 1 # export.py models default to batch-size 1 + LOGGER.info(f"Forcing --batch-size 1 square inference (1,3,{imgsz},{imgsz}) for non-PyTorch models") + + # Data + #data = check_dataset(yaml_path) # check + + # Configure model.eval() - pred, targets, loss, dt = [], [], 0, (Profile(), Profile(), Profile()) - n = len(dataloader) # number of batches - action = 'validating' if dataloader.dataset.root.stem == 'val' else 'testing' - desc = f"{pbar.desc[:-36]}{action:>36}" if pbar else f"{action}" - bar = tqdm(dataloader, desc, n, not training, bar_format=TQDM_BAR_FORMAT, position=0) - with torch.cuda.amp.autocast(enabled=device.type != 'cpu'): - for images, labels in bar: - with dt[0]: - images, labels = images.to(device, non_blocking=True), labels.to(device) - - with dt[1]: - y = model(images) - - with dt[2]: - pred.append(y.argsort(1, descending=True)[:, :5]) - targets.append(labels) - if criterion: - loss += criterion(y, labels) - - loss /= n - pred, targets = torch.cat(pred), torch.cat(targets) - correct = (targets[:, None] == pred).float() - acc = torch.stack((correct[:, 0], correct.max(1).values), dim=1) # (top1, top5) accuracy - top1, top5 = acc.mean(0).tolist() - - if pbar: - pbar.desc = f"{pbar.desc[:-36]}{loss:>12.3g}{top1:>12.3g}{top5:>12.3g}" - if verbose: # all classes - LOGGER.info(f"{'Class':>24}{'Images':>12}{'top1_acc':>12}{'top5_acc':>12}") - LOGGER.info(f"{'all':>24}{targets.shape[0]:>12}{top1:>12.3g}{top5:>12.3g}") - for i, c in model.names.items(): - aci = acc[targets == i] - top1i, top5i = aci.mean(0).tolist() - LOGGER.info(f"{c:>24}{aci.shape[0]:>12}{top1i:>12.3g}{top5i:>12.3g}") - - # Print results - t = tuple(x.t / len(dataloader.dataset.samples) * 1E3 for x in dt) # speeds per image - shape = (1, 3, imgsz, imgsz) - LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms post-process per image at shape {shape}' % t) - LOGGER.info(f"Results saved to {colorstr('bold', save_dir)}") - - return top1, top5, loss + nc = 1 if single_cls else 80 # number of classes + iouv = torch.linspace(0.5, 0.95, 10, device=device) # iou vector for mAP@0.5:0.95 + niou = iouv.numel() + + # Dataloader + model.warmup(imgsz=(1 if pt else batch_size, 3, imgsz, imgsz)) # warmup + pad, rect = (0.5, pt) # square inference for benchmarks + + dataloader = create_dataloader( + source, + imgsz, + batch_size, + stride, + single_cls, + pad=pad, + rect=rect, + workers=workers, + prefix=colorstr(f"{task}: "), + )[0] + + seen = 0 + names = model.names if hasattr(model, "names") else model.module.names # get class names + if isinstance(names, (list, tuple)): # old format + names = dict(enumerate(names)) + s = ("%22s" + "%11s" * 6) % ("Class", "Images", "Instances", "P", "R", "mAP50", "mAP50-95") + p, r, mp, mr, map50, ap50, map = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 + dt = Profile(), Profile(), Profile() # profiling times + loss = torch.zeros(3, device=device) + stats, ap, ap_class = [], [], [] + callbacks.run("on_val_start") + pbar = tqdm(dataloader, desc=s, bar_format=TQDM_BAR_FORMAT) # progress bar + iters = -1 if args.mode == "accuracy" else args.iteration + for batch_i, (im, targets, paths, shapes) in enumerate(pbar): + if batch_i == iters: + break + + callbacks.run("on_val_batch_start") + with dt[0]: + im = im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + _, _, height, width = im.shape # batch size, channels, height, width + + # Inference + with dt[1]: + preds, train_out = model(im) if compute_loss else (model(im, augment=False), None) + + # Batch size 1 inference drops the batch dim + if isinstance(preds, list): + preds = preds[0] + + if preds.dim() == 2: + preds=preds.unsqueeze(0) + + # Loss + if compute_loss: + loss += compute_loss(train_out, targets)[1] # box, obj, cls + + # NMS + targets[:, 2:] *= torch.tensor((width, height, width, height), device=device) # to pixels + lb = [] # for autolabelling + with dt[2]: + preds = non_max_suppression( + preds, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls, max_det=max_det + ) + + if args.benchmark: + # Metrics + for si, pred in enumerate(preds): + labels = targets[targets[:, 0] == si, 1:] + nl, npr = labels.shape[0], pred.shape[0] # number of labels, predictions + path, shape = Path(paths[si]), shapes[si][0] + correct = torch.zeros(npr, niou, dtype=torch.bool, device=device) # init + seen += 1 + + if npr == 0: + if nl: + stats.append((correct, *torch.zeros((2, 0), device=device), labels[:, 0])) + continue + + # Predictions + if single_cls: + pred[:, 5] = 0 + predn = pred.clone() + scale_boxes(im[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred + + # Evaluate + if nl: + tbox = xywh2xyxy(labels[:, 1:5]) # target boxes + scale_boxes(im[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels + labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels + correct = process_batch(predn, labelsn, iouv) + stats.append((correct, pred[:, 4], pred[:, 5], labels[:, 0])) # (correct, conf, pcls, tcls) + + callbacks.run("on_val_image_end", pred, predn, path, names, im[si]) + + + callbacks.run("on_val_batch_end", batch_i, im, targets, paths, shapes, preds) + + if args.tune: + return 1 + + # Compute metrics + stats = [torch.cat(x, 0).cpu().numpy() for x in zip(*stats)] # to numpy + if len(stats) and stats[0].any(): + _, _, p, r, _, ap, ap_class = ap_per_class(*stats, plot=False, save_dir=save_dir, names=names) + ap50, ap = ap[:, 0], ap.mean(1) # AP@0.5, AP@0.5:0.95 + mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() + nt = np.bincount(stats[3].astype(int), minlength=nc) # number of targets per class + if nt.sum() == 0: + LOGGER.warning(f"WARNING ⚠️ no labels found in {task} set, can not compute metrics without labels") + + pf = "%22s" + "%11i" * 2 + "%11.4g" * 4 # print format + + # Print results per class + if (verbose or (nc < 50)) and nc > 1 and len(stats): + for i, c in enumerate(ap_class): + LOGGER.info(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) + + # Print speeds + t = tuple(x.t / seen * 1e3 for x in dt) # images per second + latency = t[2] + if args.benchmark and args.mode == "performance": + print("Batch size = {}".format(args.batch_size)) + print("Latency: {:.3f} ms".format(latency)) + print("Throughput: {:.3f} images/sec".format(1000/latency)) + + # Return results + model.float() # for training + maps = np.zeros(nc) + map + for i, c in enumerate(ap_class): + maps[c] = ap[i] + return map50 def main(): @@ -162,11 +280,11 @@ def main(): if args.benchmark: if args.mode == 'performance': - evaluate(args.input_graph) + evaluate(args.input_model) elif args.mode == 'accuracy': - top1, top5, loss = eval(args.input_graph) + map50 = evaluate(args.input_model) print("Batch size = %d" % args.batch_size) - print("Accuracy: %.5f" % top1) + LOGGER.info("Accuracy (map50): %.4g" % map50) if __name__ == "__main__": diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_dataset.sh b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_dataset.sh index 41e1ebb28eb..acdf8a2b5e8 100644 --- a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_dataset.sh +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_dataset.sh @@ -1,90 +1,13 @@ #!/bin/bash # set -x -DATA_DIR="${PWD}/data" -DATA_NAME="val2017" -DATA_URL_LIST='http://images.cocodataset.org/zips/val2017.zip http://images.cocodataset.org/annotations/annotations_trainval2017.zip' -PACKAGES_LIST='val2017.zip annotations_trainval2017.zip' -VAL_IMAGE_DIR=$DATA_DIR/val2017 -TRAIN_ANNOTATIONS_FILE=$DATA_DIR/annotations/empty.json -VAL_ANNOTATIONS_FILE=$DATA_DIR/annotations/instances_val2017.json -TESTDEV_ANNOTATIONS_FILE=$DATA_DIR/annotations/empty.json -OUTPUT_DIR=$DATA_DIR - -help() -{ - cat <<- EOF - - Desc: Prepare dataset for Tensorflow COCO object detection. - - -h --help help info - - --dataset_location set dataset location, default is ./data - -EOF - exit 0 -} - -function main { - init_params "$@" - download_dataset -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --dataset_location=*) - DATA_DIR=$(echo "$var" |cut -f2 -d=) - ;; - -h|--help) help - ;; - *) - echo "Error: No such parameter: ${var}" - exit 1 - ;; - esac - done - -} - -# removes files that will not be used anymore -function remove_zipped_packages { - for package in $PACKAGES_LIST; do - rm "$package" - done -} - -# download_dataset -function download_dataset { - if [ ! -d "${DATA_DIR}" ]; then - mkdir "${DATA_DIR}" - fi - - cd "${DATA_DIR}" || exit - if [ ! -f "${VAL_IMAGE_DIR}" ]; then - - for dataset_dowload_link in $DATA_URL_LIST; do - wget "$dataset_dowload_link" - done - for package in $PACKAGES_LIST; do - unzip -o "$package" - done - remove_zipped_packages - if [ ! -d empty_dir ]; then - mkdir empty_dir - fi - - cd annotations || exit - echo "{ \"images\": {}, \"categories\": {}}" > empty.json - cd .. - else - echo "Dataset ${DATA_NAME} is exist!" - fi - - cd ../ -} - -main "$@" +wget https://github.com/ultralytics/assets/releases/download/v0.0.0/coco2017labels.zip +unzip -o coco2017labels.zip +rm coco2017labels.zip + +cd coco +mkdir images +cd images +wget http://images.cocodataset.org/zips/val2017.zip +unzip -o val2017.zip +rm val2017.zip diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh index c81e3d38442..0f11c657eb1 100644 --- a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh @@ -10,7 +10,7 @@ function main { # init params function init_params { - batch_size=32 + batch_size=128 iters=100 for var in "$@" do @@ -38,12 +38,25 @@ function init_params { # run_tuning function run_benchmark { - python main.py \ - --input_graph ${input_model} \ - --dataset_location ${dataset_location} \ - --mode ${mode} \ - --batch_size ${batch_size} \ - --benchmark \ + if [[ ${mode} == "accuracy" ]]; then + python main.py \ + --input_model ${input_model} \ + --dataset_location ${dataset_location} \ + --mode ${mode} \ + --batch_size ${batch_size} \ + --benchmark + elif [[ ${mode} == "performance" ]]; then + incbench --num_c 4 python main.py \ + --input_model ${input_model} \ + --dataset_location ${dataset_location} \ + --mode ${mode} \ + --batch_size ${batch_size} \ + --iteration ${iters} + --benchmark + else + echo "Error: No such mode: ${mode}" + exit 1 + fi } main "$@" diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_quant.sh index 1e9599d6f8a..8d1f6807138 100644 --- a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_quant.sh +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_quant.sh @@ -31,8 +31,8 @@ function init_params { # run tuning function run_benchmark { python main.py \ - --input_graph ${input_model} \ - --output_graph ${output_model} \ + --input_model ${input_model} \ + --output_model ${output_model} \ --dataset_location ${dataset_location} \ --tune } From fdecd9b8e4de1e1a63ae7157e85fa64f9795a9f9 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Wed, 24 Jul 2024 10:05:58 +0800 Subject: [PATCH 03/10] set bf16 op Signed-off-by: zehao-intel --- .../object_detection/yolo_v5/quantization/ptq/main.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py index 7282a7827ec..dd5a7e77d14 100644 --- a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py @@ -275,6 +275,13 @@ def main(): from neural_compressor.tensorflow import StaticQuantConfig, quantize_model quant_config = StaticQuantConfig(weight_granularity="per_channel") + bf16_config = StaticQuantConfig(weight_dtype="bf16", act_dtype="bf16") + quant_config.set_local("functional_16_1/tf_conv_1/sequential_1/conv2d_1/convolution", bf16_config) + quant_config.set_local("functional_16_1/tf_conv_1_2/sequential_1_1/conv2d_1_1/convolution", bf16_config) + quant_config.set_local("functional_16_1/tfc3_1/tf_conv_2_1/conv2d_2_1/convolution", bf16_config) + quant_config.set_local("functional_16_1/tfc3_1/sequential_2_1/tf_bottleneck_1/tf_conv_5_1/conv2d_5_1/convolution", bf16_config) + quant_config.set_local("functional_16_1/tfc3_1/tf_conv_3_1/conv2d_3_1/convolution", bf16_config) + quant_config.set_local("functional_16_1/tfc3_1/tf_conv_4_1/conv2d_4_1/convolution", bf16_config) q_model = quantize_model(args.input_model, quant_config, calib_func=evaluate) q_model.save(args.output_model) From 736b67c6897062aae62c72ce12d75fdbb6657e38 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Wed, 24 Jul 2024 13:52:52 +0800 Subject: [PATCH 04/10] select dtype Signed-off-by: zehao-intel --- .../yolo_v5/quantization/ptq/main.py | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py index dd5a7e77d14..9b8f33e6f2e 100644 --- a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py @@ -50,7 +50,7 @@ from yolov5.utils.plots import output_to_target, plot_images, plot_val_study from yolov5.utils.torch_utils import select_device, smart_inference_mode -from neural_compressor.tensorflow.utils import BaseModel +from neural_compressor.tensorflow.utils import BaseModelm, CpuInfo parser = argparse.ArgumentParser() @@ -274,14 +274,20 @@ def main(): if args.tune: from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + excluded_conv_names = [ + "functional_16_1/tf_conv_1/sequential_1/conv2d_1/convolution", + "functional_16_1/tf_conv_1_2/sequential_1_1/conv2d_1_1/convolution", + "functional_16_1/tfc3_1/tf_conv_2_1/conv2d_2_1/convolution", + "functional_16_1/tfc3_1/sequential_2_1/tf_bottleneck_1/tf_conv_5_1/conv2d_5_1/convolution", + "functional_16_1/tfc3_1/tf_conv_3_1/conv2d_3_1/convolution", + "functional_16_1/tfc3_1/tf_conv_4_1/conv2d_4_1/convolution" + ] quant_config = StaticQuantConfig(weight_granularity="per_channel") - bf16_config = StaticQuantConfig(weight_dtype="bf16", act_dtype="bf16") - quant_config.set_local("functional_16_1/tf_conv_1/sequential_1/conv2d_1/convolution", bf16_config) - quant_config.set_local("functional_16_1/tf_conv_1_2/sequential_1_1/conv2d_1_1/convolution", bf16_config) - quant_config.set_local("functional_16_1/tfc3_1/tf_conv_2_1/conv2d_2_1/convolution", bf16_config) - quant_config.set_local("functional_16_1/tfc3_1/sequential_2_1/tf_bottleneck_1/tf_conv_5_1/conv2d_5_1/convolution", bf16_config) - quant_config.set_local("functional_16_1/tfc3_1/tf_conv_3_1/conv2d_3_1/convolution", bf16_config) - quant_config.set_local("functional_16_1/tfc3_1/tf_conv_4_1/conv2d_4_1/convolution", bf16_config) + local_dtype = "bf16" if CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1" else "fp32" + local_config = StaticQuantConfig(weight_dtype=local_dtype, act_dtype=local_dtype) \ + for conv_name in excluded_conv_names: + quant_config.set_local(conv_name, local_config) + q_model = quantize_model(args.input_model, quant_config, calib_func=evaluate) q_model.save(args.output_model) From 6d0b9b5707f10dad8d67c22b1afda3fa311cf3b1 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Wed, 24 Jul 2024 14:35:36 +0800 Subject: [PATCH 05/10] fix pre commit Signed-off-by: zehao-intel --- .../object_detection/yolo_v5/quantization/ptq/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py index 9b8f33e6f2e..d737093d324 100644 --- a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py @@ -284,7 +284,7 @@ def main(): ] quant_config = StaticQuantConfig(weight_granularity="per_channel") local_dtype = "bf16" if CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1" else "fp32" - local_config = StaticQuantConfig(weight_dtype=local_dtype, act_dtype=local_dtype) \ + local_config = StaticQuantConfig(weight_dtype=local_dtype, act_dtype=local_dtype) for conv_name in excluded_conv_names: quant_config.set_local(conv_name, local_config) From f7b1921504c432d7137ef27acc22fabf340ce65a Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Wed, 24 Jul 2024 15:05:33 +0800 Subject: [PATCH 06/10] fix import Signed-off-by: zehao-intel --- .../object_detection/yolo_v5/quantization/ptq/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py index d737093d324..9f1ef532063 100644 --- a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py @@ -50,7 +50,7 @@ from yolov5.utils.plots import output_to_target, plot_images, plot_val_study from yolov5.utils.torch_utils import select_device, smart_inference_mode -from neural_compressor.tensorflow.utils import BaseModelm, CpuInfo +from neural_compressor.tensorflow.utils import BaseModel, CpuInfo parser = argparse.ArgumentParser() From aa9d1864f2af258d2796eca1cd242bd692866251 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Thu, 25 Jul 2024 09:54:37 +0800 Subject: [PATCH 07/10] fix benchmark Signed-off-by: zehao-intel --- .../object_detection/yolo_v5/quantization/ptq/run_benchmark.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh index 0f11c657eb1..12755095b13 100644 --- a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh @@ -51,7 +51,7 @@ function run_benchmark { --dataset_location ${dataset_location} \ --mode ${mode} \ --batch_size ${batch_size} \ - --iteration ${iters} + --iteration ${iters} \ --benchmark else echo "Error: No such mode: ${mode}" From 8e8c1e32aa93a4c232e8b45c631c545b7e94755a Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Thu, 25 Jul 2024 13:04:54 +0800 Subject: [PATCH 08/10] fix accuracy log Signed-off-by: zehao-intel --- .../object_detection/yolo_v5/quantization/ptq/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py index 9f1ef532063..50d750344bc 100644 --- a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py @@ -297,7 +297,7 @@ def main(): elif args.mode == 'accuracy': map50 = evaluate(args.input_model) print("Batch size = %d" % args.batch_size) - LOGGER.info("Accuracy (map50): %.4g" % map50) + LOGGER.info("Accuracy: %.4g" % map50) if __name__ == "__main__": From eddad2897882c4bc42f34edb52a2f4d2cb4b6854 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Thu, 25 Jul 2024 16:51:55 +0800 Subject: [PATCH 09/10] fix inbench Signed-off-by: zehao-intel --- .../object_detection/yolo_v5/quantization/ptq/run_benchmark.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh index 12755095b13..df8009e115b 100644 --- a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh +++ b/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh @@ -46,7 +46,7 @@ function run_benchmark { --batch_size ${batch_size} \ --benchmark elif [[ ${mode} == "performance" ]]; then - incbench --num_c 4 python main.py \ + incbench --num_c 4 main.py \ --input_model ${input_model} \ --dataset_location ${dataset_location} \ --mode ${mode} \ From 454148c284aabcb173ca5709c096fae0fbab34b0 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Fri, 26 Jul 2024 10:46:50 +0800 Subject: [PATCH 10/10] fix conflict Signed-off-by: zehao-intel --- examples/.config/model_params_tensorflow_3x.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/.config/model_params_tensorflow_3x.json b/examples/.config/model_params_tensorflow_3x.json index 48ff3dae0bd..74b40ea4f5d 100644 --- a/examples/.config/model_params_tensorflow_3x.json +++ b/examples/.config/model_params_tensorflow_3x.json @@ -97,7 +97,9 @@ "model_src_dir": "object_detection/yolo_v5/quantization/ptq", "dataset_location": "/tf_dataset2/datasets/coco_yolov5/coco", "input_model": "/tf_dataset2/models/tensorflow/yolo_v5/yolov5s.pb", - "main_script": "main.py", + "main_script": "main.py", + "batch_size": 1 + }, "faster_rcnn_resnet50": { "model_src_dir": "object_detection/faster_rcnn_resnet50/quantization/ptq", "dataset_location": "/tf_dataset/tensorflow/coco_val.record",