From f23de0df53ac075d6b4ef430af7bc2bed89fb3c6 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Thu, 4 Jul 2024 17:30:23 +0800 Subject: [PATCH 01/14] Update Examples for TF 3x API Signed-off-by: zehao-intel --- .../.config/model_params_tensorflow_3x.json | 109 ++- .../cv/densenet121/quantization/ptq/README.md | 78 ++ .../quantization/ptq/data_process.py | 581 +++++++++++++++ .../cv/densenet121/quantization/ptq/main.py | 147 ++++ .../quantization/ptq/requirements.txt | 2 + .../quantization/ptq/run_benchmark.sh | 51 ++ .../quantization/ptq/run_quant.sh | 0 .../inception_v3/quantization/ptq/README.md | 75 ++ .../quantization/ptq/data_process.py | 511 +++++++++++++ .../cv/inception_v3/quantization/ptq/main.py | 144 ++++ .../quantization/ptq/requirements.txt | 2 + .../quantization/ptq/run_benchmark.sh | 51 ++ .../quantization/ptq/run_quant.sh | 39 + .../mobilenet_v2/quantization/ptq/README.md | 108 +++ .../quantization/ptq/data_process.py | 511 +++++++++++++ .../cv/mobilenet_v2/quantization/ptq/main.py | 143 ++++ .../quantization/ptq/requirements.txt | 2 + .../quantization/ptq/run_benchmark.sh | 51 ++ .../quantization/ptq/run_quant.sh | 39 + .../prepare_dataset.sh | 0 .../resnet_v2_50/quantization/ptq/README.md | 107 +++ .../quantization/ptq/data_process.py | 511 +++++++++++++ .../cv/resnet_v2_50/quantization/ptq/main.py | 144 ++++ .../quantization/ptq/requirements.txt | 2 + .../quantization/ptq/run_benchmark.sh | 51 ++ .../quantization/ptq/run_quant.sh | 39 + .../cv/vgg16/quantization/ptq/README.md | 108 +++ .../cv/vgg16/quantization/ptq/data_process.py | 581 +++++++++++++++ .../cv/vgg16/quantization/ptq/main.py | 146 ++++ .../vgg16/quantization/ptq/requirements.txt | 2 + .../vgg16/quantization/ptq/run_benchmark.sh | 51 ++ .../cv/vgg16/quantization/ptq/run_quant.sh | 39 + .../quantization/ptq/README.md | 8 +- .../quantization/ptq/__init__.py | 0 .../quantization/ptq/data_process.py | 0 .../quantization/ptq/main.py | 1 - .../quantization/ptq/requirements.txt | 0 .../quantization/ptq/run_benchmark.sh | 0 .../quantization/ptq/run_quant.sh | 39 + .../quantization/ptq/README.md | 8 +- .../quantization/ptq/main.py | 3 - .../quantization/ptq/README.md | 187 +++++ .../quantization/ptq/download_dataset.py | 38 + .../distilbert_base/quantization/ptq/main.py | 324 ++++++++ .../quantization/ptq/requirements.txt | 7 + .../quantization/ptq/run_benchmark.sh | 88 +++ .../quantization/ptq/run_quant.sh | 83 +++ .../transformer_lt/quantization/ptq/README.md | 132 ++++ .../transformer_lt/quantization/ptq/main.py | 261 +++++++ .../quantization/ptq/prepare_dataset_model.sh | 62 ++ .../quantization/ptq/run_benchmark.sh | 64 ++ .../quantization/ptq/run_quant.sh | 43 ++ .../quantization/ptq/utils/__init__.py | 0 .../quantization/ptq/utils/metrics.py | 490 +++++++++++++ .../quantization/ptq/utils/tokenizer.py | 620 ++++++++++++++++ .../quantization/ptq/utils/tokenizer_test.py | 182 +++++ .../quantization/ptq/README.md | 130 ++++ .../quantization/ptq/coco_tools.py | 694 ++++++++++++++++++ .../quantization/ptq/data_process.py | 655 +++++++++++++++++ .../quantization/ptq/main.py | 128 ++++ .../quantization/ptq/run_benchmark.sh | 51 ++ .../quantization/ptq/run_quant.sh | 41 ++ .../quantization/ptq/README.md | 140 ++++ .../quantization/ptq/coco_tools.py | 694 ++++++++++++++++++ .../quantization/ptq/data_process.py | 655 +++++++++++++++++ .../quantization/ptq/main.py | 131 ++++ .../quantization/ptq/run_benchmark.sh | 51 ++ .../quantization/ptq/run_quant.sh | 41 ++ .../object_detection/prepare_dataset.sh | 136 ++++ .../object_detection/requirements.txt | 8 + .../quantization/ptq/README.md | 158 ++++ .../quantization/ptq/coco_tools.py | 694 ++++++++++++++++++ .../quantization/ptq/data_process.py | 655 +++++++++++++++++ .../ssd_mobilenet_v1/quantization/ptq/main.py | 129 ++++ .../quantization/ptq/prepare_model.py | 99 +++ .../quantization/ptq/run_benchmark.sh | 52 ++ .../quantization/ptq/run_quant.sh | 41 ++ .../quantization/ptq/README.md | 98 +++ .../quantization/ptq/main.py | 348 +++++++++ .../ptq/preprocess_csv_tfrecords.py | 155 ++++ .../quantization/ptq/requirements.txt | 9 + .../quantization/ptq/run_benchmark.sh | 57 ++ .../quantization/ptq/run_quant.sh | 48 ++ .../3dunet-mlperf/quantization/ptq/README.md | 73 ++ .../quantization/ptq/__init__.py | 19 + .../ptq/brats_cal_images_list.txt | 40 + .../3dunet-mlperf/quantization/ptq/main.py | 219 ++++++ .../ptq/nnUNet/Task043_BraTS_2019.py | 125 ++++ .../quantization/ptq/nnUNet/__init__.py | 19 + .../ptq/nnUNet/folds/fold0_validation.txt | 67 ++ .../ptq/nnUNet/folds/fold1_validation.txt | 67 ++ .../ptq/nnUNet/folds/fold2_validation.txt | 67 ++ .../ptq/nnUNet/folds/fold3_validation.txt | 67 ++ .../ptq/nnUNet/folds/fold4_validation.txt | 67 ++ .../quantization/ptq/nnUNet/postprocess.py | 72 ++ .../quantization/ptq/nnUNet/preprocess.py | 109 +++ .../quantization/ptq/nnUNet/setup.py | 81 ++ .../quantization/ptq/requirements.txt | 1 + .../quantization/ptq/run_benchmark.sh | 61 ++ .../quantization/ptq/run_quant.sh | 48 ++ .../quantization/ptq/README.md | 131 ++++ .../ptq/content_images/colva_beach_sq.jpg | Bin 0 -> 14235 bytes .../ptq/content_images/golden_gate_sq.jpg | Bin 0 -> 12423 bytes .../quantization/ptq/data_process.py | 489 ++++++++++++ .../quantization/ptq/main.py | 208 ++++++ .../quantization/ptq/prepare_model.py | 33 + .../quantization/ptq/requirements.txt | 2 + .../quantization/ptq/run_benchmark.sh | 61 ++ .../quantization/ptq/run_quant.sh | 50 ++ .../ptq/style_images/kanagawa_great_wave.jpg | Bin 0 -> 28352 bytes .../ptq/style_images/zigzag_colorful.jpg | Bin 0 -> 19632 bytes 111 files changed, 15326 insertions(+), 13 deletions(-) create mode 100644 examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/README.md create mode 100644 examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/data_process.py create mode 100644 examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/main.py create mode 100644 examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/requirements.txt create mode 100644 examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/run_benchmark.sh rename examples/3.x_api/tensorflow/{image_recognition/vision_transformer => cv/densenet121}/quantization/ptq/run_quant.sh (100%) create mode 100644 examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/README.md create mode 100644 examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/data_process.py create mode 100644 examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/main.py create mode 100644 examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/requirements.txt create mode 100644 examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/run_benchmark.sh create mode 100644 examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/run_quant.sh create mode 100644 examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/README.md create mode 100644 examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/data_process.py create mode 100644 examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/main.py create mode 100644 examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/requirements.txt create mode 100644 examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/run_benchmark.sh create mode 100644 examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/run_quant.sh rename examples/3.x_api/tensorflow/{image_recognition => cv}/prepare_dataset.sh (100%) create mode 100644 examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/README.md create mode 100644 examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/data_process.py create mode 100644 examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/main.py create mode 100644 examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/requirements.txt create mode 100644 examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/run_benchmark.sh create mode 100644 examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/run_quant.sh create mode 100644 examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/README.md create mode 100644 examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/data_process.py create mode 100644 examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/main.py create mode 100644 examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/requirements.txt create mode 100644 examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/run_benchmark.sh create mode 100644 examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/run_quant.sh rename examples/3.x_api/tensorflow/{image_recognition => cv}/vision_transformer/quantization/ptq/README.md (77%) rename examples/3.x_api/tensorflow/{image_recognition => cv}/vision_transformer/quantization/ptq/__init__.py (100%) rename examples/3.x_api/tensorflow/{image_recognition => cv}/vision_transformer/quantization/ptq/data_process.py (100%) rename examples/3.x_api/tensorflow/{image_recognition => cv}/vision_transformer/quantization/ptq/main.py (99%) rename examples/3.x_api/tensorflow/{image_recognition => cv}/vision_transformer/quantization/ptq/requirements.txt (100%) rename examples/3.x_api/tensorflow/{image_recognition => cv}/vision_transformer/quantization/ptq/run_benchmark.sh (100%) create mode 100644 examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/run_quant.sh create mode 100644 examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/README.md create mode 100644 examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/download_dataset.py create mode 100644 examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/main.py create mode 100644 examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/requirements.txt create mode 100644 examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/run_benchmark.sh create mode 100644 examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/run_quant.sh create mode 100644 examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/README.md create mode 100644 examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py create mode 100644 examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/prepare_dataset_model.sh create mode 100644 examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_benchmark.sh create mode 100644 examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_quant.sh create mode 100644 examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/__init__.py create mode 100644 examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/metrics.py create mode 100644 examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer.py create mode 100644 examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer_test.py create mode 100644 examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md create mode 100644 examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/coco_tools.py create mode 100644 examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/data_process.py create mode 100644 examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py create mode 100644 examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_benchmark.sh create mode 100644 examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_quant.sh create mode 100644 examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md create mode 100644 examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/coco_tools.py create mode 100644 examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py create mode 100644 examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py create mode 100644 examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_benchmark.sh create mode 100644 examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_quant.sh create mode 100644 examples/3.x_api/tensorflow/object_detection/prepare_dataset.sh create mode 100644 examples/3.x_api/tensorflow/object_detection/requirements.txt create mode 100644 examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md create mode 100644 examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/coco_tools.py create mode 100644 examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/data_process.py create mode 100644 examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py create mode 100644 examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/prepare_model.py create mode 100644 examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_benchmark.sh create mode 100644 examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_quant.sh create mode 100644 examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md create mode 100644 examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py create mode 100644 examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/preprocess_csv_tfrecords.py create mode 100644 examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/requirements.txt create mode 100644 examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh create mode 100644 examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh create mode 100644 examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md create mode 100644 examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/__init__.py create mode 100644 examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/brats_cal_images_list.txt create mode 100644 examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/main.py create mode 100644 examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/Task043_BraTS_2019.py create mode 100644 examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/__init__.py create mode 100644 examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold0_validation.txt create mode 100644 examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold1_validation.txt create mode 100644 examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold2_validation.txt create mode 100644 examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold3_validation.txt create mode 100644 examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold4_validation.txt create mode 100644 examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/postprocess.py create mode 100644 examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/preprocess.py create mode 100644 examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/setup.py create mode 100644 examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt create mode 100644 examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh create mode 100644 examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh create mode 100644 examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md create mode 100644 examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/colva_beach_sq.jpg create mode 100644 examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/golden_gate_sq.jpg create mode 100644 examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/data_process.py create mode 100644 examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/main.py create mode 100644 examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/prepare_model.py create mode 100644 examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/requirements.txt create mode 100644 examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh create mode 100644 examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh create mode 100644 examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/kanagawa_great_wave.jpg create mode 100644 examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/zigzag_colorful.jpg diff --git a/examples/.config/model_params_tensorflow_3x.json b/examples/.config/model_params_tensorflow_3x.json index 70e1497e508..f81b8abfeb3 100644 --- a/examples/.config/model_params_tensorflow_3x.json +++ b/examples/.config/model_params_tensorflow_3x.json @@ -8,6 +8,20 @@ "batch_size": 64, "fp32_model_url": "https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_7_0/fp32_bert_squad.pb" }, + "distilbert_base": { + "model_src_dir": "nlp/distilbert_base/quantization/ptq", + "dataset_location": "/tf_dataset2/datasets/sst2_validation_dataset", + "input_model": "/tf_dataset2/models/tensorflow/distilbert_base/fp32/distilbert_base_fp32.pb", + "main_script": "run_inference.py", + "batch_size": 128 + }, + "distilbert_base_sq": { + "model_src_dir": "nlp/distilbert_base/quantization/ptq", + "dataset_location": "/tf_dataset2/datasets/sst2_validation_dataset", + "input_model": "/tf_dataset2/models/tensorflow/distilbert_base/fp32/distilbert_base_fp32.pb", + "main_script": "run_inference.py", + "batch_size": 128 + }, "opt_125m_sq": { "model_src_dir": "nlp/large_language_models/quantization/ptq/smoothquant", "dataset_location": "", @@ -29,8 +43,44 @@ "main_script": "main.py", "batch_size": 1 }, + "transformer_lt": { + "model_src_dir": "nlp/transformer_lt/quantization/ptq", + "dataset_location": "/tf_dataset/tensorflow/transformer-lt-official-fp32-inference/transformer_lt_official_fp32_pretrained_model/data", + "input_model": "/tf_dataset/tensorflow/transformer-lt-official-fp32-inference/transformer_lt_official_fp32_pretrained_model/graph/fp32_graphdef.pb", + "main_script": "main.py", + "batch_size": 64 + }, + "densenet121": { + "model_src_dir": "cv/densenet121/quantization/ptq", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset/tensorflow/densenet/densenet-121.pb", + "main_script": "main.py", + "batch_size": 32 + }, + "inception_v3": { + "model_src_dir": "cv/inception_v3/quantization/ptq", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset/pre-trained-models/inceptionv3/fp32/freezed_inceptionv3.pb", + "main_script": "main.py", + "batch_size": 32, + "fp32_model_url": "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/inceptionv3_fp32_pretrained_model.pb" + }, + "mobilenetv2": { + "model_src_dir": "cv/mobilenet_v2/quantization/ptq", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_mobilenet_v2.pb", + "main_script": "main.py", + "batch_size": 32 + }, + "vgg16": { + "model_src_dir": "cv/vgg16/quantization/ptq", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_vgg16.pb", + "main_script": "main.py", + "batch_size": 32 + }, "ViT": { - "model_src_dir": "image_recognition/vision_transformer/quantization/ptq", + "model_src_dir": "cv/vision_transformer/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset/tensorflow/vit/HF-ViT-Base16-Img224-frozen.pb", "main_script": "main.py", @@ -42,6 +92,63 @@ "input_model": "/tf_dataset/tensorflow/graphsage/graphsage_frozen_model.pb", "main_script": "main.py", "batch_size": 1000 + }, + "faster_rcnn_resnet50": { + "model_src_dir": "object_detection/faster_rcnn_resnet50/quantization/ptq", + "dataset_location": "/tf_dataset/tensorflow/coco_val.record", + "input_model": "/tf_dataset/pre-train-model-oob/object_detection/faster_rcnn_resnet50/frozen_inference_graph.pb", + "main_script": "main.py", + "batch_size": 10 + }, + "mask_rcnn_inception_v2": { + "model_src_dir": "object_detection/mask_rcnn_inception_v2/quantization/ptq", + "dataset_location": "/tf_dataset/tensorflow/coco_val.record", + "input_model": "/tf_dataset/pre-train-model-oob/object_detection/mask_rcnn_inception_v2/frozen_inference_graph.pb", + "main_script": "main.py", + "batch_size": 10 + }, + "mask_rcnn_inception_v2_ckpt": { + "model_src_dir": "object_detection/mask_rcnn_inception_v2/quantization/ptq", + "dataset_location": "/tf_dataset/tensorflow/coco_val.record", + "input_model": "/tf_dataset/pre-train-model-oob/object_detection/mask_rcnn_inception_v2", + "main_script": "main.py", + "batch_size": 10 + }, + "ssd_mobilenet_v1": { + "model_src_dir": "object_detection/ssd_mobilenet_v1/quantization/ptq", + "dataset_location": "/tf_dataset/tensorflow/coco_val.record", + "input_model": "/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1/frozen_inference_graph.pb", + "main_script": "main.py", + "batch_size": 10 + }, + "ssd_mobilenet_v1_ckpt": { + "model_src_dir": "object_detection/ssd_mobilenet_v1/quantization/ptq", + "dataset_location": "/tf_dataset/tensorflow/coco_val.record", + "input_model": "/tf_dataset/pre-train-model-oob/object_detection/ssd_mobilenet_v1", + "main_script": "main.py", + "batch_size": 10 + }, + "wide_deep_large_ds": { + "model_src_dir": "recommendation/wide_deep_large_ds/quantization/ptq", + "dataset_location": "/tf_dataset/tensorflow/wide_deep_large_ds/dataset", + "input_model": "/tf_dataset/tensorflow/wide_deep_large_ds/fp32_optimized_graph.pb", + "main_script": "inference.py", + "batch_size": 256, + "fp32_model_url": "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/wide_deep_fp32_pretrained_model.pb" + }, + "3dunet-mlperf": { + "model_src_dir": "semantic_image_segmentation/3dunet-mlperf/quantization/ptq", + "dataset_location": "/tf_dataset2/models/tensorflow/3dunet/build", + "input_model": "/tf_dataset2/models/tensorflow/3dunet/3dunet_dynamic_ndhwc.pb", + "main_script": "run_accuracy.py", + "batch_size": 100 + }, + "style_transfer": { + "model_src_dir": "style_transfer/arbitrary_style_transfer/quantization/ptq", + "dataset_location": "style_images,content_images", + "input_model": "/tf_dataset/tensorflow/style_transfer/arbitrary_style_transfer/model.ckpt", + "main_script": "style_tune.py", + "batch_size": 1 } } } diff --git a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/README.md b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/README.md new file mode 100644 index 00000000000..7dbe04de7ca --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/README.md @@ -0,0 +1,78 @@ +Step-by-Step +============ + +This document list steps of reproducing densenet121 model tuning and benchmark results via Neural Compressor. +This example can run on Intel CPUs and GPUs. + +> **Note**: +> The models is supported in Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). +# Prerequisite + +## 1. Environment + +### Installation +Recommend python 3.9 or higher version. +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +## 2. Prepare pre-trained model +The densenet-series comes from [tensorflow-densenet](https://github.com/pudae/tensorflow-densenet), please also follow the step [Prepare pre-trained model](#3-prepare-pre-trained-model) to get the pb files or use openvino download tools. + ```shell + git clone https://github.com/openvinotoolkit/open_model_zoo.git + cd open_model_zoo/tools/downloader + git checkout tags/2021.2 + pip install -r requirements.in + python downloader.py --name densenet-{121|161|169}-tf -o /PATH/TO/MODEL + ``` + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in ` examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/3.x_api/tensorflow/cv + # convert validation subset + bash prepare_dataset.sh --output_dir=./densenet121/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=./densenet121/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + ``` +> **Note**: +> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images. + +# Run + +## 1 Quantization + + ```shell + bash run_quant.sh --input_model=/PATH/TO/densenet-121.pb \ + --output_model=./nc_densenet121.pb --dataset_location=/path/to/ImageNet/ + ``` + +## 2. Benchmark + ```shell + bash run_benchmark.sh --input_model=./nc_densenet121.pb --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 + bash run_benchmark.sh --input_model=./nc_densenet121.pb --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 + ``` diff --git a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/data_process.py new file mode 100644 index 00000000000..17b4d9cec5e --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/data_process.py @@ -0,0 +1,581 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +class ParseDecodeImagenet: + """Parse features in Example proto. + + Returns: + tuple of parsed image and label + """ + + def __call__(self, sample): + """Parse features in example.""" + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), + } + + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(serialized=sample, features=feature_map) + label = tf.cast(features["image/class/label"], dtype=tf.int32) + image = features["image/encoded"] + image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST") + return (image, label) + + +class ResizeCropImagenet(object): + """Combination of a series of transforms which is applicable to images in Imagenet. + + Args: + height (int): Height of the result + width (int): Width of the result + random_crop (bool, default=False): whether to random crop + resize_side (int, default=256):desired shape after resize operation + random_flip_left_right (bool, default=False): whether to random flip left and right + mean_value (list, default=[0.0,0.0,0.0]):means for each channel + scale (float, default=1.0):std value + + Returns: + tuple of processed image and label + """ + + def __init__( + self, + height, + width, + random_crop=False, + resize_side=256, + resize_method="bilinear", + random_flip_left_right=False, + mean_value=[0.0, 0.0, 0.0], + scale=1.0, + data_format="channels_last", + subpixels="RGB", + ): + """Initialize `TensorflowResizeCropImagenetTransform` class.""" + self.height = height + self.width = width + self.mean_value = mean_value + self.scale = scale + self.random_crop = random_crop + self.random_flip_left_right = random_flip_left_right + self.resize_side = resize_side + self.resize_method = resize_method + self.data_format = data_format + self.subpixels = subpixels + + # sample is (images, labels) + def __call__(self, sample): + """Convert `TensorflowResizeCropImagenetTransform` feature.""" + image, label = sample + shape = tf.shape(input=image) + + height = ( + tf.cast(shape[0], dtype=tf.float32) + if self.data_format == "channels_last" + else tf.cast(shape[1], dtype=tf.float32) + ) + width = ( + tf.cast(shape[1], dtype=tf.float32) + if self.data_format == "channels_last" + else tf.cast(shape[2], dtype=tf.float32) + ) + scale = tf.cond( + pred=tf.greater(height, width), + true_fn=lambda: self.resize_side / width, + false_fn=lambda: self.resize_side / height, + ) + + scale = tf.cast(scale, dtype=tf.float32) + new_height = tf.cast(tf.math.rint(height * scale), dtype=tf.int32) + new_width = tf.cast(tf.math.rint(width * scale), dtype=tf.int32) + + if self.subpixels == "BGR" and self.data_format == "channels_first": + # 'RGB'->'BGR' + image = tf.cond( + tf.equal(tf.rank(image), 3), + lambda: tf.experimental.numpy.moveaxis(image[::-1, ...], 0, -1), + lambda: tf.experimental.numpy.moveaxis(image[:, ::-1, ...], 1, -1), + ) + elif self.subpixels == "BGR": + # 'RGB'->'BGR' + image = image[..., ::-1] + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [new_height, new_width], method=self.resize_method) + image = tf.squeeze(image) + shape = tf.shape(input=image) + if self.random_crop: + y0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[0] - self.height + 1), dtype=tf.dtypes.int32) + x0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[1] - self.width + 1), dtype=tf.dtypes.int32) + else: + y0 = (shape[0] - self.height) // 2 + x0 = (shape[1] - self.width) // 2 + + image = tf.image.crop_to_bounding_box(image, y0, x0, self.height, self.width) + image.set_shape([self.height, self.width, 3]) + if self.random_flip_left_right: + image = tf.image.random_flip_left_right(image) + means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) + image = (image - means) * self.scale + return (image, label) + + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class LabelShift(object): + """Convert label to label - label_shift. + + Args: + label_shift(int, default=0): number of label shift + + Returns: + tuple of processed image and label + """ + + def __init__(self, label_shift=0): + """Initialize `LabelShift` class.""" + self.label_shift = label_shift + + def __call__(self, sample): + """Convert label to label_shift.""" + images, labels = sample + if isinstance(labels, np.ndarray): + labels = labels - self.label_shift + elif isinstance(labels, list): + if isinstance(labels[0], tuple): + labels = [tuple(np.array(label) - self.label_shift) for label in labels] + elif isinstance(labels[0], np.ndarray): + labels = [label - self.label_shift for label in labels] + else: + labels = np.array(labels) - self.label_shift + labels = labels.tolist() + else: + labels = np.array(labels) - self.label_shift + return images, labels + + +class ImageRecordDataset(object): + """Tensorflow imageNet database in tf record format. + + Please arrange data in this way: + root/validation-000-of-100 + root/validation-001-of-100 + ... + root/validation-099-of-100 + The file name needs to follow this pattern: '* - * -of- *' + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + """Configuration for Imagenet dataset.""" + + def __new__(cls, root, transform=None, filter=None): + """Build a new object of TensorflowImageRecord class.""" + from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module + + glob_pattern = os.path.join(root, "*-*-of-*") + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) + + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) + + if transform is not None: + transform.transform_list.insert(0, ParseDecodeImagenet()) + else: + transform = ParseDecodeImagenet() + ds = ds.map(transform, num_parallel_calls=None) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + return ds + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric_cls + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class TopKMetric(BaseMetric): + """Compute Top-k Accuracy classification score for Tensorflow model. + + This metric computes the number of times where the correct label is among + the top k labels predicted. + + Attributes: + k (int): The number of most likely outcomes considered to find the correct label. + num_correct: The number of predictions that were correct classified. + num_sample: The total number of predictions. + """ + + def __init__(self, k=1): + """Initialize the k, number of samples and correct predictions. + + Args: + k: The number of most likely outcomes considered to find the correct label. + """ + self.k = k + self.num_correct = 0 + self.num_sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = TopKMetric._topk_shape_validate(preds, labels) + + labels = labels.reshape([len(labels)]) + with tf.Graph().as_default() as acc_graph: + topk = tf.nn.in_top_k( + predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k + ) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) + + self.num_sample += len(labels) + self.num_correct += correct + + def reset(self): + """Reset the number of samples and correct predictions.""" + self.num_correct = 0 + self.num_sample = 0 + + def result(self): + """Compute the top-k score. + + Returns: + The top-k score. + """ + if self.num_sample == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + elif getattr(self, "_hvd", None) is not None: # pragma: no cover + allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) + allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) + return allgather_num_correct / allgather_num_sample + return self.num_correct / self.num_sample + + @staticmethod + def _topk_shape_validate(preds, labels): + # preds shape can be Nxclass_num or class_num(N=1 by default) + # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax + if isinstance(preds, int): + preds = [preds] + preds = np.array(preds) + elif isinstance(preds, np.ndarray): + preds = np.array(preds) + elif isinstance(preds, list): + preds = np.array(preds) + preds = preds.reshape((-1, preds.shape[-1])) + + # consider labels just int value 1x1 + if isinstance(labels, int): + labels = [labels] + labels = np.array(labels) + elif isinstance(labels, tuple): + labels = np.array([labels]) + labels = labels.reshape((labels.shape[-1], -1)) + elif isinstance(labels, list): + if isinstance(labels[0], int): + labels = np.array(labels) + labels = labels.reshape((labels.shape[0], 1)) + elif isinstance(labels[0], tuple): + labels = np.array(labels) + labels = labels.reshape((labels.shape[-1], -1)) + else: + labels = np.array(labels) + # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) + # only support 2 dimension one-shot labels + # or 1 dimension one-hot class_num will confuse with N + + if len(preds.shape) == 1: + N = 1 + class_num = preds.shape[0] + preds = preds.reshape([-1, class_num]) + elif len(preds.shape) >= 2: + N = preds.shape[0] + preds = preds.reshape([N, -1]) + class_num = preds.shape[1] + + label_N = labels.shape[0] + assert label_N == N, "labels batch size should same with preds" + labels = labels.reshape([N, -1]) + # one-hot labels will have 2 dimension not equal 1 + if labels.shape[1] != 1: + labels = labels.argsort()[..., -1:] + return preds, labels + + +class TFDataLoader(object): # pragma: no cover + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/main.py b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/main.py new file mode 100644 index 00000000000..21ca3bcb3d8 --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/main.py @@ -0,0 +1,147 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time + +import numpy as np +import tensorflow as tf + +from argparse import ArgumentParser +from data_process import ( + ImageRecordDataset, + ComposeTransform, + ResizeCropImagenet, + LabelShift, + TFDataLoader, + TopKMetric +) + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +arg_parser = ArgumentParser(description='Parse args') +arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') +arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') +arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') +arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') +arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') +arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') +arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') +arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='interations') +args = arg_parser.parse_args() + +def evaluate(model, eval_dataloader, metric, postprocess=None): + """Custom evaluate function to estimate the accuracy of the model. + + Args: + model (tf.Graph_def): The input model graph + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + from neural_compressor.tensorflow import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + iteration = -1 + if args.benchmark and args.mode == 'performance': + iteration = args.iters + + def eval_func(dataloader): + latency_list = [] + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + + if postprocess: + predictions, labels = postprocess((predictions, labels)) + + metric.update(predictions, labels) + latency_list.append(end-start) + if idx + 1 == iteration: + break + latency = np.array(latency_list).mean() / args.batch_size + return latency + + latency = eval_func(eval_dataloader) + if args.benchmark and args.mode == 'performance': + print("Batch size = {}".format(args.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + from neural_compressor.common import set_random_seed + set_random_seed(9527) + + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + + dataset = ImageRecordDataset( + root=args.dataset_location, + transform=ComposeTransform(transform_list= [ + ResizeCropImagenet(height=224, width=224, scale=0.017, mean_value=[123.68, 116.78, 103.94]), + ] + ) + ) + calib_dataloader = TFDataLoader(dataset=dataset, batch_size=10) + + quant_config = StaticQuantConfig() + q_model = quantize_model(args.input_graph, quant_config, calib_dataloader) + q_model.save(args.output_graph) + + if args.benchmark: + dataset = ImageRecordDataset( + root=args.dataset_location, + transform=ComposeTransform(transform_list= [ + ResizeCropImagenet(height=224, width=224, scale=0.017, mean_value=[123.68, 116.78, 103.94]), + ] + ) + ) + dataloader = TFDataLoader(dataset=dataset, batch_size=args.batch_size) + + def eval(model): + top1 = TopKMetric(k=1) + postprocess = LabelShift(label_shift=1) + return evaluate(model, dataloader, top1, postprocess) + + if args.mode == 'performance': + eval(args.input_graph) + elif args.mode == 'accuracy': + acc_result = eval(args.input_graph) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..2755e1a41ac --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/requirements.txt @@ -0,0 +1,2 @@ +tensorflow +neural-compressor diff --git a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..8ecac837cf7 --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_benchmark { + + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark \ + --iters ${iters} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/run_quant.sh similarity index 100% rename from examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_quant.sh rename to examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/run_quant.sh diff --git a/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/README.md b/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/README.md new file mode 100644 index 00000000000..34eb64fcf74 --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/README.md @@ -0,0 +1,75 @@ +Step-by-Step +============ + +This document list steps of reproducing inception_v3 model tuning and benchmark results via Neural Compressor. +This example can run on Intel CPUs and GPUs. + +> **Note**: +> The models is supported in validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). +# Prerequisite + +## 1. Environment + +### Installation +Recommend python 3.9 or higher version. +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +## 2. Prepare pre-trained model + + Download pre-trained PB + ```shell + wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/inceptionv3_fp32_pretrained_model.pb + ``` + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in ` examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/3.x_api/tensorflow/cv + # convert validation subset + bash prepare_dataset.sh --output_dir=./inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=./inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + ``` +> **Note**: +> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images. + +# Run + +## 1 Quantization + + ```shell + bash run_quant.sh --input_model=/PATH/TO/inceptionv3_fp32_pretrained_model.pb \ + --output_model=./nc_inception_v3.pb --dataset_location=/path/to/ImageNet/ + ``` + +## 2. Benchmark + ```shell + bash run_benchmark.sh --input_model=./nc_inception_v3.pb --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 + bash run_benchmark.sh --input_model=./nc_inception_v3.pb --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 + ``` diff --git a/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/data_process.py new file mode 100644 index 00000000000..ecfca2348cd --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/data_process.py @@ -0,0 +1,511 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +class ParseDecodeImagenet: + """Parse features in Example proto. + + Returns: + tuple of parsed image and label + """ + + def __call__(self, sample): + """Parse features in example.""" + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), + } + + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(serialized=sample, features=feature_map) + label = tf.cast(features["image/class/label"], dtype=tf.int32) + image = features["image/encoded"] + image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST") + return (image, label) + + +class BilinearImagenetTransform(object): + """Combination of a series of transforms which is applicable to images in Imagenet. + + Args: + height: Height of the result + width:Width of the result + central_fraction(float, default=0.875):fraction of size to crop + mean_value(list, default=[0.0,0.0,0.0]):means for each channel + scale(float, default=1.0):std value + + Returns: + tuple of processed image and label + """ + + def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0): + """Initialize `BilinearImagenetTransform` class.""" + self.height = height + self.width = width + self.mean_value = mean_value + self.scale = scale + self.central_fraction = central_fraction + + # sample is (images, labels) + def __call__(self, sample): + """Convert `BilinearImagenetTransform` feature.""" + image, label = sample + if image.dtype is not tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image containing 87.5% area of the original image. + if self.central_fraction: + image = tf.image.central_crop(image, central_fraction=self.central_fraction) + + if self.height and self.width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) + image = (image - means) * self.scale + return (image, label) + + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class ShiftRescale(object): + """Label shift by 1 and rescale. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + image, label = sample + label -= 1 + image = (image - 127.5) / 127.5 + return (image, label) + + +class ImageRecordDataset(object): + """Tensorflow imageNet database in tf record format. + + Please arrange data in this way: + root/validation-000-of-100 + root/validation-001-of-100 + ... + root/validation-099-of-100 + The file name needs to follow this pattern: '* - * -of- *' + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + """Configuration for Imagenet dataset.""" + + def __new__(cls, root, transform=None, filter=None): + """Build a new object of TensorflowImageRecord class.""" + from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module + + glob_pattern = os.path.join(root, "*-*-of-*") + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) + + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) + + if transform is not None: + transform.transform_list.insert(0, ParseDecodeImagenet()) + else: + transform = ParseDecodeImagenet() + ds = ds.map(transform, num_parallel_calls=None) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + return ds + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric_cls + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class TopKMetric(BaseMetric): + """Compute Top-k Accuracy classification score for Tensorflow model. + + This metric computes the number of times where the correct label is among + the top k labels predicted. + + Attributes: + k (int): The number of most likely outcomes considered to find the correct label. + num_correct: The number of predictions that were correct classified. + num_sample: The total number of predictions. + """ + + def __init__(self, k=1): + """Initialize the k, number of samples and correct predictions. + + Args: + k: The number of most likely outcomes considered to find the correct label. + """ + self.k = k + self.num_correct = 0 + self.num_sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = TopKMetric._topk_shape_validate(preds, labels) + + labels = labels.reshape([len(labels)]) + with tf.Graph().as_default() as acc_graph: + topk = tf.nn.in_top_k( + predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k + ) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) + + self.num_sample += len(labels) + self.num_correct += correct + + def reset(self): + """Reset the number of samples and correct predictions.""" + self.num_correct = 0 + self.num_sample = 0 + + def result(self): + """Compute the top-k score. + + Returns: + The top-k score. + """ + if self.num_sample == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + elif getattr(self, "_hvd", None) is not None: # pragma: no cover + allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) + allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) + return allgather_num_correct / allgather_num_sample + return self.num_correct / self.num_sample + + @staticmethod + def _topk_shape_validate(preds, labels): + # preds shape can be Nxclass_num or class_num(N=1 by default) + # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax + if isinstance(preds, int): + preds = [preds] + preds = np.array(preds) + elif isinstance(preds, np.ndarray): + preds = np.array(preds) + elif isinstance(preds, list): + preds = np.array(preds) + preds = preds.reshape((-1, preds.shape[-1])) + + # consider labels just int value 1x1 + if isinstance(labels, int): + labels = [labels] + labels = np.array(labels) + elif isinstance(labels, tuple): + labels = np.array([labels]) + labels = labels.reshape((labels.shape[-1], -1)) + elif isinstance(labels, list): + if isinstance(labels[0], int): + labels = np.array(labels) + labels = labels.reshape((labels.shape[0], 1)) + elif isinstance(labels[0], tuple): + labels = np.array(labels) + labels = labels.reshape((labels.shape[-1], -1)) + else: + labels = np.array(labels) + # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) + # only support 2 dimension one-shot labels + # or 1 dimension one-hot class_num will confuse with N + + if len(preds.shape) == 1: + N = 1 + class_num = preds.shape[0] + preds = preds.reshape([-1, class_num]) + elif len(preds.shape) >= 2: + N = preds.shape[0] + preds = preds.reshape([N, -1]) + class_num = preds.shape[1] + + label_N = labels.shape[0] + assert label_N == N, "labels batch size should same with preds" + labels = labels.reshape([N, -1]) + # one-hot labels will have 2 dimension not equal 1 + if labels.shape[1] != 1: + labels = labels.argsort()[..., -1:] + return preds, labels + + +class TFDataLoader(object): # pragma: no cover + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/main.py b/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/main.py new file mode 100644 index 00000000000..9b0f737b619 --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/main.py @@ -0,0 +1,144 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time + +import tensorflow as tf +import numpy as np + +from argparse import ArgumentParser +from data_process import ( + ImageRecordDataset, + ComposeTransform, + BilinearImagenetTransform, + TFDataLoader, + TopKMetric, +) + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +arg_parser = ArgumentParser(description='Parse args') +arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') +arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') +arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') +arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') +arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') +arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') +arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') +arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='interations') +args = arg_parser.parse_args() + +def evaluate(model, eval_dataloader, metric, postprocess=None): + """Custom evaluate function to estimate the accuracy of the model. + + Args: + model (tf.Graph_def): The input model graph + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + from neural_compressor.tensorflow import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + iteration = -1 + if args.benchmark and args.mode == 'performance': + iteration = args.iters + + def eval_func(dataloader): + latency_list = [] + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + + metric.update(predictions, labels) + latency_list.append(end-start) + if idx + 1 == iteration: + break + latency = np.array(latency_list).mean() / args.batch_size + return latency + + latency = eval_func(eval_dataloader) + if args.benchmark and args.mode == 'performance': + print("Batch size = {}".format(args.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + from neural_compressor import set_random_seed + set_random_seed(9527) + + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + + dataset = ImageRecordDataset( + root=args.dataset_location, + transform=ComposeTransform(transform_list= [ + BilinearImagenetTransform(height=299, width=299), + ] + ) + ) + calib_dataloader = TFDataLoader(dataset=dataset) + + # maybe we need to exclud bf16 + quant_config = StaticQuantConfig() + conv_config = StaticQuantConfig(weight_dtype="fp32", act_dtype="fp32") + quant_config.set_local("v0/cg/conv0/conv2d/Conv2D", conv_config) + q_model = quantize_model(args.input_graph, quant_config, calib_dataloader) + q_model.save(args.output_graph) + + if args.benchmark: + dataset = ImageRecordDataset( + root=args.dataset_location, + transform=ComposeTransform(transform_list= [ + BilinearImagenetTransform(height=299, width=299), + ] + ) + ) + dataloader = TFDataLoader(dataset=dataset, batch_size=args.batch_size) + def eval(model): + top1 = TopKMetric(k=1) + return evaluate(model, dataloader, top1) + + if args.mode == 'performance': + eval(args.input_graph) + elif args.mode == 'accuracy': + acc_result = eval(args.input_graph) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..2755e1a41ac --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/requirements.txt @@ -0,0 +1,2 @@ +tensorflow +neural-compressor diff --git a/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..8ecac837cf7 --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_benchmark { + + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark \ + --iters ${iters} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..6a9e1b859c9 --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/run_quant.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dataset_location ${dataset_location} \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/README.md b/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/README.md new file mode 100644 index 00000000000..25755074a06 --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/README.md @@ -0,0 +1,108 @@ +Step-by-Step +============ + +This document list steps of reproducing mobilenet_v2 model tuning and benchmark results via Neural Compressor. +This example can run on Intel CPUs and GPUs. + +> **Note**: +> The model is supported in validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). +# Prerequisite + +## 1. Environment + +### Installation +Recommend python 3.9 or higher version. +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +## 2. Prepare pre-trained model + +The mobilenet_v2 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). +We can get the pb file by convert the checkpoint file. + + 1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models) + ```shell + wget https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.4_224.tgz + tar -xvf mobilenet_v2_1.4_224.tgz + ``` + + 2. Exporting the Inference Graph + ```shell + git clone https://github.com/tensorflow/models + cd models/research/slim + python export_inference_graph.py \ + --alsologtostderr \ + --model_name=mobilenet_v2 \ + --output_file=/tmp/mobilenet_v2_inf_graph.pb + ``` + Make sure to use intel-tensorflow v1.15, and pip install tf_slim. + #### Install Intel Tensorflow 1.15 up2 + Check your python version and use pip install 1.15.0 up2 from links below: + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl + > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. + + 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `MobilenetV2/Predictions/Reshape_1` + + 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo + ```shell + python freeze_graph.py \ + --input_graph=/tmp/mobilenet_v2_inf_graph.pb \ + --input_checkpoint=./mobilenet_v2.ckpt \ + --input_binary=true \ + --output_graph=./frozen_mobilenet_v2.pb \ + --output_node_names=MobilenetV2/Predictions/Reshape_1 + ``` + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in ` examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/3.x_api/tensorflow/cv + # convert validation subset + bash prepare_dataset.sh --output_dir=./mobilenet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=./mobilenet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + ``` +> **Note**: +> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images. + +# Run + +## 1 Quantization + + ```shell + bash run_quant.sh --input_model=/PATH/TO/frozen_mobilenet_v2.pb \ + --output_model=./nc_mobilenet_v2.pb --dataset_location=/path/to/ImageNet/ + ``` + +## 2. Benchmark + ```shell + bash run_benchmark.sh --input_model=./nc_mobilenet_v2.pb --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 + bash run_benchmark.sh --input_model=./nc_mobilenet_v2.pb --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 + ``` diff --git a/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/data_process.py new file mode 100644 index 00000000000..ecfca2348cd --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/data_process.py @@ -0,0 +1,511 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +class ParseDecodeImagenet: + """Parse features in Example proto. + + Returns: + tuple of parsed image and label + """ + + def __call__(self, sample): + """Parse features in example.""" + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), + } + + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(serialized=sample, features=feature_map) + label = tf.cast(features["image/class/label"], dtype=tf.int32) + image = features["image/encoded"] + image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST") + return (image, label) + + +class BilinearImagenetTransform(object): + """Combination of a series of transforms which is applicable to images in Imagenet. + + Args: + height: Height of the result + width:Width of the result + central_fraction(float, default=0.875):fraction of size to crop + mean_value(list, default=[0.0,0.0,0.0]):means for each channel + scale(float, default=1.0):std value + + Returns: + tuple of processed image and label + """ + + def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0): + """Initialize `BilinearImagenetTransform` class.""" + self.height = height + self.width = width + self.mean_value = mean_value + self.scale = scale + self.central_fraction = central_fraction + + # sample is (images, labels) + def __call__(self, sample): + """Convert `BilinearImagenetTransform` feature.""" + image, label = sample + if image.dtype is not tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image containing 87.5% area of the original image. + if self.central_fraction: + image = tf.image.central_crop(image, central_fraction=self.central_fraction) + + if self.height and self.width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) + image = (image - means) * self.scale + return (image, label) + + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class ShiftRescale(object): + """Label shift by 1 and rescale. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + image, label = sample + label -= 1 + image = (image - 127.5) / 127.5 + return (image, label) + + +class ImageRecordDataset(object): + """Tensorflow imageNet database in tf record format. + + Please arrange data in this way: + root/validation-000-of-100 + root/validation-001-of-100 + ... + root/validation-099-of-100 + The file name needs to follow this pattern: '* - * -of- *' + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + """Configuration for Imagenet dataset.""" + + def __new__(cls, root, transform=None, filter=None): + """Build a new object of TensorflowImageRecord class.""" + from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module + + glob_pattern = os.path.join(root, "*-*-of-*") + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) + + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) + + if transform is not None: + transform.transform_list.insert(0, ParseDecodeImagenet()) + else: + transform = ParseDecodeImagenet() + ds = ds.map(transform, num_parallel_calls=None) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + return ds + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric_cls + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class TopKMetric(BaseMetric): + """Compute Top-k Accuracy classification score for Tensorflow model. + + This metric computes the number of times where the correct label is among + the top k labels predicted. + + Attributes: + k (int): The number of most likely outcomes considered to find the correct label. + num_correct: The number of predictions that were correct classified. + num_sample: The total number of predictions. + """ + + def __init__(self, k=1): + """Initialize the k, number of samples and correct predictions. + + Args: + k: The number of most likely outcomes considered to find the correct label. + """ + self.k = k + self.num_correct = 0 + self.num_sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = TopKMetric._topk_shape_validate(preds, labels) + + labels = labels.reshape([len(labels)]) + with tf.Graph().as_default() as acc_graph: + topk = tf.nn.in_top_k( + predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k + ) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) + + self.num_sample += len(labels) + self.num_correct += correct + + def reset(self): + """Reset the number of samples and correct predictions.""" + self.num_correct = 0 + self.num_sample = 0 + + def result(self): + """Compute the top-k score. + + Returns: + The top-k score. + """ + if self.num_sample == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + elif getattr(self, "_hvd", None) is not None: # pragma: no cover + allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) + allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) + return allgather_num_correct / allgather_num_sample + return self.num_correct / self.num_sample + + @staticmethod + def _topk_shape_validate(preds, labels): + # preds shape can be Nxclass_num or class_num(N=1 by default) + # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax + if isinstance(preds, int): + preds = [preds] + preds = np.array(preds) + elif isinstance(preds, np.ndarray): + preds = np.array(preds) + elif isinstance(preds, list): + preds = np.array(preds) + preds = preds.reshape((-1, preds.shape[-1])) + + # consider labels just int value 1x1 + if isinstance(labels, int): + labels = [labels] + labels = np.array(labels) + elif isinstance(labels, tuple): + labels = np.array([labels]) + labels = labels.reshape((labels.shape[-1], -1)) + elif isinstance(labels, list): + if isinstance(labels[0], int): + labels = np.array(labels) + labels = labels.reshape((labels.shape[0], 1)) + elif isinstance(labels[0], tuple): + labels = np.array(labels) + labels = labels.reshape((labels.shape[-1], -1)) + else: + labels = np.array(labels) + # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) + # only support 2 dimension one-shot labels + # or 1 dimension one-hot class_num will confuse with N + + if len(preds.shape) == 1: + N = 1 + class_num = preds.shape[0] + preds = preds.reshape([-1, class_num]) + elif len(preds.shape) >= 2: + N = preds.shape[0] + preds = preds.reshape([N, -1]) + class_num = preds.shape[1] + + label_N = labels.shape[0] + assert label_N == N, "labels batch size should same with preds" + labels = labels.reshape([N, -1]) + # one-hot labels will have 2 dimension not equal 1 + if labels.shape[1] != 1: + labels = labels.argsort()[..., -1:] + return preds, labels + + +class TFDataLoader(object): # pragma: no cover + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/main.py b/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/main.py new file mode 100644 index 00000000000..024225431f4 --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/main.py @@ -0,0 +1,143 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time + +import tensorflow as tf +import numpy as np + +from argparse import ArgumentParser +from data_process import ( + ImageRecordDataset, + ComposeTransform, + BilinearImagenetTransform, + TFDataLoader, + TopKMetric, +) + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +arg_parser = ArgumentParser(description='Parse args') +arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') +arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') +arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') +arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') +arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') +arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') +arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') +arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='interations') +args = arg_parser.parse_args() + +def evaluate(model, eval_dataloader, metric, postprocess=None): + """Custom evaluate function to estimate the accuracy of the model. + + Args: + model (tf.Graph_def): The input model graph + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + from neural_compressor.tensorflow import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + iteration = -1 + if args.benchmark and args.mode == 'performance': + iteration = args.iters + + def eval_func(dataloader): + latency_list = [] + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + + metric.update(predictions, labels) + latency_list.append(end-start) + if idx + 1 == iteration: + break + latency = np.array(latency_list).mean() / args.batch_size + return latency + + latency = eval_func(eval_dataloader) + if args.benchmark and args.mode == 'performance': + print("Batch size = {}".format(args.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + from neural_compressor.common import set_random_seed + set_random_seed(9527) + + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + + dataset = ImageRecordDataset( + root=args.dataset_location, + transform=ComposeTransform(transform_list= [ + BilinearImagenetTransform(height=224, width=224), + ] + ) + ) + calib_dataloader = TFDataLoader(dataset=dataset, batch_size=10) + + quant_config = StaticQuantConfig() + q_model = quantize_model(args.input_graph, quant_config, calib_dataloader) + q_model.save(args.output_graph) + + if args.benchmark: + dataloader = create_dataloader('tensorflow', dataloader_args) + dataset = ImageRecordDataset( + root=args.dataset_location, + transform=ComposeTransform(transform_list= [ + BilinearImagenetTransform(height=224, width=224), + ] + ) + ) + dataloader = TFDataLoader(dataset=dataset, batch_size=args.batch_size) + + def eval(model): + top1 = TopKMetric(k=1) + return evaluate(model, dataloader, top1) + + if args.mode == 'performance': + eval(args.input_graph) + elif args.mode == 'accuracy': + acc_result = eval(args.input_graph) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..2755e1a41ac --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/requirements.txt @@ -0,0 +1,2 @@ +tensorflow +neural-compressor diff --git a/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..8ecac837cf7 --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_benchmark { + + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark \ + --iters ${iters} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..6a9e1b859c9 --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/run_quant.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dataset_location ${dataset_location} \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/image_recognition/prepare_dataset.sh b/examples/3.x_api/tensorflow/cv/prepare_dataset.sh similarity index 100% rename from examples/3.x_api/tensorflow/image_recognition/prepare_dataset.sh rename to examples/3.x_api/tensorflow/cv/prepare_dataset.sh diff --git a/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/README.md b/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/README.md new file mode 100644 index 00000000000..bc07e651f96 --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/README.md @@ -0,0 +1,107 @@ +Step-by-Step +============ + +This document list steps of reproducing resnet_v2_50 model tuning and benchmark results via Neural Compressor. +This example can run on Intel CPUs and GPUs. + +> **Note**: +> The models is supported in validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). +# Prerequisite + +## 1. Environment + +### Installation +Recommend python 3.9 or higher version. +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +## 2. Prepare pre-trained model +The resnet_v2_50 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). +We can get the pb file by convert the checkpoint file. + + 1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models) + ```shell + wget http://download.tensorflow.org/models/resnet_v2_50_2017_04_14.tar.gz + tar -xvf resnet_v2_50_2017_04_14.tar.gz + ``` + + 2. Exporting the Inference Graph + ```shell + git clone https://github.com/tensorflow/models + cd models/research/slim + python export_inference_graph.py \ + --alsologtostderr \ + --model_name=resnet_v2_50 \ + --output_file=/tmp/resnet_v2_50_inf_graph.pb + ``` + Make sure to use intel-tensorflow v1.15, and pip install tf_slim. + #### Install Intel Tensorflow 1.15 up2 + Check your python version and use pip install 1.15.0 up2 from links below: + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl + > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. + + 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `resnet_v2_50/predictions/Reshape_1` + + 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo + ```shell + python freeze_graph.py \ + --input_graph=/tmp/resnet_v2_50_inf_graph.pb \ + --input_checkpoint=./resnet_v2_50.ckpt \ + --input_binary=true \ + --output_graph=./frozen_resnet_v2_50.pb \ + --output_node_names=resnet_v2_50/predictions/Reshape_1 + ``` + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in ` examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/3.x_api/tensorflow/cv + # convert validation subset + bash prepare_dataset.sh --output_dir=./resnet_v2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=./resnet_v2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + ``` +> **Note**: +> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images. + +# Run + +## 1 Quantization + + ```shell + bash run_quant.sh --input_model=/PATH/TO/frozen_resnet_v2_50.pb \ + --output_model=./nc_resnet_v2_50.pb --dataset_location=/path/to/ImageNet/ + ``` + +## 2. Benchmark + ```shell + bash run_benchmark.sh --input_model=./nc_resnet_v2_50.pb --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 + bash run_benchmark.sh --input_model=./nc_resnet_v2_50.pb --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 + ``` diff --git a/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/data_process.py new file mode 100644 index 00000000000..ecfca2348cd --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/data_process.py @@ -0,0 +1,511 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +class ParseDecodeImagenet: + """Parse features in Example proto. + + Returns: + tuple of parsed image and label + """ + + def __call__(self, sample): + """Parse features in example.""" + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), + } + + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(serialized=sample, features=feature_map) + label = tf.cast(features["image/class/label"], dtype=tf.int32) + image = features["image/encoded"] + image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST") + return (image, label) + + +class BilinearImagenetTransform(object): + """Combination of a series of transforms which is applicable to images in Imagenet. + + Args: + height: Height of the result + width:Width of the result + central_fraction(float, default=0.875):fraction of size to crop + mean_value(list, default=[0.0,0.0,0.0]):means for each channel + scale(float, default=1.0):std value + + Returns: + tuple of processed image and label + """ + + def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0): + """Initialize `BilinearImagenetTransform` class.""" + self.height = height + self.width = width + self.mean_value = mean_value + self.scale = scale + self.central_fraction = central_fraction + + # sample is (images, labels) + def __call__(self, sample): + """Convert `BilinearImagenetTransform` feature.""" + image, label = sample + if image.dtype is not tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image containing 87.5% area of the original image. + if self.central_fraction: + image = tf.image.central_crop(image, central_fraction=self.central_fraction) + + if self.height and self.width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) + image = (image - means) * self.scale + return (image, label) + + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class ShiftRescale(object): + """Label shift by 1 and rescale. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + image, label = sample + label -= 1 + image = (image - 127.5) / 127.5 + return (image, label) + + +class ImageRecordDataset(object): + """Tensorflow imageNet database in tf record format. + + Please arrange data in this way: + root/validation-000-of-100 + root/validation-001-of-100 + ... + root/validation-099-of-100 + The file name needs to follow this pattern: '* - * -of- *' + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + """Configuration for Imagenet dataset.""" + + def __new__(cls, root, transform=None, filter=None): + """Build a new object of TensorflowImageRecord class.""" + from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module + + glob_pattern = os.path.join(root, "*-*-of-*") + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) + + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) + + if transform is not None: + transform.transform_list.insert(0, ParseDecodeImagenet()) + else: + transform = ParseDecodeImagenet() + ds = ds.map(transform, num_parallel_calls=None) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + return ds + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric_cls + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class TopKMetric(BaseMetric): + """Compute Top-k Accuracy classification score for Tensorflow model. + + This metric computes the number of times where the correct label is among + the top k labels predicted. + + Attributes: + k (int): The number of most likely outcomes considered to find the correct label. + num_correct: The number of predictions that were correct classified. + num_sample: The total number of predictions. + """ + + def __init__(self, k=1): + """Initialize the k, number of samples and correct predictions. + + Args: + k: The number of most likely outcomes considered to find the correct label. + """ + self.k = k + self.num_correct = 0 + self.num_sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = TopKMetric._topk_shape_validate(preds, labels) + + labels = labels.reshape([len(labels)]) + with tf.Graph().as_default() as acc_graph: + topk = tf.nn.in_top_k( + predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k + ) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) + + self.num_sample += len(labels) + self.num_correct += correct + + def reset(self): + """Reset the number of samples and correct predictions.""" + self.num_correct = 0 + self.num_sample = 0 + + def result(self): + """Compute the top-k score. + + Returns: + The top-k score. + """ + if self.num_sample == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + elif getattr(self, "_hvd", None) is not None: # pragma: no cover + allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) + allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) + return allgather_num_correct / allgather_num_sample + return self.num_correct / self.num_sample + + @staticmethod + def _topk_shape_validate(preds, labels): + # preds shape can be Nxclass_num or class_num(N=1 by default) + # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax + if isinstance(preds, int): + preds = [preds] + preds = np.array(preds) + elif isinstance(preds, np.ndarray): + preds = np.array(preds) + elif isinstance(preds, list): + preds = np.array(preds) + preds = preds.reshape((-1, preds.shape[-1])) + + # consider labels just int value 1x1 + if isinstance(labels, int): + labels = [labels] + labels = np.array(labels) + elif isinstance(labels, tuple): + labels = np.array([labels]) + labels = labels.reshape((labels.shape[-1], -1)) + elif isinstance(labels, list): + if isinstance(labels[0], int): + labels = np.array(labels) + labels = labels.reshape((labels.shape[0], 1)) + elif isinstance(labels[0], tuple): + labels = np.array(labels) + labels = labels.reshape((labels.shape[-1], -1)) + else: + labels = np.array(labels) + # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) + # only support 2 dimension one-shot labels + # or 1 dimension one-hot class_num will confuse with N + + if len(preds.shape) == 1: + N = 1 + class_num = preds.shape[0] + preds = preds.reshape([-1, class_num]) + elif len(preds.shape) >= 2: + N = preds.shape[0] + preds = preds.reshape([N, -1]) + class_num = preds.shape[1] + + label_N = labels.shape[0] + assert label_N == N, "labels batch size should same with preds" + labels = labels.reshape([N, -1]) + # one-hot labels will have 2 dimension not equal 1 + if labels.shape[1] != 1: + labels = labels.argsort()[..., -1:] + return preds, labels + + +class TFDataLoader(object): # pragma: no cover + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/main.py b/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/main.py new file mode 100644 index 00000000000..3b9595476e6 --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/main.py @@ -0,0 +1,144 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time + +import tensorflow as tf +import numpy as np + +from argparse import ArgumentParser +from data_process import ( + ImageRecordDataset, + ComposeTransform, + BilinearImagenetTransform, + TFDataLoader, + TopKMetric, +) + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +arg_parser = ArgumentParser(description='Parse args') +arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') +arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') +arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') +arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') +arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') +arg_parser.add_argument('--diagnose', dest='diagnose', action='store_true', help='use Neural Insights to diagnose tuning and benchmark.') +arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') +arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') +arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='interations') +args = arg_parser.parse_args() + +def evaluate(model, eval_dataloader, metric, postprocess=None): + """Custom evaluate function to estimate the accuracy of the model. + + Args: + model (tf.Graph_def): The input model graph + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + from neural_compressor.tensorflow import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + iteration = -1 + if args.benchmark and args.mode == 'performance': + iteration = args.iters + + def eval_func(dataloader): + latency_list = [] + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + + metric.update(predictions, labels) + latency_list.append(end-start) + if idx + 1 == iteration: + break + latency = np.array(latency_list).mean() / args.batch_size + return latency + + latency = eval_func(eval_dataloader) + if args.benchmark and args.mode == 'performance': + print("Batch size = {}".format(args.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + from neural_compressor.common import set_random_seed + set_random_seed(9527) + + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + + dataset = ImageRecordDataset( + root=args.dataset_location, + transform=ComposeTransform(transform_list= [ + BilinearImagenetTransform(height=224, width=224), + ] + ) + ) + calib_dataloader = TFDataLoader(dataset=dataset, batch_size=10) + + quant_config = StaticQuantConfig() + q_model = quantize_model(args.input_graph, quant_config, calib_dataloader) + q_model.save(args.output_graph) + + if args.benchmark: + dataloader = create_dataloader('tensorflow', dataloader_args) + dataset = ImageRecordDataset( + root=args.dataset_location, + transform=ComposeTransform(transform_list= [ + BilinearImagenetTransform(height=224, width=224), + ] + ) + ) + dataloader = TFDataLoader(dataset=dataset, batch_size=args.batch_size) + + def eval(model): + top1 = TopKMetric(k=1) + return evaluate(model, dataloader, top1) + + if args.mode == 'performance': + eval(args.input_graph) + elif args.mode == 'accuracy': + acc_result = eval(args.input_graph) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..2755e1a41ac --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/requirements.txt @@ -0,0 +1,2 @@ +tensorflow +neural-compressor diff --git a/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..8ecac837cf7 --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_benchmark { + + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark \ + --iters ${iters} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..6a9e1b859c9 --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/run_quant.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dataset_location ${dataset_location} \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/README.md b/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/README.md new file mode 100644 index 00000000000..00e00c7846d --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/README.md @@ -0,0 +1,108 @@ +Step-by-Step +============ + +This document list steps of reproducing vgg16 model tuning and benchmark results via Neural Compressor. +This example can run on Intel CPUs and GPUs. + +> **Note**: +> The model is supported in validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). +# Prerequisite + +## 1. Environment + +### Installation +Recommend python 3.9 or higher version. +```shell +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +## 2. Prepare pre-trained model + +The vgg16 checkpoint file comes from [models](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models). +We can get the pb file by convert the checkpoint file. + + 1. Download the checkpoint file from [here](https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models) + ```shell + wget http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz + tar -xvf vgg_16_2016_08_28.tar.gz + ``` + + 2. Exporting the Inference Graph + ```shell + git clone https://github.com/tensorflow/models + cd models/research/slim + python export_inference_graph.py \ + --alsologtostderr \ + --model_name=vgg_16 \ + --output_file=/tmp/vgg_16_inf_graph.pb + ``` + Make sure to use intel-tensorflow v1.15, and pip install tf_slim. + #### Install Intel Tensorflow 1.15 up2 + Check your python version and use pip install 1.15.0 up2 from links below: + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp36-cp36m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp37-cp37m-manylinux2010_x86_64.whl + https://storage.googleapis.com/intel-optimized-tensorflow/intel_tensorflow-1.15.0up2-cp35-cp35m-manylinux2010_x86_64.whl + > Please note: The ImageNet dataset has 1001, the **VGG** and **ResNet V1** final layers have only 1000 outputs rather than 1001. So we need add the `--labels_offset=1` flag in the inference graph exporting command. + + 3. Use [Netron](https://lutzroeder.github.io/netron/) to get the input/output layer name of inference graph pb, for vgg_16 the output layer name is `vgg_16/fc8/squeezed` + + 4. Freezing the exported Graph, please use the tool `freeze_graph.py` in [tensorflow v1.15.2](https://github.com/tensorflow/tensorflow/blob/v1.15.2/tensorflow/python/tools/freeze_graph.py) repo + ```shell + python freeze_graph.py \ + --input_graph=/tmp/vgg_16_inf_graph.pb \ + --input_checkpoint=./vgg_16.ckpt \ + --input_binary=true \ + --output_graph=./frozen_vgg16.pb \ + --output_node_names=vgg_16/fc8/squeezed + ``` + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/3.x_api/tensorflow/cv + # convert validation subset + bash prepare_dataset.sh --output_dir=./vgg16/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=./vgg16/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + ``` +> **Note**: +> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images. + +# Run + +## 1 Quantization + + ```shell + bash run_quant.sh --input_model=/PATH/TO/frozen_vgg16.pb \ + --output_model=./nc_vgg16.pb --dataset_location=/path/to/ImageNet/ + ``` + +## 2. Benchmark + ```shell + bash run_benchmark.sh --input_model=./nc_vgg16.pb --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 + bash run_benchmark.sh --input_model=./nc_vgg16.pb --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 + ``` diff --git a/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/data_process.py new file mode 100644 index 00000000000..17b4d9cec5e --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/data_process.py @@ -0,0 +1,581 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +class ParseDecodeImagenet: + """Parse features in Example proto. + + Returns: + tuple of parsed image and label + """ + + def __call__(self, sample): + """Parse features in example.""" + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), + } + + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(serialized=sample, features=feature_map) + label = tf.cast(features["image/class/label"], dtype=tf.int32) + image = features["image/encoded"] + image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST") + return (image, label) + + +class ResizeCropImagenet(object): + """Combination of a series of transforms which is applicable to images in Imagenet. + + Args: + height (int): Height of the result + width (int): Width of the result + random_crop (bool, default=False): whether to random crop + resize_side (int, default=256):desired shape after resize operation + random_flip_left_right (bool, default=False): whether to random flip left and right + mean_value (list, default=[0.0,0.0,0.0]):means for each channel + scale (float, default=1.0):std value + + Returns: + tuple of processed image and label + """ + + def __init__( + self, + height, + width, + random_crop=False, + resize_side=256, + resize_method="bilinear", + random_flip_left_right=False, + mean_value=[0.0, 0.0, 0.0], + scale=1.0, + data_format="channels_last", + subpixels="RGB", + ): + """Initialize `TensorflowResizeCropImagenetTransform` class.""" + self.height = height + self.width = width + self.mean_value = mean_value + self.scale = scale + self.random_crop = random_crop + self.random_flip_left_right = random_flip_left_right + self.resize_side = resize_side + self.resize_method = resize_method + self.data_format = data_format + self.subpixels = subpixels + + # sample is (images, labels) + def __call__(self, sample): + """Convert `TensorflowResizeCropImagenetTransform` feature.""" + image, label = sample + shape = tf.shape(input=image) + + height = ( + tf.cast(shape[0], dtype=tf.float32) + if self.data_format == "channels_last" + else tf.cast(shape[1], dtype=tf.float32) + ) + width = ( + tf.cast(shape[1], dtype=tf.float32) + if self.data_format == "channels_last" + else tf.cast(shape[2], dtype=tf.float32) + ) + scale = tf.cond( + pred=tf.greater(height, width), + true_fn=lambda: self.resize_side / width, + false_fn=lambda: self.resize_side / height, + ) + + scale = tf.cast(scale, dtype=tf.float32) + new_height = tf.cast(tf.math.rint(height * scale), dtype=tf.int32) + new_width = tf.cast(tf.math.rint(width * scale), dtype=tf.int32) + + if self.subpixels == "BGR" and self.data_format == "channels_first": + # 'RGB'->'BGR' + image = tf.cond( + tf.equal(tf.rank(image), 3), + lambda: tf.experimental.numpy.moveaxis(image[::-1, ...], 0, -1), + lambda: tf.experimental.numpy.moveaxis(image[:, ::-1, ...], 1, -1), + ) + elif self.subpixels == "BGR": + # 'RGB'->'BGR' + image = image[..., ::-1] + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [new_height, new_width], method=self.resize_method) + image = tf.squeeze(image) + shape = tf.shape(input=image) + if self.random_crop: + y0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[0] - self.height + 1), dtype=tf.dtypes.int32) + x0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[1] - self.width + 1), dtype=tf.dtypes.int32) + else: + y0 = (shape[0] - self.height) // 2 + x0 = (shape[1] - self.width) // 2 + + image = tf.image.crop_to_bounding_box(image, y0, x0, self.height, self.width) + image.set_shape([self.height, self.width, 3]) + if self.random_flip_left_right: + image = tf.image.random_flip_left_right(image) + means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) + image = (image - means) * self.scale + return (image, label) + + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class LabelShift(object): + """Convert label to label - label_shift. + + Args: + label_shift(int, default=0): number of label shift + + Returns: + tuple of processed image and label + """ + + def __init__(self, label_shift=0): + """Initialize `LabelShift` class.""" + self.label_shift = label_shift + + def __call__(self, sample): + """Convert label to label_shift.""" + images, labels = sample + if isinstance(labels, np.ndarray): + labels = labels - self.label_shift + elif isinstance(labels, list): + if isinstance(labels[0], tuple): + labels = [tuple(np.array(label) - self.label_shift) for label in labels] + elif isinstance(labels[0], np.ndarray): + labels = [label - self.label_shift for label in labels] + else: + labels = np.array(labels) - self.label_shift + labels = labels.tolist() + else: + labels = np.array(labels) - self.label_shift + return images, labels + + +class ImageRecordDataset(object): + """Tensorflow imageNet database in tf record format. + + Please arrange data in this way: + root/validation-000-of-100 + root/validation-001-of-100 + ... + root/validation-099-of-100 + The file name needs to follow this pattern: '* - * -of- *' + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + """Configuration for Imagenet dataset.""" + + def __new__(cls, root, transform=None, filter=None): + """Build a new object of TensorflowImageRecord class.""" + from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module + + glob_pattern = os.path.join(root, "*-*-of-*") + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) + + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) + + if transform is not None: + transform.transform_list.insert(0, ParseDecodeImagenet()) + else: + transform = ParseDecodeImagenet() + ds = ds.map(transform, num_parallel_calls=None) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + return ds + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric_cls + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class TopKMetric(BaseMetric): + """Compute Top-k Accuracy classification score for Tensorflow model. + + This metric computes the number of times where the correct label is among + the top k labels predicted. + + Attributes: + k (int): The number of most likely outcomes considered to find the correct label. + num_correct: The number of predictions that were correct classified. + num_sample: The total number of predictions. + """ + + def __init__(self, k=1): + """Initialize the k, number of samples and correct predictions. + + Args: + k: The number of most likely outcomes considered to find the correct label. + """ + self.k = k + self.num_correct = 0 + self.num_sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = TopKMetric._topk_shape_validate(preds, labels) + + labels = labels.reshape([len(labels)]) + with tf.Graph().as_default() as acc_graph: + topk = tf.nn.in_top_k( + predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k + ) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) + + self.num_sample += len(labels) + self.num_correct += correct + + def reset(self): + """Reset the number of samples and correct predictions.""" + self.num_correct = 0 + self.num_sample = 0 + + def result(self): + """Compute the top-k score. + + Returns: + The top-k score. + """ + if self.num_sample == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + elif getattr(self, "_hvd", None) is not None: # pragma: no cover + allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) + allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) + return allgather_num_correct / allgather_num_sample + return self.num_correct / self.num_sample + + @staticmethod + def _topk_shape_validate(preds, labels): + # preds shape can be Nxclass_num or class_num(N=1 by default) + # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax + if isinstance(preds, int): + preds = [preds] + preds = np.array(preds) + elif isinstance(preds, np.ndarray): + preds = np.array(preds) + elif isinstance(preds, list): + preds = np.array(preds) + preds = preds.reshape((-1, preds.shape[-1])) + + # consider labels just int value 1x1 + if isinstance(labels, int): + labels = [labels] + labels = np.array(labels) + elif isinstance(labels, tuple): + labels = np.array([labels]) + labels = labels.reshape((labels.shape[-1], -1)) + elif isinstance(labels, list): + if isinstance(labels[0], int): + labels = np.array(labels) + labels = labels.reshape((labels.shape[0], 1)) + elif isinstance(labels[0], tuple): + labels = np.array(labels) + labels = labels.reshape((labels.shape[-1], -1)) + else: + labels = np.array(labels) + # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) + # only support 2 dimension one-shot labels + # or 1 dimension one-hot class_num will confuse with N + + if len(preds.shape) == 1: + N = 1 + class_num = preds.shape[0] + preds = preds.reshape([-1, class_num]) + elif len(preds.shape) >= 2: + N = preds.shape[0] + preds = preds.reshape([N, -1]) + class_num = preds.shape[1] + + label_N = labels.shape[0] + assert label_N == N, "labels batch size should same with preds" + labels = labels.reshape([N, -1]) + # one-hot labels will have 2 dimension not equal 1 + if labels.shape[1] != 1: + labels = labels.argsort()[..., -1:] + return preds, labels + + +class TFDataLoader(object): # pragma: no cover + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/main.py b/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/main.py new file mode 100644 index 00000000000..ffe960e1b1e --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/main.py @@ -0,0 +1,146 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time + +import tensorflow as tf +import numpy as np + +from argparse import ArgumentParser +from data_process import ( + ImageRecordDataset, + ComposeTransform, + ResizeCropImagenet, + LabelShift, + TFDataLoader, + TopKMetric +) + + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +arg_parser = ArgumentParser(description='Parse args') +arg_parser.add_argument('-g', "--input-graph", + help='Specify the input graph for the transform tool', + dest='input_graph') +arg_parser.add_argument("--output-graph", + help='Specify tune result model save dir', + dest='output_graph') +arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') +arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') +arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') +arg_parser.add_argument('--dataset_location', dest='dataset_location', + help='location of calibration dataset and evaluate dataset') +arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') +arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='interations') +args = arg_parser.parse_args() + +def evaluate(model, eval_dataloader, metric, postprocess=None): + """Custom evaluate function to estimate the accuracy of the model. + + Args: + model (tf.Graph_def): The input model graph + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + from neural_compressor.tensorflow import Model + model = Model(model) + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + iteration = -1 + if args.benchmark and args.mode == 'performance': + iteration = args.iters + + def eval_func(dataloader): + latency_list = [] + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + if postprocess: + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + latency_list.append(end-start) + if idx + 1 == iteration: + break + latency = np.array(latency_list).mean() / args.batch_size + return latency + + latency = eval_func(eval_dataloader) + if args.benchmark and args.mode == 'performance': + print("Batch size = {}".format(args.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph.""" + + def run(self): + """This is neural_compressor function include tuning, export and benchmark option.""" + from neural_compressor.common import set_random_seed + set_random_seed(9527) + + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + + dataset = ImageRecordDataset( + root=args.dataset_location, + transform=ComposeTransform(transform_list= [ + ResizeCropImagenet(height=224, width=224, mean_value=[123.68, 116.78, 103.94]), + ] + ) + ) + calib_dataloader = TFDataLoader(dataset=dataset, batch_size=10) + + quant_config = StaticQuantConfig() + q_model = quantize_model(args.input_graph, quant_config, calib_dataloader) + q_model.save(args.output_graph) + + if args.benchmark: + dataset = ImageRecordDataset( + root=args.dataset_location, + transform=ComposeTransform(transform_list= [ + ResizeCropImagenet(height=224, width=224, mean_value=[123.68, 116.78, 103.94]), + ] + ) + ) + dataloader = TFDataLoader(dataset=dataset, batch_size=args.batch_size) + + def eval(model): + top1 = TopKMetric(k=1) + postprocess = LabelShift(label_shift=1) + return evaluate(model, dataloader, top1, postprocess) + + if args.mode == 'performance': + eval(args.input_graph) + elif args.mode == 'accuracy': + acc_result = eval(args.input_graph) + print("Batch size = %d" % dataloader.batch_size) + print("Accuracy: %.5f" % acc_result) + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..2755e1a41ac --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/requirements.txt @@ -0,0 +1,2 @@ +tensorflow +neural-compressor diff --git a/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..8ecac837cf7 --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_benchmark { + + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark \ + --iters ${iters} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..6a9e1b859c9 --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/run_quant.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dataset_location ${dataset_location} \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md b/examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/README.md similarity index 77% rename from examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md rename to examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/README.md index 7dcf3e7a363..0d4fa041690 100644 --- a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/README.md @@ -40,10 +40,10 @@ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/2_11_0/HF- ## 3. Prepare Dataset TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in ` examples/tensorflow/image_recognition/tensorflow_models/imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + We also prepared related scripts in `examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. ```shell - cd examples/3.x_api/tensorflow/image_recognition/tensorflow_models/ + cd examples/3.x_api/tensorflow/cv # convert validation subset bash prepare_dataset.sh --output_dir=./vision_transformer/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation # convert train subset @@ -60,7 +60,7 @@ wget https://storage.googleapis.com/intel-optimized-tensorflow/models/2_11_0/HF- ## 1. Quantization ```shell -bash run_quant.sh --input_model= --output_model=./output --dataset_location= +bash run_quant.sh --input_model=./HF-ViT-Base16-Img224-frozen.pb --output_model=./output --dataset_location= ``` @@ -69,7 +69,7 @@ bash run_quant.sh --input_model= --outpu ### Benchmark the fp32 model ```shell -bash run_benchmark.sh --input_model= --mode=accuracy --dataset_location= --batch_size=32 +bash run_benchmark.sh --input_model=./HF-ViT-Base16-Img224-frozen.pb --mode=accuracy --dataset_location= --batch_size=32 ``` ### Benchmark the int8 model diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/__init__.py b/examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/__init__.py similarity index 100% rename from examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/__init__.py rename to examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/__init__.py diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/data_process.py similarity index 100% rename from examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/data_process.py rename to examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/data_process.py diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/main.py b/examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/main.py similarity index 99% rename from examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/main.py rename to examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/main.py index 49b4771c61a..92b2ea0fb2a 100644 --- a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/main.py @@ -146,7 +146,6 @@ def run(self): with tf.io.gfile.GFile(args.input_graph, "rb") as f: sm.ParseFromString(f.read()) graph_def = sm.meta_graphs[0].graph_def - postprocess = ShiftRescale() q_model = quantize_model(graph_def, quant_config, calib_dataloader) q_model.save(args.output_graph) diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/requirements.txt similarity index 100% rename from examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/requirements.txt rename to examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/requirements.txt diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/run_benchmark.sh similarity index 100% rename from examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_benchmark.sh rename to examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/run_benchmark.sh diff --git a/examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..6a9e1b859c9 --- /dev/null +++ b/examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/run_quant.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph ${input_model} \ + --output-graph ${output_model} \ + --dataset_location ${dataset_location} \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md index f82b696bd07..41a673fc834 100644 --- a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md @@ -74,7 +74,7 @@ bash prepare_dataset.sh --output_dir=./data ### Convert the dataset to TF Record format After the dataset is downloaded by either of ways above, the dataset should be converted to files of TF Record format. ```shell -python create_tf_record.py --vocab_file=data/vocab.txt --predict_file=data/dev-v1.1.json --output_file=./data/eval.tf_record +python create_tf_record.py --vocab_file=data/vocab.txt --predict_file=data/dev-v1.1.json --output_file=data/eval.tf_record ``` # Run Command @@ -82,11 +82,11 @@ python create_tf_record.py --vocab_file=data/vocab.txt --predict_file=data/dev-v ## Quantization ```shell - bash run_quant.sh --input_model=./fp32_bert_squad.pb --output_model=./bert_squad_int8.pb --dataset_location=/path/to/evaluation/dataset + bash run_quant.sh --input_model=./fp32_bert_squad.pb --output_model=./bert_squad_int8.pb --dataset_location=data ``` ## Benchmark ```shell - bash run_benchmark.sh --input_model=./bert_squad_int8.pb --mode=accuracy --dataset_location=/path/to/evaluation/dataset --batch_size=64 - bash run_benchmark.sh --input_model=./bert_squad_int8.pb --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=64 + bash run_benchmark.sh --input_model=./bert_squad_int8.pb --mode=accuracy --dataset_location=data --batch_size=64 + bash run_benchmark.sh --input_model=./bert_squad_int8.pb --mode=performance --dataset_location=data --batch_size=64 ``` \ No newline at end of file diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/main.py b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/main.py index b5403618f40..7f99b3507fc 100644 --- a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/main.py @@ -134,8 +134,5 @@ def eval(model): q_model = quantize_model(model, quant_config, dataloader) q_model.save(FLAGS.output_model) - dataset = ModelZooBertDataset(root=data_path, label_file=label_path) - dataloader = ModelZooBertDataLoader(dataset=dataset, batch_size=FLAGS.batch_size) - if __name__ == "__main__": tf.compat.v1.app.run() diff --git a/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/README.md b/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/README.md new file mode 100644 index 00000000000..83d17508bb0 --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/README.md @@ -0,0 +1,187 @@ +Step-by-Step +============ + +This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor tuning zoo result of DistilBERT base. This example can be run on Intel CPUs and GPUs. + +## Model Details +This DistilBERT base model is based on the paper [*DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter*](https://arxiv.org/abs/1910.01108). \ +The [pretrained-model](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english?text=I+like+you.+I+love+you) thus used, was taken from [Hugging face model repository](https://huggingface.co/models). \ +The frozen model pb can be found at [Model Zoo for Intel® Architecture](https://github.com/IntelAI/models/tree/master/models/language_modeling/tensorflow/distilbert_base/inference). + +## Dataset Details +We use a part of Stanford Sentiment Treebank corpus for our task. Specifically, the validation split present in the SST2 dataset in the hugging face [repository](https://huggingface.co/datasets/sst2). It contains 872 labeled English sentences. The details for downloading the dataset are given below. + +## Prerequisite + +### 1. Install Intel® Neural Compressor +```shell +pip install neural-compressor +``` + +### 2. Install TensorFlow 2.11.dev202242 +Build a TensorFlow pip package from [intel-tensorflow spr_ww42 branch](https://github.com/Intel-tensorflow/tensorflow/tree/spr_ww42) and install it. How to build a TensorFlow pip package from source please refer to this [tutorial](https://www.tensorflow.org/install/source). + +### 3. Install Requirements +```shell +pip install -r requirements.txt +``` + +### 4. Install Intel® Extension for TensorFlow + +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers). + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +### 5. Download Dataset +```shell +python download_dataset.py --path_to_save_dataset +``` + +### 6. Download Model +Download Frozen graph: +```shell +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/2_10_0/distilbert_frozen_graph_fp32_final.pb +``` + +## Run Command +### Run Tuning: +```shell +bash run_quant.sh \ + --input_model=$INPUT_MODEL \ + --dataset_location=$DATASET_DIR \ + --output_model=$OUTPUT_MODEL \ + --batch_size=$BATCH_SIZE \ + --max_seq_length=$MAX_SEQ \ + --warmup_steps=$WARMUPS \ + --num_inter=$INTER_THREADS \ + --num_intra=$INTRA_THREADS +``` +### Run Benchmark: +```shell +# performance mode: get performance +bash run_benchmark.sh \ + --input_model=$INPUT_MODEL \ + --dataset_location=$DATASET_DIR \ + --mode=performance \ + --batch_size=$BATCH_SIZE \ + --max_seq_length=$MAX_SEQ \ + --iters=$ITERS \ + --warmup_steps=$WARMUPS \ + --num_inter=$INTER_THREADS \ + --num_intra=$INTRA_THREADS +``` + +```shell +# accuracy mode: get accuracy +bash run_benchmark.sh \ + --input_model=$INPUT_MODEL \ + --dataset_location=$DATASET_DIR \ + --mode=accuracy \ + --batch_size=$BATCH_SIZE \ + --max_seq_length=$MAX_SEQ \ + --warmup_steps=$WARMUPS \ + --num_inter=$INTER_THREADS \ + --num_intra=$INTRA_THREADS +``` + +Where (Default values are shown in the square brackets): + * $INPUT_MODEL ["./distilbert_base_fp32.pb"]-- The path to input FP32 frozen model .pb file to load + * $DATASET_DIR ["./sst2_validation_dataset"]-- The path to input dataset directory + * $OUTPUT_MODEL ["./output_distilbert_base_int8.pb"]-- The user-specified export path to the output INT8 quantized model + * $BATCH_SIZE [128]-- The batch size for model inference + * $MAX_SEQ [128]-- The maximum total sequence length after tokenization + * $ITERS [872]-- The number of iterations to run in benchmark mode, maximum value is 872 + * $WARMUPS [10]-- The number of warmup steps before benchmarking the model, maximum value is 22 + * $INTER_THREADS [2]-- The number of inter op parallelism thread to use, which can be set to the number of sockets + * $INTRA_THREADS [28]-- The number of intra op parallelism thread to use, which can be set to the number of physical core per socket + + +### Run Smooth Quant to improve int8 accuracy + +#### Tuning +```shell +bash run_quant.sh \ + --input_model=$INPUT_MODEL \ + --dataset_location=$DATASET_DIR \ + --output_model=$OUTPUT_MODEL \ + --batch_size=$BATCH_SIZE \ + --max_seq_length=$MAX_SEQ \ + --warmup_steps=$WARMUPS \ + --num_inter=$INTER_THREADS \ + --num_intra=$INTRA_THREADS \ + --sq=True +``` + + +Details of enabling Intel® Neural Compressor on DistilBERT base for TensorFlow +========================= + +This is a tutorial of how to enable DistilBERT base model with Intel® Neural Compressor. +## User Code Analysis +1. User specifies fp32 *model*, calibration dataloader *q_dataloader*, evaluation dataloader *eval_dataloader* and metric. + +2. User specifies fp32 *model*, calibration dataloader *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataloader and metric by itself. + +For DistilBERT base, we applied the latter one. The task is to implement the *q_dataloader* and *eval_func*. + + +### q_dataloader Part Adaption +Below dataloader class uses generator function to provide the model with input. + +```python +class Dataloader(object): + def __init__(self, data_location, batch_size, steps): + self.batch_size = batch_size + self.data_location = data_location + self.num_batch = math.ceil(steps / batch_size) + + def __iter__(self): + return self.generate_dataloader(self.data_location).__iter__() + + def __len__(self): + return self.num_batch + + def generate_dataloader(self, data_location): + dataset = load_dataset(data_location) + for batch_id in range(self.num_batch): + feed_dict, labels = create_feed_dict_and_labels(dataset, batch_id, self.num_batch) + yield feed_dict, labels +``` + +### Code Update +After prepare step is done, we add the code for quantization tuning to generate quantized model. + +#### Tune +```python + from neural_compressor.tensorflow import StaticQuantConfig, SmoothQuantConfig, quantize_model + + quant_config = SmoothQuantConfig(alpha=0.6) if ARGS.sq else StaticQuantConfig() + q_model = quantize_model(graph, quant_config, self.dataloader) + try: + q_model.save(ARGS.output_graph) + except Exception as e: + tf.compat.v1.logging.error("Failed to save model due to {}".format(str(e))) +``` +#### Benchmark +```python + if ARGS.mode == 'performance': + self.eval_func(graph) + elif ARGS.mode == 'accuracy': + accuracy = self.eval_func(graph) + logger.info("Accuracy: {:.4f}".format(accuracy)) +``` diff --git a/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/download_dataset.py b/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/download_dataset.py new file mode 100644 index 00000000000..b92cf39874f --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/download_dataset.py @@ -0,0 +1,38 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +''' +Script to download and save dataset +''' +from datasets import load_dataset +from argparse import ArgumentParser +import os + +def main(): + arg_parser = ArgumentParser(description="Download and save dataset") + arg_parser.add_argument("-p", "--path_to_save_dataset", type=str, + help="path to save the dataset", + default="./") + args = arg_parser.parse_args() + dataset = load_dataset("glue", "sst2", split= "validation") + path = os.path.join(args.path_to_save_dataset, "sst2_validation_dataset") + dataset.save_to_disk(path) + print("Dataset saved in location: {}".format(path)) + +if __name__ == "__main__": + main() diff --git a/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/main.py b/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/main.py new file mode 100644 index 00000000000..e0fce11736e --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/main.py @@ -0,0 +1,324 @@ + +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +'''DistilBERT base inference, implementation adapted from Hugging Face Library https://huggingface.co/''' +import time +import os +import math + +import tensorflow as tf +import numpy as np + +from argparse import ArgumentParser +from transformers import AutoTokenizer +from datasets import load_from_disk +from tensorflow.core.protobuf import saved_model_pb2 +from tensorflow.python.client import timeline + +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils import dump_elapsed_time + + + +def boolean_string(s): + if s not in {'False', 'True'}: + raise ValueError('Not a valid boolean string') + return s == 'True' + +arg_parser = ArgumentParser(description="Distilbert inference") +arg_parser.add_argument("--task-name", type=str, + help="Name of the task to run benchmark.", + dest="task_name", + default="sst2" + ) +arg_parser.add_argument("-c", "--config", type=str, + help="Quantization configuration file to load.", + dest="config", + default="distilbert_base.yaml" + ) +arg_parser.add_argument("-g", "--in-graph", type=str, + help="Full path to the input graph.", + dest="input_graph", + default=None + ) +arg_parser.add_argument("--data-location", type=str, + help="Path to the dataset.", + dest="data_location" + ) +arg_parser.add_argument("-o", "--output-graph", type=str, + help="The output path of quantized graph.", + dest="output_graph", + default="output_distilbert_base_int8.pb" + ) +arg_parser.add_argument("-m", "--mode", type=str, + choices=['performance', 'accuracy'], + help="One of two options: 'performance'/'accuracy'.", + dest="mode", + default="performance" + ) +arg_parser.add_argument("--tune", type=boolean_string, + help="whether to apply quantization", + dest="tune", + default=False + ) +arg_parser.add_argument('--sq', type=boolean_string, dest='sq', help='smooth quantization', default=False) +arg_parser.add_argument("--benchmark", type=boolean_string, + help="whether to do benchmark", + dest="benchmark", + default=False + ) +arg_parser.add_argument('-e', "--num-inter-threads", type=int, + help="The number of inter-thread.", + dest="num_inter_threads", + default=2 + ) +arg_parser.add_argument('-a', "--num-intra-threads", type=int, + help="The number of intra-thread.", + dest="num_intra_threads", + default=28 + ) +arg_parser.add_argument("--pad-to-max-length", type=boolean_string, + help="Padding option.", + dest="pad_to_max_length", + default=True + ) +arg_parser.add_argument("--warmup-steps", type=int, + help="Number of warmup steps.", + dest="warmup_steps", + default=10 + ) +arg_parser.add_argument("--max-seq-length", type=int, + help="Maximum total sequence length after tokenization.", + dest="max_seq_length", + default=128 + ) +arg_parser.add_argument("--steps", type=int, + help="Number of steps.", + dest="steps", + default=872 + ) +arg_parser.add_argument("--batch-size", type=int, + help="Inference batch-size.", + dest="batch_size", + default=128 + ) +arg_parser.add_argument("--profile", dest='profile', + type=boolean_string, help="profile", + default=False) + +ARGS = arg_parser.parse_args() +MAX_STEPS = 872 +MAX_WARMUP_STEPS = 22 + +def create_feed_dict_and_labels(dataset, batch_id= None, num_batch= None, idx= None): + """Return the input dictionary for the given batch.""" + if idx is None: + start_idx = batch_id * ARGS.batch_size + if batch_id == num_batch - 1: + end_idx = ARGS.steps + else: + end_idx = start_idx + ARGS.batch_size + input_ids = np.array(dataset["input_ids"])[start_idx:end_idx, :] + attention_mask = np.array(dataset["attention_mask"])[start_idx:end_idx, :] + feed_dict = {"input_ids:0": input_ids, + "attention_mask:0": attention_mask, + } + labels = np.array(dataset["label"])[start_idx: end_idx] + else: + input_ids = np.array(dataset["input_ids"])[idx, :].reshape(1, -1) + attention_mask = np.array(dataset["attention_mask"])[idx, :].reshape(1, -1) + feed_dict = {"input_ids:0": input_ids, + "attention_mask:0": attention_mask, + } + labels = np.array(dataset["label"])[idx] + return feed_dict, labels + +def load_dataset(data_location): + def preprocess_function(examples): + """Tokenize the texts.""" + sentence1_key, sentence2_key = "sentence", None + args = ( + (examples[sentence1_key],) if sentence2_key is None + else (examples[sentence1_key], examples[sentence2_key]) + ) + result = tokenizer(*args, padding="max_length", + max_length=ARGS.max_seq_length, + truncation=True + ) + return result + + # Load dataset (only validation split for inference) + dataset = load_from_disk(data_location) + # Load tokenizer + tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english") + # Set max sequence length + if ARGS.max_seq_length > tokenizer.model_max_length: + logger.info(f"The max sequence length passed ({ARGS.max_seq_length}) \ + is larger than the max supported by model \ + ({tokenizer.model_max_length}).Using max_seq_length = \ + {tokenizer.model_max_length}") + ARGS.max_seq_length = min(ARGS.max_seq_length, tokenizer.model_max_length) + # Tokenize the dataset + dataset = dataset.map(preprocess_function, batched=True) + return dataset + +class Dataloader(object): + def __init__(self, data_location, batch_size, steps): + self.batch_size = batch_size + self.data_location = data_location + self.num_batch = math.ceil(steps / batch_size) + + def __iter__(self): + return self.generate_dataloader(self.data_location).__iter__() + + def __len__(self): + return self.num_batch + + def generate_dataloader(self, data_location): + dataset = load_dataset(data_location) + for batch_id in range(self.num_batch): + feed_dict, labels = create_feed_dict_and_labels(dataset, batch_id, self.num_batch) + yield feed_dict, labels + +class Distilbert_base(object): + def __init__(self): + self.validate_args() + self.dataset = load_dataset(ARGS.data_location) + self.dataloader = Dataloader(ARGS.data_location, ARGS.batch_size, ARGS.steps) + + def validate_args(self): + if ARGS.warmup_steps > MAX_WARMUP_STEPS: + logger.warning("Warmup steps greater than max possible value of 22." + \ + " Setting to max value of ", MAX_WARMUP_STEPS) + ARGS.warmup_steps = MAX_WARMUP_STEPS + if ARGS.tune or ARGS.sq or (ARGS.benchmark and ARGS.mode == "accuracy"): + ARGS.steps = MAX_STEPS + elif ARGS.benchmark: + if ARGS.steps > (MAX_STEPS - MAX_WARMUP_STEPS): + logger.warning("Steps greater than max possible value of {}.".format(MAX_STEPS - MAX_WARMUP_STEPS)) + logger.warning("Setting to max value of {}".format(MAX_STEPS - MAX_WARMUP_STEPS)) + ARGS.steps = MAX_STEPS - MAX_WARMUP_STEPS + if not ARGS.data_location: + raise SystemExit("Missing dataset path.") + + def load_graph(self): + """Load the frozen model.""" + graph_def = tf.compat.v1.GraphDef() + sm = saved_model_pb2.SavedModel() + with tf.io.gfile.GFile(ARGS.input_graph, "rb") as f: + try: + content = f.read() + graph_def.ParseFromString(content) + except Exception: + sm.ParseFromString(content) + graph_def = sm.meta_graphs[0].graph_def + with tf.Graph().as_default() as graph: + tf.import_graph_def(graph_def, name="") + logger.info("Loaded graph from: " + ARGS.input_graph) + return graph + + def get_correct_predictions(self, preds, label_ids): + """Evaluate the predictions. + + return the total number of correct predictions. + """ + preds = np.argmax(preds, axis=1) + correct_preds = 0 + for pred, label in zip(preds, label_ids): + if pred == label: + correct_preds += 1 + return correct_preds + + @dump_elapsed_time(customized_msg="Customized eval_func") + def eval_func(self, graph): + # Set the config for running + config = tf.compat.v1.ConfigProto() + config.intra_op_parallelism_threads=ARGS.num_intra_threads + config.inter_op_parallelism_threads=ARGS.num_inter_threads + run_options = tf.compat.v1.RunOptions(trace_level=tf.compat.v1.RunOptions.FULL_TRACE) + run_metadata = tf.compat.v1.RunMetadata() + + output = graph.get_tensor_by_name('Identity:0') + total_time = 0 + accuracy = 0 + logger.info("Started warmup for {} steps...".format(ARGS.warmup_steps)) + start_step_idx = MAX_STEPS - MAX_WARMUP_STEPS + with tf.compat.v1.Session(graph=graph, config=config) as sess: + # Warm up + for step in range(start_step_idx, start_step_idx + ARGS.warmup_steps): + feed_dict, _ = create_feed_dict_and_labels(self.dataset, idx=step) + _ = sess.run(output, feed_dict= feed_dict) + logger.info("Warmup completed.") + # Inference + logger.info("Starting inference for {} steps...".format(ARGS.steps)) + total_correct_predictions = 0 + iter = 0 + for feed_dict, labels in self.dataloader: + iter += 1 + start_time = time.time() + if ARGS.profile: + pred = sess.run(output, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) + else: + pred = sess.run(output, feed_dict=feed_dict) + run_time = time.time() - start_time + if ARGS.tune or ARGS.sq or (ARGS.benchmark and ARGS.mode == "accuracy"): + total_correct_predictions += self.get_correct_predictions(pred, labels) + total_time += run_time + # save profiling file + if ARGS.profile and iter == int(self.dataloader.num_batch / 2): + trace = timeline.Timeline(step_stats=run_metadata.step_stats) + model_dir = str(os.path.dirname(os.path.realpath(__file__))) + '/timeline' + if not os.path.exists(model_dir): + try: + os.makedirs(model_dir) + except: + pass + profiling_file = model_dir + '/timeline-' + str(iter + 1) + '-' + str(os.getpid()) + '.json' + with open(profiling_file, 'w') as trace_file: + trace_file.write(trace.generate_chrome_trace_format(show_memory=False)) + time_per_batch = total_time / float(ARGS.steps / ARGS.batch_size) + accuracy = total_correct_predictions / ARGS.steps + if ARGS.benchmark and ARGS.mode == 'performance': + logger.info("Latency: {:.4f} ms".format(time_per_batch * 1000)) + logger.info("Throughput: {:.4f} sentences/sec".format(self.dataloader.batch_size / time_per_batch)) + return accuracy + + def run(self): + graph = self.load_graph() + if ARGS.tune or ARGS.sq: + from neural_compressor.tensorflow import StaticQuantConfig, SmoothQuantConfig, quantize_model + + quant_config = SmoothQuantConfig(alpha=0.6) if ARGS.sq else StaticQuantConfig() + q_model = quantize_model(graph, quant_config, self.dataloader) + try: + q_model.save(ARGS.output_graph) + except Exception as e: + tf.compat.v1.logging.error("Failed to save model due to {}".format(str(e))) + elif ARGS.benchmark: + assert ARGS.mode == 'performance' or ARGS.mode == 'accuracy', \ + "Benchmark only supports performance or accuracy mode." + if ARGS.mode == 'performance': + self.eval_func(graph) + elif ARGS.mode == 'accuracy': + accuracy = self.eval_func(graph) + logger.info("Accuracy: {:.4f}".format(accuracy)) + +if __name__ == "__main__": + distilbert_ob = Distilbert_base() + distilbert_ob.run() diff --git a/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..e60cdb323dd --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/requirements.txt @@ -0,0 +1,7 @@ +datasets>=2.6.1 +scikit-learn>=1.1.2 +scipy>=1.9.3 +sklearn==0.0 +tokenizers==0.13.1 +transformers>=4.31.0 +intel-tensorflow>=2.12.0 diff --git a/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..fed5b300182 --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/run_benchmark.sh @@ -0,0 +1,88 @@ +#!/bin/bash +# set -x + +function main { + + init_params "$@" + + run_benchmark + +} + +# init params +function init_params { + # set default value + input_model="./distilbert_base_fp32.pb" + dataset_location="./sst2_validation_dataset" + mode="performance" + batch_size=128 + max_seq_length=128 + iters=872 + warmup_steps=10 + num_inter=2 + num_intra=28 + benchmark=True + profile=False + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo "$var" |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo "$var" |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --max_seq_length=*) + max_seq_length=$(echo ${var} |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo ${var} |cut -f2 -d=) + ;; + --warmup_steps=*) + warmup_steps=$(echo ${var} |cut -f2 -d=) + ;; + --num_inter=*) + num_inter=$(echo ${var} |cut -f2 -d=) + ;; + --num_intra=*) + num_intra=$(echo ${var} |cut -f2 -d=) + ;; + --benchmark=*) + benchmark=$(echo ${var} |cut -f2 -d=) + ;; + --profile=*) + profile=$(echo ${var} |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + cmd=" + python main.py \ + --in-graph=${input_model} \ + --data-location=${dataset_location} \ + --benchmark=${benchmark} \ + --profile=${profile} \ + --mode=${mode} \ + --steps=${iters} \ + --warmup-steps=${warmup_steps} \ + --batch-size=${batch_size} \ + --max-seq-length=${max_seq_length} \ + --num-inter-threads=${num_inter} \ + --num-intra-threads=${num_intra} + " + echo $cmd + eval $cmd +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..31571c5ff4c --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/run_quant.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# set -x + +function main { + + init_params "$@" + + run_tuning + +} + +# init params +function init_params { + # set default value + input_model="./distilbert_base_fp32.pb" + dataset_location="./sst2_validation_dataset" + output_model="./output_distilbert_base_int8.pb" + batch_size=128 + max_seq_length=128 + warmup_steps=10 + num_inter=2 + num_intra=28 + tune=True + sq=False + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo ${var} |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo ${var} |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo ${var} |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo ${var} |cut -f2 -d=) + ;; + --max_seq_length=*) + max_seq_length=$(echo ${var} |cut -f2 -d=) + ;; + --warmup_steps=*) + warmup_steps=$(echo ${var} |cut -f2 -d=) + ;; + --num_inter=*) + num_inter=$(echo ${var} |cut -f2 -d=) + ;; + --num_intra=*) + num_intra=$(echo ${var} |cut -f2 -d=) + ;; + --tune=*) + tune=$(echo ${var} |cut -f2 -d=) + ;; + --sq=*) + sq=$(echo ${var} |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + cmd=" + python main.py \ + --in-graph=${input_model} \ + --data-location=${dataset_location} \ + --output-graph=${output_model} \ + --tune=${tune} \ + --sq=${sq} \ + --warmup-steps=${warmup_steps} \ + --batch-size=${batch_size} \ + --max-seq-length=${max_seq_length} \ + --num-inter-threads=${num_inter} \ + --num-intra-threads=${num_intra} + " + echo $cmd + eval $cmd +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/README.md b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/README.md new file mode 100644 index 00000000000..9aad1dda2a9 --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/README.md @@ -0,0 +1,132 @@ +Step-by-Step +============ + +This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor tuning zoo result of Transformer-LT. This example can run on Intel CPUs and GPUs. + +## Prerequisite + +### 1. Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### 2. Install Tensorflow +```shell +pip install tensorflow +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +### 3. Install Intel Extension for Tensorflow + +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers). + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +### 4. Prepare Dataset & Pretrained model + +```shell +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_2_0/transformer-lt-official-fp32-inference.tar.gz +tar -zxvf transformer-lt-official-fp32-inference.tar.gz +cd transformer-lt-official-fp32-inference +tar -zxvf transformer_lt_official_fp32_pretrained_model.tar.gz +``` + +Dataset is in data folder, pretrained model is in graph folder. + +#### Automatic dataset & model download +Run the `prepare_dataset_model.sh` script located in `examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq`. + +```shell +cd examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq +bash prepare_dataset_model.sh +``` + +## Run Command + +```shell +python main.py --input_graph=/path/to/fp32_graphdef.pb --inputs_file=/path/to/newstest2014.en --reference_file=/path/to/newstest2014.de --vocab_file=/path/to/vocab.txt --tune +``` + +Details of enabling Intel® Neural Compressor on transformer-lt for Tensorflow. +========================= + +This is a tutorial of how to enable transformer-lt model with Intel® Neural Compressor. +## User Code Analysis +1. User specifies fp32 *model*, calibration dataset *q_dataloader*, evaluation dataset *eval_dataloader* and metric in tuning.metric field of model-specific yaml config file. + +2. User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself. + +For transformer-lt, we applied the latter one because we don't have dataset and metric for transformer-lt. The task is to implement the *q_dataloader* and *eval_func*. + + +### q_dataloader Part Adaption +Below dataset class uses getitem to provide the model with input. + +```python +class Dataset(object): + def __init__(self, *args): + # initialize dataset related info here + ... + + def __getitem__(self, index): + data = self.batch[index] + label = self.ref_lines[index] + return data[0], label + + def __len__(self): + return len(self.batch) +``` + +### Evaluation Part Adaption +We evaluate the model with BLEU score, its source: https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py + +Here we set the input tensor and output tensors name into *inputs* and *outputs* args. +In this case we calibrate and quantize the model, and use our calibration dataloader initialized from a 'Dataset' object. + +### Code update +After prepare step is done, we add tune code to generate quantized model. + +#### Tune +```python + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model + + dataset = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file) + calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=FLAGS.batch_size, collate_fn=collate_fn) + + quant_config = StaticQuantConfig() + model = Model(graph) + model.input_tensor_names = ['input_tensor'] + model.output_tensor_names = ['model/Transformer/strided_slice_19'] + q_model = quantize_model(model, quant_config, calib_dataloader) + try: + q_model.save(FLAGS.output_model) + except Exception as e: + print("Failed to save model due to {}".format(str(e))) +``` +#### Benchmark +```python + if FLAGS.benchmark: + assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \ + "Benchmark only supports performance or accuracy mode." + eval_func(graph) + elif FLAGS.mode == 'accuracy': + acc = eval_func(graph) + print('Accuracy is {:.3f}'.format(acc)) +``` +The Intel® Neural Compressor quantization.fit() function will return a best quantized model under time constraint. diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py new file mode 100644 index 00000000000..d4d1ff54055 --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py @@ -0,0 +1,261 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +import re +import six +import sys +import time +import numpy as np +import unicodedata +import pandas as pd +from absl import app +import tensorflow as tf +from argparse import ArgumentParser + +from utils import metrics +from utils import tokenizer +from utils.tokenizer import Subtokenizer +from neural_compressor.tensorflow.utils import BaseDataLoader + +flags = tf.compat.v1.flags +FLAGS = flags.FLAGS + +flags.DEFINE_integer("batch_size", 64, + "run batch size") + +flags.DEFINE_string("input_graph", None, + "The path of input model file.") + +flags.DEFINE_string("inputs_file", None, + "File saved to an output file.") + +flags.DEFINE_string("reference_file", None, + "File containing reference translation.") + +flags.DEFINE_string("vocab_file", None, + "Path to subtoken vocabulary file.") + +flags.DEFINE_string("output_model", None, + "The output model of the quantized model.") + +flags.DEFINE_bool('tune', False, + 'whether to tune the model') + +flags.DEFINE_bool('benchmark', False, + 'whether to benchmark the model') + +flags.DEFINE_string("mode", 'performance', + "One of three options: 'performance'/'accuracy'.") + +flags.DEFINE_integer("iters", 100, + "The iteration used for benchmark.") + +class UnicodeRegex(object): + def __init__(self): + punctuation = self.property_chars("P") + self.nondigit_punct_re = re.compile(r"([^\d])([" + punctuation + r"])") + self.punct_nondigit_re = re.compile(r"([" + punctuation + r"])([^\d])") + self.symbol_re = re.compile("([" + self.property_chars("S") + "])") + + def property_chars(self, prefix): + return "".join(six.unichr(x) for x in range(sys.maxunicode) + if unicodedata.category(six.unichr(x)).startswith(prefix)) + +uregex = UnicodeRegex() + +def bleu_tokenize(string): + string = uregex.nondigit_punct_re.sub(r"\1 \2 ", string) + string = uregex.punct_nondigit_re.sub(r" \1 \2", string) + string = uregex.symbol_re.sub(r" \1 ", string) + return string.split() + +class bleu(object): + def __init__(self): + self.translations = [] + self.labels = [] + + def reset(self): + self.translations = [] + self.labels = [] + + def update(self, pred, label): + if len(label) != len(pred): + raise ValueError("Reference and translation files have different number " + "of lines. If training only a few steps (100-200), the " + "translation may be empty.") + label = [x.lower() for x in label] + pred = [x.lower() for x in pred] + label = [bleu_tokenize(x) for x in label] + pred = [bleu_tokenize(x) for x in pred] + self.labels.extend(label) + self.translations.extend(pred) + + def result(self): + return metrics.compute_bleu(self.labels, self.translations) * 100 + +def collate_fn(batch): + """Puts each data field into a pd frame with outer dimension batch size""" + elem = batch[0] + if isinstance(elem, tuple): + batch = zip(*batch) + return [collate_fn(samples) for samples in batch] + elif isinstance(elem, np.ndarray): + return [list(elem) for elem in batch] + elif isinstance(elem, str): + return batch + else: + return pd.DataFrame(batch).fillna(0).values.astype(np.int32) + +def load_graph(file_name): + tf.compat.v1.logging.info('Loading graph from: ' + file_name) + with tf.io.gfile.GFile(file_name, "rb") as f: + graph_def = tf.compat.v1.GraphDef() + graph_def.ParseFromString(f.read()) + with tf.Graph().as_default() as graph: + tf.import_graph_def(graph_def, name='') + return graph + +def eval_func(infer_graph, iteration=-1): + if isinstance(infer_graph, tf.compat.v1.GraphDef): + graph = tf.Graph() + with graph.as_default(): + tf.import_graph_def(infer_graph, name='') + infer_graph = graph + + subtokenizer = Subtokenizer(FLAGS.vocab_file) + input_tensor = infer_graph.get_tensor_by_name('input_tensor:0') + output_tensor = infer_graph.get_tensor_by_name(\ + 'model/Transformer/strided_slice_19:0') + + ds = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file) + dataloader = DataLoader(framework='tensorflow', dataset=ds, + batch_size=FLAGS.batch_size, collate_fn=collate_fn) + + config = tf.compat.v1.ConfigProto() + config.use_per_session_threads = 1 + config.inter_op_parallelism_threads = 1 + sess = tf.compat.v1.Session(graph=infer_graph, config=config) + iteration=-1 + time_list = [] + bleu_eval = bleu() + predictions = [] + labels = [] + warmup = 10 + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + assert iteration >= warmup, 'iteration must be larger than warmup' + + for idx, (input_data, label) in enumerate(dataloader): + if idx < iteration or iteration == -1: + time_start = time.time() + out = sess.run([output_tensor], {input_tensor: input_data}) + duration = time.time() - time_start + time_list.append(duration) + predictions.append(out) + labels.extend(label) + else: + break + + latency = np.array(time_list[warmup: ]).mean() / FLAGS.batch_size + if FLAGS.benchmark and FLAGS.mode == 'performance': + print('Batch size = {}'.format(FLAGS.batch_size)) + print('Latency: {:.3f} ms'.format(latency * 1000)) + print('Throughput: {:.3f} items/sec'.format(1./ latency)) + + # only calculate accuracy when running out all predictions + if iteration == -1: + decode = [] + for i,tr in enumerate(predictions): + for j,itr in enumerate(tr): + for k, otr in enumerate(itr): + try: + index = list(otr).index(tokenizer.EOS_ID) + decode.append(subtokenizer.decode(otr[:index])) + except: + decode.append(subtokenizer.decode(otr)) + bleu_eval.update(decode, labels) + return bleu_eval.result() + +class Dataset(object): + def __init__(self, inputs_file, reference_file, vocab_file): + with tf.io.gfile.GFile(inputs_file) as f: + records = f.read().split("\n") + inputs = [record.strip() for record in records] + if not inputs[-1]: + inputs.pop() + + self.ref_lines = tokenizer.native_to_unicode( + tf.io.gfile.GFile(reference_file).read()).strip().splitlines() + + subtokenizer = Subtokenizer(vocab_file) + self.batch = [] + token_lens=[] + for i, line in enumerate(inputs): + enc = subtokenizer.encode(line, add_eos=True) + token_lens.append((i, len(enc))) + + sorted_by_token_input_lens = sorted(token_lens, key=lambda x: x[1], reverse=True) + + sorted_inputs = [None] * len(sorted_by_token_input_lens) + sorted_keys = [0] * len(sorted_by_token_input_lens) + + lines = [] + for i, (index, _) in enumerate(sorted_by_token_input_lens): + sorted_inputs[i] = inputs[index] + sorted_keys[index] = i + enc=subtokenizer.encode(sorted_inputs[i], add_eos=True) + lines.append([enc]) + for i in sorted_keys: + self.batch.append(lines[i]) + + def __getitem__(self, index): + data = self.batch[index] + label = self.ref_lines[index] + return data[0], label + + def __len__(self): + return len(self.batch) + +def main(_): + graph = load_graph(FLAGS.input_graph) + if FLAGS.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model + + dataset = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file) + calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=FLAGS.batch_size, collate_fn=collate_fn) + + quant_config = StaticQuantConfig() + model = Model(graph) + model.input_tensor_names = ['input_tensor'] + model.output_tensor_names = ['model/Transformer/strided_slice_19'] + q_model = quantize_model(model, quant_config, calib_dataloader) + try: + q_model.save(FLAGS.output_model) + except Exception as e: + print("Failed to save model due to {}".format(str(e))) + + if FLAGS.benchmark: + assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \ + "Benchmark only supports performance or accuracy mode." + eval_func(graph) + elif FLAGS.mode == 'accuracy': + acc = eval_func(graph) + print('Accuracy is {:.3f}'.format(acc)) + +if __name__ == "__main__": + tf.compat.v1.app.run() diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/prepare_dataset_model.sh b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/prepare_dataset_model.sh new file mode 100644 index 00000000000..3d47dbad80c --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/prepare_dataset_model.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# set -x + +DATA_DIR="../data" +MODEL_DIR="../model" + +help() +{ + cat <<- EOF + Desc: Prepare bert dataset + -h --help help info + --data_dir Output data directory + default: './data' + --model_dir Output model directory + default: './model' +EOF + exit 0 +} + +function main { + init_params "$@" + prepare +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --data_dir=*) + DATA_DIR=$(echo $var |cut -f2 -d=) + ;; + --model_dir=*) + MODEL_DIR=$(echo $var |cut -f2 -d=) + ;; + -h|--help) help + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done +} + +# prepare data and model +function prepare { + if [ ! -d ${DATA_DIR} ]; then + echo '${DATA_DIR} already exists, please check...' + fi + if [ ! -d ${MODEL_DIR} ]; then + echo '${MODEL_DIR} already exists, please check...' + fi + wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_2_0/transformer-lt-official-fp32-inference.tar.gz + tar -zxvf transformer-lt-official-fp32-inference.tar.gz + cd transformer-lt-official-fp32-inference + tar -zxvf transformer_lt_official_fp32_pretrained_model.tar.gz + mv transformer_lt_official_fp32_pretrained_model/data ${DATA_DIR} + mv transformer_lt_official_fp32_pretrained_model/graph ${MODEL_DIR} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..87bc4c7d5c1 --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_benchmark.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# set -x + +function main { + + init_params "$@" + + run_benchmark + +} + +# init params +function init_params { + iters=100 + for var in "$@" + do + case $var in + --dataset_location=*) + dataset_location=$(echo "$var" |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo "$var" |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo ${var} |cut -f2 -d=) + ;; + esac + done + +} + +function define_mode { + + if [[ ${mode} == "accuracy" ]]; then + mode="accuracy" + elif [[ ${mode} == "performance" ]]; then + mode="performance" + else + echo "Error: No such mode: ${mode}" + exit 1 + fi +} + +# run_benchmark +function run_benchmark { + python main.py \ + --input_graph=${input_model} \ + --inputs_file=${dataset_location}/newstest2014.en \ + --reference_file=${dataset_location}/newstest2014.de \ + --vocab_file=${dataset_location}/vocab.txt \ + --benchmark \ + --mode=${mode} \ + --iters=${iters} \ + --batch_size=${batch_size} +} + +main "$@" + diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..2f2075cf346 --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_quant.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# set -x + +function main { + + init_params "$@" + + run_tuning + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --dataset_location=*) + dataset_location=$(echo "$var" |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo "$var" |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo "$var" |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input_graph=${input_model} \ + --inputs_file=${dataset_location}/newstest2014.en \ + --reference_file=${dataset_location}/newstest2014.de \ + --vocab_file=${dataset_location}/vocab.txt \ + --output_model=${output_model} \ + --tune +} + +main "$@" + diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/__init__.py b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/metrics.py b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/metrics.py new file mode 100644 index 00000000000..3e41f985c63 --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/metrics.py @@ -0,0 +1,490 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functions for calculating loss, accuracy, and other model metrics. + +Metrics: + - Padded loss, accuracy, and negative log perplexity. Source: + https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/metrics.py + - BLEU approximation. Source: + https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py + - ROUGE score. Source: + https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/rouge.py +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import math + +import numpy as np +import six +from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf + + +def _pad_tensors_to_same_length(x, y): + """Pad x and y so that the results have the same length (second dimension).""" + with tf.name_scope("pad_to_same_length"): + x_length = tf.shape(x)[1] + y_length = tf.shape(y)[1] + + max_length = tf.maximum(x_length, y_length) + + x = tf.pad(x, [[0, 0], [0, max_length - x_length], [0, 0]]) + y = tf.pad(y, [[0, 0], [0, max_length - y_length]]) + return x, y + + +def padded_cross_entropy_loss(logits, labels, smoothing, vocab_size): + """Calculate cross entropy loss while ignoring padding. + + Args: + logits: Tensor of size [batch_size, length_logits, vocab_size] + labels: Tensor of size [batch_size, length_labels] + smoothing: Label smoothing constant, used to determine the on and off values + vocab_size: int size of the vocabulary + Returns: + Returns the cross entropy loss and weight tensors: float32 tensors with + shape [batch_size, max(length_logits, length_labels)] + """ + with tf.name_scope("loss", values=[logits, labels]): + logits, labels = _pad_tensors_to_same_length(logits, labels) + + # Calculate smoothing cross entropy + with tf.name_scope("smoothing_cross_entropy", values=[logits, labels]): + confidence = 1.0 - smoothing + low_confidence = (1.0 - confidence) / tf.to_float(vocab_size - 1) + soft_targets = tf.one_hot( + tf.cast(labels, tf.int32), + depth=vocab_size, + on_value=confidence, + off_value=low_confidence) + xentropy = tf.nn.softmax_cross_entropy_with_logits_v2( + logits=logits, labels=soft_targets) + + # Calculate the best (lowest) possible value of cross entropy, and + # subtract from the cross entropy loss. + normalizing_constant = -( + confidence * tf.log(confidence) + tf.to_float(vocab_size - 1) * + low_confidence * tf.log(low_confidence + 1e-20)) + xentropy -= normalizing_constant + + weights = tf.to_float(tf.not_equal(labels, 0)) + return xentropy * weights, weights + + +def _convert_to_eval_metric(metric_fn): + """Wrap a metric fn that returns scores and weights as an eval metric fn. + + The input metric_fn returns values for the current batch. The wrapper + aggregates the return values collected over all of the batches evaluated. + + Args: + metric_fn: function that returns scores and weights for the current batch's + logits and predicted labels. + + Returns: + function that aggregates the scores and weights from metric_fn. + """ + def problem_metric_fn(*args): + """Returns an aggregation of the metric_fn's returned values.""" + (scores, weights) = metric_fn(*args) + + # The tf.metrics.mean function assures correct aggregation. + return tf.metrics.mean(scores, weights) + return problem_metric_fn + + +def get_eval_metrics(logits, labels, params): + """Return dictionary of model evaluation metrics.""" + metrics = { + "accuracy": _convert_to_eval_metric(padded_accuracy)(logits, labels), + "accuracy_top5": _convert_to_eval_metric(padded_accuracy_top5)( + logits, labels), + "accuracy_per_sequence": _convert_to_eval_metric( + padded_sequence_accuracy)(logits, labels), + "neg_log_perplexity": _convert_to_eval_metric(padded_neg_log_perplexity)( + logits, labels, params["vocab_size"]), + } + + if not params["use_tpu"]: + # TPU does not support tf.py_func + metrics.update({ + "approx_bleu_score": _convert_to_eval_metric( + bleu_score)(logits, labels), + "rouge_2_fscore": _convert_to_eval_metric( + rouge_2_fscore)(logits, labels), + "rouge_L_fscore": _convert_to_eval_metric( + rouge_l_fscore)(logits, labels), + }) + + # Prefix each of the metric names with "metrics/". This allows the metric + # graphs to display under the "metrics" category in TensorBoard. + metrics = {"metrics/%s" % k: v for k, v in six.iteritems(metrics)} + return metrics + + +def padded_accuracy(logits, labels): + """Percentage of times that predictions matches labels on non-0s.""" + with tf.variable_scope("padded_accuracy", values=[logits, labels]): + logits, labels = _pad_tensors_to_same_length(logits, labels) + weights = tf.to_float(tf.not_equal(labels, 0)) + outputs = tf.to_int32(tf.argmax(logits, axis=-1)) + padded_labels = tf.to_int32(labels) + return tf.to_float(tf.equal(outputs, padded_labels)), weights + + +def padded_accuracy_topk(logits, labels, k): + """Percentage of times that top-k predictions matches labels on non-0s.""" + with tf.variable_scope("padded_accuracy_topk", values=[logits, labels]): + logits, labels = _pad_tensors_to_same_length(logits, labels) + weights = tf.to_float(tf.not_equal(labels, 0)) + effective_k = tf.minimum(k, tf.shape(logits)[-1]) + _, outputs = tf.nn.top_k(logits, k=effective_k) + outputs = tf.to_int32(outputs) + padded_labels = tf.to_int32(labels) + padded_labels = tf.expand_dims(padded_labels, axis=-1) + padded_labels += tf.zeros_like(outputs) # Pad to same shape. + same = tf.to_float(tf.equal(outputs, padded_labels)) + same_topk = tf.reduce_sum(same, axis=-1) + return same_topk, weights + + +def padded_accuracy_top5(logits, labels): + return padded_accuracy_topk(logits, labels, 5) + + +def padded_sequence_accuracy(logits, labels): + """Percentage of times that predictions matches labels everywhere (non-0).""" + with tf.variable_scope("padded_sequence_accuracy", values=[logits, labels]): + logits, labels = _pad_tensors_to_same_length(logits, labels) + weights = tf.to_float(tf.not_equal(labels, 0)) + outputs = tf.to_int32(tf.argmax(logits, axis=-1)) + padded_labels = tf.to_int32(labels) + not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights + axis = list(range(1, len(outputs.get_shape()))) + correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis)) + return correct_seq, tf.constant(1.0) + + +def padded_neg_log_perplexity(logits, labels, vocab_size): + """Average log-perplexity excluding padding 0s. No smoothing.""" + num, den = padded_cross_entropy_loss(logits, labels, 0, vocab_size) + return -num, den + + +def bleu_score(logits, labels): + """Approximate BLEU score computation between labels and predictions. + + An approximate BLEU scoring method since we do not glue word pieces or + decode the ids and tokenize the output. By default, we use ngram order of 4 + and use brevity penalty. Also, this does not have beam search. + + Args: + logits: Tensor of size [batch_size, length_logits, vocab_size] + labels: Tensor of size [batch-size, length_labels] + + Returns: + bleu: int, approx bleu score + """ + predictions = tf.to_int32(tf.argmax(logits, axis=-1)) + # TODO: Look into removing use of py_func + bleu = tf.py_func(compute_bleu, (labels, predictions), tf.float32) + return bleu, tf.constant(1.0) + + +def _get_ngrams_with_counter(segment, max_order): + """Extracts all n-grams up to a given maximum order from an input segment. + + Args: + segment: text segment from which n-grams will be extracted. + max_order: maximum length in tokens of the n-grams returned by this + methods. + + Returns: + The Counter containing all n-grams upto max_order in segment + with a count of how many times each n-gram occurred. + """ + ngram_counts = collections.Counter() + for order in xrange(1, max_order + 1): + for i in xrange(0, len(segment) - order + 1): + ngram = tuple(segment[i:i + order]) + ngram_counts[ngram] += 1 + return ngram_counts + + +def compute_bleu(reference_corpus, translation_corpus, max_order=4, + use_bp=True): + """Computes BLEU score of translated segments against one or more references. + + Args: + reference_corpus: list of references for each translation. Each + reference should be tokenized into a list of tokens. + translation_corpus: list of translations to score. Each translation + should be tokenized into a list of tokens. + max_order: Maximum n-gram order to use when computing BLEU score. + use_bp: boolean, whether to apply brevity penalty. + + Returns: + BLEU score. + """ + reference_length = 0 + translation_length = 0 + bp = 1.0 + geo_mean = 0 + + matches_by_order = [0] * max_order + possible_matches_by_order = [0] * max_order + precisions = [] + + for (references, translations) in zip(reference_corpus, translation_corpus): + reference_length += len(references) + translation_length += len(translations) + ref_ngram_counts = _get_ngrams_with_counter(references, max_order) + translation_ngram_counts = _get_ngrams_with_counter(translations, max_order) + + overlap = dict((ngram, + min(count, translation_ngram_counts[ngram])) + for ngram, count in ref_ngram_counts.items()) + + for ngram in overlap: + matches_by_order[len(ngram) - 1] += overlap[ngram] + for ngram in translation_ngram_counts: + possible_matches_by_order[len(ngram) - 1] += translation_ngram_counts[ + ngram] + + precisions = [0] * max_order + smooth = 1.0 + + for i in xrange(0, max_order): + if possible_matches_by_order[i] > 0: + precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[i] + if matches_by_order[i] > 0: + precisions[i] = float(matches_by_order[i]) / possible_matches_by_order[ + i] + else: + smooth *= 2 + precisions[i] = 1.0 / (smooth * possible_matches_by_order[i]) + else: + precisions[i] = 0.0 + + if max(precisions) > 0: + p_log_sum = sum(math.log(p) for p in precisions if p) + geo_mean = math.exp(p_log_sum / max_order) + + if use_bp: + ratio = translation_length / reference_length + bp = math.exp(1 - 1. / ratio) if ratio < 1.0 else 1.0 + bleu = geo_mean * bp + return np.float32(bleu) + + +def rouge_2_fscore(logits, labels): + """ROUGE-2 F1 score computation between labels and predictions. + + This is an approximate ROUGE scoring method since we do not glue word pieces + or decode the ids and tokenize the output. + + Args: + logits: tensor, model predictions + labels: tensor, gold output. + + Returns: + rouge2_fscore: approx rouge-2 f1 score. + """ + predictions = tf.to_int32(tf.argmax(logits, axis=-1)) + # TODO: Look into removing use of py_func + rouge_2_f_score = tf.py_func(rouge_n, (predictions, labels), tf.float32) + return rouge_2_f_score, tf.constant(1.0) + + +def _get_ngrams(n, text): + """Calculates n-grams. + + Args: + n: which n-grams to calculate + text: An array of tokens + + Returns: + A set of n-grams + """ + ngram_set = set() + text_length = len(text) + max_index_ngram_start = text_length - n + for i in range(max_index_ngram_start + 1): + ngram_set.add(tuple(text[i:i + n])) + return ngram_set + + +def rouge_n(eval_sentences, ref_sentences, n=2): + """Computes ROUGE-N f1 score of two text collections of sentences. + + Source: https://www.microsoft.com/en-us/research/publication/ + rouge-a-package-for-automatic-evaluation-of-summaries/ + + Args: + eval_sentences: Predicted sentences. + ref_sentences: Sentences from the reference set + n: Size of ngram. Defaults to 2. + + Returns: + f1 score for ROUGE-N + """ + f1_scores = [] + for eval_sentence, ref_sentence in zip(eval_sentences, ref_sentences): + eval_ngrams = _get_ngrams(n, eval_sentence) + ref_ngrams = _get_ngrams(n, ref_sentence) + ref_count = len(ref_ngrams) + eval_count = len(eval_ngrams) + + # Count the overlapping ngrams between evaluated and reference + overlapping_ngrams = eval_ngrams.intersection(ref_ngrams) + overlapping_count = len(overlapping_ngrams) + + # Handle edge case. This isn't mathematically correct, but it's good enough + if eval_count == 0: + precision = 0.0 + else: + precision = float(overlapping_count) / eval_count + if ref_count == 0: + recall = 0.0 + else: + recall = float(overlapping_count) / ref_count + f1_scores.append(2.0 * ((precision * recall) / (precision + recall + 1e-8))) + + # return overlapping_count / reference_count + return np.mean(f1_scores, dtype=np.float32) + + +def rouge_l_fscore(predictions, labels): + """ROUGE scores computation between labels and predictions. + + This is an approximate ROUGE scoring method since we do not glue word pieces + or decode the ids and tokenize the output. + + Args: + predictions: tensor, model predictions + labels: tensor, gold output. + + Returns: + rouge_l_fscore: approx rouge-l f1 score. + """ + outputs = tf.to_int32(tf.argmax(predictions, axis=-1)) + rouge_l_f_score = tf.py_func(rouge_l_sentence_level, (outputs, labels), + tf.float32) + return rouge_l_f_score, tf.constant(1.0) + + +def rouge_l_sentence_level(eval_sentences, ref_sentences): + """Computes ROUGE-L (sentence level) of two collections of sentences. + + Source: https://www.microsoft.com/en-us/research/publication/ + rouge-a-package-for-automatic-evaluation-of-summaries/ + + Calculated according to: + R_lcs = LCS(X,Y)/m + P_lcs = LCS(X,Y)/n + F_lcs = ((1 + beta^2)*R_lcs*P_lcs) / (R_lcs + (beta^2) * P_lcs) + + where: + X = reference summary + Y = Candidate summary + m = length of reference summary + n = length of candidate summary + + Args: + eval_sentences: The sentences that have been picked by the summarizer + ref_sentences: The sentences from the reference set + + Returns: + A float: F_lcs + """ + + f1_scores = [] + for eval_sentence, ref_sentence in zip(eval_sentences, ref_sentences): + m = float(len(ref_sentence)) + n = float(len(eval_sentence)) + lcs = _len_lcs(eval_sentence, ref_sentence) + f1_scores.append(_f_lcs(lcs, m, n)) + return np.mean(f1_scores, dtype=np.float32) + + +def _len_lcs(x, y): + """Returns the length of the Longest Common Subsequence between two seqs. + + Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence + + Args: + x: sequence of words + y: sequence of words + + Returns + integer: Length of LCS between x and y + """ + table = _lcs(x, y) + n, m = len(x), len(y) + return table[n, m] + + +def _lcs(x, y): + """Computes the length of the LCS between two seqs. + + The implementation below uses a DP programming algorithm and runs + in O(nm) time where n = len(x) and m = len(y). + Source: http://www.algorithmist.com/index.php/Longest_Common_Subsequence + + Args: + x: collection of words + y: collection of words + + Returns: + Table of dictionary of coord and len lcs + """ + n, m = len(x), len(y) + table = dict() + for i in range(n + 1): + for j in range(m + 1): + if i == 0 or j == 0: + table[i, j] = 0 + elif x[i - 1] == y[j - 1]: + table[i, j] = table[i - 1, j - 1] + 1 + else: + table[i, j] = max(table[i - 1, j], table[i, j - 1]) + return table + + +def _f_lcs(llcs, m, n): + """Computes the LCS-based F-measure score. + + Source: http://research.microsoft.com/en-us/um/people/cyl/download/papers/ + rouge-working-note-v1.3.1.pdf + + Args: + llcs: Length of LCS + m: number of words in reference summary + n: number of words in candidate summary + + Returns: + Float. LCS-based F-measure score + """ + r_lcs = llcs / m + p_lcs = llcs / n + beta = p_lcs / (r_lcs + 1e-12) + num = (1 + (beta ** 2)) * r_lcs * p_lcs + denom = r_lcs + ((beta ** 2) * p_lcs) + f_lcs = num / (denom + 1e-12) + return f_lcs diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer.py b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer.py new file mode 100644 index 00000000000..33f144b23fd --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer.py @@ -0,0 +1,620 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Defines Subtokenizer class to encode and decode strings.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import re +import sys +import unicodedata + +import numpy as np +import six +from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf + +PAD = "" +PAD_ID = 0 +EOS = "" +EOS_ID = 1 +RESERVED_TOKENS = [PAD, EOS] + +# Set of characters that will be used in the function _escape_token() (see func +# docstring for more details). +# This set is added to the alphabet list to ensure that all escaped tokens can +# be encoded. +_ESCAPE_CHARS = set(u"\\_u;0123456789") +# Regex for the function _unescape_token(), the inverse of _escape_token(). +# This is used to find "\u", "\\", and "\###;" substrings in the token. +_UNESCAPE_REGEX = re.compile(r"\\u|\\\\|\\([0-9]+);") + +_UNDEFINED_UNICODE = u"\u3013" + +# Set contains all letter and number characters. +_ALPHANUMERIC_CHAR_SET = set( + six.unichr(i) for i in xrange(sys.maxunicode) + if (unicodedata.category(six.unichr(i)).startswith("L") or + unicodedata.category(six.unichr(i)).startswith("N"))) + +# min_count is the minimum number of times a subtoken must appear in the data +# before before it is added to the vocabulary. The value is found using binary +# search to obtain the target vocabulary size. +_MIN_MIN_COUNT = 1 # min value to use when binary searching for min_count +_MAX_MIN_COUNT = 1000 # max value to use when binary searching for min_count + + +class Subtokenizer(object): + """Encodes and decodes strings to/from integer IDs.""" + + def __init__(self, vocab_file, reserved_tokens=None): + """Initializes class, creating a vocab file if data_files is provided.""" + tf.compat.v1.logging.info("Initializing Subtokenizer from file %s." % + vocab_file) + + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + + self.subtoken_list = _load_vocab_file(vocab_file, reserved_tokens) + self.alphabet = _generate_alphabet_dict(self.subtoken_list) + self.subtoken_to_id_dict = _list_to_index_dict(self.subtoken_list) + + self.max_subtoken_length = 0 + for subtoken in self.subtoken_list: + self.max_subtoken_length = max(self.max_subtoken_length, len(subtoken)) + + # Create cache to speed up subtokenization + self._cache_size = 2 ** 20 + self._cache = [(None, None)] * self._cache_size + + @staticmethod + def init_from_files( + vocab_file, files, target_vocab_size, threshold, min_count=None, + file_byte_limit=1e6, reserved_tokens=None, correct_strip=True): + """Create subtoken vocabulary based on files, and save vocab to file. + + Args: + vocab_file: String name of vocab file to store subtoken vocabulary. + files: List of file paths that will be used to generate vocabulary. + target_vocab_size: target vocabulary size to generate. + threshold: int threshold of vocabulary size to accept. + min_count: int minimum count to use for generating the vocabulary. The min + count is the minimum number of times a subtoken should appear in the + files before it is added to the vocabulary. If set to none, this value + is found using binary search. + file_byte_limit: (Default 1e6) Maximum number of bytes of sample text that + will be drawn from the files. + reserved_tokens: List of string tokens that are guaranteed to be at the + beginning of the subtoken vocabulary list. + correct_strip: Whether to convert text to unicode before strip. + + Returns: + Subtokenizer object + """ + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + + if tf.io.gfile.exists(vocab_file): + tf.compat.v1.logging.info("Vocab file already exists (%s)" % vocab_file) + else: + tf.compat.v1.logging.info("Begin steps to create subtoken vocabulary...") + token_counts = _count_tokens(files, file_byte_limit, correct_strip) + alphabet = _generate_alphabet_dict(token_counts) + subtoken_list = _generate_subtokens_with_target_vocab_size( + token_counts, alphabet, target_vocab_size, threshold, min_count, + reserved_tokens) + tf.compat.v1.logging.info("Generated vocabulary with %d subtokens." % + len(subtoken_list)) + _save_vocab_file(vocab_file, subtoken_list) + return Subtokenizer(vocab_file) + + def encode(self, raw_string, add_eos=False): + """Encodes a string into a list of int subtoken ids.""" + ret = [] + tokens = _split_string_to_tokens(native_to_unicode(raw_string)) + for token in tokens: + ret.extend(self._token_to_subtoken_ids(token)) + if add_eos: + ret.append(EOS_ID) + return ret + + def _token_to_subtoken_ids(self, token): + """Encode a single token into a list of subtoken ids.""" + cache_location = hash(token) % self._cache_size + cache_key, cache_value = self._cache[cache_location] + if cache_key == token: + return cache_value + + ret = _split_token_to_subtokens( + _escape_token(token, self.alphabet), self.subtoken_to_id_dict, + self.max_subtoken_length) + ret = [self.subtoken_to_id_dict[subtoken_id] for subtoken_id in ret] + + self._cache[cache_location] = (token, ret) + return ret + + def decode(self, subtokens): + """Converts list of int subtokens ids into a string.""" + if isinstance(subtokens, np.ndarray): + # Note that list(subtokens) converts subtokens to a python list, but the + # items remain as np.int32. This converts both the array and its items. + subtokens = subtokens.tolist() + + if not subtokens: + return "" + + assert isinstance(subtokens, list) and isinstance(subtokens[0], int), ( + "Subtokens argument passed into decode() must be a list of integers.") + + return _unicode_to_native( + _join_tokens_to_string(self._subtoken_ids_to_tokens(subtokens))) + + def _subtoken_ids_to_tokens(self, subtokens): + """Convert list of int subtoken ids to a list of string tokens.""" + escaped_tokens = "".join([ + self.subtoken_list[s] for s in subtokens + if s < len(self.subtoken_list)]) + escaped_tokens = escaped_tokens.split("_") + + # All tokens in the vocabulary list have been escaped (see _escape_token()) + # so each token must be unescaped when decoding. + ret = [] + for token in escaped_tokens: + if token: + ret.append(_unescape_token(token)) + return ret + + +def _save_vocab_file(vocab_file, subtoken_list): + """Save subtokens to file.""" + with tf.io.gfile.GFile(vocab_file, mode="w") as f: + for subtoken in subtoken_list: + f.write("'%s'\n" % _unicode_to_native(subtoken)) + + +def _load_vocab_file(vocab_file, reserved_tokens=None): + """Load vocabulary while ensuring reserved tokens are at the top.""" + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + + subtoken_list = [] + with tf.io.gfile.GFile(vocab_file, mode="r") as f: + for line in f: + subtoken = native_to_unicode(line.strip()) + subtoken = subtoken[1:-1] # Remove surrounding single-quotes + if subtoken in reserved_tokens: + continue + subtoken_list.append(native_to_unicode(subtoken)) + return reserved_tokens + subtoken_list + + +def native_to_unicode(s): + """Convert string to unicode (required in Python 2).""" + try: # Python 2 + return s if isinstance(s, unicode) else s.decode("utf-8") + except NameError: # Python 3 + return s + + +def _unicode_to_native(s): + """Convert string from unicode to native format (required in Python 2).""" + try: # Python 2 + return s.encode("utf-8") if isinstance(s, unicode) else s + except NameError: # Python 3 + return s + + +def _split_string_to_tokens(text): + """Splits text to a list of string tokens.""" + if not text: + return [] + ret = [] + token_start = 0 + # Classify each character in the input string + is_alnum = [c in _ALPHANUMERIC_CHAR_SET for c in text] + for pos in xrange(1, len(text)): + if is_alnum[pos] != is_alnum[pos - 1]: + token = text[token_start:pos] + if token != u" " or token_start == 0: + ret.append(token) + token_start = pos + final_token = text[token_start:] + ret.append(final_token) + return ret + + +def _join_tokens_to_string(tokens): + """Join a list of string tokens into a single string.""" + token_is_alnum = [t[0] in _ALPHANUMERIC_CHAR_SET for t in tokens] + ret = [] + for i, token in enumerate(tokens): + if i > 0 and token_is_alnum[i - 1] and token_is_alnum[i]: + ret.append(u" ") + ret.append(token) + return "".join(ret) + + +def _escape_token(token, alphabet): + r"""Replace characters that aren't in the alphabet and append "_" to token. + + Apply three transformations to the token: + 1. Replace underline character "_" with "\u", and backslash "\" with "\\". + 2. Replace characters outside of the alphabet with "\###;", where ### is the + character's Unicode code point. + 3. Appends "_" to mark the end of a token. + + Args: + token: unicode string to be escaped + alphabet: list of all known characters + + Returns: + escaped string + """ + token = token.replace(u"\\", u"\\\\").replace(u"_", u"\\u") + ret = [c if c in alphabet and c != u"\n" else r"\%d;" % ord(c) for c in token] + return u"".join(ret) + "_" + + +def _unescape_token(token): + r"""Replaces escaped characters in the token with their unescaped versions. + + Applies inverse transformations as _escape_token(): + 1. Replace "\u" with "_", and "\\" with "\". + 2. Replace "\###;" with the unicode character the ### refers to. + + Args: + token: escaped string + + Returns: + unescaped string + """ + + def match(m): + r"""Returns replacement string for matched object. + + Matched objects contain one of the strings that matches the regex pattern: + r"\\u|\\\\|\\([0-9]+);" + The strings can be '\u', '\\', or '\###;' (### is any digit number). + + m.group(0) refers to the entire matched string ('\u', '\\', or '\###;'). + m.group(1) refers to the first parenthesized subgroup ('###'). + + m.group(0) exists for all match objects, while m.group(1) exists only for + the string '\###;'. + + This function looks to see if m.group(1) exists. If it doesn't, then the + matched string must be '\u' or '\\' . In this case, the corresponding + replacement ('_' and '\') are returned. Note that in python, a single + backslash is written as '\\', and double backslash as '\\\\'. + + If m.group(1) exists, then use the integer in m.group(1) to return a + unicode character. + + Args: + m: match object + + Returns: + String to replace matched object with. + """ + # Check if the matched strings are '\u' or '\\'. + if m.group(1) is None: + return u"_" if m.group(0) == u"\\u" else u"\\" + + # If m.group(1) exists, try and return unicode character. + try: + return six.unichr(int(m.group(1))) + except (ValueError, OverflowError) as _: + return _UNDEFINED_UNICODE + + # Use match function to replace escaped substrings in the token. + return _UNESCAPE_REGEX.sub(match, token) + + +def _count_tokens(files, file_byte_limit=1e6, correct_strip=True): + """Return token counts of words in the files. + + Samples file_byte_limit bytes from each file, and counts the words that appear + in the samples. The samples are semi-evenly distributed across the file. + + Args: + files: List of filepaths + file_byte_limit: Max number of bytes that will be read from each file. + correct_strip: Whether to convert text to unicode before strip. This affects + vocabulary generation for PY2. Sets correct_strip to False in PY2 to + reproduce previous common public result. Sets correct_strip to True will + let PY2 and PY3 get a consistent vocabulary. + + Returns: + Dictionary mapping tokens to the number of times they appear in the sampled + lines from the files. + """ + token_counts = collections.defaultdict(int) + + for filepath in files: + with tf.io.gfile.GFile(filepath, mode="r") as reader: + file_byte_budget = file_byte_limit + counter = 0 + lines_to_skip = int(reader.size() / (file_byte_budget * 2)) + for line in reader: + if counter < lines_to_skip: + counter += 1 + else: + if file_byte_budget < 0: + break + if correct_strip: + line = native_to_unicode(line) + line = line.strip() + file_byte_budget -= len(line) + counter = 0 + + # Add words to token counts + for token in _split_string_to_tokens(native_to_unicode(line)): + token_counts[token] += 1 + return token_counts + + +def _list_to_index_dict(lst): + """Create dictionary mapping list items to their indices in the list.""" + return {item: n for n, item in enumerate(lst)} + + +def _split_token_to_subtokens(token, subtoken_dict, max_subtoken_length): + """Splits a token into subtokens defined in the subtoken dict.""" + ret = [] + start = 0 + token_len = len(token) + while start < token_len: + # Find the longest subtoken, so iterate backwards. + for end in xrange(min(token_len, start + max_subtoken_length), start, -1): + subtoken = token[start:end] + if subtoken in subtoken_dict: + ret.append(subtoken) + start = end + break + else: # Did not break + # If there is no possible encoding of the escaped token then one of the + # characters in the token is not in the alphabet. This should be + # impossible and would be indicative of a bug. + raise ValueError("Was unable to split token \"%s\" into subtokens." % + token) + return ret + + +def _generate_subtokens_with_target_vocab_size( + token_counts, alphabet, target_size, threshold, min_count=None, + reserved_tokens=None): + """Generate subtoken vocabulary close to the target size.""" + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + + if min_count is not None: + tf.compat.v1.logging.info( + "Using min_count=%d to generate vocab with target size %d" % + (min_count, target_size)) + return _generate_subtokens( + token_counts, alphabet, min_count, reserved_tokens=reserved_tokens) + + def bisect(min_val, max_val): + """Recursive function to binary search for subtoken vocabulary.""" + cur_count = (min_val + max_val) // 2 + tf.compat.v1.logging.info("Binary search: trying min_count=%d (%d %d)" % + (cur_count, min_val, max_val)) + subtoken_list = _generate_subtokens( + token_counts, alphabet, cur_count, reserved_tokens=reserved_tokens) + + val = len(subtoken_list) + tf.compat.v1.logging.info( + "Binary search: min_count=%d resulted in %d tokens" % (cur_count, val)) + + within_threshold = abs(val - target_size) < threshold + if within_threshold or min_val >= max_val or cur_count < 2: + return subtoken_list + if val > target_size: + other_subtoken_list = bisect(cur_count + 1, max_val) + else: + other_subtoken_list = bisect(min_val, cur_count - 1) + + # Return vocabulary dictionary with the closest number of tokens. + other_val = len(other_subtoken_list) + if abs(other_val - target_size) < abs(val - target_size): + return other_subtoken_list + return subtoken_list + + tf.compat.v1.logging.info("Finding best min_count to get target size of %d" % + target_size) + return bisect(_MIN_MIN_COUNT, _MAX_MIN_COUNT) + + +def _generate_alphabet_dict(iterable, reserved_tokens=None): + """Create set of characters that appear in any element in the iterable.""" + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + alphabet = {c for token in iterable for c in token} + alphabet |= {c for token in reserved_tokens for c in token} + alphabet |= _ESCAPE_CHARS # Add escape characters to alphabet set. + return alphabet + + +def _count_and_gen_subtokens( + token_counts, alphabet, subtoken_dict, max_subtoken_length): + """Count number of times subtokens appear, and generate new subtokens. + + Args: + token_counts: dict mapping tokens to the number of times they appear in the + original files. + alphabet: list of allowed characters. Used to escape the tokens, which + guarantees that all tokens can be split into subtokens. + subtoken_dict: dict mapping subtokens to ids. + max_subtoken_length: maximum length of subtoken in subtoken_dict. + + Returns: + A defaultdict mapping subtokens to the number of times they appear in the + tokens. The dict may contain new subtokens. + """ + subtoken_counts = collections.defaultdict(int) + for token, count in six.iteritems(token_counts): + token = _escape_token(token, alphabet) + subtokens = _split_token_to_subtokens( + token, subtoken_dict, max_subtoken_length) + + # Generate new subtokens by taking substrings from token. + start = 0 + for subtoken in subtokens: + for end in xrange(start + 1, len(token) + 1): + new_subtoken = token[start:end] + subtoken_counts[new_subtoken] += count + start += len(subtoken) + + return subtoken_counts + + +def _filter_and_bucket_subtokens(subtoken_counts, min_count): + """Return a bucketed list of subtokens that are filtered by count. + + Args: + subtoken_counts: defaultdict mapping subtokens to their counts + min_count: int count used to filter subtokens + + Returns: + List of subtoken sets, where subtokens in set i have the same length=i. + """ + # Create list of buckets, where subtokens in bucket i have length i. + subtoken_buckets = [] + for subtoken, count in six.iteritems(subtoken_counts): + if count < min_count: # Filter out subtokens that don't appear enough + continue + while len(subtoken_buckets) <= len(subtoken): + subtoken_buckets.append(set()) + subtoken_buckets[len(subtoken)].add(subtoken) + return subtoken_buckets + + +def _gen_new_subtoken_list( + subtoken_counts, min_count, alphabet, reserved_tokens=None): + """Generate candidate subtokens ordered by count, and new max subtoken length. + + Add subtokens to the candidate list in order of length (longest subtokens + first). When a subtoken is added, the counts of each of its prefixes are + decreased. Prefixes that don't appear much outside the subtoken are not added + to the candidate list. + + For example: + subtoken being added to candidate list: 'translate' + subtoken_counts: {'translate':10, 't':40, 'tr':16, 'tra':12, ...} + min_count: 5 + + When 'translate' is added, subtoken_counts is updated to: + {'translate':0, 't':30, 'tr':6, 'tra': 2, ...} + + The subtoken 'tra' will not be added to the candidate list, because it appears + twice (less than min_count) outside of 'translate'. + + Args: + subtoken_counts: defaultdict mapping str subtokens to int counts + min_count: int minimum count requirement for subtokens + alphabet: set of characters. Each character is added to the subtoken list to + guarantee that all tokens can be encoded. + reserved_tokens: list of tokens that will be added to the beginning of the + returned subtoken list. + + Returns: + List of candidate subtokens in decreasing count order, and maximum subtoken + length + """ + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + + # Create a list of (count, subtoken) for each candidate subtoken. + subtoken_candidates = [] + + # Use bucketted list to iterate through subtokens in order of length. + # subtoken_buckets[i] = set(subtokens), where each subtoken has length i. + subtoken_buckets = _filter_and_bucket_subtokens(subtoken_counts, min_count) + max_subtoken_length = len(subtoken_buckets) - 1 + + # Go through the list in reverse order to consider longer subtokens first. + for subtoken_len in xrange(max_subtoken_length, 0, -1): + for subtoken in subtoken_buckets[subtoken_len]: + count = subtoken_counts[subtoken] + + # Possible if this subtoken is a prefix of another token. + if count < min_count: + continue + + # Ignore alphabet/reserved tokens, which will be added manually later. + if subtoken not in alphabet and subtoken not in reserved_tokens: + subtoken_candidates.append((count, subtoken)) + + # Decrement count of the subtoken's prefixes (if a longer subtoken is + # added, its prefixes lose priority to be added). + for end in xrange(1, subtoken_len): + subtoken_counts[subtoken[:end]] -= count + + # Add alphabet subtokens (guarantees that all strings are encodable). + subtoken_candidates.extend((subtoken_counts.get(a, 0), a) for a in alphabet) + + # Order subtoken candidates by decreasing count. + subtoken_list = [t for _, t in sorted(subtoken_candidates, reverse=True)] + + # Add reserved tokens to beginning of the list. + subtoken_list = reserved_tokens + subtoken_list + return subtoken_list, max_subtoken_length + + +def _generate_subtokens( + token_counts, alphabet, min_count, num_iterations=4, + reserved_tokens=None): + """Create a list of subtokens in decreasing order of frequency. + + Args: + token_counts: dict mapping str tokens -> int count + alphabet: set of characters + min_count: int minimum number of times a subtoken must appear before it is + added to the vocabulary. + num_iterations: int number of iterations to generate new tokens. + reserved_tokens: list of tokens that will be added to the beginning to the + returned subtoken list. + + Returns: + Sorted list of subtokens (most frequent first) + """ + if reserved_tokens is None: + reserved_tokens = RESERVED_TOKENS + + # Use alphabet set to create initial list of subtokens + subtoken_list = reserved_tokens + list(alphabet) + max_subtoken_length = 1 + + # On each iteration, segment all words using the subtokens defined in + # subtoken_dict, count how often the resulting subtokens appear, and update + # the dictionary with subtokens w/ high enough counts. + for i in xrange(num_iterations): + tf.compat.v1.logging.info("\tGenerating subtokens: iteration %d" % i) + # Generate new subtoken->id dictionary using the new subtoken list. + subtoken_dict = _list_to_index_dict(subtoken_list) + + # Create dict mapping subtoken->count, with additional subtokens created + # from substrings taken from the tokens. + subtoken_counts = _count_and_gen_subtokens( + token_counts, alphabet, subtoken_dict, max_subtoken_length) + + # Generate new list of subtokens sorted by subtoken count. + subtoken_list, max_subtoken_length = _gen_new_subtoken_list( + subtoken_counts, min_count, alphabet, reserved_tokens) + + tf.compat.v1.logging.info("\tVocab size: %d" % len(subtoken_list)) + return subtoken_list diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer_test.py b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer_test.py new file mode 100644 index 00000000000..f757389f30d --- /dev/null +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer_test.py @@ -0,0 +1,182 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Test Subtokenizer and string helper methods.""" + +import collections +import tempfile + +import tensorflow as tf # pylint: disable=g-bad-import-order + +from official.transformer.utils import tokenizer + + +class SubtokenizerTest(tf.test.TestCase): + + def _init_subtokenizer(self, vocab_list): + temp_file = tempfile.NamedTemporaryFile(delete=False) + with tf.io.gfile.GFile(temp_file.name, "w") as w: + for subtoken in vocab_list: + w.write("'%s'" % subtoken) + w.write("\n") + return tokenizer.Subtokenizer(temp_file.name, reserved_tokens=[]) + + def test_encode(self): + vocab_list = ["123_", "test", "ing_"] + subtokenizer = self._init_subtokenizer(vocab_list) + s = "testing 123" + encoded_list = subtokenizer.encode(s) + self.assertEqual([1, 2, 0], encoded_list) + + def test_decode(self): + vocab_list = ["123_", "test", "ing_"] + subtokenizer = self._init_subtokenizer(vocab_list) + encoded_list = [1, 2, 0] # testing 123 + decoded_str = subtokenizer.decode(encoded_list) + self.assertEqual("testing 123", decoded_str) + + def test_subtoken_ids_to_tokens(self): + vocab_list = ["123_", "test", "ing_"] + subtokenizer = self._init_subtokenizer(vocab_list) + encoded_list = [1, 2, 0] # testing 123 + token_list = subtokenizer._subtoken_ids_to_tokens(encoded_list) + self.assertEqual([u"testing", u"123"], token_list) + + +class StringHelperTest(tf.test.TestCase): + + def test_split_string_to_tokens(self): + text = "test? testing 123." + + tokens = tokenizer._split_string_to_tokens(text) + self.assertEqual(["test", "? ", "testing", "123", "."], tokens) + + def test_join_tokens_to_string(self): + tokens = ["test", "? ", "testing", "123", "."] + + s = tokenizer._join_tokens_to_string(tokens) + self.assertEqual("test? testing 123.", s) + + def test_escape_token(self): + token = u"abc_\\4" + alphabet = set("abc_\\u;") + + escaped_token = tokenizer._escape_token(token, alphabet) + self.assertEqual("abc\\u\\\\\\52;_", escaped_token) + + def test_unescape_token(self): + escaped_token = u"Underline: \\u, Backslash: \\\\, Unicode: \\52;" + + unescaped_token = tokenizer._unescape_token(escaped_token) + self.assertEqual( + "Underline: _, Backslash: \\, Unicode: 4", unescaped_token) + + def test_list_to_index_dict(self): + lst = ["test", "strings"] + + d = tokenizer._list_to_index_dict(lst) + self.assertDictEqual({"test": 0, "strings": 1}, d) + + def test_split_token_to_subtokens(self): + token = "abc" + subtoken_dict = {"a": 0, "b": 1, "c": 2, "ab": 3} + max_subtoken_length = 2 + + subtokens = tokenizer._split_token_to_subtokens( + token, subtoken_dict, max_subtoken_length) + self.assertEqual(["ab", "c"], subtokens) + + def test_generate_alphabet_dict(self): + s = ["testing", "123"] + reserved_tokens = ["???"] + + alphabet = tokenizer._generate_alphabet_dict(s, reserved_tokens) + self.assertIn("?", alphabet) + self.assertIn("t", alphabet) + self.assertIn("e", alphabet) + self.assertIn("s", alphabet) + self.assertIn("i", alphabet) + self.assertIn("n", alphabet) + self.assertIn("g", alphabet) + self.assertIn("1", alphabet) + self.assertIn("2", alphabet) + self.assertIn("3", alphabet) + + def test_count_and_gen_subtokens(self): + token_counts = {"abc": 5} + alphabet = set("abc_") + subtoken_dict = {"a": 0, "b": 1, "c": 2, "_": 3} + max_subtoken_length = 2 + + subtoken_counts = tokenizer._count_and_gen_subtokens( + token_counts, alphabet, subtoken_dict, max_subtoken_length) + + self.assertIsInstance(subtoken_counts, collections.defaultdict) + self.assertDictEqual( + {"a": 5, "b": 5, "c": 5, "_": 5, "ab": 5, "bc": 5, "c_": 5, + "abc": 5, "bc_": 5, "abc_": 5}, subtoken_counts) + + def test_filter_and_bucket_subtokens(self): + subtoken_counts = collections.defaultdict( + int, {"a": 2, "b": 4, "c": 1, "ab": 6, "ac": 3, "abbc": 5}) + min_count = 3 + + subtoken_buckets = tokenizer._filter_and_bucket_subtokens( + subtoken_counts, min_count) + + self.assertEqual(len(subtoken_buckets[0]), 0) + self.assertEqual(set("b"), subtoken_buckets[1]) + self.assertEqual(set(["ab", "ac"]), subtoken_buckets[2]) + self.assertEqual(len(subtoken_buckets[3]), 0) + self.assertEqual(set(["abbc"]), subtoken_buckets[4]) + + def test_gen_new_subtoken_list(self): + subtoken_counts = collections.defaultdict( + int, {"translate": 10, "t": 40, "tr": 16, "tra": 12}) + min_count = 5 + alphabet = set("translate") + reserved_tokens = ["reserved", "tokens"] + + subtoken_list, max_token_length = tokenizer._gen_new_subtoken_list( + subtoken_counts, min_count, alphabet, reserved_tokens) + + # Check that "tra" isn"t in the list (its count should be decremented to 2, + # so it should not be added to the canddiate list). + self.assertNotIn("tra", subtoken_list) + + self.assertIn("tr", subtoken_list) + self.assertIn("t", subtoken_list) + + self.assertEqual(len("translate"), max_token_length) + + def test_generate_subtokens(self): + token_counts = {"ab": 1, "bc": 3, "abc": 5} + alphabet = set("abc_") + min_count = 100 + num_iterations = 1 + reserved_tokens = ["reserved", "tokens"] + + vocab_list = tokenizer._generate_subtokens( + token_counts, alphabet, min_count, num_iterations, reserved_tokens) + + # Check that reserved tokens are at the front of the list + self.assertEqual(vocab_list[:2], reserved_tokens) + + # Check that each character in alphabet is in the vocab list + for c in alphabet: + self.assertIn(c, vocab_list) + + +if __name__ == "__main__": + tf.test.main() diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md new file mode 100644 index 00000000000..a2c4fbfbbb8 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md @@ -0,0 +1,130 @@ +Step-by-Step +============ + +This document is used to list steps of reproducing TensorFlow Object Detection models tuning results. This example can run on Intel CPUs and GPUs. + +# Prerequisite + + +## 1. Environment +Recommend python 3.9 or higher version. + +### Install Intel® Neural Compressor +```shell +pip install neural-compressor +``` + +### Install Intel Tensorflow +```shell +pip install tensorflow +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +### Installation Dependency packages +```shell +cd examples/3.x_api/tensorflow/object_detection +pip install -r requirements.txt +cd faster_rcnn_resnet50/quantization/ptq +``` + +### Install Protocol Buffer Compiler + +`Protocol Buffer Compiler` in version higher than 3.0.0 is necessary ingredient for automatic COCO dataset preparation. To install please follow +[Protobuf installation instructions](https://grpc.io/docs/protoc-installation/#install-using-a-package-manager). + +### Install Intel Extension for Tensorflow + +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +## 2. Prepare Model + +```shell +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz +tar -xvf faster_rcnn_resnet50_fp32_coco_pretrained_model.tar.gz +``` + +## 3. Prepare Dataset + +### Automatic dataset download + +> **_Note: `prepare_dataset.sh` script works with TF version 1.x._** + +Run the `prepare_dataset.sh` script located in `examples/3.x_api/tensorflow/object_detection`. + +Usage: +```shell +cd examples/3.x_api/tensorflow/object_detection +. prepare_dataset.sh +cd faster_rcnn_resnet50/quantization/ptq +``` + +This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to +tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. + +### Manual dataset download +Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). + + +# Run + +## 1. Quantization + + ```shell + # The cmd of running faster_rcnn_resnet50 + bash run_quant.sh --input_model=./faster_rcnn_resnet50_fp32_coco_pretrained_model/frozen_inference_graph.pb --output_model=./tensorflow-faster_rcnn_resnet50-tune.pb --dataset_location=/path/to/dataset/coco_val.record + ``` + +## 2. Benchmark + ```shell + bash run_benchmark.sh --input_model=./tensorflow-faster_rcnn_resnet50-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=performance + ``` + +Details of enabling Intel® Neural Compressor on faster_rcnn_resnet50 for Tensorflow. +========================= + +This is a tutorial of how to enable faster_rcnn_resnet50 model with Intel® Neural Compressor. +## User Code Analysis +User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself. + +For faster_rcnn_resnet50, we applied the latter one because our philosophy is to enable the model with minimal changes. Hence we need to make two changes on the original code. The first one is to implement the q_dataloader and make necessary changes to *eval_func*. + +### Code update + +After prepare step is done, we just need update main.py like below. +```python + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model + + quant_config = StaticQuantConfig() + model = Model(args.input_graph) + model.input_tensor_names = ['image_tensor'] + model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + q_model = quantize_model(model, quant_config, calib_dataloader) + q_model.save(args.output_model) + + if args.benchmark: + if args.mode == 'performance': + evaluate(args.input_graph) + else: + accuracy = evaluate(args.input_graph) + print('Batch size = %d' % args.batch_size) + print("Accuracy: %.5f" % accuracy) +``` + +The quantization.fit() function will return a best quantized model during timeout constrain. diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/coco_tools.py b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/coco_tools.py new file mode 100644 index 00000000000..2f9369798df --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/coco_tools.py @@ -0,0 +1,694 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Wrappers for third party pycocotools to be used within object_detection. + +Note that nothing in this file is tensorflow related and thus cannot +be called directly as a slim metric, for example. + +TODO(jonathanhuang): wrap as a slim metric in metrics.py + + +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) +and detections (boxes, scores and classes), where elements of each list +correspond to detections/annotations of a single image, +then evaluation (in multi-class mode) can be invoked as follows: + + groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( + image_ids, groundtruth_boxes_list, groundtruth_classes_list, + max_num_classes, output_path=None) + detections_list = coco_tools.ExportDetectionsToCOCO( + image_ids, detection_boxes_list, detection_scores_list, + detection_classes_list, output_path=None) + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() +""" + +import copy +import time +from collections import OrderedDict +from typing import Any, Dict, List, Set, Union + +import numpy as np +from pycocotools import coco, cocoeval, mask + +from neural_compressor.utils import logger + + +class COCOWrapper(coco.COCO): + """Wrapper for the pycocotools COCO class. + + Attributes: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + """ + + def __init__(self, dataset: Dict[str, Any], detection_type: str = "bbox"): + """Construct a COCOWrapper. + + See http://mscoco.org/dataset/#format for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + + Raises: + ValueError: if detection_type is unsupported. + """ + supported_detection_types = ["bbox", "segmentation"] + if detection_type not in supported_detection_types: + raise ValueError( + "Unsupported detection type: {}. " + "Supported values are: {}".format(detection_type, supported_detection_types) + ) + self._detection_type = detection_type + coco.COCO.__init__(self) + self.dataset = dataset + self.createIndex() + + def LoadAnnotations(self, annotations: list) -> coco.COCO: + """Load annotations dictionary into COCO datastructure. + + See http://mscoco.org/dataset/#format for a description of the annotations + format. As above, this function replicates the default behavior of the API + but does not require writing to external storage. + + Args: + annotations: python list holding object detection results where each + detection is encoded as a dict with required keys ['image_id', + 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on + `detection_type`. + + Returns: + a coco.COCO datastructure holding object detection annotations results + + Raises: + ValueError: if (1) annotations is not a list or annotations do not + correspond to the images contained in self. + """ + results = coco.COCO() + results.dataset["images"] = [img for img in self.dataset["images"]] + + logger.info("Load and prepare annotation results.") + tic = time.time() + + if not isinstance(annotations, list): + raise ValueError("annotations is not a list of objects") + annotation_img_ids = [ann["image_id"] for ann in annotations] + if set(annotation_img_ids) != (set(annotation_img_ids) & set(self.getImgIds())): + raise ValueError("Results do not correspond to current coco set") + results.dataset["categories"] = copy.deepcopy(self.dataset["categories"]) + if self._detection_type == "bbox": + for idx, ann in enumerate(annotations): + bb = ann["bbox"] + ann["area"] = bb[2] * bb[3] + ann["id"] = idx + 1 + ann["iscrowd"] = 0 + elif self._detection_type == "segmentation": + for idx, ann in enumerate(annotations): + ann["area"] = mask.area(ann["segmentation"]) + ann["bbox"] = mask.toBbox(ann["segmentation"]) + ann["id"] = idx + 1 + ann["iscrowd"] = 0 + logger.info("DONE (t=%0.2fs)", (time.time() - tic)) + + results.dataset["annotations"] = annotations + results.createIndex() + return results + + +class COCOEvalWrapper(cocoeval.COCOeval): + """Wrapper for the pycocotools COCOeval class. + + To evaluate, create two objects (groundtruth_dict and detections_list) + using the conventions listed at http://mscoco.org/dataset/#format. + Then call evaluation as follows: + + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + """ + + def __init__( + self, + groundtruth: coco.COCO = None, + detections: coco.COCO = None, + agnostic_mode=False, + iou_type: str = "bbox", + iou_thrs: Union[str, float] = None, + map_points=None, + ): + """Construct a COCOEvalWrapper. + + Note that for the area-based metrics to be meaningful, detection and + groundtruth boxes must be in image coordinates measured in pixels. + + Args: + groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding + groundtruth annotations + detections: a coco.COCO (or coco_tools.COCOWrapper) object holding + detections + agnostic_mode: boolean (default: False). If True, evaluation ignores + class labels, treating all detections as proposals. + iou_thrs: Minimal value for intersection over union that allows to + make decision that prediction bounding box is true positive. + You can specify one float value between 0 to 1 or + string "05:0.05:0.95" for standard COCO thresholds. + iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + """ + cocoeval.COCOeval.__init__(self, groundtruth, detections, iouType=iou_type) + if agnostic_mode: + self.params.useCats = 0 + if iou_thrs == "0.5:0.05:0.95": + self.params.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True) + elif isinstance(iou_thrs, float): + self.params.iouThrs = [iou_thrs] + + if map_points == 101: + self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True) + if map_points == 11: + self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.1)) + 1, endpoint=True) + if map_points == 0: + self.params.recThrs = [-1] + + def GetCategory(self, category_id: int) -> dict: + """Fetch dictionary holding category information given category id. + + Args: + category_id: integer id + + Returns: + dictionary holding 'id', 'name'. + """ + return self.cocoGt.cats[category_id] + + def GetAgnosticMode(self) -> bool: + """Return whether COCO Eval is configured to evaluate in agnostic mode.""" + return self.params.useCats == 0 + + def GetCategoryIdList(self) -> List[int]: + """Return the list of IDs of all valid categories.""" + return self.params.catIds + + def accumulate(self, p: cocoeval.Params = None): + """Accumulate evaluation results per image and store it to self.eval. + + Args: + p: input params for evaluation + """ + print("Accumulating evaluation results...") + tic = time.time() + if not self.evalImgs: + print("Please run evaluate() first") + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T, R, K, A, M)) # -1 for the precision of absent categories + recall = -np.ones((T, K, A, M)) + scores = -np.ones((T, R, K, A, M)) + + # create dictionary for future indexing + _pe = self._paramsEval + print("-pe", _pe) + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0 * A0 * I0 + for a, a0 in enumerate(a_list): + Na = a0 * I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if e is not None] + if len(E) == 0: + continue + dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind="mergesort") + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds] + dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds] + gtIg = np.concatenate([e["gtIgnore"] for e in E]) + npig = np.count_nonzero(gtIg == 0) + if npig == 0: + continue + tps = np.logical_and(dtm, np.logical_not(dtIg)) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg)) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp + tp + np.spacing(1)) + + # calculate precision + if R == 1: + rc = np.concatenate(([0.0], rc, [1.0])) + pr = np.concatenate(([0.0], pr, [0.0])) + + # compute the precision envelope + for i in range(pr.size - 1, 0, -1): + pr[i - 1] = np.maximum(pr[i - 1], pr[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + change_point = np.where(rc[1:] != rc[:-1])[0] + # and sum (\Delta recall) * recall + res = np.sum((rc[change_point + 1] - rc[change_point]) * pr[change_point + 1]) + precision[t, :, k, a, m] = np.array([res]) + else: + q = np.zeros((R,)) + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist() + q = q.tolist() + + for i in range(nd - 1, 0, -1): + if pr[i] > pr[i - 1]: + pr[i - 1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side="left") + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except: + pass + precision[t, :, k, a, m] = np.array(q) + + # calculate recall + if nd: + recall[t, k, a, m] = rc[-1] + else: + recall[t, k, a, m] = 0 + + # calculate score + ss = np.zeros((R,)) + inds = np.searchsorted(rc, p.recThrs, side="left") + try: + for ri, pi in enumerate(inds): + ss[ri] = dtScoresSorted[pi] + except: + pass + scores[t, :, k, a, m] = np.array(ss) + # exit(0) + self.eval = { + "params": p, + "counts": [T, R, K, A, M], + "precision": precision, + "recall": recall, + "scores": scores, + } + toc = time.time() + print("DONE (t={:0.2f}s).".format(toc - tic)) + + def ComputeMetrics( + self, include_metrics_per_category: bool = False, all_metrics_per_category: bool = False + ): # pragma: no cover + """Compute detection metrics. + + Args: + include_metrics_per_category: Whether include metrics per category. + all_metrics_per_category: Whether include all the summery metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + + Returns: + A tuple of (summary_metrics, per_category_ap), in which + (1) summary_metrics is a dictionary holding: + 'Precision/mAP': mean average precision over classes averaged over IOU + thresholds ranging from .5 to .95 with .05 increments; + 'Precision/mAP@.50IOU': mean average precision at 50% IOU; + 'Precision/mAP@.75IOU': mean average precision at 75% IOU; + 'Precision/mAP (small)': mean average precision for small objects + (area < 32^2 pixels); + 'Precision/mAP (medium)': mean average precision for medium sized + objects (32^2 pixels < area < 96^2 pixels); + 'Precision/mAP (large)': mean average precision for large objects + (96^2 pixels < area < 10000^2 pixels); + 'Recall/AR@1': average recall with 1 detection; + 'Recall/AR@10': average recall with 10 detections; + 'Recall/AR@100': average recall with 100 detections; + 'Recall/AR@100 (small)': average recall for small objects with 100 + detections; + 'Recall/AR@100 (medium)': average recall for medium objects with 100 + detections; + 'Recall/AR@100 (large)': average recall for large objects with 100 + detections; + and (2) per_category_ap is a dictionary holding category specific results with + keys of the form: 'Precision mAP ByCategory/category' + (without the supercategory part if no supercategories exist). + + For backward compatibility 'PerformanceByCategory' is included in the + output regardless of all_metrics_per_category. If evaluating class-agnostic + mode, per_category_ap is an empty dictionary. + + Raises: + ValueError: If category_stats does not exist. + """ + self.evaluate() + self.accumulate() + self.summarize() + + summary_metrics = OrderedDict( + [ + ("Precision/mAP", self.stats[0]), + ("Precision/mAP@.50IOU", self.stats[1]), + ("Precision/mAP@.75IOU", self.stats[2]), + ("Precision/mAP (small)", self.stats[3]), + ("Precision/mAP (medium)", self.stats[4]), + ("Precision/mAP (large)", self.stats[5]), + ("Recall/AR@1", self.stats[6]), + ("Recall/AR@10", self.stats[7]), + ("Recall/AR@100", self.stats[8]), + ("Recall/AR@100 (small)", self.stats[9]), + ("Recall/AR@100 (medium)", self.stats[10]), + ("Recall/AR@100 (large)", self.stats[11]), + ] + ) + if not include_metrics_per_category: + return summary_metrics, {} + if not hasattr(self, "category_stats"): + raise ValueError("Category stats do not exist") + per_category_ap = OrderedDict([]) + if self.GetAgnosticMode(): + return summary_metrics, per_category_ap + for category_index, category_id in enumerate(self.GetCategoryIdList()): + category = self.GetCategory(category_id)["name"] + # Kept for backward compatilbility + # pylint: disable=no-member + per_category_ap["PerformanceByCategory/mAP/{}".format(category)] = self.category_stats[0][category_index] + if all_metrics_per_category: + per_category_ap["Precision mAP ByCategory/{}".format(category)] = self.category_stats[0][category_index] + per_category_ap["Precision mAP@.50IOU ByCategory/{}".format(category)] = self.category_stats[1][ + category_index + ] + per_category_ap["Precision mAP@.75IOU ByCategory/{}".format(category)] = self.category_stats[2][ + category_index + ] + per_category_ap["Precision mAP (small) ByCategory/{}".format(category)] = self.category_stats[3][ + category_index + ] + per_category_ap["Precision mAP (medium) ByCategory/{}".format(category)] = self.category_stats[4][ + category_index + ] + per_category_ap["Precision mAP (large) ByCategory/{}".format(category)] = self.category_stats[5][ + category_index + ] + per_category_ap["Recall AR@1 ByCategory/{}".format(category)] = self.category_stats[6][category_index] + per_category_ap["Recall AR@10 ByCategory/{}".format(category)] = self.category_stats[7][category_index] + per_category_ap["Recall AR@100 ByCategory/{}".format(category)] = self.category_stats[8][category_index] + per_category_ap["Recall AR@100 (small) ByCategory/{}".format(category)] = self.category_stats[9][ + category_index + ] + per_category_ap["Recall AR@100 (medium) ByCategory/{}".format(category)] = self.category_stats[10][ + category_index + ] + per_category_ap["Recall AR@100 (large) ByCategory/{}".format(category)] = self.category_stats[11][ + category_index + ] + + return summary_metrics, per_category_ap + + +def _ConvertBoxToCOCOFormat(box): + """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format. + + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + + Args: + box: a numpy array in format of [ymin, xmin, ymax, xmax] + + Returns: + A list of floats, in COCO format, representing [xmin, ymin, width, height] + """ + return [float(box[1]), float(box[0]), float(box[3] - box[1]), float(box[2] - box[0])] + + +def _RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + + Returns: + A pycocotools Run-length encoding of the mask. + """ + return mask.encode(np.asfortranarray(masks)) + + +def ExportSingleImageGroundtruthToCoco( + image_id: Union[int, str], + next_annotation_id: int, + category_id_set: Set[str], + groundtruth_boxes: np.array, + groundtruth_classes: np.array, + groundtruth_masks: Union[np.array, None] = None, + groundtruth_is_crowd: Union[np.array, None] = None, +) -> list: + """Export groundtruth of a single image to COCO format. + + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the COCO evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in + correspondence - that is: groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + + Args: + image_id: a unique image identifier either of type integer or string. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] + indicating whether groundtruth boxes are crowd. + + Returns: + A list of groundtruth annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + if len(groundtruth_classes.shape) != 1: + raise ValueError("groundtruth_classes is " "expected to be of rank 1.") + if len(groundtruth_boxes.shape) != 2: + raise ValueError("groundtruth_boxes is expected to be of " "rank 2.") + if groundtruth_boxes.shape[1] != 4: + raise ValueError("groundtruth_boxes should have " "shape[1] == 4.") + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError( + "Corresponding entries in groundtruth_classes, " + "and groundtruth_boxes should have " + "compatible shapes (i.e., agree on the 0th dimension)." + "Classes shape: %d. Boxes shape: %d. Image ID: %s" + % (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], image_id) + ) + has_is_crowd = groundtruth_is_crowd is not None + if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: + raise ValueError("groundtruth_is_crowd is expected to be of rank 1.") + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 + export_dict = { + "id": next_annotation_id + i, + "image_id": image_id, + "category_id": int(groundtruth_classes[i]), + "bbox": list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + "area": float( + (groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) + * (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1]) + ), + "iscrowd": iscrowd, + } + if groundtruth_masks is not None: + export_dict["segmentation"] = _RleCompress(groundtruth_masks[i]) + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportSingleImageDetectionBoxesToCoco( + image_id: Union[int, str], + category_id_set: Set[int], + detection_boxes: np.array, + detection_scores: np.array, + detection_classes: np.array, +) -> list: + """Export detections of a single image to COCO format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. Note that the image_ids + provided here must match the ones given to the + ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in + correspondence - that is: boxes[i, :], and classes[i] + are associated with the same groundtruth annotation. + + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_boxes: float numpy array of shape [num_detections, 4] containing + detection boxes. + detection_scores: float numpy array of shape [num_detections] containing + scored for the detection boxes. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection boxes. + + Returns: + A list of detection annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_boxes, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.") + if len(detection_boxes.shape) != 2: + raise ValueError("All entries in detection_boxes expected to be of " "rank 2.") + if detection_boxes.shape[1] != 4: + raise ValueError("All entries in detection_boxes should have " "shape[1] == 4.") + num_boxes = detection_classes.shape[0] + if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: + raise ValueError( + "Corresponding entries in detection_classes, " + "detection_scores and detection_boxes should have " + "compatible shapes (i.e., agree on the 0th dimension). " + "Classes shape: %d. Boxes shape: %d. " + "Scores shape: %d" % (detection_classes.shape[0], detection_boxes.shape[0], detection_scores.shape[0]) + ) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append( + { + "image_id": image_id, + "category_id": int(detection_classes[i]), + "bbox": list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + "score": float(detection_scores[i]), + } + ) + return detections_list + + +def ExportSingleImageDetectionMasksToCoco( + image_id: Union[str, int], + category_id_set: Set[int], + detection_masks: np.array, + detection_scores: np.array, + detection_classes: np.array, +) -> list: + """Export detection masks of a single image to COCO format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + + Returns: + A list of detection mask annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.") + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError( + "Corresponding entries in detection_classes, " + "detection_scores and detection_masks should have " + "compatible lengths and shapes " + "Classes length: %d. Masks length: %d. " + "Scores length: %d" % (detection_classes.shape[0], len(detection_masks), detection_scores.shape[0]) + ) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append( + { + "image_id": image_id, + "category_id": int(detection_classes[i]), + "segmentation": _RleCompress(detection_masks[i]), + "score": float(detection_scores[i]), + } + ) + return detections_list diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/data_process.py new file mode 100644 index 00000000000..32e55adb3fd --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/data_process.py @@ -0,0 +1,655 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import cv2 +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +interpolation_map = { + "nearest": cv2.INTER_NEAREST, + "bilinear": cv2.INTER_LINEAR, + "bicubic": cv2.INTER_CUBIC, +} + +category_map = { + 1: "person", + 2: "bicycle", + 3: "car", + 4: "motorcycle", + 5: "airplane", + 6: "bus", + 7: "train", + 8: "truck", + 9: "boat", + 10: "traffic light", + 11: "fire hydrant", + 13: "stop sign", + 14: "parking meter", + 15: "bench", + 16: "bird", + 17: "cat", + 18: "dog", + 19: "horse", + 20: "sheep", + 21: "cow", + 22: "elephant", + 23: "bear", + 24: "zebra", + 25: "giraffe", + 27: "backpack", + 28: "umbrella", + 31: "handbag", + 32: "tie", + 33: "suitcase", + 34: "frisbee", + 35: "skis", + 36: "snowboard", + 37: "sports ball", + 38: "kite", + 39: "baseball bat", + 40: "baseball glove", + 41: "skateboard", + 42: "surfboard", + 43: "tennis racket", + 44: "bottle", + 46: "wine glass", + 47: "cup", + 48: "fork", + 49: "knife", + 50: "spoon", + 51: "bowl", + 52: "banana", + 53: "apple", + 54: "sandwich", + 55: "orange", + 56: "broccoli", + 57: "carrot", + 58: "hot dog", + 59: "pizza", + 60: "donut", + 61: "cake", + 62: "chair", + 63: "couch", + 64: "potted plant", + 65: "bed", + 67: "dining table", + 70: "toilet", + 72: "tv", + 73: "laptop", + 74: "mouse", + 75: "remote", + 76: "keyboard", + 77: "cell phone", + 78: "microwave", + 79: "oven", + 80: "toaster", + 81: "sink", + 82: "refrigerator", + 84: "book", + 85: "clock", + 86: "vase", + 87: "scissors", + 88: "teddy bear", + 89: "hair drier", + 90: "toothbrush", +} + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class ResizeTFTransform(object): + """Resize the input image to the given size. + + Args: + size (list or int): Size of the result + interpolation (str, default='bilinear'):Desired interpolation type, + support 'bilinear', 'nearest', 'bicubic' + + Returns: + tuple of processed image and label + """ + + def __init__(self, size, interpolation="bilinear"): + """Initialize `ResizeTFTransform` class.""" + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[0], size[1] + self.interpolation = interpolation + + if self.interpolation not in ["bilinear", "nearest", "bicubic"]: + raise ValueError("Unsupported interpolation type!") + + def __call__(self, sample): + """Resize the input image in sample to the given size.""" + image, label = sample + if isinstance(image, tf.Tensor): + image = tf.image.resize(image, self.size, method=self.interpolation) + else: + image = cv2.resize(image, self.size, interpolation=interpolation_map[self.interpolation]) + return (image, label) + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class COCOmAPv2(BaseMetric): + """Compute mean average precision of the detection task.""" + + def __init__( + self, + anno_path=None, + iou_thrs="0.5:0.05:0.95", + map_points=101, + map_key="DetectionBoxes_Precision/mAP", + output_index_mapping={"num_detections": -1, "boxes": 0, "scores": 1, "classes": 2}, + ): + """Initialize the metric. + + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + output_index_mapping: The output index mapping. + Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}. + """ + self.output_index_mapping = output_index_mapping + + if anno_path: + import os + import yaml + + assert os.path.exists(anno_path), "Annotation path does not exists!" + with open(anno_path, "r") as f: + label_map = yaml.safe_load(f.read()) + self.category_map_reverse = {k: v for k, v in label_map.items()} + else: + # label: index + self.category_map_reverse = {v: k for k, v in category_map.items()} + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + self.category_map = category_map + self.category_id_set = set([cat for cat in self.category_map]) # index + self.iou_thrs = iou_thrs + self.map_points = map_points + self.map_key = map_key + + def update(self, predicts, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + predicts: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. Defaults to None. + """ + from coco_tools import ExportSingleImageDetectionBoxesToCoco, ExportSingleImageGroundtruthToCoco + + detections = [] + if "num_detections" in self.output_index_mapping and self.output_index_mapping["num_detections"] > -1: + for item in zip(*predicts): + detection = {} + num = int(item[self.output_index_mapping["num_detections"]]) + detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]])[0:num] + detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]])[0:num] + detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]])[0:num] + detections.append(detection) + else: + for item in zip(*predicts): + detection = {} + detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]]) + detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]]) + detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]]) + detections.append(detection) + + bboxes, str_labels, int_labels, image_ids = labels + labels = [] + if len(int_labels[0]) == 0: + for str_label in str_labels: + str_label = [x if type(x) == "str" else x.decode("utf-8") for x in str_label] + labels.append([self.category_map_reverse[x] for x in str_label]) + elif len(str_labels[0]) == 0: + for int_label in int_labels: + labels.append([x for x in int_label]) + + for idx, image_id in enumerate(image_ids): + image_id = image_id if type(image_id) == "str" else image_id.decode("utf-8") + if image_id in self.image_ids: + continue + self.image_ids.append(image_id) + + ground_truth = {} + ground_truth["boxes"] = np.asarray(bboxes[idx]) + ground_truth["classes"] = np.asarray(labels[idx]) + + self.ground_truth_list.extend( + ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self.annotation_id, + category_id_set=self.category_id_set, + groundtruth_boxes=ground_truth["boxes"], + groundtruth_classes=ground_truth["classes"], + ) + ) + self.annotation_id += ground_truth["boxes"].shape[0] + + self.detection_list.extend( + ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self.category_id_set, + detection_boxes=detections[idx]["boxes"], + detection_scores=detections[idx]["scores"], + detection_classes=detections[idx]["classes"], + ) + ) + + def reset(self): + """Reset the prediction and labels.""" + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + + def result(self): + """Compute mean average precision. + + Returns: + The mean average precision score. + """ + from coco_tools import COCOEvalWrapper, COCOWrapper + + if len(self.ground_truth_list) == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + else: + groundtruth_dict = { + "annotations": self.ground_truth_list, + "images": [{"id": image_id} for image_id in self.image_ids], + "categories": [{"id": k, "name": v} for k, v in self.category_map.items()], + } + coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(self.detection_list) + box_evaluator = COCOEvalWrapper( + coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs=self.iou_thrs, + map_points=self.map_points, + ) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=False, all_metrics_per_category=False + ) + box_metrics.update(box_per_category_ap) + box_metrics = {"DetectionBoxes_" + key: value for key, value in iter(box_metrics.items())} + + return box_metrics[self.map_key] + + +class ParseDecodeCoco: # pragma: no cover + """Helper function for TensorflowModelZooBertDataset. + + Parse the features from sample. + """ + + def __call__(self, sample): + """Parse the sample data. + + Args: + sample: Data to be parsed. + """ + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/object/class/text": tf.compat.v1.VarLenFeature(dtype=tf.string), + "image/object/class/label": tf.compat.v1.VarLenFeature(dtype=tf.int64), + "image/source_id": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""), + } + sparse_float32 = tf.compat.v1.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(sample, feature_map) + + xmin = tf.expand_dims(features["image/object/bbox/xmin"].values, 0) + ymin = tf.expand_dims(features["image/object/bbox/ymin"].values, 0) + xmax = tf.expand_dims(features["image/object/bbox/xmax"].values, 0) + ymax = tf.expand_dims(features["image/object/bbox/ymax"].values, 0) + + bbox = tf.concat([ymin, xmin, ymax, xmax], 0) + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) + bbox = tf.transpose(bbox, [0, 2, 1]) + + encoded_image = features["image/encoded"] + image_tensor = tf.image.decode_image(encoded_image, channels=3) + image_tensor.set_shape([None, None, 3]) + + str_label = features["image/object/class/text"].values + int_label = features["image/object/class/label"].values + image_id = features["image/source_id"] + + return image_tensor, (bbox[0], str_label, int_label, image_id) + + +class COCORecordDataset(object): + """Tensorflow COCO dataset in tf record format. + + Root is a full path to tfrecord file, which contains the file name. + Please use Resize transform when batch_size > 1 + + Args: root (str): Root directory of dataset. + num_cores (int, default=28):The number of input Datasets to interleave from in parallel. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + def __new__(cls, root, num_cores=28, transform=None, filter=filter): + """Build a new object.""" + record_iterator = tf.compat.v1.python_io.tf_record_iterator(root) + example = tf.train.SequenceExample() + for element in record_iterator: + example.ParseFromString(element) + break + feature = example.context.feature + if ( + len(feature["image/object/class/text"].bytes_list.value) == 0 + and len(feature["image/object/class/label"].int64_list.value) == 0 + ): + raise ValueError( + "Tfrecord format is incorrect, please refer\ + 'https://github.com/tensorflow/models/blob/master/research/\ + object_detection/dataset_tools/create_coco_tf_record.py' to\ + create correct tfrecord" + ) + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + tfrecord_paths = [root] + ds = tf.data.TFRecordDataset.list_files(tfrecord_paths) + ds = ds.apply( + parallel_interleave( + tf.data.TFRecordDataset, + cycle_length=num_cores, + block_length=5, + sloppy=True, + buffer_output_elements=10000, + prefetch_input_elements=10000, + ) + ) + if transform is not None: + transform.transform_list.insert(0, ParseDecodeCoco()) + else: + transform = ParseDecodeCoco() + ds = ds.map(transform, num_parallel_calls=None) + if filter is not None: + ds = ds.filter(filter) + ds = ds.prefetch(buffer_size=1000) + return ds + + +class TFDataLoader(object): + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py new file mode 100644 index 00000000000..277028064dc --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py @@ -0,0 +1,128 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +import time + +import numpy as np +import tensorflow as tf + +from __future__ import division +from argparse import ArgumentParser +from data_process import( + COCOmAPv2, + COCORecordDataset, + ComposeTransform, + ResizeTFTransform, + TFDataLoader, +) + +arg_parser = ArgumentParser(description='Parse args') + +arg_parser.add_argument('-g', + "--input-graph", + help='Specify the input graph.', + dest='input_graph') +arg_parser.add_argument('--config', type=str, default='') +arg_parser.add_argument('--dataset_location', type=str, default='') +arg_parser.add_argument('--output_model', type=str, default='') +arg_parser.add_argument('--mode', type=str, default='performance') +arg_parser.add_argument('--batch_size', type=int, default=10) +arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='iterations') +arg_parser.add_argument('--tune', action='store_true', default=False) +arg_parser.add_argument('--benchmark', dest='benchmark', + action='store_true', help='run benchmark') +args = arg_parser.parse_args() + +def evaluate(model): + """Custom evaluate function to estimate the accuracy of the model. + + Args: + model (tf.Graph): The input model graph. + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + from neural_compressor.tensorflow import Model + model = Model(model) + model.input_tensor_names = ["image_tensor:0"] + model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \ + "detection_scores:0", "detection_classes:0"] + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + warmup = 5 + iteration = -1 + if args.benchmark and args.mode == 'performance': + iteration = args.iters + metric = COCOmAPv2(output_index_mapping={'num_detections':0, 'boxes':1, 'scores':2, 'classes':3}) + + def eval_func(dataloader): + latency_list = [] + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + + metric.update(predictions, labels) + latency_list.append(end-start) + if idx + 1 == iteration: + break + latency = np.array(latency_list[warmup:]).mean() / args.batch_size + return latency + + eval_dataset = COCORecordDataset(root=args.dataset_location, filter=None, \ + transform=ComposeTransform(transform_list=[ResizeTFTransform(size=600)])) + eval_dataloader=TFDataLoader(dataset=eval_dataset, batch_size=args.batch_size) + latency = eval_func(eval_dataloader) + if args.benchmark and args.mode == 'performance': + print("Batch size = {}".format(args.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +def main(_): + calib_dataset = COCORecordDataset(root=args.dataset_location, filter=None, \ + transform=ComposeTransform(transform_list=[ResizeTFTransform(size=600)])) + calib_dataloader = TFDataLoader(dataset=calib_dataset, batch_size=args.batch_size) + + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model + + quant_config = StaticQuantConfig() + model = Model(args.input_graph) + model.input_tensor_names = ['image_tensor'] + model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + q_model = quantize_model(model, quant_config, calib_dataloader) + q_model.save(args.output_model) + + if args.benchmark: + if args.mode == 'performance': + evaluate(args.input_graph) + else: + accuracy = evaluate(args.input_graph) + print('Batch size = %d' % args.batch_size) + print("Accuracy: %.5f" % accuracy) + +if __name__ == "__main__": + tf.compat.v1.app.run() diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..6c2115f58ff --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo "$var" |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + + +# run_tuning +function run_benchmark { + + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location "${dataset_location}" \ + --batch_size ${batch_size} \ + --iters ${iters} \ + --benchmark +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..559d695f768 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_quant.sh @@ -0,0 +1,41 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo "$var" |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo "$var" |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo "$var" |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph "${input_model}" \ + --output_model "${output_model}" \ + --dataset_location "${dataset_location}" \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md new file mode 100644 index 00000000000..3091b712180 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md @@ -0,0 +1,140 @@ +Step-by-Step +============ + +This document is used to list steps of reproducing TensorFlow Object Detection models tuning results. This example can run on Intel CPUs and GPUs. + +# Prerequisite + + +## 1. Environment +Recommend python 3.6 or higher version. + +### Install Intel® Neural Compressor +```shell +pip install neural-compressor +``` + +### Install Intel Tensorflow +```shell +pip install intel-tensorflow +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +### Installation Dependency packages +```shell +cd examples/3.x_api/tensorflow/object_detection +pip install -r requirements.txt +cd mask_rcnn_inception_v2/quantization/ptq +``` + +### Install Protocol Buffer Compiler + +`Protocol Buffer Compiler` in version higher than 3.0.0 is necessary ingredient for automatic COCO dataset preparation. To install please follow +[Protobuf installation instructions](https://grpc.io/docs/protoc-installation/#install-using-a-package-manager). + +### Install Intel Extension for Tensorflow + +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +## 2. Prepare Model + +```shell +wget http://download.tensorflow.org/models/object_detection/mask_rcnn_inception_v2_coco_2018_01_28.tar.gz +tar -xvzf mask_rcnn_inception_v2_coco_2018_01_28.tar.gz +``` + +## 3. Prepare Dataset + +### Automatic dataset download + +> **_Note: `prepare_dataset.sh` script works with TF version 1.x._** + +Run the `prepare_dataset.sh` script located in `examples/3.x_api/tensorflow/object_detection`. + +Usage: +```shell +cd examples/3.x_api/tensorflow/object_detection/ +. prepare_dataset.sh +cd mask_rcnn_inception_v2/quantization/ptq +``` + +This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to +tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. + +### Manual dataset download +Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). + + +# Run + +Now we support both pb and ckpt formats. + +## 1. Quantization +### For PB format + + ```shell + # The cmd of running mask_rcnn_inception_v2 + bash run_quant.sh --input_model=./mask_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb --output_model=./tensorflow-mask_rcnn_inception_v2-tune.pb --dataset_location=/path/to/dataset/coco_val.record + ``` + +### For ckpt format + + ```shell + # The cmd of running mask_rcnn_inception_v2 + bash run_quant.sh --input_model=./mask_rcnn_inception_v2_coco_2018_01_28/ --output_model=./tensorflow-mask_rcnn_inception_v2-tune.pb --dataset_location=/path/to/dataset/coco_val.record + ``` + +## 2. Benchmark + ```shell + bash run_benchmark.sh --input_model=./tensorflow-mask_rcnn_inception_v2-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=performance + ``` + +Details of enabling Intel® Neural Compressor on mask_rcnn_inception_v2 for Tensorflow. +========================= + +This is a tutorial of how to enable mask_rcnn_inception_v2 model with Intel® Neural Compressor. +## User Code Analysis +User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself. + +For mask_rcnn_inception_v2, we applied the latter one because our philosophy is to enable the model with minimal changes. Hence we need to make two changes on the original code. The first one is to implement the q_dataloader and make necessary changes to *eval_func*. + +### Code update + +After prepare step is done, we just need update main.py like below. +```python + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model + + quant_config = StaticQuantConfig() + model = Model(args.input_graph) + model.input_tensor_names = ['image_tensor'] + model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + q_model = quantize_model(model, quant_config, calib_dataloader) + q_model.save(args.output_model) + + if args.benchmark: + if args.mode == 'performance': + evaluate(args.input_graph) + else: + accuracy = evaluate(args.input_graph) + print('Batch size = %d' % args.batch_size) + print("Accuracy: %.5f" % accuracy) +``` + +The quantization.fit() function will return a best quantized model during timeout constrain. diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/coco_tools.py b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/coco_tools.py new file mode 100644 index 00000000000..2f9369798df --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/coco_tools.py @@ -0,0 +1,694 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Wrappers for third party pycocotools to be used within object_detection. + +Note that nothing in this file is tensorflow related and thus cannot +be called directly as a slim metric, for example. + +TODO(jonathanhuang): wrap as a slim metric in metrics.py + + +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) +and detections (boxes, scores and classes), where elements of each list +correspond to detections/annotations of a single image, +then evaluation (in multi-class mode) can be invoked as follows: + + groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( + image_ids, groundtruth_boxes_list, groundtruth_classes_list, + max_num_classes, output_path=None) + detections_list = coco_tools.ExportDetectionsToCOCO( + image_ids, detection_boxes_list, detection_scores_list, + detection_classes_list, output_path=None) + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() +""" + +import copy +import time +from collections import OrderedDict +from typing import Any, Dict, List, Set, Union + +import numpy as np +from pycocotools import coco, cocoeval, mask + +from neural_compressor.utils import logger + + +class COCOWrapper(coco.COCO): + """Wrapper for the pycocotools COCO class. + + Attributes: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + """ + + def __init__(self, dataset: Dict[str, Any], detection_type: str = "bbox"): + """Construct a COCOWrapper. + + See http://mscoco.org/dataset/#format for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + + Raises: + ValueError: if detection_type is unsupported. + """ + supported_detection_types = ["bbox", "segmentation"] + if detection_type not in supported_detection_types: + raise ValueError( + "Unsupported detection type: {}. " + "Supported values are: {}".format(detection_type, supported_detection_types) + ) + self._detection_type = detection_type + coco.COCO.__init__(self) + self.dataset = dataset + self.createIndex() + + def LoadAnnotations(self, annotations: list) -> coco.COCO: + """Load annotations dictionary into COCO datastructure. + + See http://mscoco.org/dataset/#format for a description of the annotations + format. As above, this function replicates the default behavior of the API + but does not require writing to external storage. + + Args: + annotations: python list holding object detection results where each + detection is encoded as a dict with required keys ['image_id', + 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on + `detection_type`. + + Returns: + a coco.COCO datastructure holding object detection annotations results + + Raises: + ValueError: if (1) annotations is not a list or annotations do not + correspond to the images contained in self. + """ + results = coco.COCO() + results.dataset["images"] = [img for img in self.dataset["images"]] + + logger.info("Load and prepare annotation results.") + tic = time.time() + + if not isinstance(annotations, list): + raise ValueError("annotations is not a list of objects") + annotation_img_ids = [ann["image_id"] for ann in annotations] + if set(annotation_img_ids) != (set(annotation_img_ids) & set(self.getImgIds())): + raise ValueError("Results do not correspond to current coco set") + results.dataset["categories"] = copy.deepcopy(self.dataset["categories"]) + if self._detection_type == "bbox": + for idx, ann in enumerate(annotations): + bb = ann["bbox"] + ann["area"] = bb[2] * bb[3] + ann["id"] = idx + 1 + ann["iscrowd"] = 0 + elif self._detection_type == "segmentation": + for idx, ann in enumerate(annotations): + ann["area"] = mask.area(ann["segmentation"]) + ann["bbox"] = mask.toBbox(ann["segmentation"]) + ann["id"] = idx + 1 + ann["iscrowd"] = 0 + logger.info("DONE (t=%0.2fs)", (time.time() - tic)) + + results.dataset["annotations"] = annotations + results.createIndex() + return results + + +class COCOEvalWrapper(cocoeval.COCOeval): + """Wrapper for the pycocotools COCOeval class. + + To evaluate, create two objects (groundtruth_dict and detections_list) + using the conventions listed at http://mscoco.org/dataset/#format. + Then call evaluation as follows: + + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + """ + + def __init__( + self, + groundtruth: coco.COCO = None, + detections: coco.COCO = None, + agnostic_mode=False, + iou_type: str = "bbox", + iou_thrs: Union[str, float] = None, + map_points=None, + ): + """Construct a COCOEvalWrapper. + + Note that for the area-based metrics to be meaningful, detection and + groundtruth boxes must be in image coordinates measured in pixels. + + Args: + groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding + groundtruth annotations + detections: a coco.COCO (or coco_tools.COCOWrapper) object holding + detections + agnostic_mode: boolean (default: False). If True, evaluation ignores + class labels, treating all detections as proposals. + iou_thrs: Minimal value for intersection over union that allows to + make decision that prediction bounding box is true positive. + You can specify one float value between 0 to 1 or + string "05:0.05:0.95" for standard COCO thresholds. + iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + """ + cocoeval.COCOeval.__init__(self, groundtruth, detections, iouType=iou_type) + if agnostic_mode: + self.params.useCats = 0 + if iou_thrs == "0.5:0.05:0.95": + self.params.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True) + elif isinstance(iou_thrs, float): + self.params.iouThrs = [iou_thrs] + + if map_points == 101: + self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True) + if map_points == 11: + self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.1)) + 1, endpoint=True) + if map_points == 0: + self.params.recThrs = [-1] + + def GetCategory(self, category_id: int) -> dict: + """Fetch dictionary holding category information given category id. + + Args: + category_id: integer id + + Returns: + dictionary holding 'id', 'name'. + """ + return self.cocoGt.cats[category_id] + + def GetAgnosticMode(self) -> bool: + """Return whether COCO Eval is configured to evaluate in agnostic mode.""" + return self.params.useCats == 0 + + def GetCategoryIdList(self) -> List[int]: + """Return the list of IDs of all valid categories.""" + return self.params.catIds + + def accumulate(self, p: cocoeval.Params = None): + """Accumulate evaluation results per image and store it to self.eval. + + Args: + p: input params for evaluation + """ + print("Accumulating evaluation results...") + tic = time.time() + if not self.evalImgs: + print("Please run evaluate() first") + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T, R, K, A, M)) # -1 for the precision of absent categories + recall = -np.ones((T, K, A, M)) + scores = -np.ones((T, R, K, A, M)) + + # create dictionary for future indexing + _pe = self._paramsEval + print("-pe", _pe) + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0 * A0 * I0 + for a, a0 in enumerate(a_list): + Na = a0 * I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if e is not None] + if len(E) == 0: + continue + dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind="mergesort") + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds] + dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds] + gtIg = np.concatenate([e["gtIgnore"] for e in E]) + npig = np.count_nonzero(gtIg == 0) + if npig == 0: + continue + tps = np.logical_and(dtm, np.logical_not(dtIg)) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg)) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp + tp + np.spacing(1)) + + # calculate precision + if R == 1: + rc = np.concatenate(([0.0], rc, [1.0])) + pr = np.concatenate(([0.0], pr, [0.0])) + + # compute the precision envelope + for i in range(pr.size - 1, 0, -1): + pr[i - 1] = np.maximum(pr[i - 1], pr[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + change_point = np.where(rc[1:] != rc[:-1])[0] + # and sum (\Delta recall) * recall + res = np.sum((rc[change_point + 1] - rc[change_point]) * pr[change_point + 1]) + precision[t, :, k, a, m] = np.array([res]) + else: + q = np.zeros((R,)) + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist() + q = q.tolist() + + for i in range(nd - 1, 0, -1): + if pr[i] > pr[i - 1]: + pr[i - 1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side="left") + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except: + pass + precision[t, :, k, a, m] = np.array(q) + + # calculate recall + if nd: + recall[t, k, a, m] = rc[-1] + else: + recall[t, k, a, m] = 0 + + # calculate score + ss = np.zeros((R,)) + inds = np.searchsorted(rc, p.recThrs, side="left") + try: + for ri, pi in enumerate(inds): + ss[ri] = dtScoresSorted[pi] + except: + pass + scores[t, :, k, a, m] = np.array(ss) + # exit(0) + self.eval = { + "params": p, + "counts": [T, R, K, A, M], + "precision": precision, + "recall": recall, + "scores": scores, + } + toc = time.time() + print("DONE (t={:0.2f}s).".format(toc - tic)) + + def ComputeMetrics( + self, include_metrics_per_category: bool = False, all_metrics_per_category: bool = False + ): # pragma: no cover + """Compute detection metrics. + + Args: + include_metrics_per_category: Whether include metrics per category. + all_metrics_per_category: Whether include all the summery metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + + Returns: + A tuple of (summary_metrics, per_category_ap), in which + (1) summary_metrics is a dictionary holding: + 'Precision/mAP': mean average precision over classes averaged over IOU + thresholds ranging from .5 to .95 with .05 increments; + 'Precision/mAP@.50IOU': mean average precision at 50% IOU; + 'Precision/mAP@.75IOU': mean average precision at 75% IOU; + 'Precision/mAP (small)': mean average precision for small objects + (area < 32^2 pixels); + 'Precision/mAP (medium)': mean average precision for medium sized + objects (32^2 pixels < area < 96^2 pixels); + 'Precision/mAP (large)': mean average precision for large objects + (96^2 pixels < area < 10000^2 pixels); + 'Recall/AR@1': average recall with 1 detection; + 'Recall/AR@10': average recall with 10 detections; + 'Recall/AR@100': average recall with 100 detections; + 'Recall/AR@100 (small)': average recall for small objects with 100 + detections; + 'Recall/AR@100 (medium)': average recall for medium objects with 100 + detections; + 'Recall/AR@100 (large)': average recall for large objects with 100 + detections; + and (2) per_category_ap is a dictionary holding category specific results with + keys of the form: 'Precision mAP ByCategory/category' + (without the supercategory part if no supercategories exist). + + For backward compatibility 'PerformanceByCategory' is included in the + output regardless of all_metrics_per_category. If evaluating class-agnostic + mode, per_category_ap is an empty dictionary. + + Raises: + ValueError: If category_stats does not exist. + """ + self.evaluate() + self.accumulate() + self.summarize() + + summary_metrics = OrderedDict( + [ + ("Precision/mAP", self.stats[0]), + ("Precision/mAP@.50IOU", self.stats[1]), + ("Precision/mAP@.75IOU", self.stats[2]), + ("Precision/mAP (small)", self.stats[3]), + ("Precision/mAP (medium)", self.stats[4]), + ("Precision/mAP (large)", self.stats[5]), + ("Recall/AR@1", self.stats[6]), + ("Recall/AR@10", self.stats[7]), + ("Recall/AR@100", self.stats[8]), + ("Recall/AR@100 (small)", self.stats[9]), + ("Recall/AR@100 (medium)", self.stats[10]), + ("Recall/AR@100 (large)", self.stats[11]), + ] + ) + if not include_metrics_per_category: + return summary_metrics, {} + if not hasattr(self, "category_stats"): + raise ValueError("Category stats do not exist") + per_category_ap = OrderedDict([]) + if self.GetAgnosticMode(): + return summary_metrics, per_category_ap + for category_index, category_id in enumerate(self.GetCategoryIdList()): + category = self.GetCategory(category_id)["name"] + # Kept for backward compatilbility + # pylint: disable=no-member + per_category_ap["PerformanceByCategory/mAP/{}".format(category)] = self.category_stats[0][category_index] + if all_metrics_per_category: + per_category_ap["Precision mAP ByCategory/{}".format(category)] = self.category_stats[0][category_index] + per_category_ap["Precision mAP@.50IOU ByCategory/{}".format(category)] = self.category_stats[1][ + category_index + ] + per_category_ap["Precision mAP@.75IOU ByCategory/{}".format(category)] = self.category_stats[2][ + category_index + ] + per_category_ap["Precision mAP (small) ByCategory/{}".format(category)] = self.category_stats[3][ + category_index + ] + per_category_ap["Precision mAP (medium) ByCategory/{}".format(category)] = self.category_stats[4][ + category_index + ] + per_category_ap["Precision mAP (large) ByCategory/{}".format(category)] = self.category_stats[5][ + category_index + ] + per_category_ap["Recall AR@1 ByCategory/{}".format(category)] = self.category_stats[6][category_index] + per_category_ap["Recall AR@10 ByCategory/{}".format(category)] = self.category_stats[7][category_index] + per_category_ap["Recall AR@100 ByCategory/{}".format(category)] = self.category_stats[8][category_index] + per_category_ap["Recall AR@100 (small) ByCategory/{}".format(category)] = self.category_stats[9][ + category_index + ] + per_category_ap["Recall AR@100 (medium) ByCategory/{}".format(category)] = self.category_stats[10][ + category_index + ] + per_category_ap["Recall AR@100 (large) ByCategory/{}".format(category)] = self.category_stats[11][ + category_index + ] + + return summary_metrics, per_category_ap + + +def _ConvertBoxToCOCOFormat(box): + """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format. + + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + + Args: + box: a numpy array in format of [ymin, xmin, ymax, xmax] + + Returns: + A list of floats, in COCO format, representing [xmin, ymin, width, height] + """ + return [float(box[1]), float(box[0]), float(box[3] - box[1]), float(box[2] - box[0])] + + +def _RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + + Returns: + A pycocotools Run-length encoding of the mask. + """ + return mask.encode(np.asfortranarray(masks)) + + +def ExportSingleImageGroundtruthToCoco( + image_id: Union[int, str], + next_annotation_id: int, + category_id_set: Set[str], + groundtruth_boxes: np.array, + groundtruth_classes: np.array, + groundtruth_masks: Union[np.array, None] = None, + groundtruth_is_crowd: Union[np.array, None] = None, +) -> list: + """Export groundtruth of a single image to COCO format. + + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the COCO evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in + correspondence - that is: groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + + Args: + image_id: a unique image identifier either of type integer or string. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] + indicating whether groundtruth boxes are crowd. + + Returns: + A list of groundtruth annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + if len(groundtruth_classes.shape) != 1: + raise ValueError("groundtruth_classes is " "expected to be of rank 1.") + if len(groundtruth_boxes.shape) != 2: + raise ValueError("groundtruth_boxes is expected to be of " "rank 2.") + if groundtruth_boxes.shape[1] != 4: + raise ValueError("groundtruth_boxes should have " "shape[1] == 4.") + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError( + "Corresponding entries in groundtruth_classes, " + "and groundtruth_boxes should have " + "compatible shapes (i.e., agree on the 0th dimension)." + "Classes shape: %d. Boxes shape: %d. Image ID: %s" + % (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], image_id) + ) + has_is_crowd = groundtruth_is_crowd is not None + if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: + raise ValueError("groundtruth_is_crowd is expected to be of rank 1.") + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 + export_dict = { + "id": next_annotation_id + i, + "image_id": image_id, + "category_id": int(groundtruth_classes[i]), + "bbox": list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + "area": float( + (groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) + * (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1]) + ), + "iscrowd": iscrowd, + } + if groundtruth_masks is not None: + export_dict["segmentation"] = _RleCompress(groundtruth_masks[i]) + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportSingleImageDetectionBoxesToCoco( + image_id: Union[int, str], + category_id_set: Set[int], + detection_boxes: np.array, + detection_scores: np.array, + detection_classes: np.array, +) -> list: + """Export detections of a single image to COCO format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. Note that the image_ids + provided here must match the ones given to the + ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in + correspondence - that is: boxes[i, :], and classes[i] + are associated with the same groundtruth annotation. + + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_boxes: float numpy array of shape [num_detections, 4] containing + detection boxes. + detection_scores: float numpy array of shape [num_detections] containing + scored for the detection boxes. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection boxes. + + Returns: + A list of detection annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_boxes, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.") + if len(detection_boxes.shape) != 2: + raise ValueError("All entries in detection_boxes expected to be of " "rank 2.") + if detection_boxes.shape[1] != 4: + raise ValueError("All entries in detection_boxes should have " "shape[1] == 4.") + num_boxes = detection_classes.shape[0] + if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: + raise ValueError( + "Corresponding entries in detection_classes, " + "detection_scores and detection_boxes should have " + "compatible shapes (i.e., agree on the 0th dimension). " + "Classes shape: %d. Boxes shape: %d. " + "Scores shape: %d" % (detection_classes.shape[0], detection_boxes.shape[0], detection_scores.shape[0]) + ) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append( + { + "image_id": image_id, + "category_id": int(detection_classes[i]), + "bbox": list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + "score": float(detection_scores[i]), + } + ) + return detections_list + + +def ExportSingleImageDetectionMasksToCoco( + image_id: Union[str, int], + category_id_set: Set[int], + detection_masks: np.array, + detection_scores: np.array, + detection_classes: np.array, +) -> list: + """Export detection masks of a single image to COCO format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + + Returns: + A list of detection mask annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.") + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError( + "Corresponding entries in detection_classes, " + "detection_scores and detection_masks should have " + "compatible lengths and shapes " + "Classes length: %d. Masks length: %d. " + "Scores length: %d" % (detection_classes.shape[0], len(detection_masks), detection_scores.shape[0]) + ) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append( + { + "image_id": image_id, + "category_id": int(detection_classes[i]), + "segmentation": _RleCompress(detection_masks[i]), + "score": float(detection_scores[i]), + } + ) + return detections_list diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py new file mode 100644 index 00000000000..32e55adb3fd --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py @@ -0,0 +1,655 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import cv2 +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +interpolation_map = { + "nearest": cv2.INTER_NEAREST, + "bilinear": cv2.INTER_LINEAR, + "bicubic": cv2.INTER_CUBIC, +} + +category_map = { + 1: "person", + 2: "bicycle", + 3: "car", + 4: "motorcycle", + 5: "airplane", + 6: "bus", + 7: "train", + 8: "truck", + 9: "boat", + 10: "traffic light", + 11: "fire hydrant", + 13: "stop sign", + 14: "parking meter", + 15: "bench", + 16: "bird", + 17: "cat", + 18: "dog", + 19: "horse", + 20: "sheep", + 21: "cow", + 22: "elephant", + 23: "bear", + 24: "zebra", + 25: "giraffe", + 27: "backpack", + 28: "umbrella", + 31: "handbag", + 32: "tie", + 33: "suitcase", + 34: "frisbee", + 35: "skis", + 36: "snowboard", + 37: "sports ball", + 38: "kite", + 39: "baseball bat", + 40: "baseball glove", + 41: "skateboard", + 42: "surfboard", + 43: "tennis racket", + 44: "bottle", + 46: "wine glass", + 47: "cup", + 48: "fork", + 49: "knife", + 50: "spoon", + 51: "bowl", + 52: "banana", + 53: "apple", + 54: "sandwich", + 55: "orange", + 56: "broccoli", + 57: "carrot", + 58: "hot dog", + 59: "pizza", + 60: "donut", + 61: "cake", + 62: "chair", + 63: "couch", + 64: "potted plant", + 65: "bed", + 67: "dining table", + 70: "toilet", + 72: "tv", + 73: "laptop", + 74: "mouse", + 75: "remote", + 76: "keyboard", + 77: "cell phone", + 78: "microwave", + 79: "oven", + 80: "toaster", + 81: "sink", + 82: "refrigerator", + 84: "book", + 85: "clock", + 86: "vase", + 87: "scissors", + 88: "teddy bear", + 89: "hair drier", + 90: "toothbrush", +} + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class ResizeTFTransform(object): + """Resize the input image to the given size. + + Args: + size (list or int): Size of the result + interpolation (str, default='bilinear'):Desired interpolation type, + support 'bilinear', 'nearest', 'bicubic' + + Returns: + tuple of processed image and label + """ + + def __init__(self, size, interpolation="bilinear"): + """Initialize `ResizeTFTransform` class.""" + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[0], size[1] + self.interpolation = interpolation + + if self.interpolation not in ["bilinear", "nearest", "bicubic"]: + raise ValueError("Unsupported interpolation type!") + + def __call__(self, sample): + """Resize the input image in sample to the given size.""" + image, label = sample + if isinstance(image, tf.Tensor): + image = tf.image.resize(image, self.size, method=self.interpolation) + else: + image = cv2.resize(image, self.size, interpolation=interpolation_map[self.interpolation]) + return (image, label) + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class COCOmAPv2(BaseMetric): + """Compute mean average precision of the detection task.""" + + def __init__( + self, + anno_path=None, + iou_thrs="0.5:0.05:0.95", + map_points=101, + map_key="DetectionBoxes_Precision/mAP", + output_index_mapping={"num_detections": -1, "boxes": 0, "scores": 1, "classes": 2}, + ): + """Initialize the metric. + + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + output_index_mapping: The output index mapping. + Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}. + """ + self.output_index_mapping = output_index_mapping + + if anno_path: + import os + import yaml + + assert os.path.exists(anno_path), "Annotation path does not exists!" + with open(anno_path, "r") as f: + label_map = yaml.safe_load(f.read()) + self.category_map_reverse = {k: v for k, v in label_map.items()} + else: + # label: index + self.category_map_reverse = {v: k for k, v in category_map.items()} + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + self.category_map = category_map + self.category_id_set = set([cat for cat in self.category_map]) # index + self.iou_thrs = iou_thrs + self.map_points = map_points + self.map_key = map_key + + def update(self, predicts, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + predicts: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. Defaults to None. + """ + from coco_tools import ExportSingleImageDetectionBoxesToCoco, ExportSingleImageGroundtruthToCoco + + detections = [] + if "num_detections" in self.output_index_mapping and self.output_index_mapping["num_detections"] > -1: + for item in zip(*predicts): + detection = {} + num = int(item[self.output_index_mapping["num_detections"]]) + detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]])[0:num] + detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]])[0:num] + detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]])[0:num] + detections.append(detection) + else: + for item in zip(*predicts): + detection = {} + detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]]) + detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]]) + detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]]) + detections.append(detection) + + bboxes, str_labels, int_labels, image_ids = labels + labels = [] + if len(int_labels[0]) == 0: + for str_label in str_labels: + str_label = [x if type(x) == "str" else x.decode("utf-8") for x in str_label] + labels.append([self.category_map_reverse[x] for x in str_label]) + elif len(str_labels[0]) == 0: + for int_label in int_labels: + labels.append([x for x in int_label]) + + for idx, image_id in enumerate(image_ids): + image_id = image_id if type(image_id) == "str" else image_id.decode("utf-8") + if image_id in self.image_ids: + continue + self.image_ids.append(image_id) + + ground_truth = {} + ground_truth["boxes"] = np.asarray(bboxes[idx]) + ground_truth["classes"] = np.asarray(labels[idx]) + + self.ground_truth_list.extend( + ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self.annotation_id, + category_id_set=self.category_id_set, + groundtruth_boxes=ground_truth["boxes"], + groundtruth_classes=ground_truth["classes"], + ) + ) + self.annotation_id += ground_truth["boxes"].shape[0] + + self.detection_list.extend( + ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self.category_id_set, + detection_boxes=detections[idx]["boxes"], + detection_scores=detections[idx]["scores"], + detection_classes=detections[idx]["classes"], + ) + ) + + def reset(self): + """Reset the prediction and labels.""" + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + + def result(self): + """Compute mean average precision. + + Returns: + The mean average precision score. + """ + from coco_tools import COCOEvalWrapper, COCOWrapper + + if len(self.ground_truth_list) == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + else: + groundtruth_dict = { + "annotations": self.ground_truth_list, + "images": [{"id": image_id} for image_id in self.image_ids], + "categories": [{"id": k, "name": v} for k, v in self.category_map.items()], + } + coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(self.detection_list) + box_evaluator = COCOEvalWrapper( + coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs=self.iou_thrs, + map_points=self.map_points, + ) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=False, all_metrics_per_category=False + ) + box_metrics.update(box_per_category_ap) + box_metrics = {"DetectionBoxes_" + key: value for key, value in iter(box_metrics.items())} + + return box_metrics[self.map_key] + + +class ParseDecodeCoco: # pragma: no cover + """Helper function for TensorflowModelZooBertDataset. + + Parse the features from sample. + """ + + def __call__(self, sample): + """Parse the sample data. + + Args: + sample: Data to be parsed. + """ + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/object/class/text": tf.compat.v1.VarLenFeature(dtype=tf.string), + "image/object/class/label": tf.compat.v1.VarLenFeature(dtype=tf.int64), + "image/source_id": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""), + } + sparse_float32 = tf.compat.v1.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(sample, feature_map) + + xmin = tf.expand_dims(features["image/object/bbox/xmin"].values, 0) + ymin = tf.expand_dims(features["image/object/bbox/ymin"].values, 0) + xmax = tf.expand_dims(features["image/object/bbox/xmax"].values, 0) + ymax = tf.expand_dims(features["image/object/bbox/ymax"].values, 0) + + bbox = tf.concat([ymin, xmin, ymax, xmax], 0) + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) + bbox = tf.transpose(bbox, [0, 2, 1]) + + encoded_image = features["image/encoded"] + image_tensor = tf.image.decode_image(encoded_image, channels=3) + image_tensor.set_shape([None, None, 3]) + + str_label = features["image/object/class/text"].values + int_label = features["image/object/class/label"].values + image_id = features["image/source_id"] + + return image_tensor, (bbox[0], str_label, int_label, image_id) + + +class COCORecordDataset(object): + """Tensorflow COCO dataset in tf record format. + + Root is a full path to tfrecord file, which contains the file name. + Please use Resize transform when batch_size > 1 + + Args: root (str): Root directory of dataset. + num_cores (int, default=28):The number of input Datasets to interleave from in parallel. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + def __new__(cls, root, num_cores=28, transform=None, filter=filter): + """Build a new object.""" + record_iterator = tf.compat.v1.python_io.tf_record_iterator(root) + example = tf.train.SequenceExample() + for element in record_iterator: + example.ParseFromString(element) + break + feature = example.context.feature + if ( + len(feature["image/object/class/text"].bytes_list.value) == 0 + and len(feature["image/object/class/label"].int64_list.value) == 0 + ): + raise ValueError( + "Tfrecord format is incorrect, please refer\ + 'https://github.com/tensorflow/models/blob/master/research/\ + object_detection/dataset_tools/create_coco_tf_record.py' to\ + create correct tfrecord" + ) + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + tfrecord_paths = [root] + ds = tf.data.TFRecordDataset.list_files(tfrecord_paths) + ds = ds.apply( + parallel_interleave( + tf.data.TFRecordDataset, + cycle_length=num_cores, + block_length=5, + sloppy=True, + buffer_output_elements=10000, + prefetch_input_elements=10000, + ) + ) + if transform is not None: + transform.transform_list.insert(0, ParseDecodeCoco()) + else: + transform = ParseDecodeCoco() + ds = ds.map(transform, num_parallel_calls=None) + if filter is not None: + ds = ds.filter(filter) + ds = ds.prefetch(buffer_size=1000) + return ds + + +class TFDataLoader(object): + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py new file mode 100644 index 00000000000..7751c5dadc6 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py @@ -0,0 +1,131 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +import time + +import numpy as np +import tensorflow as tf + +from __future__ import division +from argparse import ArgumentParser +from data_process import( + COCOmAPv2, + COCORecordDataset, + ComposeTransform, + ResizeTFTransform, + TFDataLoader, +) + +arg_parser = ArgumentParser(description='Parse args') + +arg_parser.add_argument('-g', + "--input-graph", + help='Specify the input graph.', + dest='input_graph') +arg_parser.add_argument('--config', type=str, default='') +arg_parser.add_argument('--dataset_location', type=str, default='') +arg_parser.add_argument('--output_model', type=str, default='') +arg_parser.add_argument('--mode', type=str, default='accuracy') +arg_parser.add_argument('--batch_size', type=int, default=10) +arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='iterations') +arg_parser.add_argument('--tune', action='store_true', default=False) +arg_parser.add_argument('--benchmark', dest='benchmark', + action='store_true', help='run benchmark') +args = arg_parser.parse_args() + +def evaluate(model): + """Custom evaluate function to estimate the accuracy of the model. + + Args: + model (tf.Graph or string or INC.model.TensorflowCheckpointModel): The input model. + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + from neural_compressor.tensorflow import Model + if isinstance(model, str) or isinstance(model, tf.compat.v1.Graph): + model = Model(model) + model.input_tensor_names = ["image_tensor:0"] + model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \ + "detection_scores:0", "detection_classes:0"] + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + warmup = 5 + iteration = -1 + if args.benchmark and args.mode == 'performance': + iteration = args.iters + metric = COCOmAPv2(output_index_mapping={'num_detections':0, 'boxes':1, 'scores':2, 'classes':3}) + + def eval_func(dataloader): + latency_list = [] + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + + metric.update(predictions, labels) + latency_list.append(end-start) + if idx + 1 == iteration: + break + latency = np.array(latency_list[warmup:]).mean() / args.batch_size + return latency + + eval_dataset = COCORecordDataset(root=args.dataset_location, filter=None, \ + transform=ComposeTransform(transform_list=[TensorflowResizeWithRatio( + min_dim=800, max_dim=1356, padding=False)])) + batch_size = 1 if args.mode == 'accuracy' else args.batch_size + eval_dataloader=TFDataLoader(dataset=eval_dataset, batch_size=args.batch_size) + + latency = eval_func(eval_dataloader) + if args.benchmark and args.mode == 'performance': + print("Batch size = {}".format(args.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +def main(_): + calib_dataset = COCORecordDataset(root=args.dataset_location, filter=LabelBalanceCOCORecordFilter(size=1)) + calib_dataloader = TFDataLoader(dataset=calib_dataset, batch_size=1) + + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model + + quant_config = StaticQuantConfig() + model = Model(args.input_graph) + model.input_tensor_names = ['image_tensor'] + model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + q_model = quantize_model(model, quant_config, calib_dataloader) + q_model.save(args.output_model) + + if args.benchmark: + if args.mode == 'performance': + evaluate(args.input_graph) + else: + accuracy = evaluate(args.input_graph) + print('Batch size = %d' % args.batch_size) + print("Accuracy: %.5f" % accuracy) + +if __name__ == "__main__": + tf.compat.v1.app.run() diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..6c2115f58ff --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo "$var" |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + + +# run_tuning +function run_benchmark { + + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location "${dataset_location}" \ + --batch_size ${batch_size} \ + --iters ${iters} \ + --benchmark +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..559d695f768 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_quant.sh @@ -0,0 +1,41 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo "$var" |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo "$var" |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo "$var" |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph "${input_model}" \ + --output_model "${output_model}" \ + --dataset_location "${dataset_location}" \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/object_detection/prepare_dataset.sh b/examples/3.x_api/tensorflow/object_detection/prepare_dataset.sh new file mode 100644 index 00000000000..fea0ff1c373 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/prepare_dataset.sh @@ -0,0 +1,136 @@ +!/bin/bash +# set -x + +DATA_DIR="${PWD}/data" +DATA_NAME="val2017" +DATA_URL_LIST='http://images.cocodataset.org/zips/val2017.zip http://images.cocodataset.org/annotations/annotations_trainval2017.zip' +PACKAGES_LIST='val2017.zip annotations_trainval2017.zip' +VAL_IMAGE_DIR=$DATA_DIR/val2017 +TRAIN_ANNOTATIONS_FILE=$DATA_DIR/annotations/empty.json +VAL_ANNOTATIONS_FILE=$DATA_DIR/annotations/instances_val2017.json +TESTDEV_ANNOTATIONS_FILE=$DATA_DIR/annotations/empty.json +OUTPUT_DIR=$DATA_DIR + +help() +{ + cat <<- EOF + + Desc: Prepare dataset for Tensorflow COCO object detection. + + -h --help help info + + --dataset_location set dataset location, default is ./data + +EOF + exit 0 +} + +function main { + init_params "$@" + download_dataset + convert_to_tf_record +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --dataset_location=*) + DATA_DIR=$(echo "$var" |cut -f2 -d=) + ;; + -h|--help) help + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done + +} + +# removes files that will not be used anymore +function remove_zipped_packages { + for package in $PACKAGES_LIST; do + rm "$package" + done +} + +function download_tf_models_repo { + if [ ! -d models ]; then + git clone https://github.com/tensorflow/models.git + fi + cd models || exit + git checkout 7a9934df2afdf95be9405b4e9f1f2480d748dc40 + cd .. +} + +function divide_tf_records_by_dataset { + if [ ! -d "${DATA_DIR}/tf_test2017" ]; then + mkdir "${DATA_DIR}/tf_test2017" + fi + if [ ! -d "${DATA_DIR}/tf_train2017" ]; then + mkdir "${DATA_DIR}/tf_train2017" + fi + if [ ! -d "${DATA_DIR}/tf_val2017" ]; then + mkdir "${DATA_DIR}/tf_val2017" + fi + mv ${DATA_DIR}/coco_testdev.record* ${DATA_DIR}/tf_test2017 + mv ${DATA_DIR}/coco_train.record* ${DATA_DIR}/tf_train2017 + mv ${DATA_DIR}/coco_val.record* ${DATA_DIR}/tf_val2017 +} + +function convert { + cd models/research + protoc object_detection/protos/*.proto --python_out=. + export PYTHONPATH=$PYTHONPATH:$(pwd) + export PYTHONPATH=$PYTHONPATH:$(pwd)/slim + python ./object_detection/dataset_tools/create_coco_tf_record.py --logtostderr \ + --train_image_dir=empty_dir \ + --val_image_dir="${VAL_IMAGE_DIR}" \ + --test_image_dir=empty_dir \ + --train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \ + --val_annotations_file="${VAL_ANNOTATIONS_FILE}" \ + --testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \ + --output_dir="${OUTPUT_DIR}" +} + +function convert_to_tf_record { + download_tf_models_repo + convert + divide_tf_records_by_dataset +} + +# download_dataset +function download_dataset { + if [ ! -d "${DATA_DIR}" ]; then + mkdir "${DATA_DIR}" + fi + + cd "${DATA_DIR}" || exit + if [ ! -f "${VAL_IMAGE_DIR}" ]; then + + for dataset_dowload_link in $DATA_URL_LIST; do + wget "$dataset_dowload_link" + done + for package in $PACKAGES_LIST; do + unzip -o "$package" + done + remove_zipped_packages + if [ ! -d empty_dir ]; then + mkdir empty_dir + fi + + cd annotations || exit + echo "{ \"images\": {}, \"categories\": {}}" > empty.json + cd .. + else + echo "Dataset ${DATA_NAME} is exist!" + fi + + cd ../ +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/object_detection/requirements.txt b/examples/3.x_api/tensorflow/object_detection/requirements.txt new file mode 100644 index 00000000000..865df0f3a6b --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/requirements.txt @@ -0,0 +1,8 @@ +Cython +contextlib2 +pillow>=8.2.0 +lxml>=4.6.2 +matplotlib +numpy>=1.17.4 +pycocotools +protobuf diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md new file mode 100644 index 00000000000..43026b9002d --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md @@ -0,0 +1,158 @@ +Step-by-Step +============ + +This document is used to list steps of reproducing TensorFlow Object Detection models tuning results. This example can run on Intel CPUs and GPUs. + +# Prerequisite + + +## 1. Environment +Recommend python 3.6 or higher version. + +### Install Intel® Neural Compressor +```shell +pip install neural-compressor +``` + +### Install Intel Tensorflow +```shell +pip install intel-tensorflow +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +### Installation Dependency packages +```shell +cd examples/3.x_api//tensorflow/object_detection +pip install -r requirements.txt +cd ssd_mobilenet_v1/quantization/ptq +``` + +### Install Protocol Buffer Compiler + +`Protocol Buffer Compiler` in version higher than 3.0.0 is necessary ingredient for automatic COCO dataset preparation. To install please follow +[Protobuf installation instructions](https://grpc.io/docs/protoc-installation/#install-using-a-package-manager). + +### Install Intel Extension for Tensorflow + +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +## 2. Prepare Model + +### Automated approach +Run the `prepare_model.py` script located in `examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq`. + +``` +python prepare_model.py --model_name=ssd_mobilenet_v1 --model_path=./ + +Prepare pre-trained model for COCO object detection + +optional arguments: + -h, --help show this help message and exit + --model_name {ssd_resnet50_v1,ssd_mobilenet_v1} + model to download, default is ssd_resnet50_v1 + --model_path MODEL_PATH + directory to put models, default is ./model +``` + +### Manual approach + +```shell +wget http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz +tar -xvzf ssd_mobilenet_v1_coco_2018_01_28.tar.gz +``` + +## 3. Prepare Dataset + +### Automatic dataset download + +> **_Note: `prepare_dataset.sh` script works with TF version 1.x._** + +Run the `prepare_dataset.sh` script located in `examples/3.x_api/tensorflow/object_detection`. + +Usage: +```shell +cd examples/3.x_api/tensorflow/object_detection +. prepare_dataset.sh +cd ssd_mobilenet_v1/quantization/ptq +``` + +This script will download the *train*, *validation* and *test* COCO datasets. Furthermore it will convert them to +tensorflow records using the `https://github.com/tensorflow/models.git` dedicated script. + +### Manual dataset download +Download CoCo Dataset from [Official Website](https://cocodataset.org/#download). + + +# Run Command + +Now we support both pb and ckpt formats. + +## 1. Quantization +### For PB format + + ```shell + # The cmd of running ssd_mobilenet_v1 + bash run_quant.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28/frozen_inference_graph.pb --output_model=./tensorflow-ssd_mobilenet_v1-tune.pb --dataset_location=/path/to/dataset/coco_val.record + ``` + +### For ckpt format + + ```shell + # The cmd of running ssd_mobilenet_v1 + bash run_quant.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28/ --output_model=./tensorflow-ssd_mobilenet_v1-tune.pb --dataset_location=/path/to/dataset/coco_val.record + ``` + +## 2. Benchmark + ```shell + bash run_benchmark.sh --input_model=./tensorflow-ssd_mobilenet_v1-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=performance + ``` + +Details of enabling Intel® Neural Compressor on ssd_mobilenet_v1 for Tensorflow. +========================= + +This is a tutorial of how to enable ssd_mobilenet_v1 model with Intel® Neural Compressor. +## User Code Analysis +User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself. + +For ssd_mobilenet_v1, we applied the latter one because our philosophy is to enable the model with minimal changes. Hence we need to make two changes on the original code. The first one is to implement the q_dataloader and make necessary changes to *eval_func*. + +### Code update + +After prepare step is done, we just need update main.py like below. +```python + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model + + quant_config = StaticQuantConfig() + model = Model(args.input_graph) + model.input_tensor_names = ['image_tensor'] + model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + q_model = quantize_model(model, quant_config, calib_dataloader) + q_model.save(args.output_model) + + if args.benchmark: + if args.mode == 'performance': + evaluate(args.input_graph) + else: + accuracy = evaluate(args.input_graph) + print('Batch size = %d' % args.batch_size) + print("Accuracy: %.5f" % accuracy) +``` + +The quantization.fit() function will return a best quantized model during timeout constrain. diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/coco_tools.py b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/coco_tools.py new file mode 100644 index 00000000000..2f9369798df --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/coco_tools.py @@ -0,0 +1,694 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Wrappers for third party pycocotools to be used within object_detection. + +Note that nothing in this file is tensorflow related and thus cannot +be called directly as a slim metric, for example. + +TODO(jonathanhuang): wrap as a slim metric in metrics.py + + +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) +and detections (boxes, scores and classes), where elements of each list +correspond to detections/annotations of a single image, +then evaluation (in multi-class mode) can be invoked as follows: + + groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( + image_ids, groundtruth_boxes_list, groundtruth_classes_list, + max_num_classes, output_path=None) + detections_list = coco_tools.ExportDetectionsToCOCO( + image_ids, detection_boxes_list, detection_scores_list, + detection_classes_list, output_path=None) + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() +""" + +import copy +import time +from collections import OrderedDict +from typing import Any, Dict, List, Set, Union + +import numpy as np +from pycocotools import coco, cocoeval, mask + +from neural_compressor.utils import logger + + +class COCOWrapper(coco.COCO): + """Wrapper for the pycocotools COCO class. + + Attributes: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + """ + + def __init__(self, dataset: Dict[str, Any], detection_type: str = "bbox"): + """Construct a COCOWrapper. + + See http://mscoco.org/dataset/#format for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + + Raises: + ValueError: if detection_type is unsupported. + """ + supported_detection_types = ["bbox", "segmentation"] + if detection_type not in supported_detection_types: + raise ValueError( + "Unsupported detection type: {}. " + "Supported values are: {}".format(detection_type, supported_detection_types) + ) + self._detection_type = detection_type + coco.COCO.__init__(self) + self.dataset = dataset + self.createIndex() + + def LoadAnnotations(self, annotations: list) -> coco.COCO: + """Load annotations dictionary into COCO datastructure. + + See http://mscoco.org/dataset/#format for a description of the annotations + format. As above, this function replicates the default behavior of the API + but does not require writing to external storage. + + Args: + annotations: python list holding object detection results where each + detection is encoded as a dict with required keys ['image_id', + 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on + `detection_type`. + + Returns: + a coco.COCO datastructure holding object detection annotations results + + Raises: + ValueError: if (1) annotations is not a list or annotations do not + correspond to the images contained in self. + """ + results = coco.COCO() + results.dataset["images"] = [img for img in self.dataset["images"]] + + logger.info("Load and prepare annotation results.") + tic = time.time() + + if not isinstance(annotations, list): + raise ValueError("annotations is not a list of objects") + annotation_img_ids = [ann["image_id"] for ann in annotations] + if set(annotation_img_ids) != (set(annotation_img_ids) & set(self.getImgIds())): + raise ValueError("Results do not correspond to current coco set") + results.dataset["categories"] = copy.deepcopy(self.dataset["categories"]) + if self._detection_type == "bbox": + for idx, ann in enumerate(annotations): + bb = ann["bbox"] + ann["area"] = bb[2] * bb[3] + ann["id"] = idx + 1 + ann["iscrowd"] = 0 + elif self._detection_type == "segmentation": + for idx, ann in enumerate(annotations): + ann["area"] = mask.area(ann["segmentation"]) + ann["bbox"] = mask.toBbox(ann["segmentation"]) + ann["id"] = idx + 1 + ann["iscrowd"] = 0 + logger.info("DONE (t=%0.2fs)", (time.time() - tic)) + + results.dataset["annotations"] = annotations + results.createIndex() + return results + + +class COCOEvalWrapper(cocoeval.COCOeval): + """Wrapper for the pycocotools COCOeval class. + + To evaluate, create two objects (groundtruth_dict and detections_list) + using the conventions listed at http://mscoco.org/dataset/#format. + Then call evaluation as follows: + + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + """ + + def __init__( + self, + groundtruth: coco.COCO = None, + detections: coco.COCO = None, + agnostic_mode=False, + iou_type: str = "bbox", + iou_thrs: Union[str, float] = None, + map_points=None, + ): + """Construct a COCOEvalWrapper. + + Note that for the area-based metrics to be meaningful, detection and + groundtruth boxes must be in image coordinates measured in pixels. + + Args: + groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding + groundtruth annotations + detections: a coco.COCO (or coco_tools.COCOWrapper) object holding + detections + agnostic_mode: boolean (default: False). If True, evaluation ignores + class labels, treating all detections as proposals. + iou_thrs: Minimal value for intersection over union that allows to + make decision that prediction bounding box is true positive. + You can specify one float value between 0 to 1 or + string "05:0.05:0.95" for standard COCO thresholds. + iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + """ + cocoeval.COCOeval.__init__(self, groundtruth, detections, iouType=iou_type) + if agnostic_mode: + self.params.useCats = 0 + if iou_thrs == "0.5:0.05:0.95": + self.params.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True) + elif isinstance(iou_thrs, float): + self.params.iouThrs = [iou_thrs] + + if map_points == 101: + self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True) + if map_points == 11: + self.params.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.1)) + 1, endpoint=True) + if map_points == 0: + self.params.recThrs = [-1] + + def GetCategory(self, category_id: int) -> dict: + """Fetch dictionary holding category information given category id. + + Args: + category_id: integer id + + Returns: + dictionary holding 'id', 'name'. + """ + return self.cocoGt.cats[category_id] + + def GetAgnosticMode(self) -> bool: + """Return whether COCO Eval is configured to evaluate in agnostic mode.""" + return self.params.useCats == 0 + + def GetCategoryIdList(self) -> List[int]: + """Return the list of IDs of all valid categories.""" + return self.params.catIds + + def accumulate(self, p: cocoeval.Params = None): + """Accumulate evaluation results per image and store it to self.eval. + + Args: + p: input params for evaluation + """ + print("Accumulating evaluation results...") + tic = time.time() + if not self.evalImgs: + print("Please run evaluate() first") + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T, R, K, A, M)) # -1 for the precision of absent categories + recall = -np.ones((T, K, A, M)) + scores = -np.ones((T, R, K, A, M)) + + # create dictionary for future indexing + _pe = self._paramsEval + print("-pe", _pe) + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0 * A0 * I0 + for a, a0 in enumerate(a_list): + Na = a0 * I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if e is not None] + if len(E) == 0: + continue + dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind="mergesort") + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds] + dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds] + gtIg = np.concatenate([e["gtIgnore"] for e in E]) + npig = np.count_nonzero(gtIg == 0) + if npig == 0: + continue + tps = np.logical_and(dtm, np.logical_not(dtIg)) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg)) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp + tp + np.spacing(1)) + + # calculate precision + if R == 1: + rc = np.concatenate(([0.0], rc, [1.0])) + pr = np.concatenate(([0.0], pr, [0.0])) + + # compute the precision envelope + for i in range(pr.size - 1, 0, -1): + pr[i - 1] = np.maximum(pr[i - 1], pr[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + change_point = np.where(rc[1:] != rc[:-1])[0] + # and sum (\Delta recall) * recall + res = np.sum((rc[change_point + 1] - rc[change_point]) * pr[change_point + 1]) + precision[t, :, k, a, m] = np.array([res]) + else: + q = np.zeros((R,)) + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist() + q = q.tolist() + + for i in range(nd - 1, 0, -1): + if pr[i] > pr[i - 1]: + pr[i - 1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side="left") + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except: + pass + precision[t, :, k, a, m] = np.array(q) + + # calculate recall + if nd: + recall[t, k, a, m] = rc[-1] + else: + recall[t, k, a, m] = 0 + + # calculate score + ss = np.zeros((R,)) + inds = np.searchsorted(rc, p.recThrs, side="left") + try: + for ri, pi in enumerate(inds): + ss[ri] = dtScoresSorted[pi] + except: + pass + scores[t, :, k, a, m] = np.array(ss) + # exit(0) + self.eval = { + "params": p, + "counts": [T, R, K, A, M], + "precision": precision, + "recall": recall, + "scores": scores, + } + toc = time.time() + print("DONE (t={:0.2f}s).".format(toc - tic)) + + def ComputeMetrics( + self, include_metrics_per_category: bool = False, all_metrics_per_category: bool = False + ): # pragma: no cover + """Compute detection metrics. + + Args: + include_metrics_per_category: Whether include metrics per category. + all_metrics_per_category: Whether include all the summery metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + + Returns: + A tuple of (summary_metrics, per_category_ap), in which + (1) summary_metrics is a dictionary holding: + 'Precision/mAP': mean average precision over classes averaged over IOU + thresholds ranging from .5 to .95 with .05 increments; + 'Precision/mAP@.50IOU': mean average precision at 50% IOU; + 'Precision/mAP@.75IOU': mean average precision at 75% IOU; + 'Precision/mAP (small)': mean average precision for small objects + (area < 32^2 pixels); + 'Precision/mAP (medium)': mean average precision for medium sized + objects (32^2 pixels < area < 96^2 pixels); + 'Precision/mAP (large)': mean average precision for large objects + (96^2 pixels < area < 10000^2 pixels); + 'Recall/AR@1': average recall with 1 detection; + 'Recall/AR@10': average recall with 10 detections; + 'Recall/AR@100': average recall with 100 detections; + 'Recall/AR@100 (small)': average recall for small objects with 100 + detections; + 'Recall/AR@100 (medium)': average recall for medium objects with 100 + detections; + 'Recall/AR@100 (large)': average recall for large objects with 100 + detections; + and (2) per_category_ap is a dictionary holding category specific results with + keys of the form: 'Precision mAP ByCategory/category' + (without the supercategory part if no supercategories exist). + + For backward compatibility 'PerformanceByCategory' is included in the + output regardless of all_metrics_per_category. If evaluating class-agnostic + mode, per_category_ap is an empty dictionary. + + Raises: + ValueError: If category_stats does not exist. + """ + self.evaluate() + self.accumulate() + self.summarize() + + summary_metrics = OrderedDict( + [ + ("Precision/mAP", self.stats[0]), + ("Precision/mAP@.50IOU", self.stats[1]), + ("Precision/mAP@.75IOU", self.stats[2]), + ("Precision/mAP (small)", self.stats[3]), + ("Precision/mAP (medium)", self.stats[4]), + ("Precision/mAP (large)", self.stats[5]), + ("Recall/AR@1", self.stats[6]), + ("Recall/AR@10", self.stats[7]), + ("Recall/AR@100", self.stats[8]), + ("Recall/AR@100 (small)", self.stats[9]), + ("Recall/AR@100 (medium)", self.stats[10]), + ("Recall/AR@100 (large)", self.stats[11]), + ] + ) + if not include_metrics_per_category: + return summary_metrics, {} + if not hasattr(self, "category_stats"): + raise ValueError("Category stats do not exist") + per_category_ap = OrderedDict([]) + if self.GetAgnosticMode(): + return summary_metrics, per_category_ap + for category_index, category_id in enumerate(self.GetCategoryIdList()): + category = self.GetCategory(category_id)["name"] + # Kept for backward compatilbility + # pylint: disable=no-member + per_category_ap["PerformanceByCategory/mAP/{}".format(category)] = self.category_stats[0][category_index] + if all_metrics_per_category: + per_category_ap["Precision mAP ByCategory/{}".format(category)] = self.category_stats[0][category_index] + per_category_ap["Precision mAP@.50IOU ByCategory/{}".format(category)] = self.category_stats[1][ + category_index + ] + per_category_ap["Precision mAP@.75IOU ByCategory/{}".format(category)] = self.category_stats[2][ + category_index + ] + per_category_ap["Precision mAP (small) ByCategory/{}".format(category)] = self.category_stats[3][ + category_index + ] + per_category_ap["Precision mAP (medium) ByCategory/{}".format(category)] = self.category_stats[4][ + category_index + ] + per_category_ap["Precision mAP (large) ByCategory/{}".format(category)] = self.category_stats[5][ + category_index + ] + per_category_ap["Recall AR@1 ByCategory/{}".format(category)] = self.category_stats[6][category_index] + per_category_ap["Recall AR@10 ByCategory/{}".format(category)] = self.category_stats[7][category_index] + per_category_ap["Recall AR@100 ByCategory/{}".format(category)] = self.category_stats[8][category_index] + per_category_ap["Recall AR@100 (small) ByCategory/{}".format(category)] = self.category_stats[9][ + category_index + ] + per_category_ap["Recall AR@100 (medium) ByCategory/{}".format(category)] = self.category_stats[10][ + category_index + ] + per_category_ap["Recall AR@100 (large) ByCategory/{}".format(category)] = self.category_stats[11][ + category_index + ] + + return summary_metrics, per_category_ap + + +def _ConvertBoxToCOCOFormat(box): + """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format. + + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + + Args: + box: a numpy array in format of [ymin, xmin, ymax, xmax] + + Returns: + A list of floats, in COCO format, representing [xmin, ymin, width, height] + """ + return [float(box[1]), float(box[0]), float(box[3] - box[1]), float(box[2] - box[0])] + + +def _RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + + Returns: + A pycocotools Run-length encoding of the mask. + """ + return mask.encode(np.asfortranarray(masks)) + + +def ExportSingleImageGroundtruthToCoco( + image_id: Union[int, str], + next_annotation_id: int, + category_id_set: Set[str], + groundtruth_boxes: np.array, + groundtruth_classes: np.array, + groundtruth_masks: Union[np.array, None] = None, + groundtruth_is_crowd: Union[np.array, None] = None, +) -> list: + """Export groundtruth of a single image to COCO format. + + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the COCO evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in + correspondence - that is: groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + + Args: + image_id: a unique image identifier either of type integer or string. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] + indicating whether groundtruth boxes are crowd. + + Returns: + A list of groundtruth annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + if len(groundtruth_classes.shape) != 1: + raise ValueError("groundtruth_classes is " "expected to be of rank 1.") + if len(groundtruth_boxes.shape) != 2: + raise ValueError("groundtruth_boxes is expected to be of " "rank 2.") + if groundtruth_boxes.shape[1] != 4: + raise ValueError("groundtruth_boxes should have " "shape[1] == 4.") + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError( + "Corresponding entries in groundtruth_classes, " + "and groundtruth_boxes should have " + "compatible shapes (i.e., agree on the 0th dimension)." + "Classes shape: %d. Boxes shape: %d. Image ID: %s" + % (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], image_id) + ) + has_is_crowd = groundtruth_is_crowd is not None + if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: + raise ValueError("groundtruth_is_crowd is expected to be of rank 1.") + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 + export_dict = { + "id": next_annotation_id + i, + "image_id": image_id, + "category_id": int(groundtruth_classes[i]), + "bbox": list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + "area": float( + (groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) + * (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1]) + ), + "iscrowd": iscrowd, + } + if groundtruth_masks is not None: + export_dict["segmentation"] = _RleCompress(groundtruth_masks[i]) + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportSingleImageDetectionBoxesToCoco( + image_id: Union[int, str], + category_id_set: Set[int], + detection_boxes: np.array, + detection_scores: np.array, + detection_classes: np.array, +) -> list: + """Export detections of a single image to COCO format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. Note that the image_ids + provided here must match the ones given to the + ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in + correspondence - that is: boxes[i, :], and classes[i] + are associated with the same groundtruth annotation. + + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_boxes: float numpy array of shape [num_detections, 4] containing + detection boxes. + detection_scores: float numpy array of shape [num_detections] containing + scored for the detection boxes. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection boxes. + + Returns: + A list of detection annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_boxes, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.") + if len(detection_boxes.shape) != 2: + raise ValueError("All entries in detection_boxes expected to be of " "rank 2.") + if detection_boxes.shape[1] != 4: + raise ValueError("All entries in detection_boxes should have " "shape[1] == 4.") + num_boxes = detection_classes.shape[0] + if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: + raise ValueError( + "Corresponding entries in detection_classes, " + "detection_scores and detection_boxes should have " + "compatible shapes (i.e., agree on the 0th dimension). " + "Classes shape: %d. Boxes shape: %d. " + "Scores shape: %d" % (detection_classes.shape[0], detection_boxes.shape[0], detection_scores.shape[0]) + ) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append( + { + "image_id": image_id, + "category_id": int(detection_classes[i]), + "bbox": list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + "score": float(detection_scores[i]), + } + ) + return detections_list + + +def ExportSingleImageDetectionMasksToCoco( + image_id: Union[str, int], + category_id_set: Set[int], + detection_masks: np.array, + detection_scores: np.array, + detection_classes: np.array, +) -> list: + """Export detection masks of a single image to COCO format. + + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + + Returns: + A list of detection mask annotations for a single image in the COCO format. + + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError("All entries in detection_classes and detection_scores" "expected to be of rank 1.") + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError( + "Corresponding entries in detection_classes, " + "detection_scores and detection_masks should have " + "compatible lengths and shapes " + "Classes length: %d. Masks length: %d. " + "Scores length: %d" % (detection_classes.shape[0], len(detection_masks), detection_scores.shape[0]) + ) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append( + { + "image_id": image_id, + "category_id": int(detection_classes[i]), + "segmentation": _RleCompress(detection_masks[i]), + "score": float(detection_scores[i]), + } + ) + return detections_list diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/data_process.py new file mode 100644 index 00000000000..32e55adb3fd --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/data_process.py @@ -0,0 +1,655 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import cv2 +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +interpolation_map = { + "nearest": cv2.INTER_NEAREST, + "bilinear": cv2.INTER_LINEAR, + "bicubic": cv2.INTER_CUBIC, +} + +category_map = { + 1: "person", + 2: "bicycle", + 3: "car", + 4: "motorcycle", + 5: "airplane", + 6: "bus", + 7: "train", + 8: "truck", + 9: "boat", + 10: "traffic light", + 11: "fire hydrant", + 13: "stop sign", + 14: "parking meter", + 15: "bench", + 16: "bird", + 17: "cat", + 18: "dog", + 19: "horse", + 20: "sheep", + 21: "cow", + 22: "elephant", + 23: "bear", + 24: "zebra", + 25: "giraffe", + 27: "backpack", + 28: "umbrella", + 31: "handbag", + 32: "tie", + 33: "suitcase", + 34: "frisbee", + 35: "skis", + 36: "snowboard", + 37: "sports ball", + 38: "kite", + 39: "baseball bat", + 40: "baseball glove", + 41: "skateboard", + 42: "surfboard", + 43: "tennis racket", + 44: "bottle", + 46: "wine glass", + 47: "cup", + 48: "fork", + 49: "knife", + 50: "spoon", + 51: "bowl", + 52: "banana", + 53: "apple", + 54: "sandwich", + 55: "orange", + 56: "broccoli", + 57: "carrot", + 58: "hot dog", + 59: "pizza", + 60: "donut", + 61: "cake", + 62: "chair", + 63: "couch", + 64: "potted plant", + 65: "bed", + 67: "dining table", + 70: "toilet", + 72: "tv", + 73: "laptop", + 74: "mouse", + 75: "remote", + 76: "keyboard", + 77: "cell phone", + 78: "microwave", + 79: "oven", + 80: "toaster", + 81: "sink", + 82: "refrigerator", + 84: "book", + 85: "clock", + 86: "vase", + 87: "scissors", + 88: "teddy bear", + 89: "hair drier", + 90: "toothbrush", +} + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class ResizeTFTransform(object): + """Resize the input image to the given size. + + Args: + size (list or int): Size of the result + interpolation (str, default='bilinear'):Desired interpolation type, + support 'bilinear', 'nearest', 'bicubic' + + Returns: + tuple of processed image and label + """ + + def __init__(self, size, interpolation="bilinear"): + """Initialize `ResizeTFTransform` class.""" + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[0], size[1] + self.interpolation = interpolation + + if self.interpolation not in ["bilinear", "nearest", "bicubic"]: + raise ValueError("Unsupported interpolation type!") + + def __call__(self, sample): + """Resize the input image in sample to the given size.""" + image, label = sample + if isinstance(image, tf.Tensor): + image = tf.image.resize(image, self.size, method=self.interpolation) + else: + image = cv2.resize(image, self.size, interpolation=interpolation_map[self.interpolation]) + return (image, label) + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class COCOmAPv2(BaseMetric): + """Compute mean average precision of the detection task.""" + + def __init__( + self, + anno_path=None, + iou_thrs="0.5:0.05:0.95", + map_points=101, + map_key="DetectionBoxes_Precision/mAP", + output_index_mapping={"num_detections": -1, "boxes": 0, "scores": 1, "classes": 2}, + ): + """Initialize the metric. + + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + output_index_mapping: The output index mapping. + Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}. + """ + self.output_index_mapping = output_index_mapping + + if anno_path: + import os + import yaml + + assert os.path.exists(anno_path), "Annotation path does not exists!" + with open(anno_path, "r") as f: + label_map = yaml.safe_load(f.read()) + self.category_map_reverse = {k: v for k, v in label_map.items()} + else: + # label: index + self.category_map_reverse = {v: k for k, v in category_map.items()} + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + self.category_map = category_map + self.category_id_set = set([cat for cat in self.category_map]) # index + self.iou_thrs = iou_thrs + self.map_points = map_points + self.map_key = map_key + + def update(self, predicts, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + predicts: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. Defaults to None. + """ + from coco_tools import ExportSingleImageDetectionBoxesToCoco, ExportSingleImageGroundtruthToCoco + + detections = [] + if "num_detections" in self.output_index_mapping and self.output_index_mapping["num_detections"] > -1: + for item in zip(*predicts): + detection = {} + num = int(item[self.output_index_mapping["num_detections"]]) + detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]])[0:num] + detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]])[0:num] + detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]])[0:num] + detections.append(detection) + else: + for item in zip(*predicts): + detection = {} + detection["boxes"] = np.asarray(item[self.output_index_mapping["boxes"]]) + detection["scores"] = np.asarray(item[self.output_index_mapping["scores"]]) + detection["classes"] = np.asarray(item[self.output_index_mapping["classes"]]) + detections.append(detection) + + bboxes, str_labels, int_labels, image_ids = labels + labels = [] + if len(int_labels[0]) == 0: + for str_label in str_labels: + str_label = [x if type(x) == "str" else x.decode("utf-8") for x in str_label] + labels.append([self.category_map_reverse[x] for x in str_label]) + elif len(str_labels[0]) == 0: + for int_label in int_labels: + labels.append([x for x in int_label]) + + for idx, image_id in enumerate(image_ids): + image_id = image_id if type(image_id) == "str" else image_id.decode("utf-8") + if image_id in self.image_ids: + continue + self.image_ids.append(image_id) + + ground_truth = {} + ground_truth["boxes"] = np.asarray(bboxes[idx]) + ground_truth["classes"] = np.asarray(labels[idx]) + + self.ground_truth_list.extend( + ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self.annotation_id, + category_id_set=self.category_id_set, + groundtruth_boxes=ground_truth["boxes"], + groundtruth_classes=ground_truth["classes"], + ) + ) + self.annotation_id += ground_truth["boxes"].shape[0] + + self.detection_list.extend( + ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self.category_id_set, + detection_boxes=detections[idx]["boxes"], + detection_scores=detections[idx]["scores"], + detection_classes=detections[idx]["classes"], + ) + ) + + def reset(self): + """Reset the prediction and labels.""" + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + + def result(self): + """Compute mean average precision. + + Returns: + The mean average precision score. + """ + from coco_tools import COCOEvalWrapper, COCOWrapper + + if len(self.ground_truth_list) == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + else: + groundtruth_dict = { + "annotations": self.ground_truth_list, + "images": [{"id": image_id} for image_id in self.image_ids], + "categories": [{"id": k, "name": v} for k, v in self.category_map.items()], + } + coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(self.detection_list) + box_evaluator = COCOEvalWrapper( + coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs=self.iou_thrs, + map_points=self.map_points, + ) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=False, all_metrics_per_category=False + ) + box_metrics.update(box_per_category_ap) + box_metrics = {"DetectionBoxes_" + key: value for key, value in iter(box_metrics.items())} + + return box_metrics[self.map_key] + + +class ParseDecodeCoco: # pragma: no cover + """Helper function for TensorflowModelZooBertDataset. + + Parse the features from sample. + """ + + def __call__(self, sample): + """Parse the sample data. + + Args: + sample: Data to be parsed. + """ + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/object/class/text": tf.compat.v1.VarLenFeature(dtype=tf.string), + "image/object/class/label": tf.compat.v1.VarLenFeature(dtype=tf.int64), + "image/source_id": tf.compat.v1.FixedLenFeature([], dtype=tf.string, default_value=""), + } + sparse_float32 = tf.compat.v1.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(sample, feature_map) + + xmin = tf.expand_dims(features["image/object/bbox/xmin"].values, 0) + ymin = tf.expand_dims(features["image/object/bbox/ymin"].values, 0) + xmax = tf.expand_dims(features["image/object/bbox/xmax"].values, 0) + ymax = tf.expand_dims(features["image/object/bbox/ymax"].values, 0) + + bbox = tf.concat([ymin, xmin, ymax, xmax], 0) + # Force the variable number of bounding boxes into the shape + # [1, num_boxes, coords]. + bbox = tf.expand_dims(bbox, 0) + bbox = tf.transpose(bbox, [0, 2, 1]) + + encoded_image = features["image/encoded"] + image_tensor = tf.image.decode_image(encoded_image, channels=3) + image_tensor.set_shape([None, None, 3]) + + str_label = features["image/object/class/text"].values + int_label = features["image/object/class/label"].values + image_id = features["image/source_id"] + + return image_tensor, (bbox[0], str_label, int_label, image_id) + + +class COCORecordDataset(object): + """Tensorflow COCO dataset in tf record format. + + Root is a full path to tfrecord file, which contains the file name. + Please use Resize transform when batch_size > 1 + + Args: root (str): Root directory of dataset. + num_cores (int, default=28):The number of input Datasets to interleave from in parallel. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + def __new__(cls, root, num_cores=28, transform=None, filter=filter): + """Build a new object.""" + record_iterator = tf.compat.v1.python_io.tf_record_iterator(root) + example = tf.train.SequenceExample() + for element in record_iterator: + example.ParseFromString(element) + break + feature = example.context.feature + if ( + len(feature["image/object/class/text"].bytes_list.value) == 0 + and len(feature["image/object/class/label"].int64_list.value) == 0 + ): + raise ValueError( + "Tfrecord format is incorrect, please refer\ + 'https://github.com/tensorflow/models/blob/master/research/\ + object_detection/dataset_tools/create_coco_tf_record.py' to\ + create correct tfrecord" + ) + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + tfrecord_paths = [root] + ds = tf.data.TFRecordDataset.list_files(tfrecord_paths) + ds = ds.apply( + parallel_interleave( + tf.data.TFRecordDataset, + cycle_length=num_cores, + block_length=5, + sloppy=True, + buffer_output_elements=10000, + prefetch_input_elements=10000, + ) + ) + if transform is not None: + transform.transform_list.insert(0, ParseDecodeCoco()) + else: + transform = ParseDecodeCoco() + ds = ds.map(transform, num_parallel_calls=None) + if filter is not None: + ds = ds.filter(filter) + ds = ds.prefetch(buffer_size=1000) + return ds + + +class TFDataLoader(object): + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py new file mode 100644 index 00000000000..b217f63a3ec --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py @@ -0,0 +1,129 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +import time + +import numpy as np +import tensorflow as tf + +from __future__ import division +from argparse import ArgumentParser +from data_process import( + COCOmAPv2, + COCORecordDataset, + ComposeTransform, + ResizeTFTransform, + TFDataLoader, +) + +arg_parser = ArgumentParser(description='Parse args') + +arg_parser.add_argument('-g', + "--input-graph", + help='Specify the input graph.', + dest='input_graph') +arg_parser.add_argument('--config', type=str, default='') +arg_parser.add_argument('--dataset_location', type=str, default='') +arg_parser.add_argument('--output_model', type=str, default='') +arg_parser.add_argument('--mode', type=str, default='performance') +arg_parser.add_argument('--batch_size', type=int, default=10) +arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='iterations') +arg_parser.add_argument('--tune', action='store_true', default=False) +arg_parser.add_argument('--benchmark', dest='benchmark', + action='store_true', help='run benchmark') +args = arg_parser.parse_args() + +def evaluate(model): + """Custom evaluate function to estimate the accuracy of the model. + + Args: + model (tf.Graph or string or INC.model.TensorflowCheckpointModel): The input model. + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + from neural_compressor.tensorflow import Model + if isinstance(model, str) or isinstance(model, tf.compat.v1.Graph): + model = Model(model) + model.input_tensor_names = ["image_tensor:0"] + model.output_tensor_names = ["num_detections:0", "detection_boxes:0", \ + "detection_scores:0", "detection_classes:0"] + input_tensor = model.input_tensor + output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ + model.output_tensor[0] + warmup = 5 + iteration = -1 + if args.benchmark and args.mode == 'performance': + iteration = args.iters + metric = COCOmAPv2(output_index_mapping={'num_detections':0, 'boxes':1, 'scores':2, 'classes':3}) + + def eval_func(dataloader): + latency_list = [] + for idx, (inputs, labels) in enumerate(dataloader): + # dataloader should keep the order and len of inputs same with input_tensor + inputs = np.array([inputs]) + feed_dict = dict(zip(input_tensor, inputs)) + + start = time.time() + predictions = model.sess.run(output_tensor, feed_dict) + end = time.time() + + metric.update(predictions, labels) + latency_list.append(end-start) + if idx + 1 == iteration: + break + latency = np.array(latency_list[warmup:]).mean() / args.batch_size + return latency + + eval_dataset = COCORecordDataset(root=args.dataset_location, filter=None, \ + transform=ComposeTransform(transform_list=[ResizeTFTransform(size=300)])) + eval_dataloader=TFDataLoader(framework='tensorflow', dataset=eval_dataset, batch_size=args.batch_size) + latency = eval_func(eval_dataloader) + if args.benchmark and args.mode == 'performance': + print("Batch size = {}".format(args.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +def main(_): + calib_dataset = COCORecordDataset(root=args.dataset_location, filter=None, \ + transform=ComposeTransform(transform_list=[ResizeTFTransform(size=300)])) + calib_dataloader = TFDataLoader(framework='tensorflow', dataset=calib_dataset, batch_size=args.batch_size) + + if args.tune: + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model + + quant_config = StaticQuantConfig() + model = Model(args.input_graph) + model.input_tensor_names = ['image_tensor'] + model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] + q_model = quantize_model(model, quant_config, calib_dataloader) + q_model.save(args.output_model) + + if args.benchmark: + if args.mode == 'performance': + evaluate(args.input_graph) + else: + accuracy = evaluate(args.input_graph) + print('Batch size = %d' % args.batch_size) + print("Accuracy: %.5f" % accuracy) + +if __name__ == "__main__": + tf.compat.v1.app.run() diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/prepare_model.py b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/prepare_model.py new file mode 100644 index 00000000000..51882cf0bfe --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/prepare_model.py @@ -0,0 +1,99 @@ +import os +import argparse +import enum +import tarfile +import abc + + +class SupportedModels(enum.Enum): + """ + Enumeration containing supported models + """ + ssd_resnet50_v1 = 'ssd_resnet50_v1' + ssd_mobilnet_v1 = 'ssd_mobilenet_v1' + + +class Model(abc.ABC): + """ + Base model class used to obtain the model (and perform any necessary operations to make it usable) + """ + + @abc.abstractmethod + def get_pretrained_model(self, destination): + """ + Base method for obtaining a ready to use model + Args: + destination: path to where the file should be stored + """ + pass + + +class SsdMobilenetV1(Model): + """ Concrete implementation of the Model base class for ssd_mobilenet_v1""" + + def get_pretrained_model(self, destination): + """ + Obtains a ready to use ssd_mobilenet_v1 model file. + Args: + destination: path to where the file should be stored + """ + url = 'http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz' + os.system("curl -o ssd_mobilenet_v1_coco_2018_01_28.tar.gz {0}".format(url)) + with tarfile.open("ssd_mobilenet_v1_coco_2018_01_28.tar.gz") as tar: + if not os.path.exists(destination): + os.makedirs(destination) + tar.extractall(destination) + + +class SsdResnet50(Model): + """ Concrete implementation of the Model base class for ssd_resnet_50""" + + def get_pretrained_model(self, destination): + """ + Obtains a ready to use ssd_resnet_50 model file. + Args: + destination: path to where the file should be stored + """ + url = "http://download.tensorflow.org/models/object_detection/" \ + "ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03.tar.gz" + os.system("curl -o ssd_resnet50_v1.tar.gz {0}".format(url)) + with tarfile.open("ssd_resnet50_v1.tar.gz") as tar: + if not os.path.exists(destination): + os.makedirs(destination) + tar.extractall(destination) + + +def get_model(model: SupportedModels) -> Model: + """ + Factory method that returns the requested model object + Args: + model: model from SupportedModels enumeration + + Returns: Concrete object inheriting the Model base class + + """ + if model == SupportedModels.ssd_resnet50_v1: + return SsdResnet50() + if model == SupportedModels.ssd_mobilnet_v1: + return SsdMobilenetV1() + else: + raise AttributeError("The model {0} is not supported. Supported models: {1}" + .format(model_name, SupportedModels.__members__.keys())) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Prepare pre-trained model for COCO object detection') + parser.add_argument('--model_name', type=str, default='ssd_resnet50_v1', + help='model to download, default is ssd_resnet50_v1', + choices=["ssd_resnet50_v1", "ssd_mobilenet_v1"]) + parser.add_argument('--model_path', type=str, default='./model', help='directory to put models, default is ./model') + + args = parser.parse_args() + model_name = args.model_name + model_path = args.model_path + try: + model = get_model(SupportedModels(model_name)) + model.get_pretrained_model(model_path) + except AttributeError: + print("The model {0} is not supported. Supported models: {1}" + .format(model_name, SupportedModels.__members__.keys())) diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..8ee728de373 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_benchmark.sh @@ -0,0 +1,52 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + + +# run_tuning +function run_benchmark { + + python main.py \ + --input-graph ${input_model} \ + --mode ${mode} \ + --dataset_location "${dataset_location}" \ + --batch_size ${batch_size} \ + --benchmark \ + --iters ${iters} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..559d695f768 --- /dev/null +++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_quant.sh @@ -0,0 +1,41 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo "$var" |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo "$var" |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo "$var" |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-graph "${input_model}" \ + --output_model "${output_model}" \ + --dataset_location "${dataset_location}" \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md new file mode 100644 index 00000000000..7bff08a2f84 --- /dev/null +++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md @@ -0,0 +1,98 @@ +Step-by-Step +============ + +This document is used to list steps of reproducing TensorFlow Wide & Deep tuning zoo result. +This example can run on Intel CPUs and GPUs. + +# Prerequisite + +## 1. Environment + +### Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` +### Install Intel Tensorflow +```shell +pip install intel-tensorflow +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +### Install Intel Extension for Tensorflow +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers). + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +### Install Additional Dependency packages +```shell +cd examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq +pip install -r requirements.txt +``` + +### 2. Download Frozen PB +```shell +wget https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_6/wide_deep_fp32_pretrained_model.pb +``` + +### 3. Prepare Dataset +Download training dataset: (8 million samples) +```bash +$ wget https://storage.googleapis.com/dataset-uploader/criteo-kaggle/large_version/train.csv +``` +Download evaluation dataset (2 million samples) +```bash +$ wget https://storage.googleapis.com/dataset-uploader/criteo-kaggle/large_version/eval.csv +``` + +### 4. Process Dataset +Process calib dataset +```bash +python preprocess_csv_tfrecords.py \ + --inputcsv-datafile train.csv \ + --calibrationcsv-datafile eval.csv \ + --outputfile-name processed_data +``` +Process eval dataset +```bash +python preprocess_csv_tfrecords.py \ + --inputcsv-datafile eval.csv \ + --calibrationcsv-datafile train.csv \ + --outputfile-name processed_data +``` +Two .tfrecords files are generated and will be used later on: +1) train_processed_data.tfrecords +2) eval_processed_data.tfrecords + + +# Run Command + +## Quantization + ```shell + bash run_quant.sh --dataset_location=/path/to/datasets --input_model=/path/to/wide_deep_fp32_pretrained_model.pb --output_model=./wnd_int8_opt.pb + ``` + +## Benchmark + ``` + bash run_benchmark.sh --dataset_location=/path/to/datasets --input_model=./wnd_int8_opt.pb --mode=accuracy --batch_size=500 + bash run_benchmark.sh --dataset_location=/path/to/datasets --input_model=./wnd_int8_opt.pb --mode=performance --batch_size=500 + ``` + +# Other +This example takes the reference from https://github.com/IntelAI/models/tree/master/benchmarks/recommendation/tensorflow/wide_deep_large_ds. +The pretrained model was trained with preprocessed data from dataset Criteo. diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py new file mode 100644 index 00000000000..19ee3647f7e --- /dev/null +++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py @@ -0,0 +1,348 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# + +import sys +import os +import numpy as np +import argparse +import collections +import time +import math +import json +import datetime + +import tensorflow as tf + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import ops +from tensorflow.core.framework import graph_pb2 +from google.protobuf import text_format +from argparse import ArgumentParser +from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference +from tensorflow.compat.v1 import graph_util + + +def load_graph(model_file): + """This is a function to load TF graph from pb file + + Args: + model_file (string): TF pb file local path + + Returns: + graph: TF graph object + """ + graph = tf.Graph() + #graph_def = tf.compat.v1.GraphDef() + graph_def = graph_pb2.GraphDef() + + file_ext = os.path.splitext(model_file)[1] + + with open(model_file, "rb") as f: + if file_ext == '.pbtxt': + text_format.Merge(f.read(), graph_def) + else: + graph_def.ParseFromString(f.read()) + + with graph.as_default(): + tf.import_graph_def(graph_def, name='') + + return graph + + +numeric_feature_names = ["numeric_1"] +string_feature_names = ["string_1"] + +def get_feature_name(compute_accuracy): + + if compute_accuracy: + full_features_names = numeric_feature_names + string_feature_names + ["label"] + feature_datatypes = [tf.io.FixedLenSequenceFeature([], tf.float32, default_value=0.0, allow_missing=True)]+[tf.io.FixedLenSequenceFeature( + [], tf.int64, default_value=0, allow_missing=True)]+[tf.io.FixedLenSequenceFeature([], tf.int64, default_value=0, allow_missing=True)] + else: + full_features_names = numeric_feature_names + string_feature_names + feature_datatypes = [tf.io.FixedLenSequenceFeature([], tf.float32, default_value=0.0, allow_missing=True)]+[tf.io.FixedLenSequenceFeature( + [], tf.int64, default_value=0, allow_missing=True)] + return full_features_names, feature_datatypes + +def input_fn(data_file, num_epochs, shuffle, batch_size, compute_accuracy=True): + """Generate an input function for the Estimator.""" + full_features_names, feature_datatypes = get_feature_name(compute_accuracy) + def _parse_function(proto): + f = collections.OrderedDict( + zip(full_features_names, feature_datatypes)) + parsed_features = tf.io.parse_example(proto, f) + parsed_feature_vals_num = [tf.reshape( + parsed_features["numeric_1"], shape=[-1, 13])] + parsed_feature_vals_str = [tf.reshape( + parsed_features["string_1"], shape=[-1, 2]) for i in string_feature_names] + parsed_feature_vals = parsed_feature_vals_num + parsed_feature_vals_str + if compute_accuracy: + parsed_feature_vals_label = [tf.reshape(parsed_features[i], shape=[-1]) for i in ["label"]] + parsed_feature_vals = parsed_feature_vals + parsed_feature_vals_label + return parsed_feature_vals + + # Extract lines from input files using the Dataset API. + dataset = tf.data.TFRecordDataset([data_file]) + if shuffle: + dataset = dataset.shuffle(buffer_size=20000) + dataset = dataset.batch(batch_size) + dataset = dataset.map(_parse_function, num_parallel_calls=28) + dataset = dataset.prefetch(batch_size*10) + return dataset + +def evaluation_func(model, measurer=None): + evaluate_opt_graph.eval_inference(model) + +class eval_classifier_optimized_graph: + """Evaluate image classifier with optimized TensorFlow graph""" + + def __init__(self): + arg_parser = ArgumentParser(description='Parse args') + arg_parser.add_argument('-i', '--input_graph', type=str, + help='Specify the input of the model', + dest='input_graph', + required=True) + arg_parser.add_argument('-o', '--output_graph', type=str, + help='Specify the output of the model', + dest='output_graph') + arg_parser.add_argument('--calibration_data_location', type=str, + help='full path of calibration data file', + dest='calib_data') + arg_parser.add_argument('--evaluation_data_location', type=str, + help='full path of validation data file', + dest='eval_data', + required=True) + arg_parser.add_argument('--batch_size', type=int, + help='batch size for inference.Default is 512', + default=512, + dest='batch_size') + arg_parser.add_argument('--num_intra_threads', type=int, + help='number of threads for an operator', + required=False, + default=0, + dest='num_intra_threads') + arg_parser.add_argument('--num_inter_threads', type=int, + help='number of threads across operators', + required=False, + default=0, + dest='num_inter_threads') + arg_parser.add_argument('--kmp_blocktime', type=str, + help='KMP_BLOCKTIME value', + required=False, + default=None, + dest='kmp_blocktime') + arg_parser.add_argument('-r', "--accuracy", + help='For accuracy measurement only.', + dest='accuracy', action='store_true') + arg_parser.add_argument("--config", default=None, + help="tuning config") + arg_parser.add_argument('--performance', + dest='performance', + action='store_true', + help='run performance') + arg_parser.add_argument('--tune', + dest='tune', + action='store_true', + help='use neural_compressor to tune.') + arg_parser.add_argument("--warmup-steps", + type=int, default=50, + help="number of warmup steps") + arg_parser.add_argument("--steps", + type=int, default=2000, + help="number of iterations") + + arg_parser.add_argument('--env', + dest='env', + help='specific Tensorflow env', + default='mkl') + + + self.args = arg_parser.parse_args() + + def auto_tune(self): + """This is neural_compressor tuning part to generate a quantized pb + Returns: + graph: it will return a quantized pb + """ + from neural_compressor.common import set_random_seed + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model + + set_random_seed(9527) + infer_graph = load_graph(self.args.input_graph) + model = Model(infer_graph) + model.input_tensor_names = ["new_numeric_placeholder", "new_categorical_placeholder"] + model.output_tensor_names = ["import/head/predictions/probabilities"] + + if self.args.calib_data: + quant_config = StaticQuantConfig() + calib_dataloader=Dataloader(self.args.calib_data, self.args.batch_size) + q_model = quantize_model(model, quant_config, calib_dataloader) + return q_model + print("Please provide calibration dataset!") + + def eval_inference(self, infer_graph): + print("Run inference") + if isinstance(infer_graph, tf.compat.v1.GraphDef): + graph = tf.Graph() + with graph.as_default(): + tf.import_graph_def(infer_graph, name='') + infer_graph = graph + + data_config = tf.compat.v1.ConfigProto() + data_config.intra_op_parallelism_threads = self.args.num_intra_threads + data_config.inter_op_parallelism_threads = self.args.num_inter_threads + data_config.use_per_session_threads = 1 + + infer_config = tf.compat.v1.ConfigProto() + if self.args.env == 'mkl': + print("Set inter and intra for mkl: ") + print("intra_op_parallelism_threads = ", self.args.num_intra_threads) + print("inter_op_parallelism_threads = ", self.args.num_inter_threads) + infer_config.intra_op_parallelism_threads = self.args.num_intra_threads + infer_config.inter_op_parallelism_threads = self.args.num_inter_threads + infer_config.use_per_session_threads = 1 + + total_test_samples = sum(1 for _ in tf.compat.v1.python_io.tf_record_iterator(self.args.eval_data)) + total_batches = math.ceil(float(total_test_samples)/self.args.batch_size) + placeholder_list = ['new_numeric_placeholder','new_categorical_placeholder'] + input_tensor = [infer_graph.get_tensor_by_name(name + ":0") for name in placeholder_list] + output_name = "import/head/predictions/probabilities" + output_tensor = infer_graph.get_tensor_by_name(output_name + ":0" ) + correctly_predicted = 0 + evaluate_duration = 0.0 + + features_list = [] + data_graph = tf.Graph() + with data_graph.as_default(): + res_dataset = input_fn(self.args.eval_data, 1, False, self.args.batch_size) + iterator = tf.compat.v1.data.make_one_shot_iterator(res_dataset) + next_element = iterator.get_next() + with tf.compat.v1.Session(config=data_config, graph=data_graph) as data_sess: + for i in range(int(total_batches)): + batch = data_sess.run(next_element) + features=batch[0:3] + features_list.append(features) + + if self.args.performance: + iteration = 0 + warm_up_iteration = self.args.warmup_steps + total_run = self.args.steps + + if total_run > total_batches: + total_run = total_batches + + with tf.compat.v1.Session(config=infer_config, graph=infer_graph) as infer_sess: + i = 0 + for i in range(int(total_run)): + start_time = time.time() + logistic = infer_sess.run(output_tensor, dict(zip(input_tensor, features_list[iteration][0:2]))) + time_consume = time.time() - start_time + + if iteration > warm_up_iteration: + evaluate_duration += time_consume + + iteration += 1 + if iteration > total_batches: + iteration = 0 + test_batches = total_run - warm_up_iteration + else: + with tf.compat.v1.Session(config=infer_config, graph=infer_graph) as infer_sess: + i = 0 + for i in range(int(total_batches)): + start_time = time.time() + logistic = infer_sess.run(output_tensor, dict(zip(input_tensor, features_list[i][0:2]))) + time_consume = time.time() - start_time + evaluate_duration += time_consume + + predicted_labels = np.argmax(logistic,1) + correctly_predicted=correctly_predicted+np.sum(features_list[i][2] == predicted_labels) + + i=i+1 + + accuracy = float(correctly_predicted) / float(total_test_samples) + test_batches = total_batches + + no_of_test_samples = test_batches * self.args.batch_size + latency = 1000 * float(evaluate_duration) / float(test_batches) + throughput = no_of_test_samples / evaluate_duration + + print('--------------------------------------------------') + print('Total test records: %d' % no_of_test_samples) + print('Number of batches: %d' % test_batches) + print('Batch size = %d' % self.args.batch_size) + print('Latency: %.3f ms' % latency) + print('Throughput: %.3f records/sec' % throughput) + print('--------------------------------------------------') + + if self.args.accuracy: + return accuracy + + def run(self): + """ This is neural_compressor function include tuning and benchmark option """ + + if self.args.tune: + q_model = evaluate_opt_graph.auto_tune() + q_model.save(self.args.output_graph) + else: + if self.args.accuracy: + infer_graph = load_graph(self.args.input_graph) + acc = evaluation_func(infer_graph) + print("Accuracy: %.5f" % acc) + if self.args.performance: + infer_graph = load_graph(self.args.input_graph) + evaluation_func(infer_graph) + + +class Dataloader(object): + def __init__(self, data_location, batch_size): + """dataloader generator + + Args: + data_location (str): tf recorder local path + batch_size (int): dataloader batch size + """ + self.batch_size = batch_size + self.data_file = data_location + self.total_samples = sum(1 for _ in tf.compat.v1.python_io.tf_record_iterator(data_location)) + self.n = math.ceil(float(self.total_samples) / batch_size) + print("batch size is " + str(self.batch_size) + "," + str(self.n) + " iteration") + + def __iter__(self): + data_graph = tf.Graph() + with data_graph.as_default(): + self.dataset = input_fn(self.data_file, 1, False, self.batch_size) + self.dataset_iterator = tf.compat.v1.data.make_one_shot_iterator(self.dataset) + next_element = self.dataset_iterator.get_next() + + with tf.compat.v1.Session(graph=data_graph) as sess: + for i in range(self.n): + batch = sess.run(next_element) + yield (batch[0:2], batch[2]) + + def __len__(self): + return self.n + + +if __name__ == "__main__": + evaluate_opt_graph = eval_classifier_optimized_graph() + evaluate_opt_graph.run() diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/preprocess_csv_tfrecords.py b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/preprocess_csv_tfrecords.py new file mode 100644 index 00000000000..e1a82cd674c --- /dev/null +++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/preprocess_csv_tfrecords.py @@ -0,0 +1,155 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import os +import sys +import pandas +import argparse +import numpy as np +import tensorflow as tf +if tf.version.VERSION < '2.0': + tf.enable_eager_execution() +parser = argparse.ArgumentParser() +parser.add_argument('--inputcsv-datafile', type=str, + help='full path of data file e.g. eval.csv', + dest='evaldatafile_path', + required=True) +parser.add_argument('--calibrationcsv-datafile', type=str, + help='full path of data file of calibration/train dataset to get normalization ranges', + dest='traindatafile_path', + default='NULL', + required=False) + +parser.add_argument('--outputfile-name', type=str, + help='output tfrecord file name e.g. processed_eval.[tfrecords]', + dest='outputfile_path', + default="processed_data.tfrecords", + required=False) + +args = parser.parse_args() + +eval_csv_file = args.evaldatafile_path +train_csv_file = args.traindatafile_path +output_file = args.outputfile_path + +if not os.path.isfile(eval_csv_file): + print("Please input a valid csv file") + sys.exit(1) + +filename, file_ext = os.path.splitext(output_file) +in_filename, _ = os.path.splitext(os.path.basename(eval_csv_file)) + +if file_ext != ".tfrecords": + output_file = output_file + ".tfrecords" + +output_file = "{}_{}".format(in_filename,output_file) +csv = pandas.read_csv(eval_csv_file, header=None) +if len(csv.columns)==39: + dataset_type = 'test' +else: + dataset_type = 'eval' +fill_na_dict = {} +if dataset_type=='test': + for i in range(0,13): + fill_na_dict[i]=0.0 + for i in range(13,39): + fill_na_dict[i]="" +else: + for i in range(1,14): + fill_na_dict[i]=0.0 + for i in range(14,40): + fill_na_dict[i]="" +csv=csv.fillna(value=fill_na_dict).values +numeric_feature_names = ["numeric_1"] +string_feature_names = ["string_1"] +LABEL_COLUMN =["clicked"] +CATEGORICAL_COLUMNS1 = ["C"+str(i)+"_embedding" for i in range(1, 27)] +NUMERIC_COLUMNS1 = ["I"+str(i) for i in range(1, 14)] +if dataset_type=='eval': + DATA_COLUMNS = LABEL_COLUMN + NUMERIC_COLUMNS1 + CATEGORICAL_COLUMNS1 +else: + DATA_COLUMNS = NUMERIC_COLUMNS1 + CATEGORICAL_COLUMNS1 +CATEGORICAL_COLUMNS2 = ["C"+str(i)+"_embedding" for i in range(1, 27)] +NUMERIC_COLUMNS2 = ["I"+str(i) for i in range(1, 14)] + +CATEGORICAL_COLUMNS1.sort() +NUMERIC_COLUMNS1.sort() +no_of_rows = 0 +with open(eval_csv_file, 'r') as f: + if not os.path.isfile(train_csv_file): + nums=[line.strip('\n\r').split(',') for line in f.readlines()] + else: + f1 = open(train_csv_file, 'r') + nums=[line.strip('\n\r').split(',') for line in f.readlines( + )]+[line.strip('\n\t').split(',') for line in f1.readlines()] + numpy_arr = np.array(nums) + numpy_arr[numpy_arr=='']='0' + min_list,max_list,range_list = [],[],[] + for i in range(len(DATA_COLUMNS)): + if DATA_COLUMNS[i] in NUMERIC_COLUMNS1: + col_min = numpy_arr[:,i].astype(np.float32).min() + col_max = numpy_arr[:,i].astype(np.float32).max() + min_list.append(col_min) + max_list.append(col_max) + range_list.append(col_max-col_min) + if os.path.isfile(train_csv_file): + f1.close() + print('min list',min_list) + print('max list',max_list) + print('range list',range_list) + + +with tf.compat.v1.python_io.TFRecordWriter(output_file) as writer: + print('*****Processing data******') + for row in csv: + no_of_rows = no_of_rows+1 + if dataset_type == 'eval': + unnormalized_vals = np.array(row[1:14]) + else: + unnormalized_vals = np.array(row[0:13]) + normalized_vals = (unnormalized_vals-min_list)/range_list + if dataset_type == 'eval': + new_categorical_dict = dict(zip(CATEGORICAL_COLUMNS2, row[14:40])) + else: + new_categorical_dict = dict(zip(CATEGORICAL_COLUMNS2, row[13:39])) + new_categorical_list = [] + for i in CATEGORICAL_COLUMNS1: + if pandas.isnull(new_categorical_dict[i]): + new_categorical_list.append("") + else: + new_categorical_list.append(new_categorical_dict[i]) + hash_values = tf.compat.v1.string_to_hash_bucket_fast( + new_categorical_list, 1000).numpy() + new_numerical_dict = dict(zip(NUMERIC_COLUMNS2, normalized_vals)) + example = tf.train.Example() + for i in NUMERIC_COLUMNS1: + example.features.feature[numeric_feature_names[0]].float_list.value.extend([new_numerical_dict[i]]) + for i in range(0, 26): + example.features.feature[string_feature_names[0]].int64_list.value.extend([i]) + example.features.feature[string_feature_names[0]].int64_list.value.extend([hash_values[i]]) + if dataset_type == 'eval': + example.features.feature["label"].int64_list.value.append(row[0]) + writer.write(example.SerializeToString()) + +print('Total number of rows ', no_of_rows) +print('Generated output file name :'+output_file) diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..e2f0ef81736 --- /dev/null +++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/requirements.txt @@ -0,0 +1,9 @@ +intel-tensorflow>=2.12.0 +Cython +contextlib2 +pillow>=8.2.0 +lxml>=4.6.2 +matplotlib +numpy>=1.17.4 +pycocotools +protobuf diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..72ab01f2a19 --- /dev/null +++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh @@ -0,0 +1,57 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + define_mode + run_benchmark + +} + +# init params +function init_params { + iters=1000 + for var in "$@" + do + case $var in + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +function define_mode { + if [[ ${mode} == "accuracy" ]]; then + mode_cmd=" --accuracy" + elif [[ ${mode} == "performance" ]]; then + mode_cmd=" --performance" + else + echo "Error: No such mode: ${mode}" + exit 1 + fi +} + +# run_tuning +function run_benchmark { + #numactl -N 0 -m 0 \ + python main.py \ + --input_graph ${input_model} \ + --evaluation_data_location ${dataset_location}/eval_processed_data.tfrecords \ + --batch_size ${batch_size} \ + --num_inter_threads 4 \ + ${mode_cmd} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..a8068917a27 --- /dev/null +++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh @@ -0,0 +1,48 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done + +} + + +# run_tuning +function run_tuning { + python main.py \ + --input_graph ${input_model} \ + --evaluation_data_location ${dataset_location}/eval_processed_data.tfrecords \ + --calibration_data_location ${dataset_location}/train_processed_data.tfrecords \ + --accuracy \ + --batch_size 1000 \ + --output_graph ${output_model} \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md new file mode 100644 index 00000000000..5747ba5b4ac --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md @@ -0,0 +1,73 @@ +Step-by-Step +============ + +This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor tuning zoo result of 3dunet-mlperf. +This example can run on Intel CPUs and GPUs. + +# Prerequisite + +## 1. Environment + +### Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### Install Intel Tensorflow +```shell +pip install intel-tensorflow +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +### Install Intel Extension for Tensorflow +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers). + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +## 2. Prepare Pre-trained model + Download the pre-trained model from the + [3DUnetCNN](https://storage.googleapis.com/intel-optimized-tensorflow/models/v2_7_0/3dunet_dynamic_ndhwc.pb). + In this example, we are using the model, + trained using the fold 1 BRATS 2019 data. + The validation files have been copied from [here](https://github.com/mlcommons/inference/tree/r0.7/vision/medical_imaging/3d-unet/folds) + +## 3. Prepare dataset + +### Download BraTS 2019 dataset + Please download [Brats 2019](https://www.med.upenn.edu/cbica/brats2019/data.html) + separately and unzip the dataset. The directory that contains the dataset files will be + passed to the launch script when running the benchmarking script. + +### Prepare Calibration set + The calibration set is the forty images listed in brats_cal_images_list.txt. They are randomly selected from Fold 0, Fold 2, Fold 3, and Fold 4 of BraTS 2019 Training Dataset. + + +# Run command + +## Quantization + + +## Benchmark + +* `export nnUNet_preprocessed=/build/preprocessed_data` +* `export nnUNet_raw_data_base=/build/raw_data` +* `export RESULTS_FOLDER=/build/result` +* `pip install -r requirements.txt` +* `python run_accuracy.py --input-model= --data-location= --calib-preprocess= --iters=100 --batch-size=1 --mode=benchmark --bfloat16 0` + diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/__init__.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/__init__.py new file mode 100644 index 00000000000..0a18c579d8b --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# \ No newline at end of file diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/brats_cal_images_list.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/brats_cal_images_list.txt new file mode 100644 index 00000000000..69276e67b6a --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/brats_cal_images_list.txt @@ -0,0 +1,40 @@ +HGG__BraTS19_2013_18_1 +HGG__BraTS19_2013_20_1 +HGG__BraTS19_CBICA_AAP_1 +HGG__BraTS19_CBICA_ABN_1 +HGG__BraTS19_CBICA_ABO_1 +HGG__BraTS19_CBICA_ALU_1 +HGG__BraTS19_CBICA_ANZ_1 +HGG__BraTS19_CBICA_APY_1 +HGG__BraTS19_CBICA_AQJ_1 +HGG__BraTS19_CBICA_AQZ_1 +HGG__BraTS19_CBICA_ASN_1 +HGG__BraTS19_CBICA_ASY_1 +HGG__BraTS19_CBICA_AUW_1 +HGG__BraTS19_CBICA_AXJ_1 +HGG__BraTS19_CBICA_AXM_1 +HGG__BraTS19_CBICA_AYG_1 +HGG__BraTS19_CBICA_AYU_1 +HGG__BraTS19_CBICA_AZD_1 +HGG__BraTS19_CBICA_BAX_1 +HGG__BraTS19_CBICA_BGR_1 +HGG__BraTS19_CBICA_BHV_1 +HGG__BraTS19_TCIA01_235_1 +HGG__BraTS19_TCIA02_394_1 +HGG__BraTS19_TCIA02_473_1 +HGG__BraTS19_TCIA02_606_1 +HGG__BraTS19_TCIA03_419_1 +HGG__BraTS19_TCIA04_192_1 +HGG__BraTS19_TCIA04_479_1 +HGG__BraTS19_TCIA06_372_1 +HGG__BraTS19_TCIA08_278_1 +LGG__BraTS19_2013_28_1 +LGG__BraTS19_TCIA09_462_1 +LGG__BraTS19_TCIA10_130_1 +LGG__BraTS19_TCIA10_202_1 +LGG__BraTS19_TCIA10_346_1 +LGG__BraTS19_TCIA10_387_1 +LGG__BraTS19_TCIA10_628_1 +LGG__BraTS19_TCIA12_470_1 +LGG__BraTS19_TCIA13_621_1 +LGG__BraTS19_TCIA13_653_1 diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/main.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/main.py new file mode 100644 index 00000000000..5ba82875420 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/main.py @@ -0,0 +1,219 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import time +from argparse import ArgumentParser +import os +import pickle +import sys +import math +import array + +import numpy as np +import tensorflow as tf +from tensorflow.python.framework import dtypes +from tensorflow.core.protobuf import rewriter_config_pb2 +from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference +from nnunet.evaluation.region_based_evaluation import evaluate_regions, get_brats_regions + +from nnUNet.setup import setup +from nnUNet.postprocess import postprocess_output + +INPUTS = 'input' +OUTPUTS = 'Identity' + +if __name__ == "__main__": + """Evaluate 3d_unet with optimized TensorFlow graph""" + def get_args(): + arg_parser = ArgumentParser(description='Parse args') + + arg_parser.add_argument('-m', "--mode", + help="One of three options: 'benchmark'/'accuracy'/'tune'.") + arg_parser.add_argument('-n', "--iters", + help='The number of iteration. shall > warmup num(10)', + type=int, default=20) + arg_parser.add_argument('-e', "--num-inter-threads", + help='The number of inter-thread.', + dest='num_inter_threads', type=int, default=0) + arg_parser.add_argument('-a', "--num-intra-threads", + help='The number of intra-thread.', + dest='num_intra_threads', type=int, default=0) + arg_parser.add_argument('-i', "--input-model", + help='Specify the input graph.', + dest='input_model') + arg_parser.add_argument('-o', "--output-model", + help='Specify the output graph.', + dest='output_model') + arg_parser.add_argument('-c', "--calib-preprocess", + help='Specify calibration preprocess dir.', + dest='calib_preprocess') + arg_parser.add_argument('-d', "--data-location", + help='Specify the location of the data.', + dest="data_location", default=None) + arg_parser.add_argument("--batch-size", dest="batch_size", type=int, default=1) + arg_parser.add_argument("--bfloat16", type=int, default=0) + + args = arg_parser.parse_args() + print(args) + return args + + def eval_func(graph): + print("Run inference for accuracy") + args = get_args() + #setup(args.data_location, args.input_model) + + output_graph = optimize_for_inference(graph.as_graph_def(), [INPUTS], [OUTPUTS], + dtypes.float32.as_datatype_enum, False) + tf.import_graph_def(output_graph, name="") + + input_tensor = graph.get_tensor_by_name('input:0') + output_tensor = graph.get_tensor_by_name('Identity:0') + + config = tf.compat.v1.ConfigProto() + config.intra_op_parallelism_threads=args.num_intra_threads + config.inter_op_parallelism_threads=args.num_inter_threads + if args.bfloat16: + config.graph_options.rewrite_options.auto_mixed_precision_mkl = rewriter_config_pb2.RewriterConfig.ON + + sess = tf.compat.v1.Session(graph=graph, config=config) + if args.mode: + print("Inference with real data") + preprocessed_data_dir = os.path.join(args.data_location, "preprocessed_data") + with open(os.path.join(preprocessed_data_dir, "preprocessed_files.pkl"), "rb") as f: + preprocessed_files = pickle.load(f) + + dictionaries = [] + for preprocessed_file in preprocessed_files: + with open(os.path.join(preprocessed_data_dir, preprocessed_file + ".pkl"), "rb") as f: + dct = pickle.load(f)[1] + dictionaries.append(dct) + + count = len(preprocessed_files) + predictions = [None] * count + validation_indices = list(range(0,count)) + print("Found {:d} preprocessed files".format(count)) + loaded_files = {} + batch_size = args.batch_size + + # Get the number of steps based on batch size + steps = count#math.ceil(count/batch_size) + warmup = 10 + assert args.iters >= warmup, 'iteration must be larger than warmup' + time_list=[] + for i in range(steps): + print("Iteration {} ...".format(i)) + test_data_index = validation_indices[i]#validation_indices[i * batch_size:(i + 1) * batch_size] + file_name = preprocessed_files[test_data_index] + with open(os.path.join(preprocessed_data_dir, "{:}.pkl".format(file_name)), "rb") as f: + data = pickle.load(f)[0] + if args.mode == 'performance' and i < args.iters: + time_start = time.time() + predictions[i] = sess.run(output_tensor, feed_dict={input_tensor: data[np.newaxis, ...]})[0].astype(np.float32) + duration = time.time() - time_start + time_list.append(duration) + else: + predictions[i] = sess.run(output_tensor, feed_dict={input_tensor: data[np.newaxis, ...]})[0].astype(np.float32) + if args.mode == 'performance': + latency = np.array(time_list[warmup: ]).mean() / args.batch_size + print('Batch size = {}'.format(args.batch_size)) + print('Latency: {:.3f} ms'.format(latency * 1000)) + print('Throughput: {:.3f} items/sec'.format(1./ latency)) + else: + output_folder = os.path.join(args.data_location, "postprocessed_data") + output_files = preprocessed_files + # Post Process + postprocess_output(predictions, dictionaries, validation_indices, output_folder, output_files) + + ground_truths = os.path.join(args.data_location, \ + "raw_data/nnUNet_raw_data/Task043_BraTS2019/labelsTr") + # Run evaluation + print("Running evaluation...") + evaluate_regions(output_folder, ground_truths, get_brats_regions()) + # Load evaluation summary + print("Loading evaluation summary...") + accuracy=0.0 + with open(os.path.join(output_folder, "summary.csv")) as f: + for line in f: + words = line.split(",") + if words[0] == "mean": + whole = float(words[1]) + core = float(words[2]) + enhancing = float(words[3]) + mean = (whole + core + enhancing) / 3 + accuracy=mean + print("Batch size =", args.batch_size) + print("Accuracy is {:.5f}".format(mean)) + break + print("Done!") + return accuracy + + def load_graph(file_name): + tf.compat.v1.logging.info('Loading graph from: ' + file_name) + with tf.io.gfile.GFile(file_name, "rb") as f: + graph_def = tf.compat.v1.GraphDef() + graph_def.ParseFromString(f.read()) + with tf.Graph().as_default() as graph: + tf.import_graph_def(graph_def, name='') + return graph + + class CalibrationDL(): + def __init__(self): + path = os.path.abspath(os.path.expanduser( + './brats_cal_images_list.txt')) + with open(path, 'r') as f: + self.preprocess_files = [line.rstrip() for line in f] + + self.loaded_files = {} + self.batch_size = 1 + + def __getitem__(self, sample_id): + file_name = self.preprocess_files[sample_id] + print("Loading file {:}".format(file_name)) + with open(os.path.join(args.calib_preprocess, "{:}.pkl".format(file_name)), "rb") as f: + self.loaded_files[sample_id] = pickle.load(f)[0] + # note that calibration phase does not care label, here we return 0 for label free case. + return self.loaded_files[sample_id], 0 + + def __len__(self): + self.count = len(self.preprocess_files) + return self.count + + + args = get_args() + print(args) + graph = load_graph(args.input_model) + if args.mode == 'tune': + from neural_compressor.common import set_random_seed + from neural_compressor.tensorflow.utils import BaseDataLoader + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + + set_random_seed(9527) + quant_config = StaticQuantConfig() + calib_dataloader=BaseDataloader(dataset=CalibrationDL()) + q_model = quantize_model(graph, quant_config, calib_dataloader) + try: + q_model.save(args.output_model) + except Exception as e: + print("Failed to save model due to {}".format(str(e))) + else: + eval_func(graph) diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/Task043_BraTS_2019.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/Task043_BraTS_2019.py new file mode 100644 index 00000000000..d26521276d6 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/Task043_BraTS_2019.py @@ -0,0 +1,125 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file is copied from nnUnet/nnunet/dataset_conversion/Task043_BraTS_2019.py, except that +# the validation/test set part is removed and downloaded_data_dir is now configurable. + +import argparse +import numpy as np +from collections import OrderedDict +import os +import sys + +from batchgenerators.utilities.file_and_folder_operations import * +from nnunet.paths import nnUNet_raw_data +import SimpleITK as sitk +import shutil + +def copy_BraTS_segmentation_and_convert_labels(in_file, out_file): + # use this for segmentation only!!! + # nnUNet wants the labels to be continuous. BraTS is 0, 1, 2, 4 -> we make that into 0, 1, 2, 3 + img = sitk.ReadImage(in_file) + img_npy = sitk.GetArrayFromImage(img) + + uniques = np.unique(img_npy) + for u in uniques: + if u not in [0, 1, 2, 4]: + raise RuntimeError('unexpected label') + + seg_new = np.zeros_like(img_npy) + seg_new[img_npy == 4] = 3 + seg_new[img_npy == 2] = 1 + seg_new[img_npy == 1] = 2 + img_corr = sitk.GetImageFromArray(seg_new) + img_corr.CopyInformation(img) + sitk.WriteImage(img_corr, out_file) + +def task_setup(downloaded_data_dir): + """ + REMEMBER TO CONVERT LABELS BACK TO BRATS CONVENTION AFTER PREDICTION! + """ + + task_name = "Task043_BraTS2019" + print(task_name) + print(downloaded_data_dir) + print(nnUNet_raw_data) + + target_base = join(nnUNet_raw_data, task_name) + if not os.path.isdir(target_base): + target_imagesTr = join(target_base, "imagesTr") + target_imagesVal = join(target_base, "imagesVal") + target_imagesTs = join(target_base, "imagesTs") + target_labelsTr = join(target_base, "labelsTr") + + maybe_mkdir_p(target_imagesTr) + maybe_mkdir_p(target_imagesVal) + maybe_mkdir_p(target_imagesTs) + maybe_mkdir_p(target_labelsTr) + + patient_names = [] + for tpe in ["HGG", "LGG"]: + cur = join(downloaded_data_dir, tpe) + for p in subdirs(cur, join=False): + patdir = join(cur, p) + patient_name = tpe + "__" + p + patient_names.append(patient_name) + t1 = join(patdir, p + "_t1.nii.gz") + t1c = join(patdir, p + "_t1ce.nii.gz") + t2 = join(patdir, p + "_t2.nii.gz") + flair = join(patdir, p + "_flair.nii.gz") + seg = join(patdir, p + "_seg.nii.gz") + + assert all([ + isfile(t1), + isfile(t1c), + isfile(t2), + isfile(flair), + isfile(seg) + ]), "%s" % patient_name + + shutil.copy(t1, join(target_imagesTr, patient_name + "_0000.nii.gz")) + shutil.copy(t1c, join(target_imagesTr, patient_name + "_0001.nii.gz")) + shutil.copy(t2, join(target_imagesTr, patient_name + "_0002.nii.gz")) + shutil.copy(flair, join(target_imagesTr, patient_name + "_0003.nii.gz")) + + copy_BraTS_segmentation_and_convert_labels(seg, join(target_labelsTr, patient_name + ".nii.gz")) + + json_dict = OrderedDict() + json_dict['name'] = "BraTS2019" + json_dict['description'] = "nothing" + json_dict['tensorImageSize'] = "4D" + json_dict['reference'] = "see BraTS2019" + json_dict['licence'] = "see BraTS2019 license" + json_dict['release'] = "0.0" + json_dict['modality'] = { + "0": "T1", + "1": "T1ce", + "2": "T2", + "3": "FLAIR" + } + json_dict['labels'] = { + "0": "background", + "1": "edema", + "2": "non-enhancing", + "3": "enhancing", + } + json_dict['numTraining'] = len(patient_names) + json_dict['numTest'] = 0 + json_dict['training'] = [{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i in + patient_names] + json_dict['test'] = [] + + save_json(json_dict, join(target_base, "dataset.json")) + print("DONE") diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/__init__.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/__init__.py new file mode 100644 index 00000000000..0a18c579d8b --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/__init__.py @@ -0,0 +1,19 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# \ No newline at end of file diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold0_validation.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold0_validation.txt new file mode 100644 index 00000000000..57eeeb651c5 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold0_validation.txt @@ -0,0 +1,67 @@ +HGG__BraTS19_2013_22_1 +HGG__BraTS19_2013_23_1 +HGG__BraTS19_2013_3_1 +HGG__BraTS19_2013_5_1 +HGG__BraTS19_2013_7_1 +HGG__BraTS19_CBICA_AAB_1 +HGG__BraTS19_CBICA_AAL_1 +HGG__BraTS19_CBICA_ABN_1 +HGG__BraTS19_CBICA_ALU_1 +HGG__BraTS19_CBICA_AME_1 +HGG__BraTS19_CBICA_ANG_1 +HGG__BraTS19_CBICA_AOC_1 +HGG__BraTS19_CBICA_AOD_1 +HGG__BraTS19_CBICA_APZ_1 +HGG__BraTS19_CBICA_AQD_1 +HGG__BraTS19_CBICA_AQJ_1 +HGG__BraTS19_CBICA_AQN_1 +HGG__BraTS19_CBICA_ASA_1 +HGG__BraTS19_CBICA_ASK_1 +HGG__BraTS19_CBICA_ASO_1 +HGG__BraTS19_CBICA_AWH_1 +HGG__BraTS19_CBICA_AWV_1 +HGG__BraTS19_CBICA_AYA_1 +HGG__BraTS19_CBICA_AYC_1 +HGG__BraTS19_CBICA_AYI_1 +HGG__BraTS19_CBICA_BFB_1 +HGG__BraTS19_CBICA_BGN_1 +HGG__BraTS19_CBICA_BGR_1 +HGG__BraTS19_CBICA_BJY_1 +HGG__BraTS19_TCIA01_231_1 +HGG__BraTS19_TCIA01_378_1 +HGG__BraTS19_TCIA01_390_1 +HGG__BraTS19_TCIA01_412_1 +HGG__BraTS19_TCIA02_135_1 +HGG__BraTS19_TCIA02_179_1 +HGG__BraTS19_TCIA02_208_1 +HGG__BraTS19_TCIA02_274_1 +HGG__BraTS19_TCIA02_314_1 +HGG__BraTS19_TCIA02_430_1 +HGG__BraTS19_TCIA02_608_1 +HGG__BraTS19_TCIA03_121_1 +HGG__BraTS19_TCIA03_138_1 +HGG__BraTS19_TCIA03_375_1 +HGG__BraTS19_TCIA03_498_1 +HGG__BraTS19_TCIA06_184_1 +HGG__BraTS19_TCIA06_372_1 +HGG__BraTS19_TCIA08_113_1 +HGG__BraTS19_TCIA08_162_1 +HGG__BraTS19_TCIA08_218_1 +HGG__BraTS19_TCIA08_469_1 +LGG__BraTS19_2013_6_1 +LGG__BraTS19_TCIA09_141_1 +LGG__BraTS19_TCIA09_255_1 +LGG__BraTS19_TCIA09_402_1 +LGG__BraTS19_TCIA09_451_1 +LGG__BraTS19_TCIA09_462_1 +LGG__BraTS19_TCIA09_620_1 +LGG__BraTS19_TCIA10_266_1 +LGG__BraTS19_TCIA10_413_1 +LGG__BraTS19_TCIA10_628_1 +LGG__BraTS19_TCIA10_629_1 +LGG__BraTS19_TCIA10_640_1 +LGG__BraTS19_TCIA12_298_1 +LGG__BraTS19_TCIA12_470_1 +LGG__BraTS19_TCIA13_621_1 +LGG__BraTS19_TCIA13_624_1 +LGG__BraTS19_TCIA13_654_1 diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold1_validation.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold1_validation.txt new file mode 100644 index 00000000000..d24f39b67c4 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold1_validation.txt @@ -0,0 +1,67 @@ +HGG__BraTS19_2013_13_1 +HGG__BraTS19_2013_19_1 +HGG__BraTS19_2013_27_1 +HGG__BraTS19_CBICA_AAG_1 +HGG__BraTS19_CBICA_ALN_1 +HGG__BraTS19_CBICA_ANV_1 +HGG__BraTS19_CBICA_AOH_1 +HGG__BraTS19_CBICA_APK_1 +HGG__BraTS19_CBICA_APR_1 +HGG__BraTS19_CBICA_AQG_1 +HGG__BraTS19_CBICA_AQP_1 +HGG__BraTS19_CBICA_ARZ_1 +HGG__BraTS19_CBICA_ASF_1 +HGG__BraTS19_CBICA_ASG_1 +HGG__BraTS19_CBICA_ATP_1 +HGG__BraTS19_CBICA_ATX_1 +HGG__BraTS19_CBICA_AUA_1 +HGG__BraTS19_CBICA_AVJ_1 +HGG__BraTS19_CBICA_AVV_1 +HGG__BraTS19_CBICA_AWG_1 +HGG__BraTS19_CBICA_AXL_1 +HGG__BraTS19_CBICA_AXQ_1 +HGG__BraTS19_CBICA_BAN_1 +HGG__BraTS19_CBICA_BBG_1 +HGG__BraTS19_CBICA_BGE_1 +HGG__BraTS19_CBICA_BHQ_1 +HGG__BraTS19_CBICA_BIC_1 +HGG__BraTS19_CBICA_BNR_1 +HGG__BraTS19_TCIA01_131_1 +HGG__BraTS19_TCIA01_147_1 +HGG__BraTS19_TCIA01_180_1 +HGG__BraTS19_TCIA01_190_1 +HGG__BraTS19_TCIA01_221_1 +HGG__BraTS19_TCIA01_335_1 +HGG__BraTS19_TCIA01_411_1 +HGG__BraTS19_TCIA02_151_1 +HGG__BraTS19_TCIA02_321_1 +HGG__BraTS19_TCIA02_331_1 +HGG__BraTS19_TCIA02_368_1 +HGG__BraTS19_TCIA02_471_1 +HGG__BraTS19_TCIA03_257_1 +HGG__BraTS19_TCIA03_474_1 +HGG__BraTS19_TCIA04_111_1 +HGG__BraTS19_TCIA04_328_1 +HGG__BraTS19_TCIA04_343_1 +HGG__BraTS19_TCIA05_277_1 +HGG__BraTS19_TCIA05_478_1 +HGG__BraTS19_TCIA06_165_1 +HGG__BraTS19_TCIA08_105_1 +HGG__BraTS19_TCIA08_280_1 +HGG__BraTS19_TMC_15477_1 +HGG__BraTS19_TMC_21360_1 +HGG__BraTS19_TMC_30014_1 +LGG__BraTS19_TCIA09_428_1 +LGG__BraTS19_TCIA10_175_1 +LGG__BraTS19_TCIA10_276_1 +LGG__BraTS19_TCIA10_393_1 +LGG__BraTS19_TCIA10_408_1 +LGG__BraTS19_TCIA10_410_1 +LGG__BraTS19_TCIA10_449_1 +LGG__BraTS19_TCIA10_490_1 +LGG__BraTS19_TCIA10_625_1 +LGG__BraTS19_TCIA10_637_1 +LGG__BraTS19_TCIA12_249_1 +LGG__BraTS19_TCIA12_466_1 +LGG__BraTS19_TCIA13_615_1 +LGG__BraTS19_TCIA13_630_1 diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold2_validation.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold2_validation.txt new file mode 100644 index 00000000000..c468e57417d --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold2_validation.txt @@ -0,0 +1,67 @@ +HGG__BraTS19_2013_11_1 +HGG__BraTS19_2013_21_1 +HGG__BraTS19_2013_2_1 +HGG__BraTS19_2013_4_1 +HGG__BraTS19_CBICA_ABB_1 +HGG__BraTS19_CBICA_ABE_1 +HGG__BraTS19_CBICA_ABM_1 +HGG__BraTS19_CBICA_ANZ_1 +HGG__BraTS19_CBICA_AOP_1 +HGG__BraTS19_CBICA_APY_1 +HGG__BraTS19_CBICA_AQA_1 +HGG__BraTS19_CBICA_AQO_1 +HGG__BraTS19_CBICA_AQU_1 +HGG__BraTS19_CBICA_ARW_1 +HGG__BraTS19_CBICA_ASV_1 +HGG__BraTS19_CBICA_AUN_1 +HGG__BraTS19_CBICA_AUW_1 +HGG__BraTS19_CBICA_AUX_1 +HGG__BraTS19_CBICA_AVB_1 +HGG__BraTS19_CBICA_AVF_1 +HGG__BraTS19_CBICA_AWX_1 +HGG__BraTS19_CBICA_AXO_1 +HGG__BraTS19_CBICA_AYW_1 +HGG__BraTS19_CBICA_BAX_1 +HGG__BraTS19_CBICA_BEM_1 +HGG__BraTS19_CBICA_BHK_1 +HGG__BraTS19_CBICA_BHM_1 +HGG__BraTS19_CBICA_BLJ_1 +HGG__BraTS19_TCIA01_150_1 +HGG__BraTS19_TCIA01_203_1 +HGG__BraTS19_TCIA01_235_1 +HGG__BraTS19_TCIA01_401_1 +HGG__BraTS19_TCIA01_448_1 +HGG__BraTS19_TCIA01_499_1 +HGG__BraTS19_TCIA02_168_1 +HGG__BraTS19_TCIA02_222_1 +HGG__BraTS19_TCIA02_226_1 +HGG__BraTS19_TCIA02_283_1 +HGG__BraTS19_TCIA02_290_1 +HGG__BraTS19_TCIA02_309_1 +HGG__BraTS19_TCIA02_394_1 +HGG__BraTS19_TCIA02_455_1 +HGG__BraTS19_TCIA02_606_1 +HGG__BraTS19_TCIA03_133_1 +HGG__BraTS19_TCIA04_192_1 +HGG__BraTS19_TCIA04_361_1 +HGG__BraTS19_TCIA06_332_1 +HGG__BraTS19_TCIA08_167_1 +HGG__BraTS19_TCIA08_205_1 +HGG__BraTS19_TCIA08_234_1 +HGG__BraTS19_TCIA08_242_1 +HGG__BraTS19_TCIA08_278_1 +HGG__BraTS19_TCIA08_436_1 +HGG__BraTS19_TMC_12866_1 +LGG__BraTS19_2013_15_1 +LGG__BraTS19_2013_1_1 +LGG__BraTS19_TCIA09_312_1 +LGG__BraTS19_TCIA10_109_1 +LGG__BraTS19_TCIA10_130_1 +LGG__BraTS19_TCIA10_152_1 +LGG__BraTS19_TCIA10_241_1 +LGG__BraTS19_TCIA10_282_1 +LGG__BraTS19_TCIA10_325_1 +LGG__BraTS19_TCIA10_639_1 +LGG__BraTS19_TCIA13_618_1 +LGG__BraTS19_TCIA13_633_1 +LGG__BraTS19_TMC_09043_1 diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold3_validation.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold3_validation.txt new file mode 100644 index 00000000000..171a51a02a8 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold3_validation.txt @@ -0,0 +1,67 @@ +HGG__BraTS19_2013_12_1 +HGG__BraTS19_2013_14_1 +HGG__BraTS19_2013_18_1 +HGG__BraTS19_2013_20_1 +HGG__BraTS19_2013_26_1 +HGG__BraTS19_CBICA_ABO_1 +HGG__BraTS19_CBICA_ALX_1 +HGG__BraTS19_CBICA_ANP_1 +HGG__BraTS19_CBICA_AOS_1 +HGG__BraTS19_CBICA_AOZ_1 +HGG__BraTS19_CBICA_AQT_1 +HGG__BraTS19_CBICA_ARF_1 +HGG__BraTS19_CBICA_ASE_1 +HGG__BraTS19_CBICA_ASW_1 +HGG__BraTS19_CBICA_ATN_1 +HGG__BraTS19_CBICA_ATV_1 +HGG__BraTS19_CBICA_AUQ_1 +HGG__BraTS19_CBICA_AVG_1 +HGG__BraTS19_CBICA_AVT_1 +HGG__BraTS19_CBICA_AWI_1 +HGG__BraTS19_CBICA_AXW_1 +HGG__BraTS19_CBICA_AYG_1 +HGG__BraTS19_CBICA_AYU_1 +HGG__BraTS19_CBICA_BAP_1 +HGG__BraTS19_CBICA_BCL_1 +HGG__BraTS19_CBICA_BDK_1 +HGG__BraTS19_CBICA_BGG_1 +HGG__BraTS19_CBICA_BGT_1 +HGG__BraTS19_CBICA_BGW_1 +HGG__BraTS19_CBICA_BGX_1 +HGG__BraTS19_TCIA01_186_1 +HGG__BraTS19_TCIA01_429_1 +HGG__BraTS19_TCIA01_460_1 +HGG__BraTS19_TCIA02_171_1 +HGG__BraTS19_TCIA02_370_1 +HGG__BraTS19_TCIA02_374_1 +HGG__BraTS19_TCIA02_377_1 +HGG__BraTS19_TCIA02_473_1 +HGG__BraTS19_TCIA02_491_1 +HGG__BraTS19_TCIA02_607_1 +HGG__BraTS19_TCIA03_296_1 +HGG__BraTS19_TCIA03_338_1 +HGG__BraTS19_TCIA03_419_1 +HGG__BraTS19_TCIA04_437_1 +HGG__BraTS19_TCIA04_479_1 +HGG__BraTS19_TCIA06_247_1 +HGG__BraTS19_TCIA06_603_1 +HGG__BraTS19_TMC_11964_1 +LGG__BraTS19_2013_28_1 +LGG__BraTS19_2013_29_1 +LGG__BraTS19_2013_9_1 +LGG__BraTS19_TCIA09_177_1 +LGG__BraTS19_TCIA09_254_1 +LGG__BraTS19_TCIA10_103_1 +LGG__BraTS19_TCIA10_299_1 +LGG__BraTS19_TCIA10_310_1 +LGG__BraTS19_TCIA10_330_1 +LGG__BraTS19_TCIA10_346_1 +LGG__BraTS19_TCIA10_351_1 +LGG__BraTS19_TCIA10_420_1 +LGG__BraTS19_TCIA10_442_1 +LGG__BraTS19_TCIA10_632_1 +LGG__BraTS19_TCIA10_644_1 +LGG__BraTS19_TCIA12_480_1 +LGG__BraTS19_TCIA13_623_1 +LGG__BraTS19_TCIA13_642_1 +LGG__BraTS19_TCIA13_645_1 diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold4_validation.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold4_validation.txt new file mode 100644 index 00000000000..0fc2a8bc9cc --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold4_validation.txt @@ -0,0 +1,67 @@ +HGG__BraTS19_2013_10_1 +HGG__BraTS19_2013_17_1 +HGG__BraTS19_2013_25_1 +HGG__BraTS19_CBICA_AAP_1 +HGG__BraTS19_CBICA_ABY_1 +HGG__BraTS19_CBICA_AMH_1 +HGG__BraTS19_CBICA_ANI_1 +HGG__BraTS19_CBICA_AOO_1 +HGG__BraTS19_CBICA_AQQ_1 +HGG__BraTS19_CBICA_AQR_1 +HGG__BraTS19_CBICA_AQV_1 +HGG__BraTS19_CBICA_AQY_1 +HGG__BraTS19_CBICA_AQZ_1 +HGG__BraTS19_CBICA_ASH_1 +HGG__BraTS19_CBICA_ASN_1 +HGG__BraTS19_CBICA_ASR_1 +HGG__BraTS19_CBICA_ASU_1 +HGG__BraTS19_CBICA_ASY_1 +HGG__BraTS19_CBICA_ATB_1 +HGG__BraTS19_CBICA_ATD_1 +HGG__BraTS19_CBICA_ATF_1 +HGG__BraTS19_CBICA_AUR_1 +HGG__BraTS19_CBICA_AXJ_1 +HGG__BraTS19_CBICA_AXM_1 +HGG__BraTS19_CBICA_AXN_1 +HGG__BraTS19_CBICA_AZD_1 +HGG__BraTS19_CBICA_AZH_1 +HGG__BraTS19_CBICA_BCF_1 +HGG__BraTS19_CBICA_BFP_1 +HGG__BraTS19_CBICA_BGO_1 +HGG__BraTS19_CBICA_BHB_1 +HGG__BraTS19_CBICA_BHV_1 +HGG__BraTS19_CBICA_BHZ_1 +HGG__BraTS19_CBICA_BKV_1 +HGG__BraTS19_TCIA01_201_1 +HGG__BraTS19_TCIA01_425_1 +HGG__BraTS19_TCIA02_117_1 +HGG__BraTS19_TCIA02_118_1 +HGG__BraTS19_TCIA02_198_1 +HGG__BraTS19_TCIA02_300_1 +HGG__BraTS19_TCIA02_322_1 +HGG__BraTS19_TCIA02_605_1 +HGG__BraTS19_TCIA03_199_1 +HGG__BraTS19_TCIA03_265_1 +HGG__BraTS19_TCIA04_149_1 +HGG__BraTS19_TCIA05_396_1 +HGG__BraTS19_TCIA05_444_1 +HGG__BraTS19_TCIA06_211_1 +HGG__BraTS19_TCIA06_409_1 +HGG__BraTS19_TCIA08_319_1 +HGG__BraTS19_TCIA08_406_1 +HGG__BraTS19_TMC_06290_1 +HGG__BraTS19_TMC_06643_1 +HGG__BraTS19_TMC_27374_1 +LGG__BraTS19_2013_0_1 +LGG__BraTS19_2013_16_1 +LGG__BraTS19_2013_24_1 +LGG__BraTS19_2013_8_1 +LGG__BraTS19_TCIA09_493_1 +LGG__BraTS19_TCIA10_202_1 +LGG__BraTS19_TCIA10_261_1 +LGG__BraTS19_TCIA10_307_1 +LGG__BraTS19_TCIA10_387_1 +LGG__BraTS19_TCIA12_101_1 +LGG__BraTS19_TCIA13_634_1 +LGG__BraTS19_TCIA13_650_1 +LGG__BraTS19_TCIA13_653_1 diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/postprocess.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/postprocess.py new file mode 100644 index 00000000000..e5590bdb338 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/postprocess.py @@ -0,0 +1,72 @@ +# coding=utf-8 +# Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. +# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from multiprocessing import Pool +import os +from nnunet.inference.segmentation_export import save_segmentation_nifti_from_softmax + +def load_predictions(predictions, dictionaries, validation_indices): + assert len(predictions) == len(dictionaries),"Number of predictions does not match number of samples in validation set!" + padded_shape = [224,224,160] + results = [None for i in range(len(predictions))] + for i in range(len(predictions)): + qsl_idx = validation_indices[i] + prediction = predictions[qsl_idx] + assert qsl_idx >= 0 and qsl_idx < len(predictions), "Invalid qsl_idx!" + raw_shape = list(dictionaries[qsl_idx]["size_after_cropping"]) + # Remove the padded part + pad_before = [(p - r) // 2 for p, r in zip(padded_shape, raw_shape)] + pad_after = [-(p - r - b) for p, r, b in zip(padded_shape, raw_shape, pad_before)] + result_shape = (4,) + tuple(padded_shape) + result = np.reshape(prediction, result_shape).astype(np.float32) + results[qsl_idx] = result[:, pad_before[0]:pad_after[0], pad_before[1]:pad_after[1], pad_before[2]:pad_after[2]] + assert all([i is not None for i in results]), "Missing some results!" + return results + +def postprocess_output(predictions, dictionaries, validation_indices, output_folder, output_files): + processed_predictions = load_predictions(predictions, dictionaries, validation_indices) + print("Running postprocessing with multiple threads...") + force_separate_z=None + interp_order=3 + interp_order_z=0 + num_threads_nifti_save = 12 + all_in_gpu = "None" + print("Saving predictions...") + pool = Pool(num_threads_nifti_save) + results = [] + for i, output_filename in enumerate(output_files): + print(i, "/", len(output_files)) + output_filename = os.path.join(output_folder, output_filename + ".nii.gz") + softmax_mean = processed_predictions[i] + dct = dictionaries[i] + bytes_per_voxel = 4 + if all_in_gpu: + bytes_per_voxel = 2 # if all_in_gpu then the return value is half (float16) + if np.prod(softmax_mean.shape) > (2e9 / bytes_per_voxel * 0.85): # * 0.85 just to be save + print( + "This output is too large for python process-process communication. Saving output temporarily to disk") + np.save(output_filename[:-7] + ".npy", softmax_mean) + softmax_mean = output_filename[:-7] + ".npy" + + results.append(pool.starmap_async(save_segmentation_nifti_from_softmax, + ((softmax_mean, output_filename, dct, interp_order, None, None, None, + None, None, force_separate_z, interp_order_z),) + )) + + _ = [i.get() for i in results] + pool.close() + pool.join() diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/preprocess.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/preprocess.py new file mode 100644 index 00000000000..048eb0e91cb --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/preprocess.py @@ -0,0 +1,109 @@ +# coding=utf-8 +# Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. +# Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file has been copied from +# https://github.com/mlcommons/inference/blob/r0.7/vision/medical_imaging/3d-unet/preprocess.py + +import argparse +import numpy +import os +import pickle +import sys +import torch + +from batchgenerators.augmentations.utils import pad_nd_image +from batchgenerators.utilities.file_and_folder_operations import subfiles +from nnunet.training.model_restore import load_model_and_checkpoint_files +from nnunet.inference.predict import preprocess_multithreaded + +def preprocess_MLPerf(model, checkpoint_name, folds, fp16, list_of_lists, output_filenames, preprocessing_folder, num_threads_preprocessing): + assert len(list_of_lists) == len(output_filenames) + print("loading parameters for folds", folds) + trainer, params = load_model_and_checkpoint_files(model, folds, fp16, checkpoint_name=checkpoint_name) + + print("starting preprocessing generator") + preprocessing = preprocess_multithreaded(trainer, list_of_lists, output_filenames, num_threads_preprocessing, None) + print("Preprocessing images...") + all_output_files = [] + + for preprocessed in preprocessing: + output_filename, (d, dct) = preprocessed + + all_output_files.append(output_filename) + if isinstance(d, str): + data = np.load(d) + os.remove(d) + d = data + + # Pad to the desired full volume + d = pad_nd_image(d, trainer.patch_size, "constant", None, False, None) + + with open(os.path.join(preprocessing_folder, output_filename+ ".pkl"), "wb") as f: + pickle.dump([d, dct], f) + f.close() + + return all_output_files + + +def preprocess_setup(preprocessed_data_dir): + print("Preparing for preprocessing data...") + + # Validation set is fold 1 + fold = 1 + import sys + import os + CURRENT_DIR = os.path.split(os.path.abspath(__file__))[0] + #validation_fold_file = '/workspace/intelai_models/inference/nnUNet/folds/fold1_validation.txt' + validation_fold_file = os.path.join(CURRENT_DIR, 'folds/fold1_validation.txt') + # Make sure the model exists + model_dir = 'build/result/nnUNet/3d_fullres/Task043_BraTS2019/nnUNetTrainerV2__nnUNetPlansv2.mlperf.1' + model_path = os.path.join(model_dir, "plans.pkl") + assert os.path.isfile(model_path), "Cannot find the model file {:}!".format(model_path) + checkpoint_name = "model_final_checkpoint" + + # Other settings + fp16 = False + num_threads_preprocessing = 12 + raw_data_dir = 'build/raw_data/nnUNet_raw_data/Task043_BraTS2019/imagesTr' + + # Open list containing validation images from specific fold (e.g. 1) + validation_files = [] + with open(validation_fold_file) as f: + for line in f: + validation_files.append(line.rstrip()) + + # Create output and preprocessed directory + if not os.path.isdir(preprocessed_data_dir): + os.makedirs(preprocessed_data_dir) + + # Create list of images locations (i.e. 4 images per case => 4 modalities) + all_files = subfiles(raw_data_dir, suffix=".nii.gz", join=False, sort=True) + list_of_lists = [[os.path.join(raw_data_dir, i) for i in all_files if i[:len(j)].startswith(j) and + len(i) == (len(j) + 12)] for j in validation_files] + + # Preprocess images, returns filenames list + # This runs in multiprocess + print("Actually preprocessing data...") + + preprocessed_files = preprocess_MLPerf(model_dir, checkpoint_name, fold, fp16, list_of_lists, + validation_files, preprocessed_data_dir, num_threads_preprocessing) + + print("Saving metadata of the preprocessed data...") + with open(os.path.join(preprocessed_data_dir, "preprocessed_files.pkl"), "wb") as f: + pickle.dump(preprocessed_files, f) + + print("Preprocessed data saved to {:}".format(preprocessed_data_dir)) + print("Done!") diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/setup.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/setup.py new file mode 100644 index 00000000000..bf4d5981497 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/setup.py @@ -0,0 +1,81 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: EPL-2.0 +# + +import os, shutil +import argparse +import sys +import zipfile +#import sys +#print(sys.path) +#sys.path.append('/home/sys_dltest/lpot/lz/frameworks.ai.models.intel-models/models/image_segmentation/tensorflow/3d_unet_mlperf') +from nnUNet.Task043_BraTS_2019 import task_setup +from nnUNet.preprocess import preprocess_setup + +BUILD_DIR = 'build' +RAW_DATA_DIR = BUILD_DIR + '/raw_data' +PREPROCESSED_DATA_DIR = BUILD_DIR + '/preprocessed_data' +POSTPROCESSED_DATA_DIR = BUILD_DIR + '/postprocessed_data' +MODEL_DIR = BUILD_DIR + '/model' +RESULT_DIR = BUILD_DIR + '/result' +TF_MODEL = '224_224_160.pb' +OTHER_FILES = 'fold_1.zip' + +def create_directories(): + print("Creating directories") + if not os.path.isdir(BUILD_DIR): + os.makedirs(BUILD_DIR) + if not os.path.isdir(RAW_DATA_DIR): + os.makedirs(RAW_DATA_DIR) + if not os.path.isdir(PREPROCESSED_DATA_DIR): + os.makedirs(PREPROCESSED_DATA_DIR) + if not os.path.isdir(POSTPROCESSED_DATA_DIR): + os.makedirs(POSTPROCESSED_DATA_DIR) + if not os.path.isdir(RESULT_DIR): + os.makedirs(RESULT_DIR) + if not os.path.isdir(MODEL_DIR): + os.makedirs(MODEL_DIR) + +def download_model(input_graph): + pwd = os.getcwd() + os.chdir(os.path.join(pwd, MODEL_DIR)) + if input_graph == 'NONE': + print("Downloading TF model from Zenodo") + if not os.path.isfile(TF_MODEL): + os.system('wget -O 224_224_160.pb https://zenodo.org/record/3928991/files/224_224_160.pb?download=1;') + os.chdir(os.path.join(pwd, RESULT_DIR)) + if not os.path.isfile(OTHER_FILES): + os.system('wget -O fold_1.zip https://zenodo.org/record/3904106/files/fold_1.zip?download=1;') + zip_file = "fold_1.zip" + #legacy bitmap issue https://bugzilla.redhat.com/show_bug.cgi?id=1802689 + if (not os.path.isfile(OTHER_FILES)): + os.system('curl -O --output fold_1.zip https://zenodo.org/record/3904106/files/fold_1.zip') + try: + with zipfile.ZipFile(zip_file) as z: + z.extractall() + print("Extracted all") + except: + print("Could not extract fold_1.zip") + os.chdir(pwd) + +def setup(downloaded_data_dir, input_graph='NONE'): + create_directories() + download_model(input_graph) + task_setup(downloaded_data_dir) + preprocess_setup(PREPROCESSED_DATA_DIR) diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..4e85853747e --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt @@ -0,0 +1 @@ +nnunet diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..7e869e7cca7 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh @@ -0,0 +1,61 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + export BUILD_DIR=${dataset_location} + export nnUNet_preprocessed=${BUILD_DIR}/preprocessed_data + export nnUNet_raw_data_base=${BUILD_DIR}/raw_data + export RESULTS_FOLDER=${BUILD_DIR}/result + run_benchmark + +} + +# init params +function init_params { + iters=100 + batch_size=1 + for var in "$@" + do + case $var in + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done + +} + + +# run_benchmark +function run_benchmark { + if [[ ${bfloat16} == "true" ]]; then + extra_cmd="--bfloat16" + else + extra_cmd="" + fi + + python main.py \ + --input-model=${input_model} \ + --data-location=${dataset_location} \ + --calib-preprocess=${BUILD_DIR}/calib_preprocess \ + --batch-size=${batch_size} \ + --mode=${mode} \ + ${extra_cmd} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..79256545613 --- /dev/null +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh @@ -0,0 +1,48 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + export BUILD_DIR=${dataset_location} + export nnUNet_preprocessed=${BUILD_DIR}/preprocessed_data + export nnUNet_raw_data_base=${BUILD_DIR}/raw_data + export RESULTS_FOLDER=${BUILD_DIR}/result + run_tuning + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input-model=${input_model} \ + --output-model=${output_model} \ + --data-location=${dataset_location} \ + --calib-preprocess=${BUILD_DIR}/calib_preprocess \ + --mode=tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md new file mode 100644 index 00000000000..2802279a8c3 --- /dev/null +++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md @@ -0,0 +1,131 @@ +Step-by-Step +============ + +This document is used to list steps of reproducing TensorFlow style transfer Intel® Neural Compressor tuning zoo result. +This example can run on Intel CPUs and GPUs. + +# Prerequisite + +## Prerequisite + +### Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` +### Install Intel Tensorflow +```shell +pip install intel-tensorflow +``` +> Note: Supported Tensorflow [Version](../../../../../../README.md#supported-frameworks). + +### Install Additional Dependency packages +```shell +cd examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq +pip install -r requirements.txt +``` + +### Install Intel Extension for Tensorflow +#### Quantizing the model on Intel GPU(Mandatory to install ITEX) +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[xpu] +``` +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) + +#### Quantizing the model on Intel CPU(Optional to install ITEX) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +> **Note**: +> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. + +### 2. Prepare Pretrained model + +#### Automated approach +Run the `prepare_model.py` script located in `./examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq`. + +``` +usage: prepare_model.py [-h] [--model_path MODEL_PATH] + +optional arguments: + -h, --help show this help message and exit + --model_path MODEL_PATH directory to put models, default is ./model +``` + +#### Manual approach + +```shell +wget https://storage.googleapis.com/download.magenta.tensorflow.org/models/arbitrary_style_transfer.tar.gz +tar -xvzf arbitrary_style_transfer.tar.gz ./model +``` + +### 3. Prepare Dataset +There are two folders named style_images and content_images in current folder. Please use these two folders to generated stylized images for test. And you can also prepare your own style_images or content_images. + + +# Run Command + ```shell + python style_tune.py --output_dir=./result --style_images_paths=./style_images --content_images_paths=./content_images --input_model=./model/model.ckpt + ``` + + +## Quantization Config + +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = PostTrainingQuantConfig( + device="gpu", + backend="itex", + ... + ) +``` + +## Quantization + ```shell + bash run_quant.sh --dataset_location=style_images/,content_images/ --input_model=./model/model.ckpt --output_model=saved_model + ``` +## Benchmark + ```shell + bash run_benchmark.sh --dataset_location=style_images/,content_images/ --input_model=saved_model.pb --batch_size=1 + ``` + +Details of enabling Intel® Neural Compressor on style transfer for Tensorflow. +========================= + +This is a tutorial of how to enable style_transfer model with Intel® Neural Compressor. +## User Code Analysis +1. User specifies fp32 *model*, calibration dataset *q_dataloader*, evaluation dataset *eval_dataloader* and metric in tuning.metric field of model-specific yaml config file. + +2. User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself. + +For style_transfer, we applied the latter one because we don't have metric for style transfer model.The first one is to implement the q_dataloader and implement a fake *eval_func*. As neural_compressor have implement a style_transfer dataset, so only eval_func should be prepared after load the graph + +### Evaluation Part Adaption +As style transfer don't have a metric to measure the accuracy, we only implement a fake eval_func +```python +def eval_func(model): + return 1. +``` + +Here we set the input tensor and output tensors name into *inputs* and *outputs* field. In this case we only calibration and quantize the model without tune the accuracy + +### Code update + +After prepare step is done, we just need add 2 lines to get the quantized model. +```python +from neural_compressor import quantization +from neural_compressor.config import PostTrainingQuantConfig +conf = PostTrainingQuantConfig(inputs=['style_input', 'content_input'], + outputs=['transformer/expand/conv3/conv/Sigmoid'], + calibration_sampling_size=[50, 100]) +quantized_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=dataloader, + eval_dataloader==dataloader) +``` + +The Intel® Neural Compressor quantizer.fit() function will return a best quantized model during timeout constrain. diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/colva_beach_sq.jpg b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/colva_beach_sq.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5f6c5a6beb51054070261ed7d4c7ca6e5973f484 GIT binary patch literal 14235 zcmbWebx>PR`2QQE1Sl>o4kc+RQi>H4oKoCfOL2!1f>XRmAq0X$ad&qqPznKBNO6k0 zOL2!^Ki}V#nfuqh_t`mfcIND!+1KpOv%Bwo_F?*A2|%VOs~`)&!U6!W{w2V}EI=B7 zkB0}udxQ@J0tpE435lqPi5@>DqNSiDrDCLGW@4mcU|<1rv$3#pffyKG3bAwX@CpbB zFtb6$Abg_S`~rOc{RE4EfPm;R5e+dh4c{|{XMF#k?V$@mj{jeqaIjbb*yLC^9Y?_^}< zuuV6c!bi zl-AbOH#9aix3qrm`O({l?jIN&9UGsRoSObKv%Ippw!X2swY_t6d~$kresOtq{U6tV zod2i&Z?OLd7x_OfY+PI%T;P9PSlFKb9vpJqN6&ciD5NxiW-v-tz94+6x2T+&ZUPX$ z=3i=a*Ac=eV1ec5NB^Py583}au;Bk+Wd9e~|IIZ6Ai}}=*LgVP00}@#BSNdVDo@l| z1A8>IfGS)42_@B6yUT)J&d46YQKu4K*_X}ba*=OafC9JbGCWWWnT&I+>fxs7F^e|6 zP^rc217N~^5er-8qDw^~q{8Fk`N!C@W?P zkOg23?pIdW^5_d!i6rca0|HDj{NCb@vQq9JbsT; zP1UL?IzI{fSmL@}UXGDJXew*n_%u4-L)h_2h>HAU8jJPFFkR&f7MeZ@;f}kyqIa|6 z<6tYC1ME3B471uXC%s(5{Z#y~X0=my30BKciygVQes#U>HSbWeHMyUqMzl>yyY#A* zGRi8D9O)%Hx{>EI?z%{IX}2ZB_yI{DhYPH`c&%2p`6QVot6hY0`p)l8JkgXJk5gVa zt>hlhb$#SVw>3!x?Nx^CrgITn&TW`@N_i^RL6zs@PsOevK6Ykqd4|n&&(o!Q##a-; z$0H%B49?f?4*;wyA6@^dQX|_Z{vVPsB2_<|X#2dO;#r&H-mf1~>uQzMirm+Dy+p$? z2NjP>_!JnTS~aOW(YR!4CY?XGbqHnPE>Cc+Js1U-oDVTYlNIvL+%^VQc8t$O9spkJ z;S8SQ6Xp&uCn6r2;z!!XA+**ySfl!E0E-b zlc;1!G8lR@9xwvS7FzWV+ac8t7ibSu8T_wDGzS9B}?F7@b1O z(=Ws(h8IBMeXoD+lAPPI?+nvb%&YaNF}f$eGHXG;KpdE(HuiTBDM_%8wAb+ zV5bazX}GQ_=ye`(On_5K%AEb8j@xOd>JP|N&@>R1wdx;`wY_URmc9MW;VJEjtk%mx+7H^;c0^%TG^GI^-$vWrJcE2*k<8 znLXJOp){zIfm8C#EWZ2+|BhHX5NlaYn`lG!bDP^Kp)$1*7NV<6^M1Wg^q!uZ^F-FZ zBY`zj2FiL=-?Xoy?o?b{cMt8|Kd2CypY(9#_m2$FGcT{pL%Nw-mi;VWiK=Vgp+|(TaX_-uHo-mgmDOOiBZI(hDZB{CYGU5vAM zQI)@=w0cZQ$IBLxI^oel5!9QXjs51*&m51vy8ui@o?|(TFx7hao0Y+zpLJW=h0JqSFWK2R6Nnq{mxUES6_GVbV%s62Sa1qypJjFZ{2 zwzPKE1wOhuQG0KH`SN*e$fAuZ|G*ZB#Py#lsnu=tyR12wpRVgfz1yrEuYrcWCtu;^ zBx^Q2)-ynOEZ9ucewy%yWu>-#1@tdA#=n+Izrnag?IU3_LpdOPQ4 zi65RGTVmdoW4(sS$?Mb7tYt@_16mrb?^(`#J}%-7!_iE<_OC0~X6fG5GGW#RGg{n#O>1E?hpu>)Rr=y;%9o2gKB<|Az zs70p2u(v1*5$MXvt{Gi9VxOPh#2UoU5IX*sKpSw2{(WPS89vtXh73{uwH1%P1tWfU zvN+jrD&q93PQc%uvhQi|(&k5J7oc>e&qmpM5G{B<_F(Tv?Xle1mTzYGOilc}2lr&1 zK>Z)nw0X@nj+`rLzX@7^`(dW=>HhY6lW>nCwHL`QS6&G;QA~;t0MDY2C+T_w)}zi` zLZU??F&;E)Sx4s^t`y04WjvTCgGrBhim^F!u(XEc^CUzdOOI{ zx_37V>~Pz7IQt88Dgie)RR;PeKnRE=7`Cq;fjj_=%K^*AuD~LR@JpS2jm))IgQW?6 zIVtRf%(NiL>eSKFRI+fT{UN-TrzSvVLZ^Pja{uNBDnp8aFCCVJIXPc{s+;uvSSS2u z6Smjk=8j_%%hiCSY z3vplhGK1R;TwMv$! zpM6mGH@@Q}-0`IX?b%wUP{<<~culqA)T9RQ-csChLIiXO`935v|IvAuxz#mZVGR)6Z~q8y$h{X}v#?9HGj_3=$%8GA7J=GB%< zv5S<&{CQ0C)oh;vE|WuRG&qY7TB_~jTB}41nElhulj#e08Tnzz|NW31%_s?`oT+~` zdOkRz<&`0tT@H@1ivfyAiH558BuPm1vA;S~c$Vl^;b2c=sZ!_E)!%&;^LtfH3^q`i zyD$K<3sk3`bH+hju-xgV9TefKitj=;p)x!0G?n$1c7p2fM(g@t<9D;h130$!pOJ@K zNd_Io0*#4C|CUbqFg$g|o9kcQFwZ}*9+vj=zkC1$>0}767?|@K7aJ8{#^S{KJpk11 zt5=ekNs6`oeMuVhG&3`VxCHF?3c?f_f9|=+9lr?s9{fuZCr)8WJEhdHT~jIleJ=%n zJBsg-NfC(gzF}@Yaag~sjln&-ex|aByNK<&$Z^lv(UwV&$1*0ZjAZzxV9~EsX>mzfym|#hO|NtW@7BzKHKl^U~9-Dg6d^EM8BE&YQBs zz|q;2gr3^Y7dC#*AMEp<=5k1xa&71YJstGwK1uvS0Xn962Gt=fs^s%rtA1Y*Q#M<+ zKrQJ}8f$=5BA?oTg5$d}(DObqkXlW!h_8B0f-PAemLuv@c;npS-i;=jA2e@+Fw?n zZ@X;^3!t20gwSjc_t-k~4iI6<0^o6^%vC3q$;xUkmS*-|5pedeae6)gn7i&p#|;zG zg)=Br$+foJbc=WG@KGW1AoU+a*svj`NEdYgPLVyY$*G$Hlk~*g@c^5hC2AkpwaO)p zgUVd(G--HLu&ZC+c;GoR{C4Omt}mI|TT+SPGEN#p1y*`_NSC=_jMm&{k!XOzMO2FZ zuTbz!O_)0W)MX5m!PIT@2;yza~QRt0Vv=@hQ?o`BWs6V2(L&udPo0(;U? zzb8xQ!Hw@!L42&Z(aDcyfgn%Oy9tK#VKMetyTQ% zCL-aGN-G3eUKs=TA+M`mOp4beTj6qkOx5-;5ApkW@)|C+R+&EG`5^>J=qn+@K*s4 zKYoImJ2LSrF|Jmye8o99)Q~!EcC0W*`i*MI2X(sbw1;08|3f(akZ~?2KS(36d(|2X zUnAIsKn!aw8ja&GR)#E1SQPjszY~IZ%H{9488z_i z1Z@7xN9a#J3gDZ<-R~sU!N?OS}~DyENf*Wrf#`UD2Y*KN*&nl zq8{2wvv{-Y%Wb9{I4DQc%@)!pn^6A&K#p9D-W>$+^S<#7nXBQmkDlfqf zu(YWHSWN4^&$f>-YaBc}!h_Lr(e#EOC`6dP{W;wpx?2(C_gN()17d*o5>S zhNL4r|M}oa&Y|ZXzmCz)zdnd4r!cb*^F21Tah9|1eXDV?+__;pXLaz?q8vN-$riWh za>lG;7Mgk4fQh-?^ijeug+Jd|16p|q?`d&;nWc;_D|d9L_1Qs47q_*QK8r|RwcZz; zMyb=$7LPOXXrbb3lVFdMVz10|qX>=BQAlo#*ubNKTOpLu8f@420T8h_y4Z7#$ob-J z@R)LYgLDg~B~)-jC#~nau?!Toc=N+v0hJboawa6kl`0ulwM~DXE$}QDZtg}7 zPvqW+q80;PnvrjzU2N+RuqPOHcjzqmPed!N!41S>u@Lx^W02#O&RAKX3W2=oFtjfa z!h?@}Ez(M*(XgFN`dgl~osNoAQ#;w$+D5nXxH>75kq{z+>x;JXZ;@5cCKC^ombAc@b&naXw5v882_5;K}KG z@%B8Tu%M|X$gS6Tu>r&0;AgaQ@j<|y_FGn=FPqTg0U{UDDaHW8?S@y_wx>}Jm-4nI zZ{lq~G?(*XK?in`ozc!EZSURq6DEIopi1q35~8V>XWBo$R(4et)r<^DS#Kj z_AEyf;3{r*xPPza#%6-QwmJtJ+wHZSwsQ_!mUH;7uT$W1eRO~)E9~bdfvIH9+rBqy82eP~_uYdu#S(QG z^Yw_e+GnDs^ulg#<4p$ptM$aS`BbHE^WF3Ev{2|(HGRkqvFko1XDyO<3h8U!c4zWi zrTj+_Kvy?{%!Rg8cvSXPm*ClkddxumE7($MntB`w1&HfvoS=ieRs& zNFwL_MVXyYnJ1PpBu*kp1H=%vW&zAZj{zHCLVFI2`JMa~uN~xuB~Sr0`9-K9n#$eu z0xBdUzZ%KqyWnSSzfG~G{_dI>l$v}iEB@(qlTZWZ48R`-+<(}N-}brXGv4mQ{~5jc zs(L9_Vi>kaz0rrEP#~B=h-)=LfS}3tJ>|@Z!=AqVx>vKdbG{D%j`JvoPupa(3UpNm z)UCs9_vH%c>o;W@V1G-fwb}Gb-M#?E&J3SK6Gl!!n-6YCCKcM1jPiMFa+%j( zk13?z{aCUXs%y{LH*0^^^IZ%{pr(H`5ngpri9r_4Nr#_7Ad(Gp+%)y&hksa1Uv27P%#Q zj?20=lVO7y4oe;0uXV&ymF0R?DL@=Cc0jNRsz_w`kxaIpERy zG@@p^zwfsMUEl9li0Tguig|?5xSx)B^2Dc?dHlXkw%VCe7_~#YT%IMdW zUsZ3|07QtGP2-ay?+@c!p+>R`SuTzECDtXw(&9_9JW7-?lNIL0+cIgiO)gI~OU3Z; zINz2LJpka$?_^meI+Ba$G#>#>wW42rgwy}$p*1A+l{F#_UpDJIb2R4BD;2))%iHi` zlp8N8a{~};7Wpuhb|t2M%~jg8ZFIyF>?0$!x(X=x&}(t2^7GPXUVnX+t6~qHwQHnb z0QAZkGE{bE=T)0M4m*S7_v-YT;sE!F&7-n3GE??2%p;&7sSQ zo}dz4CiIn`1IPG;K7?OU%WW+`5T1Fzq!HOa#%&dLcRJfhzp7Oe0flt~$7nEDc#U$P zH<@&GacfyWngps8WKQI)HOJ?A3ytO5TMP9-AWs=gBP`4QMC2=UxxsrbX+g0*)EmRH z=9?aQ_PETwwaL~vwpGg=?Ns zB{VerlA)>(PS9|963K4ctgpxplX?1>KvDc}ZqT@9vwlJ0{;}mu*X=E8x50j0w^}o4 zR=#^q?W2@#84>^8xJu3Ai&`R$NOsu!r@Yk(nj?{(j|u*DQ0}tX%iC=`_Bk#K+vk)w zZx`N}g4*M0(aK8!TW@N#9rN@*epF{jiiIv6p|WIb^d$a|K)ZL_s4l9 zYX_JjdrQHP8_jfaW#=prB5+`B-H2sb!5G=@3cRBB(skpADOci?4|nvX)rOB-u-yYD zvWcx^ZYr)ie~so&yaq$GLgiZY=u?Q&i8AZ=CJS{Ll}6KuUtyVzTYpu}_8=NbW4ANr zqRnQKc}`_40Si=+D0+dozAt(D$Lb zyL4&>uO*HJitD3}=}aV-aZUTW>csuGX}Zwg)Yet=Ef9Wo4o0cfL(g*KGQRgHeOj_$ z3HG_;cs#4qQ6D#-QtU|9QyMD;{L8}py=X=$3`b1v_yIuJKYpuJueL3D&ZT+0$sLvvHI}>)@j7F*<2C=)?d^3<^R1?3pawj3SjzSc#oe&2+D~a zOEJK=b>Rj{V|DgU6lM6|<*#h!hyRlP_SHo?5b@Q&+IRr)+s_QI77qytv{Xi=OdGQE z%l;ev?o_GyLn5J}L$vurFYM;H^jZfp=SuoU;qgnW(;PatRCgZ%?D z$9KlhVXoA|pN{40sVJw4KG;VZ=stgi3;|~@Y@l9OYzDr`AKraSSj7sI7y$Ot&>fFs@)lZ8bRX)voOMa;#_k)Rb+<851bW$WG2RE-JH0YC!c=*R6 z<@-0i#xgJV%aMFu^KqROJ{(@0#-mT()%Zra4X-k2#S(1CmHM^b=)T_wF9&*Ndd%g4 z`~OYPMYP~kyo?4Bl?gpfza+l+ZV+L>4EWKViZZcAv+KvDcaBd`(y%dGZV7SD(H|1< zME6<<`)i-OXdX!#9Dob^pWb zsOxIht=m1MqJ4_L)a+Kzwjx-x7)j!Nw@Q9R{+`K!BY>eJKw9CJm)+i{p)iB_ut+8z zs}U!8HnYuVHN^OJ#93Li%)zU;QFnwwz4Fs%u>qvj^TY6v2CvmOTb_?MrPpi2je|8> zh=*a?m2Mq^O1{yPPr}sLBWr?Oix6b6?gCA|_zpAyxWXcE?BMV;!vnV=HuqO4BdruJ zij8f~3D-}Ik7Qi%QB|Jjfa}$(xzjQ>MWl)*XP?DQ!}mf+|9nW+C3)FiWE|(|41C*I z^YaI3uc18YmFM%5sU;PIXmC)@uXor0Q&m~HaR%Q>=8(hJXTJ))npDD)7LRvX(*uam*CEPux|woA=Ly&HMc2Afj525ZUWOS6zm<$; zpUIL@E*Fm>H!*P?iE(Ya-0$-r8GVk{LKg216+0_N0&K&>i(6@LLb-QZXI>9eb0-|d zy~k%RwDlu-XB^cAM@s};vlrF~WX(|KXt=R&ASP%FJu2Cwkf2hG_4nM3td zRG(Uer}(@?WNjOApnUllz^!y$qZ@UjSG3{7*r-_j)$PSMFVo3;LQVw^IG>!z+BYYl z&;qM$qW;N*L{>#rfStcM zf6h7x0$EPU9_aFPxr3@qRa|GHdn+(s8k-j>q|d#7hv5>VUAEJgvbME8%zcoyv2&;T ztl<3B%784?8p41K-(Vox%!F2Yt*{ooZr^f)=1|&bC&{~QXR=ww0;q}y23Xv>&&qeJ zmq1a$M+7`0&7>1!^s11Xo-7N5TUiBYaob!*PUUAyLZy$pN%}D_YO2X_=rR3%%ZdE6 zaKKBzlRo-Nc~I5ZuAXLTYm?SXEjy7@mX671)y=$oEmnXy04VE+2F%CVjr|ZLJMg97 z7~U7~y;9A>LEzpJ1#0OML8+Y<+z$sUWC!!8dq*Aj&B_uS0BJ<;yvx&)))xeoN=C)= zKEfY&ThjiRT)PT05R8?VrI+6gXDfuD;o2$*nFc-kr&m+~y!v%J4*B{jU?0DNF?n2A zFN|sHBt1?K@t1T(#;P;AflVj6K3b4Qd-G>~^fL41t{#1XP|XyLQ=TOGh}956cvgkL7JS~CZT z%p&8MWDZ(XZsa2pfAFVh|M?vBsabTvbemQ7ZNAxU^qbQtHX8E8Yla7a=mavL-IjZh zk1Ab1JXv!1f))Y4`qkO2u%( ze2R0+f>3)cV?@(QLc2qy2}il{`)%Pcf5yU<2Ff7G$E4~-xt@B2jH3?nMo%{ewkcwK zD7qOa$ry&mnXPo0KRhLIF;GIWBXm>Cerw0&G4xlLa}&AN4lsWP)Aa zi*a(;jDtGtz76F0>oJShJAGbNwoZ_wi;GI#}Q6IM~I3>wM#OsR)s&gAJNZV)-Hj3IwtMZnkuwU%DQWe zBARMytkZvjC*rsB0{M(JROGGcDyYnDVC_inEL=m}_Wl{2;@6Akor%M~>aRXl{Q(b; zHtU?wNr#zQS@fR|*}q?S+CH{FVlZh(gpgddF9C^X@k(E6Kb_i8Bk%EF7j0gTkp3p4 z?mb6esSSW%GpTacMmt&SgTo+npwF^WsA_HiS`v>pMpBGjnvtdg)S}tbHs6+FZHn?Gv2u<_#?L1XviHv$Zxl#8`fAN$R5LTL z&Tm!8o$~)sUd{-Zd6!7@d!8NVR1(ihp7uc0o-(B-WJ#e@jZtt7x-iAF<|H+&+h`SG;|iEiY~= zKO-{UC_+&}@7KXTcdYP+G&TJsB=fE$S6z-c{p4RLQh`g2xF68o!v#|OX#tLsw^rv1 zkd{H4JIlOWn+Ooor;LS7)Adg_6je9!$+W!ap$Vd0=<@S%8z}p1oPpxA<4n&A+`Rb^ z_0bv}d_F4gg9u@TU8PTD&wtkMCJ+8Ks|N~H`=poRrMp}oZ9Ay`<3-^2g1bLPV#1%Y zDm;$u`35Q35=DZr+Z=jZlZ_b0*IFFi@XP2zH3A_yBh1M%76<(LcBMR~dwYY>Zpgj% z_8V`Sb#XQ4&e4{u=Wr9-uaD{2=4o#22xVObUr#xieyihhd@PccZxz19NvYo1o*~(o zwyx^UN-Iry)(2dE0PHrXzH3Fv&rJ%<%dTt)ja8a89#1{W!i`m9AujqIklC(Um%AhE zm=xwM+N&ShoS=}-0wnYU02zhNbFz%b$?@sJa+-yFle!prV@%pN(lgn|JwoH8IZB3^ zi4?Zu)UIeN%uA(zt(9nI$k^l*F|JPx$J`~28spk@mi&P60dV(bYp zl6wUaQK24!*tbCaMuwx%%fHPNtlrev7l?rW=&XHgKYr)Vh+hlB*XdS7fC`hzAd z#@((oEarysM&f8bb=`mRJv*;dTo|tpA~+rZ9+ol065s@?s`5Z~WL=<@>7u4NzBD=| zY@BTH!jW4GmbN5L%tAQeoyY1=P$!M(&E_oi$c*ZX&M^EnlHnsVNkHR%J~NuNeP)>T zx}M;7dYsRa8yjOw^-g@q<#)M@alHnJEsW?AP{g(cN+oTn@}K?- z@R;PZuOu|`=X<)3JukjkB}@^{%)xHRxbR)F%^?Y5ywbn^oM zWIWQA;br#~=8P{U{;h$hqxFiWe>t3H=aGc@P1Fo1n}T579{)||ky_Oo zOvm^r?>uJ>DAR>KaDWBy1b%n8D_3*SJ+bE`UPgoIzhZngFbkw+C){{bnFd}hA?H8%p7!`U3 z9@2GIhR~c2o27-7psl}V3@a>C9jglK;uln^$GjG=hSb`6a311@R|_%D8vlQV>sY?A}7~D=EwoW%XYvteaako zCqd{v`YMets!z>C;`Pa{PQhcS4O2## zV-pWwP{UQX?o5Dc!xy2j=l$ZYVQjJRvE#ML$pbOC!iJD^oLF(&mU78=qr8+XdBHdV zXYfarDAd`mvJox${5{5vj3|CR4sZZRreF%(x5Y8-LCIA^vq!sXH*P^8vZ9^ma*E;X z6xDw_4qT#H-g*Q_H9KReQk?_`eAre-%|Bmwd@{FmhB2A{Qn_I3{~L#GiY|U(RKC|t zYU@U|m(C=mquHmNu`)X1w2;AhQxaJ{iOuv06EUKW&xl&!&%j@BM6eoEeDlh)FyVw} zb}mjI`v;k~82`Ojh1}n}(U`dfyn$i;M9g|x%nhZ6CK+GNuqRg_AYkd8<$Zuj{d>=+ zby*gIN~w&P>xL_8*j(tg)hirp;50Qc zf<9+njP`^fiT3Czf`Z;*ETLwPv|cE$)J}?E!vAGum65PX?}{fun9oc|rEn6ntTDR-i>ywf zojXoG!Sk9US)Q%(0n0m$Q_r?^Mx4ZaZ$KnXwWQ}>G(;-X1b3l^fAa>)*@p%_jzjQ? zAc^I_~aP2TqQ%ZdTcD<*!Gr}WQpzO?G!Z$kL`YF?%Ed6YoA3K>IX zsgd4uuAK#hsjyVZfo;{bX-NNI;KF$R8IYDOj(rSlyK0&9wUf43m4AxmY)OZc$1dm3 zz|!WTz(k@N2<#Jr-c)8|xDX3BwBCD(!95=m!LMepnGz!wsM%d?EMy}W(U2+Dz=d!e#%SrbSghS0@9gwAEORxJbj@nxEPt_%8+CfH8-shV za8nj0H4iopxzhft zm`t-^0nDv;l=^q=0)3MwR+7`|FKL}K`mLZe>;aJWyv5Q3E51lfZgt#dCXwN;R3_O@ z+4Qy6<+#83-tmUc++x%2%3mn(k`1mcLI6tFyE z9Kum`!LWMNy$Vn72AaO*YT_i;g&%V{wwlQ8f1rRHf??3Y+6H|ZRLp4MoCRD@Sl;NR zTR!5wP$EZ=OyBO<3GQ(X;(ti6bn)}rvQxn0(UDycJR19(BZ8y%l6F%8Orda0bT`s{ zk>A8@7iuWLmX*>Upx8-k0X?B4%v*Pd+&z-;Oe%-~Fi{~%N2_K$-W7YtilxAgTgz5O zwK)s>Ea}%xd9#!!!cc=>Ja}Eiw-_crc}xlE;kFOa40{CgVQufK@+CF_Y0kl!fAamB zXYeGU`?q%-oqzsV&bB9tk??Tz*Cx)=M-~dF-+w!^Ucey)C_O5`l!>ZZq5ikJm2e4uFvs-umhhlwZSzZnjMa} zv59y>&mI8Hi=$DYVVh$J&+H~+RQg3Bg&eL0Tl!fZM9f#Kx>CO}2~wo+d;kX`BE4MG zJ>z9cLIAAfR3w&GR2hQXkGT+{2+JtInx1AyCoHCZ5;NfV&r&SGvZH=>`nS!S$3vRG zb(Oa$)kl(q^r{oPno-#^*}M@c=Rk0JMy{%`RWs+fZd=<}+I-vsMWrXlAwwufvnUfj z2k*f0R}cB07G&}))W&CBIctxDU3hn;W8(zFatbR)pSj#C;xQQ}+EJVTiwiV;V!*$5 z)h(($6p+hR3#o6mJ!S-nNCMX%@n^L~FY*p-W`9gS$?uc@ceTM6X@{jNN|a^K{T==A G=YIi|{hoCI literal 0 HcmV?d00001 diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/golden_gate_sq.jpg b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/golden_gate_sq.jpg new file mode 100644 index 0000000000000000000000000000000000000000..248d9fd31f9e7f8cafd9bede3c346271b0394aea GIT binary patch literal 12423 zcmbWdcT`hd^fq{@p(9N}AVEM85TtiRM5@wzM|u$i0#QH+MSAbjrGxZd6_AefUX>~( z(t9Au#CLw*_pLR5&CJQVYn|Ned)D3UdG^^Sn0d@9a7RT!SpmSo0RSBA2f!==asUw_ zA&8KG2m}HV6BCh;(UOyql9Dk{(@@ef--m#i?=vy6u=8@Tus&g9VtOpf`Gk*ONJt36 zAucT@AjK;vB=Bz$9AaW(GEy>na&mfs2TTtH{yz^)CqPB?@15{)SOHur96TxOPe2GFA|@fl3N+jSaB=YPaPjd72=MW-(!SW=0emU~>IeL?gfyCO zK&)=G0s-;4L~Ji=y6Chfj@Sh)-2;h9?%tzkxc~4G$74<|Az=|wF>wjGm+}gVO3EtQ zI=XuL28Kpf);6|w_709v4^PJ@zR)GK_+86P;HC@DPf?7v( zmhKZIciDy39v=Tw?LW=_-zgUO|4Xy~t=Rw3YY`yB!@)Waj|z|h=9iS47K@gY^uAYa zhr2V^XE-|7XV^EJL^?A%n$}xJ1`jk&G#2(ZFxod$Ip;eoM~K{Ly0R`QET+<0Mh4tn zRBaGhRQ*0}Lex0vT-rGK-#wA(HV2Ujtki!?D>p_4P%Mf}7uqk$cSbrt`tQl$fe5U; z%0y#i%%-H9tWi?M16mW_=X&Y#YS`-@Y4XtNYfkOtqS^`9mS(*~XVu0;XZgVgOUjM8 z9UV%R# z+9VUM54c}*V-1#^x2n&rYEwE;Brw&?7^AC3eBIG1E=*5d*KA2A6Z+1?XC^s}7glAa zT#_ws2rCibr~h$*n4Y6u)Vqk??5I#{Rk_s3uX>L%gd1h1My$$~?M%{7`yY^k7woS& zcGEjk2D+tcS5}m%r3zRe+||5_@?BjUl^|(E>9KIn>?KlVU22e%VJRV>iwGYXWMVKS z{OYE@sji7!G9@cT*-s-l_0OoHI|Y9>)Qs>EO<9y{VBIxD{!`MMAgzqn*~33N)56A| zS9PYkm-^Uj?N-wG*hw$jC_tn!-w;{T#7;Mf2<|gv`eDHp)R$dxq^%)F_LQh%ot+Qg zE`==)DuKcgfsSnh=XNL_X)X@q~s%ay}rqWR&UbP<{!It z$@E|s;1+Z>r?Y!uwj$M2i)c4*LE)8mhTL!c*ONz-cez{VsHI#Z}Tql3NU}4 z&TzE+mz8(+{{=;&^F3Ui`r!Y$MOTzuIy8k10G9s(pjW7ZPvo`0D|EwA$Ff|&Rd!J< zr=Dyu9%&y)&GFY_RMjDzWJ@y&CLvv3>~XO_zVXq5|Es+fIwJ=~%+(ZX<#GJ!s0F{< znid+50iK*9gY??poH{MCl5HwCV}ReU(6nRZPt(>>YUT$&?qL838wRkcVx_oFzRlV6 z;eK6)Jn#(mb32on%u6G~AECZ$%f)=}s`mNVv^SE>+j2UxOKhhozg?1t-QBI;Wo+`B zcZ!=`B;ia9q^skE;CPN!0q`Ksz3 zr_KqGI4|rnS7Y-W2)xsVJv3Y!~nq>M+a?14#>j&k)B^hCVk~;4`SZ)zv-7&2Y1g_ zItIae7&>+{xvxIv-#U#RY`?K#IOXeO?%W1FbE4e3E!$pEmSg0sxVSm+m6qVk#^p;H zKIhH}14CluL&X1ND?Y&OCnd9+gK^{OUZvUjyuI-DP43%G%1zKJt^189j^ZAD8;%}t zV&A?jyX{r974)J<=1jH~hL=peUJap0NHWj42IsCFUY$%4UB6E={BfHb#%>*OrSB3i zbmzK;*DB5>fqy=`ykq)3VorQRNl%iMBx`WAOGo>g`JDBUW z!xXypL1e?Uz?~rlS>_#5wUc2E569Pbhb+v}ooK5L7w>^D0)+T?6l9s^S{QySww!7O zC(rgeu>Sf?PbRJ%wThhL*%BnwSLf9c^W=~TlkWNpkH^>TY#Q= z0Ep8F=DGHe^!HIgSWvY{cDa*GR$lB>Mn?B66a!TBD(O|I3VYbDum2suQdSYCa&6XN z4aI-lhvm8dGkd^|jR#JJAzpk*2a)Nee}or`CA`lSSoRzcnWQVdn4~Rstud+Kt1!f7 zt9)m<-eA~NsEX*CgdAV>UJV(0_6nimjjb`V1PzAlHY0OYyQC5&WCzO z9Z!~wy$-p!qX`TkSG&nvJ(BZwfd4jj%Pbc6&#cTP=%h;W=!fNq1iZw2X0uB96FjdM zy*HPZ;-FQ0K9fD_dCO&`>mK3z6+iaxaY(ds)X6}8aWgvqWV@m{^->wVlP8Rt^d;~< z)ZR|qY9aG6;~VHo+6C}Q$ULyGIbKJ;R^{$;HnWT@OSPeW=KBu(HBW#*F1)?63Y`7mY% z@UxEN0vMpkLF)=9=)<-S)Am4W6H27yJdOIY=NDURRJFmnJ%fS8mc9d9cADIEMCt0` z9fw8H@T!Az&K@dL#hb>P?uVyAF+S(JT~%D4CVK~wXvJ@4)W}b|t!#5dC`1&T2yNSP z2tHn2nrI4~2=tZt&^~3n4hsG>D$}sRVK4K<`Z-d=)DGEE@yF=4<6`*~P!e%u?E&E?ygaXh8Qd1hO-cZD3WFem3AftHEzt@Rgd< zCHYS)RQOwOxe8ycEJ>r6*b*n&@|y%(>kwi8FCDSlveBOfE!!t3bt|#R&EmZ~B_~!G zz(bQ7y(P0k_eAcJ6TMUL$!}feQt_n9`1AE8^8imjQUWcEUvd#;8^Eyfo^g#iT#;NOAgtU2qeR+I zn!|Tl|5XC2&`-zwkRyE$(9%TC$uo~vI9|oq(r=G*OBeV@~14yDP>#5NO15E z+l^wcDh5!sLA&c`?}-X14TR#rI5%4Q;1%ACnR{kjm+9tz&O4VJ-lxyjkOxPZS)Z`R zkpch-F?)7k`&W|l?UUQs{4!n>A(Fermsn6Y!4lRuxpE;C){Rcf`NQ25w5HI)c z2iKn54>5q{QxvYzS+mS0#2LK_pKM=Iyj9&-yy3?HJ2d)vovQ}Mr?UUKL#XtkywKeTv`PWTnQPjyRwm@n z-oM=~W4V#J1tB%g@3(s6>vK!=tQit-=0^!^f*T(BLC+wwU&SbyYxEZpmj+}TTf1js zrFYj}?=!lt;(|)fAz37*UEQy~;Qd8Fl0Nl)c$|N5JWaWae!bTJRCi(NdHGI`3 zMIIcm;d?T+K!W)kFMI@CtmqLb3|OfmqeRa)IaKr48FDT3H(hF+RqKp=L-+-F(|@J3 z>*%8UTyfY}Db9y~=JV$I!UfIuL^hR(UKw6cHh*97jxDs$QsIP!ZAUl3$VZ@7>Z%IJ zJ-zSkO&CDQV*$TSw`Tcx?v~5?Otg@hQ9_lP|6LK!_w_yoW>TgTIj~n*&z$61_-iBb zazlJBs`%vt;c}P9F%dnwvEN6O>xkZtek!$iE)_*~A`uf>vY(x-u`K1pR4Hp1Cy)#Z zNI~RWIx+CFdEMO{ablljE~btfUiyBF|FJWEN^I4msCJ>-onXoMIn80M3&>%}9gv z4#*EWuyKo~`+0>v;XJcvGsNS|GRB*7BAY3T_xtYLyH!uxg5T!9(9UNDjY=FklLGrX zfqEO~Y8+A~O(HdEYU>p^Y3m-~m-Z?5oB3Yvn>N@kGyEA3)MdT>SrJfS9BY4r0cwO@ zAT)+OdAQb3!8icLiqDN6BL*0NQC@-20m9bH<<2Fh0I~a6Je?HeZC^$2 zoiz+*tFzV>sJ#PRCt%%l?&CWtCw{Nf|FRih8QkX+_x*m>9g>sK!4BDTFxV3xL@3 zZN7bXTzA==;h7YUf0`S~@$;9#1138lRkgA}op%GuLvHF!;1gZD&PTHVikV_v6J z^6qLSO6{80h55ZKo=RKTGQE{J6Rt$I;MtD zxqFyWtZPhI#kx6*zCA3fNaI;rX%fcP&7cr+C36Gscp-e5y&zj~bfo||Sa zG(YoWv5niaOkhEPjem^H_??+dvQ`RuRG8f)oKTHOfdP^tW3;s>7YM7~-;Wrd%Ds7U zAWEgkaa(t4M-_$KT&2iTW7z>JgfhyOal9ezXyFDv-UAkP(SGY3AFE#wZ`a-}h) zNP96|+;Ry=i0#Wyq%#+p1`2;{RCF(HV#_|y@nruxWXbs5u?(hkPkw6WD<4+-arWy5J|0_cZkMXJT}jhBMXl2Y5F#9)$-7Z_K!} z>N*^FDEM8v8AjO=a*_Jcv$NNUb+AZ(>JV zoVn~P8l@@2j*mpd0Y?;+oDQk;T7>gDsxR%VWDT~2&s)6xGMJDv3V`OjlsvbDd&GS7W6ukV< z#S{D7ZSNI$p4^>$cv})Q480Y=0HJ%nUz4X}Qcp|iyG3xATY9|=t#yjIFQnhjH22*- zFs2lU5xy(){d9pjEv^IfP>JP?Va$Y5Wg<~O-QXp3@BLrhZmz;9)>8U@`oGU*%tu?g zC0%69RrH%#_2`)(Di^3vj7HgbW$ZbnRUd?wK1V_t87D!9>^a`ssFg2gmxno4NkY^8 zKRpx___uDXW<#1$7t0hYrO(DiJ)AN)pu!C@+44bC1niA=94%bn~bld#p~LUNv3qoy}@oGa%z3qpE`WcnHjmssql-1k3%=?3u%NKVZg(*YSDzzAb;s`u2|P* zHYLYwSUJz6UANHl(jQ7)(@d1>+Jl=H{oCq`0;c!IhZDrsZRdNcjEif@PSy~Wk& zT#a!B`B9$IshRo-tO zx?6DGkz4jKy*fg{s%4Jkn`|G*#fR_qF?uLOI;Mv*gMJhIJJqNbEI)NNLw)DtjP|Kj zjPmVuMb+Q4w(9og_Z)vu^0+8)mJ3ect1=brD=9_7EjEwJt*N`hY@|Gk#Zi+e^%L4( z(heqg_fNCc;cv{!FBLN=L(Jzik=BFGYh~e^AnW&B`vHmm8tJR=AOg6Qj5OFC2i;=v zZTS6El!wd>kJ~-PKPDJpOb81c@*mN3J|}*5)0R0cv=2*JZL#}@vc!%X9(@mSCS~5a zHz_8o=ZM*oC8nX}*mHOI&ZCN*iiScA;N|TnJfrzt&t3Cs<5XIS23630)-HwtI_R># zx_uo%?_d{qdI@+w9CPZ9y*V~)Ewj?kiN92Y9O0OFX`@at0Oejx)Y6qE2B1c%P$QHs$Edk1p{V?_ybQuHizaD&MCQW%>@(X&i z{1iI3$63=JGl8@?yLIqDx7~C4i9ArDcV@UOy8haH9l9couAgua3V~ui{@LmUWY}atK{+`yF!aM!qvk(RaS8*}OWah`t;0tH7ue(c31XW% z2G|;80&4m|ATjgJX|uOQJ6%(vpqN+OF})_6IQf(V{TTjQ2+`QvIiA->Mr@VYDm4e zK>LU~mlC|b?gw^NawP1@UqhqI{Jklz*qZ32BD!=c3>S%ZJa#7ZOVEdVTX`3iHRUf? zlxNs9+oZ@!VGKKh{Y>Db+2&LyFL%Xv(~qOxSrQH4pKTP$f~x3Di+!^VuB{+Hf`D_W zjVkTIk|uW_KjC&hXN2bRR_W)GT?LU-cJ?PDtfdU>f!l6_dr-t-)U-ff-GFr}@BYl^ z1XWIp?jpbXzAk10&mTz-j`Bkay^~G<9K5{HsO2M#I`4*TrRVZAt7kw7HE^TsvS-|t zHdZ?RP*BKH$bwG4ep(B|M|+i&`XsBtRPT#EOdYsRd#)QV(|KRr z&L?x(;^?H%fCTaV2*r;Ina~lKx`&neTI%_Tlb{>;9G{51%FM@gx{XL)~`!MH%VnOI@ar- z*DM8L02+b~w7JYhu(e|kxuq*@PLefthfUxVdxZha%^TtSB-;%ODu`4P!e{2Fg!Z>( zS7Wy_U#0(dDxQcQnpm6{q`Y|aF=t&^!NGXcE%Rgl^@Jde+at!SF+7C1QcB5nDG#LT zI-Ppp**u6@{)Nr6ck0-V&4NP$=MBu&t;Gk$i5veE-Yfxc(@0nLzOO9G@$L^Z_h9`Q znnGq(sP0LuXrsDJcxn2~g;!J?4RL}me^kKlqcUPLf!Eyc@IThu|;C5xg71;9G>C(A1`Ty2WjfniH%)FX<5iTS7Sa= z7x=^P>q{ih%ZrF;R>zB8KO}9tyB3P)Fu>3?%D24UydCz($O)0VLdF)^20L4>!GB>K zPLI=NAO0(%?>V2iIEYzk^*BaQ^QW)@rn*ndJ&Z_x@zM(cOynndECm>+VDn_i&F;}I+S?~Az zTWUL_5Qtg_{1$gz-DBbA)x(nIoAqvE%7HNbrb@sqQlz_@(FNcU%K(Ts@c|= zl(04DCYWklx+s~i?;b03a6M2_HiDN`wV$4jQHFk3vko@4mLAopAlO;3^`?OOnQRHI zi&!!LAmT+74dT#H6sl9M6vX5WE2hcZI4S9-WA{Yazg-2xNlJEJiL`f8a-0>nm#{@` zm&lCPz1;e)1qUZTDWJF9o^8n9@t93GLutZfuBjWS64BKd;BUj=Zl&MK+uwg7etoTB zvA0^@U(Vj1BlWp0L`@cBFDOqPkB-cH{mM?Bg)-kh-0@h4@kgzmtUL=WF_GkBz?S*$ z!b7avS^Z2mBo!Y6K&i=M?xJL;X4#KkEeyhqv-Rj?9PXjsz6Wyl=^$s97@(dTWW?C; zGxkyu?a%EBAJD{Blu)AW&=9HmNAcKNvUIOk@B|Ap9x-}!t)r9(e zB0W6Bbhcjg&daapP4Io?SG-CLSYB@DGJI=lx|M3p?=qO6KtiYv%5H$Ii+_qJqfJ?I z2aB4Ly?yhjCysxLd2l}nA@%atB3QJhxTzN< zU`UWI_;KAqWRt)*f1C+Pv#-%B$e6>+9pE4aa3%ecuw%euKR?`^!mX$%)t4pXBw8f# z&-ffe_@e5eH?HhP^LmCSDyK*-?^td`pmd=QcD@pn9ic!d`-F(2m~QaNOIU~K`6Y8r z_w;+1ro4T{qXdPRm7mGFjE#qAnqZR-h{uudx4K;Fwi@1__%@BMpk^)YawyCP@ok@p0{RpV?hw|T61nws-3@q!!&p&nXT2Jm@dirRA z)NfCO)uhE>9_L4!TQh20lv;sODCC-2SmV1-xve03H%@>!hl{$jV1jJFnwxHMd&>HY zQx($~rNQ0AY-#Si6him+DcNnA=XLirOseOmUK+iz1C!os@$_AShK-7AeHbfYgG9>- z57%*XB`Ym&_R}vGXP(A%#5lILy}u=(SlFbP4k=c*dH=^tw`Rq)v#+DD4IY^|QVX|u8e(O<12x;Y28lBvr@Y9qv1Eh?h-A0i?!IEX%TB;?@&$iz zObP=?ypH*_y~jyvRViIc`m~Y&;AV=V0M!(wNvk=K2y%LCJ9M$w=2I{by$K=At;v0z z)hv|JFD-rIkU-YBwX1o+rEri;sWtK7p)wn7Tj;a1uKF_;%`SYtZrB%IsXLV3uuZmN zgoaI0oVV7&B8B3nw=V0ePzD-5s8X@%ikJ zK~ESDabmIUb;pkFe{q)WSB$B0{H zbD9xr+#zExqflm9U8TUDBb@8gJswKR%rQTabkdDBD5?Bi(kM}s!N-_w4HMJl03otJ zqZ~=o^j^G%=J%L$`3LHpi@Ovj-hM5ObWoT+L=IIL&*m@;yb<+fF+Jv#;;WNc|f;d>S9LIMXqn#xc zJx1Hk)3_b#y*mrkC(d`^qmD6GE_v@iL%%DYDHf zm;H#c8TYGK)L+x3hvDprnK|D6-)H+z3s1f9&MW7&gJpuj&PG!usBL#mHfy|op?7swu7a8!ts?VMHz{|V9iN-wx_Tn1;}eltDs^Kg^eIrV3j@Fst*Teurm9K`o)B#VH`C5z7{Dwk1+REp zYeC8T@^-9gOX+HN{;~*f)cw7m&i=wi?nOZe3!|5w*q0*#eYP2UX@}g;OhF_3lpxn@ zbFp)TlC0)mhfjp!HxMIjchuFL2BLOmX3?|83zN4o1RACcDwoNmVHJ_pF0@vU>( zef;mfYV(1w@ISL%#r?#dpa@VXm+H;gH#>!Y`XRE#6S7j^+LiGEuj;{P4=Jtd+3Kaw z73snsuQj`g2VqQEg-N_B8%)&he9FBaPI?sW%g~^2Wb92DC>7^7!t3%6lJ{BPvIyK^ zWKWC&-(motXUlp~ZnAkjfA+joYDoVmxe_T1atAc8#@~v`@G_@@WWrgh&~hM{*}PatSf37#5u1AB>+h*M_Cyr?{5sqvt-N4c(@0$| zk3X2lqCqrKqp7K_x%vac_u;s<+Uh6@91U8g?EIE~vmg+qX1h zCv+c{ut8m-oj_#q)G4)9x;?=w^(4Gt-ZtVlOj(ySMCm$das1*IiD%!hi zJhLmAp;l}OLkfeU;Nk>tt|~$=@)Bd%H0MC2;&j=>oU@v# z@5ZMfoZ8)k{T+KYD`qQdzrUw)N$~Q4#;x#F`yd>Sc z^4R3eLJlv<+81h+D`T^HwkEgppxmL!5dIi%@B43DmjTKzV?qbdQCDXyFxDu}O&jQ( zG-t)QjR5&$Gh*#gZxUXdwh|8U?HD%|dcFbiTSuwQ%}UTz1&8)Y$EuqAJ*uY~`k-Sv z`ZvuKqP`9<{bO8<$4?%=WdehywVL~JmBn0qIm@@jS(v=1hzC}24%Hqu9&o7yU6y<(Y)gyyK#|LAWK-_87!$iU>Wk5K*VE{F{dqm+J&l^A+RN#WogU-x+t-I)o46W9}V!nJ0>VlIO+jgjOeX2VP>kgQedh8ZH?8yCq%S&hz73 zSA2VwGV*NYE#=&@UE#>-DvApn!ao~P;P2VD6T{E61gDDN_oK4iCD6KT3C+v?WE9Ug z5Mx#5eZeG%KtFAr0@bi?p#S~P+fwymi4b$m*Vb$g)O<^GJT@Soi`WY6U0fjpg^h@Y%$yGb>Gx~=5HSmtqovl&M3VyK%C15LPopbUEY~va zKqpg9`|l%Gr?1sbG!Dpe_oB7?L5Kv=`&;0J1J)+f2)>xDjD#+CC47o5MKJn28Eodo zF_EF~z8RI0D*svN%acr%FGDB;ca00|A1`J)hzEFht17m9%a3aJWdb&SN1?&zn$8?L z$+;~s+ciE{iBpsW#rAQEq)%)UV;Vaid!I;j0olblv#&3rj5?poMUv@*vY4Kh%XWbo z-@U)fnT*gWnZEO&^%hY_66gf58;?C$oJ(JBc6AYjy>WzM0O$MFQnGj=`81D=JuE1o znKUSV;Fk zZ(s8O2*JCm>AI})QWpG^X#hzgJ3|DzY0`}{5cgM#l^VS`PPR%iQ}D*?zrN)RD^QM3 z6O+R0%tMkUK^2Z0MLCB;He&LH@5d+3u_ZXkv!z}1Hd~5AqvuJK2E{K)k#3sG_Um_- zXb4!CP;MxByM3*R#-dJjC# z@ou=TK6$81k4Mp0?s+w`s5{I2?|M#ONPu+W14ArY96wgSPAGr-X`lyabM=Ae{D|bN zsC*YX{9==elq}}9<{WQ#I-84YMl-T{jqq3DX%XH#8e^3)$v_o?)g0L?T&PZah<#@D zPHuI+^zqoXQPE3hJ=L1py2qbeK89>kL#G(d5&Eo@a2S}mOei=(T@a>|yr$SF2F^Yo zWYS5vj9#I=5HAWE@+5D=KO+xRtm&^Q&`FzTVH{k=`&CsYC!(bk5#%4;7QMh&*R?;6 z1bn9JjKf~P@n7mQl?R+G9W8pjH7DJAI=p> z&6+JJ5E@N?mwr!hpyH?8*Z`gNfF%*WsA<3W7*t)Px_uZ-J@T)W##RFfk&ys%i8@3s zJPRSa_``~5bCv4?Cz}L{B&f4ZHjYFHePz|IuOl3LGl$fD6p!wkPy_^CDrwi>*#ORw QcBgbsG}NZ|pqPdK3$9ow{r~^~ literal 0 HcmV?d00001 diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/data_process.py new file mode 100644 index 00000000000..e71f6577cac --- /dev/null +++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/data_process.py @@ -0,0 +1,489 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import glob +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + + +class StyleTransferDataset(object): + """Dataset used for style transfer task on tensorflow/inteltensorflow/tensorflow_itex backend. + + This Dataset is to construct a dataset from two specific image holders representing + content image folder and style image folder. + """ + + def __init__( + self, + content_folder, + style_folder, + crop_ratio=0.1, + resize_shape=(256, 256), + image_format="jpg", + transform=None, + filter=None, + ): + """Initialize `StyleTransferDataset` class. + + Args: + content_folder (str): Root directory of content images. + style_folder (str): Root directory of style images. + crop_ratio (float, default=0.1): Cropped ratio to each side. + resize_shape (tuple, default=(256, 256)): Target size of image. + image_format (str, default='jpg'): Target image format. + transform (transform object, default=None): Transform to process input data. + filter (Filter objects, default=None): Filter out examples according to specific conditions. + """ + self.transform = transform + self.content_folder = content_folder + self.style_folder = style_folder + self.resize_shape = resize_shape + self.crop_ratio = crop_ratio + self.content_images = glob.glob(os.path.join(content_folder, "*" + image_format)) + self.style_images = glob.glob(os.path.join(style_folder, "*" + image_format)) + self.image_list = [] + for content in self.content_images: + for style in self.style_images: + self.image_list.append((content, style)) + + def __len__(self): + """Return the length of dataset.""" + return len(self.image_list) + + def __getitem__(self, index): + """Return the item of dataset according to the given index.""" + from PIL import Image + + content_image, style_image = self.image_list[index] + content_image = Image.open(content_image) + style_image = Image.open(style_image) + width, height = style_image.size + crop_ratio = self.crop_ratio + crop_box = (crop_ratio * height, crop_ratio * width, (1 - crop_ratio) * height, (1 - crop_ratio) * width) + content_image = np.asarray(content_image.resize(self.resize_shape)) + style_image = np.asarray(style_image.resize(self.resize_shape)) + if content_image.max() > 1.0: + content_image = content_image / 255.0 + if style_image.max() > 1.0: + style_image = style_image / 255.0 + + return (content_image, style_image), 0 + + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + +class ParseDecodeVocTransform(): + """Parse features in Example proto. + + Returns: + tuple of parsed image and labels + """ + + def __call__(self, sample): + """Parse decode voc.""" + + # Currently only supports jpeg and png. + # Need to use this logic because the shape is not known for + # tf.image.decode_image and we rely on this info to + # extend label if necessary. + def _decode_image(content, channels): + """Decode the image with content.""" + return tf.cond( + tf.image.is_jpeg(content), + lambda: tf.image.decode_jpeg(content, channels), + lambda: tf.image.decode_png(content, channels), + ) + + features = { + "image/encoded": tf.compat.v1.FixedLenFeature((), tf.string, default_value=""), + "image/filename": tf.compat.v1.FixedLenFeature((), tf.string, default_value=""), + "image/format": tf.compat.v1.FixedLenFeature((), tf.string, default_value="jpeg"), + "image/height": tf.compat.v1.FixedLenFeature((), tf.int64, default_value=0), + "image/width": tf.compat.v1.FixedLenFeature((), tf.int64, default_value=0), + "image/segmentation/class/encoded": tf.compat.v1.FixedLenFeature((), tf.string, default_value=""), + "image/segmentation/class/format": tf.compat.v1.FixedLenFeature((), tf.string, default_value="png"), + } + + parsed_features = tf.compat.v1.parse_single_example(sample, features) + + image = _decode_image(parsed_features["image/encoded"], channels=3) + + label = None + label = _decode_image(parsed_features["image/segmentation/class/encoded"], channels=1) + + sample = { + "image": image, + } + + label.set_shape([None, None, 1]) + + sample["labels_class"] = label + + return sample["image"], sample["labels_class"] + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric_cls + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class TopKMetric(BaseMetric): + """Compute Top-k Accuracy classification score for Tensorflow model. + + This metric computes the number of times where the correct label is among + the top k labels predicted. + + Attributes: + k (int): The number of most likely outcomes considered to find the correct label. + num_correct: The number of predictions that were correct classified. + num_sample: The total number of predictions. + """ + + def __init__(self, k=1): + """Initialize the k, number of samples and correct predictions. + + Args: + k: The number of most likely outcomes considered to find the correct label. + """ + self.k = k + self.num_correct = 0 + self.num_sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = TopKMetric._topk_shape_validate(preds, labels) + + labels = labels.reshape([len(labels)]) + with tf.Graph().as_default() as acc_graph: + topk = tf.nn.in_top_k( + predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k + ) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) + + self.num_sample += len(labels) + self.num_correct += correct + + def reset(self): + """Reset the number of samples and correct predictions.""" + self.num_correct = 0 + self.num_sample = 0 + + def result(self): + """Compute the top-k score. + + Returns: + The top-k score. + """ + if self.num_sample == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + elif getattr(self, "_hvd", None) is not None: # pragma: no cover + allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) + allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) + return allgather_num_correct / allgather_num_sample + return self.num_correct / self.num_sample + + @staticmethod + def _topk_shape_validate(preds, labels): + # preds shape can be Nxclass_num or class_num(N=1 by default) + # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax + if isinstance(preds, int): + preds = [preds] + preds = np.array(preds) + elif isinstance(preds, np.ndarray): + preds = np.array(preds) + elif isinstance(preds, list): + preds = np.array(preds) + preds = preds.reshape((-1, preds.shape[-1])) + + # consider labels just int value 1x1 + if isinstance(labels, int): + labels = [labels] + labels = np.array(labels) + elif isinstance(labels, tuple): + labels = np.array([labels]) + labels = labels.reshape((labels.shape[-1], -1)) + elif isinstance(labels, list): + if isinstance(labels[0], int): + labels = np.array(labels) + labels = labels.reshape((labels.shape[0], 1)) + elif isinstance(labels[0], tuple): + labels = np.array(labels) + labels = labels.reshape((labels.shape[-1], -1)) + else: + labels = np.array(labels) + # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) + # only support 2 dimension one-shot labels + # or 1 dimension one-hot class_num will confuse with N + + if len(preds.shape) == 1: + N = 1 + class_num = preds.shape[0] + preds = preds.reshape([-1, class_num]) + elif len(preds.shape) >= 2: + N = preds.shape[0] + preds = preds.reshape([N, -1]) + class_num = preds.shape[1] + + label_N = labels.shape[0] + assert label_N == N, "labels batch size should same with preds" + labels = labels.reshape([N, -1]) + # one-hot labels will have 2 dimension not equal 1 + if labels.shape[1] != 1: + labels = labels.argsort()[..., -1:] + return preds, labels + + +class TFDataLoader(object): # pragma: no cover + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/main.py b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/main.py new file mode 100644 index 00000000000..577e9094209 --- /dev/null +++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/main.py @@ -0,0 +1,208 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +import os +import io +import skimage.io +import glob +import numpy as np +import tensorflow.compat.v1 as tf +from PIL import Image +import time + +from data_process import ( + TFDataLoader, + StyleTransferDataset, + ComposeTransform, + ParseDecodeVocTransform, +) + +flags = tf.flags +flags.DEFINE_string('style_images_paths', None, 'Paths to the style images' + 'for evaluation.') +flags.DEFINE_string('content_images_paths', None, 'Paths to the content images' + 'for evaluation.') +flags.DEFINE_string('output_dir', './result', 'Output stylized image directory.') + +flags.DEFINE_string('output_model', None, 'Output model directory.') + +flags.DEFINE_string('input_model', None, 'Output directory.') + +flags.DEFINE_integer('batch_size', 1, 'batch_size') + +flags.DEFINE_bool('tune', False, 'if use tune') + +FLAGS = flags.FLAGS + +def load_img(path, resize_shape=(256, 256), crop_ratio=0.1): + img = Image.open(path) + width, height = img.size + crop_box = (crop_ratio*height, crop_ratio*width, (1-crop_ratio)*height, (1-crop_ratio)*width) + img = np.asarray(img.crop(crop_box).resize(resize_shape)) + if img.max() > 1.0: + img = img / 255. + img = img.astype(np.float32)[np.newaxis, ...] + return img + +def save_image(image, output_file, save_format='jpeg'): + image = np.uint8(image * 255.0) + buf = io.BytesIO() + skimage.io.imsave(buf, np.squeeze(image, 0), format=save_format) + buf.seek(0) + f = tf.gfile.GFile(output_file, 'w') + f.write(buf.getvalue()) + f.close() + +def image_style_transfer(sess, content_img_path, style_img_path): + stylized_images = sess.graph.get_tensor_by_name('import/import/transformer/expand/conv3/conv/Sigmoid:0') + style_img_np = load_img(style_img_path, crop_ratio=0) + content_img_np = load_img(content_img_path, crop_ratio=0) + stylized_image_res = sess.run( + stylized_images, + feed_dict={ + 'import/import/style_input:0': style_img_np, + 'import/import/content_input:0': content_img_np}) + # saves stylized image. + save_image(stylized_image_res, os.path.join(FLAGS.output_dir, 'stylized_image.jpg')) + +def main(args=None): + tf.logging.set_verbosity(tf.logging.INFO) + if not tf.gfile.Exists(FLAGS.output_dir): + tf.gfile.MkDir(FLAGS.output_dir) + + with tf.Session() as sess: + if FLAGS.input_model.rsplit('.', 1)[-1] == 'ckpt': + style_img_ph = tf.placeholder(tf.float32, shape=[None, 256, 256, 3], name='style_input') + content_img_ph = tf.placeholder(tf.float32, shape=[None, 256, 256, 3], name='content_input') + # import meta_graph + meta_data_path = FLAGS.input_model + '.meta' + saver = tf.train.import_meta_graph(meta_data_path, clear_devices=True) + + sess.run(tf.global_variables_initializer()) + saver.restore(sess, FLAGS.input_model) + graph_def = sess.graph.as_graph_def() + + replace_style = 'style_image_processing/ResizeBilinear_2' + replace_content = 'batch_processing/batch' + for node in graph_def.node: + for idx, input_name in enumerate(node.input): + # replace style input and content input nodes to placeholder + if replace_content == input_name: + node.input[idx] = 'content_input' + if replace_style == input_name: + node.input[idx] = 'style_input' + + if FLAGS.tune: + from neural_compressor.tensorflow.quantization.utils.utility import _parse_ckpt_bn_input + _parse_ckpt_bn_input(graph_def) + output_name = 'transformer/expand/conv3/conv/Sigmoid' + frozen_graph = tf.graph_util.convert_variables_to_constants(sess, graph_def, [output_name]) + # use frozen pb instead + elif FLAGS.input_model.rsplit('.', 1)[-1] == 'pb': + with open(FLAGS.input_model, 'rb') as f: + frozen_graph = tf.GraphDef() + frozen_graph.ParseFromString(f.read()) + else: + print("not supported model format") + exit(-1) + + if FLAGS.tune: + with tf.Graph().as_default() as graph: + tf.import_graph_def(frozen_graph, name='') + from neural_compressor.common import set_random_seed + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + + set_random_seed(9527) + dataset = StyleTransferDataset( + content_folder=FLAGS.content_images_paths.strip(), + style_folder=FLAGS.style_images_paths.strip(), + transform=ComposeTransform(transform_list= [ + ParseDecodeVocTransform(), + ] + ) + ) + calib_dataloader = TFDataLoader(dataset=dataset, batch_size=FLAGS.batch_size) + + quant_config = StaticQuantConfig() + q_model = quantize_model(graph, quant_config, calib_dataloader) + q_model.save(FLAGS.output_model) + frozen_graph= q_model.graph_def + + # validate the quantized model here + with tf.Graph().as_default(), tf.Session() as sess: + if FLAGS.tune: + # create dataloader using default style_transfer dataset + # generate stylized images + dataset = StyleTransferDataset( + content_folder=FLAGS.content_images_paths.strip(), + style_folder=FLAGS.style_images_paths.strip(), + crop_ratio=0.2, + resize_shape=(256, 256) + ) + else: + from neural_compressor.tensorflow.utils import DummyDatasetV2 + dataset = DummyDatasetV2(input_shape=[(256, 256, 3), (256, 256, 3)], label_shape=(1, )) + + dataloader = TFDataLoader(dataset=dataset, batch_size=FLAGS.batch_size) + tf.import_graph_def(frozen_graph, name='') + style_transfer(sess, dataloader) + +def add_import_to_name(sess, name, try_cnt=2): + for i in range(0, try_cnt): + try: + sess.graph.get_tensor_by_name(name) + return name + except: + name = 'import/' + name + + raise ValueError('can not find tensor by name') + +# validate and save the files +def style_transfer(sess, dataloader): + time_list = [] + output_name = add_import_to_name(sess, 'transformer/expand/conv3/conv/Sigmoid:0', 3) + style_name = add_import_to_name(sess, 'style_input:0', 3) + content_name = add_import_to_name(sess, 'content_input:0', 3) + + stylized_images = sess.graph.get_tensor_by_name(output_name) + + for idx, ((content_img_np, style_img_np), _) in enumerate(dataloader): + start_time = time.time() + stylized_image_res = sess.run( + stylized_images, + feed_dict={ + style_name: style_img_np, + content_name: content_img_np}) + duration = time.time() - start_time + time_list.append(duration) + if idx + 1 == 20: + break + warm_up = 1 + throughput = (len(time_list) - warm_up)/ np.array(time_list[warm_up:]).sum() + print('Batch size = {}'.format(FLAGS.batch_size)) + print('Latency: {:.3f} ms'.format(np.array(time_list[warm_up:]).mean() * 1000)) + print('Throughput: {:.3f} images/sec'.format(throughput)) + + +def run_tuning(): + tf.disable_v2_behavior() + tf.app.run(main) + +if __name__ == '__main__': + run_tuning() diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/prepare_model.py b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/prepare_model.py new file mode 100644 index 00000000000..74182ad5f37 --- /dev/null +++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/prepare_model.py @@ -0,0 +1,33 @@ +import os +import argparse +import enum +import tarfile +import abc + +def get_pretrained_model(destination): + """ + Obtains a ready to use style_transfer model file. + Args: + destination: path to where the file should be stored + """ + url = "https://storage.googleapis.com/download.magenta.tensorflow.org/models/ \ + arbitrary_style_transfer.tar.gz" + + os.system("curl -o arbitrary_style_transfer.tar.gz {0}".format(url)) + with tarfile.open("arbitrary_style_transfer.tar.gz") as tar: + if not os.path.exists(destination): + os.makedirs(destination) + tar.extractall(destination) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Prepare pre-trained model for style transfer model') + parser.add_argument('--model_path', type=str, default='./model', help='directory to put models, default is ./model') + + args = parser.parse_args() + model_path = args.model_path + try: + get_pretrained_model(model_path) + except AttributeError: + print("The model fetched failed.") + diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..1e5d462dcd4 --- /dev/null +++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/requirements.txt @@ -0,0 +1,2 @@ +scikit-image +Pillow>=8.2.0 diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..41fee820958 --- /dev/null +++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh @@ -0,0 +1,61 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + iters=100 + for var in "$@" + do + case $var in + --topology=*) + topology=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo ${var} |cut -f2 -d=) + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done + +} + + +# run_tuning +function run_benchmark { + style_images=$(echo ${dataset_location} | awk -F ',' '{print $1}') + content_images=$(echo ${dataset_location} | awk -F ',' '{print $2}') + echo "$style_images, $content_images" + + python main.py \ + --input_model "${input_model}" \ + --style_images_paths "${style_images}" \ + --content_images_paths "${content_images}" \ + --batch_size "${batch_size}" \ + --tune=False \ + --output_model "${output_model}" + +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..4fdfdd2e8a5 --- /dev/null +++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# set -x + +function main { + + init_params "$@" + + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --topology=*) + topology=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo "$var" |cut -f2 -d=) + ;; + --input_model=*) + input_model=$(echo "$var" |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo "$var" |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + style_images=$(echo ${dataset_location} | awk -F ',' '{print $1}') + content_images=$(echo ${dataset_location} | awk -F ',' '{print $2}') + echo "$style_images, $content_images" + + python main.py \ + --input_model "${input_model}" \ + --style_images_paths "${style_images}" \ + --content_images_paths "${content_images}" \ + --config "./conf.yaml" \ + --tune=True \ + --output_model "${output_model}" +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/kanagawa_great_wave.jpg b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/kanagawa_great_wave.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5af5a0eff5980f17081299c28da0903326d380c7 GIT binary patch literal 28352 zcmbT7bx<6^_uv?L)cL4sb0Tck3=;#>eXqXrn z7+6@C*to=axHvet6t9U0h-oP4XlW>^sp*+{*ytI#n5e1Wh_G|<@dJTCIyNyWQ2|LF zL7>2Y20_BY!otPDCC9@f7hs@f5cq$VzdZm#Or$WRFcc(405TyG3L(fx zmjM2!AR(imqM>78VqxR_>(KZLfQ*EKf{coShK7pzuXpgj=KxefwAT#$a_B^Q?=cv` z!~!1^^D&v^YkNubXRnwAZM?#Xl!b3 zX>IH49~c}O9vK~*n_pO5T3-3Sy1lczw|{VWbbNAsb9;CH@c8rT`9EAp0F?j6`gi?r zu>XUL@E;d4Dk=&p#(%hwkp2FhD1@kJ4E*S?<@7M#gNYagK4KEfC+64oVlfHoUy;~& z&0>=>1GibO|AY2lWdC@~Mt{$6c|H4Ss{vWAM4eIbN#l_?ZcXnEDX-yFlsdt1(t%A|ubWh+B#ETh7RL;u>K7JDG?{ z04KJ*Xvy_|=8%`5LJZRe?H+4+m;pH86doFAFDy_O^HP>@7nt`7qB927 z>L*5GU56bcY(X@QN6RorhRp%*nR7by;|PfJ<%na4)!$Kw<%RnfUk(!ogz2Kxe$+^W zzE7YO!A6zw#3*7AlVV4jd^6+(z{%802q@WsIfRkp}K0wZQ$3v#z zasHijq0<-)yHdc7NC+gJNA^)aXQv{9&+96q&&iIuBtzWvd#&M4q}IVV7zls}RnUAC z9>Za^x!|!ZPNt=g)G_AnTsj=vhCF~6Ur=4`t{_S37+2%fx&<=rw5rY!c*Xp)ndFT+igUUhYIleQFo$o^h#m*@fER#%Z#B-{M(7NGp;YEpx zP%dO~N@~j2nf7uclWfEgxNiSELd|B+_gex%pwyQWPdTnpOf-LvWGo5#dNpN*!;6sO&RgLnD@Do=P9njf}#^jE7X2 z-b<71Ya%P>zD@F!9x9Mm0NYyTkm3F!m2#z<_Xy%1(;buCY5lr!NYEro_}0Xu;it!T zKVBWa-mtCOaSmYOD25 zbGz}9JBeB}ZS$RHwrV#S9rsAg^-43dc|v6`hn?0(~gL1Jfi4WHL@)m9@RW4@I` zt_n{4{9zui!k75Tl%27Js$A(*NBE3D-6*!%7CrUrMbs-V(TZ1ncJ6S)MA_*D@TqYD zj19S*uWL`;cg0Ay!0Cv+FP-{XSIN~ziRqKRi$DOS@!>~vFV<_=qDhl`%cLXldm*J@ zh9^W1PFc_8N`;>P&hV}>Q%KV}5h@>0BsQ!rIEg|GS%z9iEF~AQYV*qypxUJ07kgVP zVe13}8cAAE4k&~~(n@}URFG2|y-N!rV#10`CjrQLNz9QNBin{PtZBdWqK5eFJTh90 zXw=G=GOZ@>r&ll6pG=>)e-R*L9GYzogBZ!}bMC*ITQsNPOAc?*6%bx>YyFJzHKrXFH=sT{|!A zp{O#O*O=@DMhFY<I-_%T>U51+8*MWeu~p$%iJ$+7qPu9 zzute5{rw|P`c7VQs+ZlXA}cVi)#E*~s-U1)U%8KvF&&Y)l^h>SewxrzYFiv)boV1*WtIQtoA{HgLpURmi^IkE|~=53d!5Fu?IwRnhve^o&^bx|kc0L(0foDigof zD1+3IUnRK^t%XZ&F7Caiv`w=@#i#EosLw{7bOXh240YW0d$dvFI)i3+u=IC={Ld|% z*Kwc5*Ixbt%o}|;%|~LlgTH5gQ|0}{P^{t2M5t&O&e;&IS}<6cTrnAE(i!kF?MOPp z^kJP7S5$AAN%Ek0{;;DqvAS4ynafc4cCF-=6@Gl{+9$ZbSO6ImHlA4<8l>iI!r{GI zmXYWMJs5u((S5@2_zTGS*2b+c@M?XSrBcr$RafwWLT^~Tr%9=`wWTGTccai^;+-Sj z&E3mU@oH=#M~uaA{(@7pUtrW}S=>ndP1zj+C)(AcfK`vU9G-*^{t4aVZVdY6g4gY6 zFVH>N{*yHQoD(E%%N;e=VfTBVe((0KvPk>90YtGnn!YNa^OC^pf;__Y6iOgnaMdF?##arvm;wm#x^V_x2QqDyC{r8OS)IqXpOR2~Cm@Gk)^a}1kR$IEhbLsd$Tr?Qhdo;b z13IhIn)TUM)4OPGPyu(-Cf7XOj~pQ)7>K?em$DTxmq}10N{I&Uhw8EyCixfXLu3hn z6L$9nY`{-{hyi@X@Q)GN)5QH3PRPPAzFrcuvt}=Rj(w)r?4rd&tWf3Y{Zl8tLL;CH< zrTRv0GuK@lT!)}2QPbt;C7*{12orZxE=A^!0<4-N=b*VEo5Iy*{cy51#A>>_(WmNc zKREjCH(L;5|BAp9U>BgdOpRC_6~8b)*fz50n?{8sNghRRQhBJ<6IZnfIa53 zZX^gNwew8AiDUaf)IjucvsLUKp?;+Xm@jbpO4vB9E3;)}{%i1N%BFFCjQb!oup~R; z{;_gsp=t<$ym202`AV`&fG_xS$*DzOviT_a*f%%kf!dCa#@lc88Tx$de*yKQTRs)W zekRMyb1_-bi?X|qVRqMl0n6o-_i+WtI-n9WQh~D76LGq+4j29UYuaRnw6hJRh;cH} zsu2T&?-gGw+DmHh;Hwj>AAGM$%~0a+!}c8K>MwmXw5*p@J&>aIby^Kz8h~Z^M`h>)_Ze@oR(>8$uKD;z2UaYnK~S` z@Y1YYr|Fm8T;z$}%_Xq3R`UqdkgNoFq9cvcEf`ld7s{3YbhhGBO?!rSp9;o*y|otO zBJ{U@HsvvyXx9eNV05}@^OrWR>ulVL>rkCGFKu%Ab5TS#xTIhU;6s+$b)x>THZQn^ z^p}YTmyPmA^w0!`r0y<0%AVm23pE^x>1&iK zt*Xq3$JQy<{mwB2F}d{;?|1rFrjUs|5HYHbin)qpEUNwm+#7MTUPvkQyOeW`#NqGk zig9OtzZ%Ih%n53hGu?ZqAwxkSvun9J3=wgtY)_=}tDQ4*NX-83Ifpd*7a&^J@y5L0 z9P`mWFO4gaVMzSfFYFT!x2@JjED*D%E~N3}@V+H(rdxw}QE$?X+1|-fN#w(nvw;!g ztg-h#q5ySF(wtoHFa{mM-u$h_`mw$# z^ZuhwisfGE9^_Au(YTZQr$==6*Yf-k3*bM*DRkUX2PswVYsb&Z`Ba*Tj+T5TgdGqH^&v5f*Q=rWTC~yBxurmPw5vur8>{&9Qh8HMVuZD_GlkXpTT=XsL}x z!%eXl1*#ixkTOHYCZ)x&PW&Lm7$L8$Mk3xHSDYDR*UGHnipjhf?##@fj3=7IX;%r$ zYG=j(o_{AH^)H&HaeDQ80IY+7oOi0vtL=pJeGpef-%j5h2q@gbelK~eP=k}?{Ao^5 z#1dQEnA+CsRI_4~F+Zr~w{vpW1&5$nYrPVx4tFmkQk2B7314uMo&JXXlS6*fxEh0i zmP8Q{e5tWqgo67~8`TH+b&x7*U6;waU$b%EhTVg9CNOza$X_omxU?#3{Q!OUREr5} zFl~-SUdaC`#OF(;Q`7UOnGMx_mWYd*%3O-mbaQp!kgXeta&M&5@)>*2TjmS@$=QR0 z?dXu*iQHXPv|e`WY)T1TWL7$*Qg-LWqs{F&TN`d>KGX5Ta+mF-(8P6o3bHKCKhw+D zN~1A6XR$fz=7Qa!N7SWq`Qv%Xp|do&zK_YMF->(ZGq?)r@!ZNeCA;cTY&rjwedjBM zIj^t`|E{#bYXK^Zh$j=M<>@n`JNhujWCO@D;=lK+UF}2$`w-#KcB?e7EP%NyQQw{v zI*RxUAU!dDu1YQ57&19$@g0-c{c(q2kTXbaPuricu{=4(of~`?E{bDEdtbE8u9^Ab z=V#*Z#_cb_e`4c$wa$Bz?HqgA1jD}0N4hxeFF@CfSiytb6eI7{iif?*P~Ig=!SC&L z_W4E2V^cC8(oJm6B2Ve8e^!5;%j<1kM6?;f;~?1ni>DavVl^ObUOTj5WE>^CkMKq1 z$4!2(5|8e@7osVbdz$~MbLFnWcZt$Joou<1BrtmTt#kFmj^3x%)d17wk9q^72m2^B zQLL_lNRQ6Bzy!|qJO4inN`>a;pf>mPY;isj3QvvabFLf49i=s9k>qWyqP*M10a{lb1XBGLz5}`* zPU(jeGapJCZxnyw$tdXTi4G`CdpDd6&(YG^kvQ_MFtu;A$saN{Iuyz+{+VOtr_$93 z|8N2@ScgZLHLV^%XCg_iMo%<~e}}LAX0#yu?2);{qw=)3YzA^Geyh-iZer;x2_3xZ zY4WF>_UERI^d6=dG^w?p>(G2ncW;nXm3Ji$IbR4dB5Lo<8B`uyABWiN_5~^}_NJ+> zP28h`pLJK=^X-Jq+SZi=xw1{}NhU1cuDx~k6YPZmP5Af9lZ2blT?($6@&ihXb)PDlJ) zJMGU(ds?elqL#D6#y7k4^J2q|6vKguNp^ag8^_8QZ;)kIir7fVr5i3509W4&1CS9w z%5aBqTVPflr3n>N8uKBv>O3)7+ZMK<>1R{xFfPZQ4La-w_JP$0v2xf2BUH6Pg@-5X zfF1B4`7SVm2K1f07)1vKfx5eylDL%74n$hQR{?&~zoXU{#9lwZcER>MR$V~am!THu zqG+a1klX~{lk9Ynq}p85bi`Gj5GYlL{H!mqI+Z3zo)qce?I^tv{b%=|eyOLq$(U_M zjC<>2zMt!KW$Gl23v81}_RmxlFT7se1-VT&x84;tcW3UZ#4GYYfw^*A+I=dI_lUHo zp7bq~`d@x?>;Pl!A>I>$k0fn&CT}_PRHx1vQ}_Ei!bPJP6~d&PA3xuH^>JBN`eT!f)p6uZne_`3lKuxYd`e>e6%sXhgKE7*@s$ca(NSa#pJ{kI7@(@=5vI^y1X zX{udbWhiYFv{;dsEWaFOe`_vpF8$ltjc4FQ9L^b*Xvbe}45b}enDq`)n*X7<5P=K9 zm-y|1|L}CdyQ+*ldx(brEhO8F);V z=iw1P|3Z}~UEI0Wb57e{JRA^Ui|a*@p?>`*aGA!%AV$qe+V~)Q+*rT2h>_R* z_e%8e<=Ggk9pXo%)=W|aJ56hfI#^ms#ijDogTGq}|JkR?^O8~@OYgX%XM%bNTL@vE z-D+FR>-fx9$D( z3GS9XN4i}@po__3mC=ETfk%p8a+tEr^It%bJ1gQhw4P;8+J08ovP2>dbYgBZh+T83)#SrxW#+h+%bzCZLK<&`sftxgz#9QC*LtF35+{~yWKB_S zgXg~si$QmLZ&b#{JXIEMO@JG^&K$er{F08i#ow;EqZfgLgzM2$e(W1zi=)S*Wyi@C z!`vN1d=PSXQ^*e~XG6n+kfK|&l{@+$kxchHrN@COf!SU6PnN-Vk|aX@zETyWl}?dU z;30OCWaxY86vx{<45A3A5%Z0f;y^q?HE)amSeAed1`W6W7tCFU#c$2&qVh6*LRWXRP4w8LUh(FC>Smm&L5(s5O%6r|#B;xZbj31Hx)0B=>)Q`a+ z%L9F_m8>F);^f3h9{)krkqrfQ)DV*G?pGaXer^b@^5drc^L>3QU2ma_h9%j4RL4{E zxe5;VBYkL6y|i$9VC;~kSB+IDq5 zt}VOEq^d|orWWTs)B=s8$Ka={~%pjxmR56?CLB;HL`xITg&Hd2jmvnorYe;p0J8K5Gi;aA{%UU<>c;KVb>*V zIorN*j>A5;X>2AfW&zc=Phd0`x%<2S7^_da6=jMG=0v}lJ+HQFv~67s!zFmDep2n0 zIp1$l02><1qgk9~^N!A05{qfYFRE}&(j|tYOQGo$9=~;2E~v+?CAPDmdMTOa>-d$$wi5NvAbet{>@RkHTP(@Lnqf8r|`OZN@IFLs`guwZ>cJh z?B^Cue~2^ae|QbAys1j>&9sWj-Cm11ZNW`X`cR83irl8sZ-KeGPs}9-;ALAoHPdct zQXFkR@(fgobVCZt8s*1Tcu`^$&hjo6-Nj)xJ8ZtgxXo`EHq&EnbGl$8kJTF<|EV7p)4%L{ zup||wImEy49Qt;w$aK!EXbpFn44WW>k*HUy%{YDEH|W(Ri+`GTEo&`H_t)O4+9u;+ zRDjrTG}CL|$V(O|MP*e-GTFrCKa(S;IbYmx^#FHl{=t>hMHgx)dHrni76ng5NBUh4 zIGmGRTW`_TsenR4YRa~EH5IY$uxdKJ`faNopKe`m$HL5m$I_g;omk%#PWmsfF|w2t zEMh<=9st>cp`A%hDoG}pW`VXGlYIL}nfUu1!} zTl7@Rv1e3>iFJhSpQ`);*GA)TPyamWIn0AFu8a?zz1k4nG@JHzLuJi_tOIoNJggkW zHvsC#;v@^|%;>~K^|RUR;xJm(`_ddPh6L}s;xsYh?R1O;K;hz0cC_S}2>MQxb#Vd; z+@>Dv096I_9X9ga_8t^-Y{;m8Dfm7EQQ@Q$AQIh4Y@?=Bc1I6faP=>%xv+B%gkQ=2 z1vnTb8SY2WM#NuN4fpeMtxD=o_KP_0-VFvcDkixon^N0=+2rOru{Ug0$;Nbvr~#Z^ z9S!@UZ&b=0p_Wht*oUN&)eO!Nx3`nuu&~u>Q+vo%due&XU6Iwwe5O`<^6I$B10p6t zHs4wqLqMzm0q<)y3Cs8d?QUl6Wb8IK@p@4pu5XdnHe0R4htPaG!@XOf;8tdtfZ&Qi z@13P$Ar9c(Q6R2Kjiu@_)dlBr#Nhj%>6*U`--2)lcd2<)>56QRpGW51t8wc%hnwG2 zz>)_&jxynG6Z(>gL$whwT*vFZg_bt2R%dl^Z7(Q2m&*FmHY^8x`` z9d(}|rDTU z-D_QRP15+I=`{S&H1IDVD$}Xs{QcHkL2Mi{b=v9L4T8pB-Pa1sGf;4<5p!{q0j*+S zELVJ2Ce}N%r+NN`%GQlq$({Krdm4Oh_9)zJyy?0mq832Jv-A#K{x2Zez~yGmYULPn z>N?@8uWiq%1;l!wu`I?9`H67DocSBBo1^51Mf{w2sTcnG*ZK7gIqWPsPK#%|E!?h% z1PQvpxvRzwKgt#}0;Lh@`vTwCK69pajYJYS7)$L__8Q<KmQ@I(sCJ5eXbD=YotoF`F8aCez@H70vQ6X?)qy z#g@6${kT9KdSAvXY;jEVO{v9|o=3{LnkMKfll%c{V`LYv z{6KWSyP~fpMcd!lsG<^D(qCrYIS4Nw+1ktbS{3|$&cXDoxch5O2su7R^V`7ot?YAK z?lPO1eD?V!PC=81;K>)li5d%wxF11aw&$hM?>9}h?qx(8-fb+mrr+Ph;#73oa<=to$txdPjza5KFRiOD35 zin-R)o=+YQM6)nF0{I(=KyxQg0QSyMY4n-)4#_2eIV?~w2_1a1fO%4KKtcS?S~z(K z0tGZUB1^R^LzW6akJ4(B#(seNDNt6}p;#V{#Z-u`@=gGH1#sh9JsB6-33)o>27X@z z)#2_Z%ykYMk-=|RgwulW@^H8r)>Ub=7#t)Qkd61|fO|Pg0QG*(WZhSCyO}yb_^*dz zcdmf*W3{1O?I8`s@T%V6yee60bWUTp<1Xg6FFPI4N?f)VWxLctx#s-~d$&T3pGTjz zWP-7JXg}YV`z0yit>&_4toZ(6n5HZ8Q^y7`Y{B3xF6$S(mu`((#N=S62^rr?@T9{} zZFVozywCAb(+Kk!;8NTM?`5?c2qS; zpd0EvsiN27t!xY0r%Qy4R(U8lX^{LPCcUKXy>*xR3z)vC@cYBjHP@STu}@wY;jh<2 z{hMR+b2Z3MJaum<_&}kH&r=y)hg_)8GM~mc%1b>+Aa)Vn1Y?UEE#wNZ9UU~Et*WMP zZXum%aKFr8NdYhX`viizi76yoZS;s4wYp`l32r#{au}LisJ`U9Vu}ru4-^s;6DlVr zHlAoMa1mbiQnOiRb@}@Ap-P6M=@!)FCuiD=Tm1t`tqS*q&vJcIl8jIJL~g(zE#Sh% ztn!C>JDCF$V>y6|+3tGerJO~Lt!l6wfe^>qvFmOPNWw`t1IdvO;7QpDJ2PM*Fk{UBFlE&tHuwW@JA#HY(H-FUj3S#jJ{ z`)|>*&~hQxBpdDRr5Fk{TJexxpPOoOzNw3iOol%q76UdPW~S6d%yT8QN~|sTU$2f8 zK39dNdUw5Qu+N>asb~$u-6+f9UTmyBH;KF9Wdc3vrFj1ZuvQ+Whu@$lYHaQw0&9Lc+&8DUA6T4sK0FMDky|x8a2z|z-c{hD)`blL>{efeBkxDA`-v0saf{Pz0l=!$n^C4(+~PX{vdsok`Rz4H+HVDfN{o10q|`8|^XLhy$-I!n1|_E*AwvUq zMxjT?z>)}TbLvq)J71&afP6Hj;8*7CB2#;tVjLh+0l7Ec(r`(45}C+{UQpFb^$4TL zsW81mA@+{`YN#qpe@w2U++LN&zbZ^TL;9h|C8ufLdd)cPY($t=mtgqCSr2crZkUv8GJcC-gO~9IC;o?3rC~KxV6^b4EBky0` z&KDPfv72>*yDwsiwM%(zQT_+9CB zwjmKR+P_^&R-35`!R2b&lL;%Qgx^8oW4UZdhCr^Srx;KqQWVh>To*l*`2rmNBKonM-oRbzCU z+iDhuljaob^1OoEZmmxX{h>e9uVPb&z8%3#oWnwX`@82ugbos2ypqWLqecbea&o6C ztxnP@FB0qao;;i*Mr1wt)e*c-seP~aRjQo*9w`fJ*FpIUq2sy|p|PylWlAMhmg`y` z>YlXHLIC(Y5l;Kswt(Bi~HQb#HYK=Tp* z^AQwa8^9V-IW#NEwe%OTd1~!anEY_i^!~9|U+T2{6XU#K_BrI+wR;(Q?KXnrVMl8< zk;(#eB6Z5!H_!9*o*s6?k=o#avca$96a7jdCs$`Iy&n=_UdOZkMYQVcaGEe{Nhi@< zhYV#`<&7Bq&{u|__#2lpCXmVK>JRVCy*u^{Ay8sU1mjic^jw=Sm#jgKxJi0+cY{d2 zyP~-O6VfL@oOHu1y;#qokZJuZDJ4n>NXU1SyO7UV*ScgZ@DVX(#$!Y_g?<*UUpgqDWt128l6YIwk5~ zawpazaT?z3M7n+$Tj5*{gr*K1-;RaR@HyG9ctS=P5d(Jh{V#FMmXOsl0TTLJjJSnbNzDOu zDyJ>mr7#*+Oi{q25T^!}>_^+x(;<~W)7?*N;sKhDQ*w7CZz*76)MdXGki=70CMFvm zP&)Z7NsGbFTp{7f`_?b>$bP=uFtDmN(vwc3yeb=g5@>tPlP;2;dICA(3983A-R5Gq_2N$Z1rsmX8ykbT&TgP zF7i~;takG(%A7JFY)A~7peiw5AnRwM)NS$^GiM5#@xJyifYNc{j!C0r0QI$s$~tj% z@-r50PKraY&0_y3REVXNB~TWx$ZHtF`QxlgrqYKK2FG@5(& z2im%Q8oLQszf|Z*$wT$Jj*3llv>P7c%75i7Ljl|)WX#qOy*i4S=wLh%o4apPU2JiV zPhnWEf#W(|TK@cg&O)n2V?X^|;VU#phO(%JyV9@Lii_7pQs2X>pZQ*_s&yp{0gF1e z_Sx<@XHDm?~ZAuyJA!M`j0L>cpiwWHL-qJ5=fjT zdZ}06#w#y4znSJJj`Q zey>yD`!I?!GLxgP^t}=ISE2o=%m8MI8q+woQuDsGtllI;+6Ch;Ie}GKMW=3Q4rKaU zCJu}Dbi#qNZjil-2qx6`Fm6LXE-wP~ucWM|DNU0r)2Oq~4C|+Ysk{5q#T8tz7J}2I zLRV^@kQ7XaAW9SkZ9}=})-G^U^{`Q7VjT>(31iv3PS{Df#!693+j!()kl=j9KTX1= zO`oj@t?KlAgZ#EhT{WeKAhsh={|wd>OWhj$Lp-zEr!}LBP_o5zTg5q+MM6dNJ`W2H zO9do!#6mGNUO_~*9+LdRn7Y#MpNyB< znZ>KpL1j}n9BEG&A&q;7yUg`3tD(2-0oJl=yCwW>E9WYZ8uqBFXK`Q;8O}O|2{ZKz}oN`n2dlX!^xA zK=Eyd!jBe2nMz`VBm1x6w_CrBmvx(C)EWXO6$+L_9j;LG z8$8~(IeqhItKKed-T_G=XHuNY-l0@VGM~0OTBBryFx7Cb2)fOODp|;aoGeUEv@qfk zcih6@vM*SXO{G#=4jayyI&nK>!UA92&qG4lTevWUR1D`Devtk2Iyls95wPm<`}*a9 zr2ja*U#F|FgFi;Jz!)k|zy*#|**3rigGQak-M+J$TniP(a$0^%J3BlczAJ4&Sc8~8 z>$D_Lg|>zL1>pN5vZVCqP02{zD2JH@Nxb~u=l<#R@@u*=+aS;4jJQeMY`3YZoFMtp zHd#D>apuEX7e&ZiDRQB(D8)&7!A{@uy6=oQ37sk-nA3b|XJ?zi$RX|+W30cTI&`kF zgKaA)p4cUh!Y*!zZKCq!br3XN2rYm#*>%K9NB}qPp>Xg3E#aCe#Jj1QWh(k%K5u>a z1DDS6>04QX&mnYndfa6x<*sNWTOme=v|>LU!9{Fq4g6d+)i1SE>cO0tHu)ls`Ci3m zo_z83t3<#=feoIGh;qt_mM2nCQXyl_3){`c*oGzNNA$%sW2-t9#$AjBW2u25xnF}+ zZIq`mI);u5>FNvn^SqJ+l0;EG)rwaSh(pz-9obQ~iITaFF=Bg(SwA$1=s|Z=7d%LL z1!GYwwlL~(qSq-$wXj|odTRr6AA*)+$>sXcZdiBGSF2vt)I%KbYJ@6-qj2h z5`sk%$;ITb7P`429FYWz2uxSUf0i(ICeN}xcs?g3uDX4EoCxlQo-kU3Ys%odn>Q&K zpw~w$p2=XOrs<01uzMimJy-K;VzwhPy;b|*D*=X+iN0V?N|LJ8c;YSXPJaJ657H`u zx%bCZrev>F5ABAqSTLGN^0K`X#-WZ<3MA@bKUuIM$REXV6WL%A={GiaYxV)d2Z`T5=z z3J}75geabpF(OtohsU09+@%W|`IzyCOC|kEiZ?B2oY6dqW4{yMrMOYt=V7R-O5%c- zGAn~>fP$RDNTQFWk7%wBlK?Xi#V(;>Hq;AE$ins=Y|ns-OB-WXl?njYMg0Y(j=N*S zK&$^jVmb=Py;iTpJe@BB=x|coYw@|S-M|AEngz*jNA-T}N2z~@}cLB#~z5AF|h zLW#>Ard!P{B&}0;a1&-~)V+TTF}k1RmUc6qb}7`9e!O1fhN!QBAEL?LtP4v7Ei_Sn zhmw0&l&Cb~{b{C#-lqiU?~8g&UNQ+RJ?R@SNN!W__wZGhRNq!BcO|PV05ufeGi07< zGh5?7FI@$*_-x}Yz z?Z8)o^HdXQHq#8ln>IfNoKvJOYz^-gmQ|u|fNd#7iC;qGyKiQo&0JUhQ7gd8nhoVV z{I&Ab`_wtpRp+5urUqA3ieCdE#D|Rwjlb0i>P^uC0GPFR&ogtIJI*^6$LR*nzG%*<|H8hNe>>J|OWmc^v(Q*$A>Jv((X@1Uc@D8$dc zPMm#c>B`?ShNB|^&g@uk)} zK92n}3Ajx3Y+xtnaFSN^N@@5LEBeN&1-r6{ZAopFIZi}s z8-FO?n^AgDY>V6Su${1NlxzJY@3t#Z130}e$3BUJ^wTz$fQr>1CFA~eycf@9QZBId zgh}^vLPE(nf6t)IQUygbRSpGjsj|e5)2s@AGkwA2bIr^HjniDg39HFbsaSxy*;j`6 z(6PfrlY1&07YT^~8@PnXrf#4ygR8wRF;M`vCM3B_)Xg(cTPlr=f@DC#aU^#y@q)KCB@fFaISM{CfMv<{ z?E{DaJm^@&Sz*&+cjaL_LiQhO&m;^&W^dd!h9I(}^&GKhSk_?=B~|Awt5|I0rFiQ( zieMF<^9_T!7|?tDbI%KAowk2}hgdn-qfwK9R~FD>;3!6iT|pJeT(QY8hi@8T@IA;} z7|)0bg%pR3pa9GfS|QcTIc&K%lWtM_`%)b+%O!lfl)pJPa0o4578RvnP0&i^us{#v z`65CqN)J(MmpVy%vBkR|M0V$+jKORozbf3xZKrk=f;+5l{4A*~@gEhv5)%+|dv_`Q0h%;YO^ff${024Fje@c^0t;MC=<(DSz5;( z+r*Lh;O;0nKg-2aTIRBq{g(mZsMy5F_n?}U>s zBc`^sUBa+v>*)jfkNGVSuQb7s}IIRgso88 zKPjLE>(5k3Eve?J{wo67Aj&HNFFD)gWg(XVTI#N=TebZR+7(Z54jLrM8RINOC9NWD zqq~z7C(G+1e2HQLYODZ5nLpLTSusP7?2pL4Po}OBYCy2`nvq?o{66{SZs935JQssB zGwwyA#=DfrQ&SXyFCMhSwyL09_@T#VQsp5B3k5j_ZSpqo6(_&sjC&Lc(J zkT~VL_DxmP>jk}cB7atUJVA&+0cm#^JohgYY(+vBSoWMsVVf>PdaYE_4WUkvR%bfJ zoHW&fkpSbVh{CFBt{b5R-AeYvj*rQMhWZ|QY7GtSEek?fBZzlREYjCBS2Dj1E#4~S zorb!Y_OC1Q@3-u}TpM#2FYb2o77Z!prSOOoPVzn&7ghX_TK=wB-#jLdf8S8vNA}5w zUoQp0);iZ7L6>=yhkq0lG<%rV5_@hDShL4>Y?z;k6SJrl&p@A>9EKs8>0443!*?YQ zNYs|20sN~oC$v^CVOLY;G-w=EvPvI>nfT?nDA#F{Nyd?V{3B$meT67lUA=&0tY^<6 z%MQ=AS_GS)xEWJ*o{^A~eF=^B{*yZ6A7dVGzKF1!$H~Wv!sLVe(enPSjz9!a1lv%p zG?w~1DvI!Ot_dUvOqE1dwugS!|G^wvjQ*W{oh8AR?vS=|<)=)ah-S54r9}4mzua2E zW+%RA8qi1=`C}NGsF>cYTK9-s*{KVyPVon}EMJdmNAhSJKChOY_$95Z0%v+0z%R(P-A%lWR7zXYq%HnxF7z#mi_5QlR0;{`H^#jJkrMb4hP;VK`mCsB4p#{8P# zwUgjcW_P+LRx{gvstT%p{TU1M1AAK97I$iWK+lIBr0GBeO>g*~V4ltAi$-Cw)z#y| zrj*j7>dmqdjaCQA9JNA$m-!~!yu(OE* zQPi#E)iBBcmh zG}7-t&CgQgHc6l}0sS6j%uxgllX}-;N!;cf3MbKH*j9*8$#yr|+taD4*(cHfs=^Qf zsfrC?FpykN%}^YJjBc;fcfMnyMCFl4O@k3rlAL zTob*Kd@tmj+_y^&TdmLosjHEyj3L9v%DwD+cGK`jo$%*PQ^!~{JF61L4!WT(oc601 zglBF?gYT2-`F*upOIOMDGNy$78lmQ?oAaj~>IXkm~R;@IBLhjN_$Y3pQonJVp?X>H= zwRVOg#2f|3TBR2C713AB!NKD_De4Je2L#u7;ZF%`ML__sfnizFLrAhQjzC6M~3wsQeovp%OCMO3<~XRd<}1H3G*%)T>bV8K<)2c zFWML&hfcQ>Nzf_}mbPrXIW5a9+LQGe@>iff^@6EdrmIA0%LwT8Fm#Zju?_>m?c-o2OXt0WR(sJ!q(@zDKC0zP) zYF#GkbH%hiYeSYu^35mV&Zl*|?PXw$U?YyCS9_uOJ5JHUy2d1!4rF20w1%k`;-p|z z*lr_~e2fis&ZS1I%pn?g(2-Phr+{%uP6ri%sNQo>8UNC>^;V6Wjk@oIa z>}!vf)@yxDCW3T(N0TUn``PM$&uVajl0hjn+1kN>Ykg}i)Cu#2AcOa-g|O6q&#p^% z6}z*Hs;g&{&#*kEKWlAaMBp%g!Nzs|mF^D3!C5Y#m(M3Y{*v-w#&2DY! zI^Wvkk!MdXc$agi>58pw;JV2F{Ito9IB;b(lei>Qq*iCzktTvFgaHAH9|Yf zxOBGruQDhoSpDjoMb;Nmgsh3>&YU__a^$-5XfFD{R+v{Hf=UI+t*KD2nUTPUvW z;kb>1&C*afU#YGAa@=R1Wf)~50UZrKT6_Qj&omK+}p!-6Gc2~ z@v!AsXEo*?EcjI&*n5o;(g_%I6Ld9{WSiGRp`*lAgk!FLwIUBO(K@pp0OvKKsCXO1 zI&9Az_A;;oJCtN%sl2y#@kXg+(0N^4#(Mr$*&H`A(nfA}=ye(;r-*c^Ug9m#+aSyN zhvg!=D7CEv#1Z*Agi*sW<9+}H*PKbOYBJl1CAgAEw`8e2A8Ms@J6z4Yjf>ewK~7n8 zWVLpE(3NJNk?xY&>My4x8g=Tmuo+bPbgo0h{wMJ!nPj$eGohHB?7WS>g1KD_#(Iv0 zAXJdeZ_vi)IIihDL817v*;aR3dx7_N*AtE-4KI0fN4dA|>Mb;91*7_)2>&C2hF|&&|^^b}d$5eT2jwAC<0X;BJ_>aoF3xA4Z?6z{Xru3T% z<$9_0uPT5!EV9EBvkZU*Hy7}f9$8c;rxlOmD_;s~mhW?rE_w!xVF`S7hDa;-swK@RzE`N@B%i!@I$wnC>@9`ApDd7$c9Vgc){550PEE>@M5B^U1xFU0 z;*CNS*%Ve_{o>tf{jY&x4d?2&vRts_f^fCP>}t>Wk~+PlWAGy}HFdwZYkf8sxsy2D z2g*Mh)v@r+-OPLSYmK9?l^;R%t=l%zH2A!`R)Qqy@}KKbYYV7P5{*7pPBngmB5Djp8Jok4Sxf8{HmRk#;}bDCZp4HE3sv%*Ph;gZvyT zR~vDzJ`KTo8FsL&YSXr;+Kopo7|3<&3DmO800$h^9}+TMX@WTJV?yj0 zWwDW2Q)w33IhPSJiH=WxD;nO_^(onkNYEhJ3b8-u{{Yuj8cOO(id9~zqjJeK>shX@ zS>8v5Lpt?Q^!)0z?!S9=4Y|@&$jQs^9jUR|U+H>1!raD_GmI>LqmKFZ{c63XyuM+Q z7~Aa5oDRx;KU$=dcVd%id!oy%wxfEM^D3F6U!1RQzvuC)BF=WzmjJ9F?ilHukMR9# zpt8`6lC;7-jNf>k^}lPRK>-X#=J%>~Aq8Y+p*g#lQE66|sM?L{1yva&^);oY_=irs zwGt$PB?kdq^Z8bt<)n93uL~$DM@rz0jj~@(1TydF#lG+1&1V}`-I_bn*&Rl=bquyE zBtjY3H^@oI2D$sadF~@id#$j_hdCV_dwxc~WD zc>sop@_SUb1}A%H^$(ZM1u{n)y?;?!)V(s7uq7Qw*1X>43#}hf4B(+g-4uhN723h74Md4#Wn#!1{QhF0^37g5 zGh4WCx;+tCNSPd~4n6CM@eY|4t;CW_qwLc1t^LDVS~k9<%>~EH(7jwkPLMG8la;gOkPWXRZCN7{aJ*jlHY2Q&pj4b0;`X=dgI{<5h#p zpHH}G{FA$A=zg_7g#2qBwTnx&5nd?o(f*Ylo2$X%%?M7LOsFv;j`hI!lSaF;xLe&e z7q1GuO7x#p0jp>FNu6NU@fzs_oD(&e8kDy-X%gc^s#of}i1&5X6Hw`KdJ z)+N`%DHF^Q-Vk&0ZX9*3O&`QAlC8bdSJzyon+#>i_XUpAAz-K zuOqmED_Btd?byoI%P)r}8>uJ$&;-hVco(PPU1W33E3+o{iN5e*)AgzawF`TK)|T=~ z89jNVsiC=f472ctg=27@a*1Xmk2LkI>DtdqwR@8iByq1LRA&{;YLh zSYDkxTddH?zkOS;)0%#v@bqd;6w*o~krx4b52aOu;Z3Q_UfxSAfc&fp;CgneN`iN} zH>%aFU^ca-&uCC~LpxJ=;~==B)~fi(Mf z&Gqj80QMuTi+g<^Pe3hf5R~Z7GhEyF(@2&#@+S_;GC}K5$K!1!unfCLsjGD4{wtaD zweYJR%`L=mN9IV_QSDe7hNq@#a6PQ@JZcznM+f}osSSQjS~N{K#_XeXfUZZ=y&7%$ z>oZGhc?1NZz$@5_;#BRcI~?z#BYS9#{0oc~#K&2Dx>y)1}7U+NH!y zryH<#pF>>2to|9&&9cascP{0XSmW@m-Cju}wJjU%KhC_}?r$RT5PQPXMK8+{C+=58 z7;`OlFsoCQ*=}}P4~kBqa4hd_O|8~A2cvx}qg$w6QwWg^gQ|nbuP@cEFCm)U>@aRs zvywYi>uF_%Y`N9ZrMX_sqndcq35@Z{!m|!R z^flSqY3prh%0S-3lU(FlY#MCV`h@d&YF|45`^O@>_@znW5=Oz1hCvkTP3(nAn)s}E zCyRBxLrK;yY&5mZsQcw0cFCz1S&zj29?4Lbmdpzb^drCLiasDkaiDnW;{O25^5VLP z{KRqe{Y`NYf1>@K;@l%icW~iGM2*^8#GzgCCb3(X7z>E_+`UxSmsw8@ zCn)?c@H#2-*NvX&uWw*{IjKbkqj2U1;U)AZv82IJJpnZ->rIN*FB*$O=idT&rMx|>Adu$TQ?>cxyRV4fXgX{fhM2+aTMS6%j~9F>jzVtq zPm!~^p*=qe;iA?xjUE}atwuQov;4axA9vhWT9j%@P8;<`GmA>h%ll!TRKB;9$-IxA zKZ?8k4_vm^w5cs_m`5uQm>*wS^7-OxTY$<&GxC~}-u`H3^KdczSnpfPnvtEIv^TFV z-%P!d*6kZ;KO~-_x~~s-kxQg9NEuHG+38rZPiL>#t*y`j@$yUEPo;8qwpaQ+>p;Y> zKP!KDdWu$3(U78^+n)abYkeiY9wy-P*1B5_W-EmYA=>`+;C>SEeAZGMZAr_*Ps+!& zdSn+dPZ*jh6obgYu3EGc+^Eh}+0yFb?Pf9~utU=oR?}FxfPo`eG2sI2^94B>ONw|W|d?`^N(7TTO5MeNw*S0k%< zHvTt~??Sp+kerB#1>9%JkZ;ET)fshbt5+*DIrkOgk$9h0 z(P#3n;F2Pz8DYogU1po{8q#P2U8rRz9MgDfYL(DBvf5f5i|dw`QS7(|K6>XBQd@Z2 z_mVZQSt9SXNfBN(kOZ&GE=En2^!(8H@; z+rG~;a2-La8mxLIxu5MjNZLWh@qk5ecRK!|rd>6yz?DiIZy?~+E6)(6!+DJ%S0fo6 z>!DEUjMV0ho3Dr3J;bbR?;apQ;PZ;cli{`5Bt>-#4D~qw02<3(cJ2~X-rD~MdQS(Uj4+?5Ji~+4Iqe$T#w7Bo-T9(t!+T*fY2pBK#U3xk7{#AqF zZx8BCDU_tf7JRFzz{kH8)@mD(+TF=Tegvp~8dV+666j z3}8qx$ZU0~r6(BdRakRRMso37-A@JNK6;CYj|G3otFlLa`Z|#cyz&j4oCDgbL2Pa9 z+Ix8S%P=BBcr|G0v+7T7m-kCN+OqLF`gE) ztZbI&T9)l(KVXCAk(gjUG3)gGYSTk6_9H{7N*%4+a+c`B-}0(DXNc2Lx46?Jh8P1T zV|c~^$2c{Iywhmuw0X5g&Az{)>e{qwmmJ0ltZujrdFlFAo{N8O?<_II(H;K)bpHTP z&bVpewp&=5c(4%T%#3oRai7w<%YQD~3wz-HZy4M=(w#K4LXB4S>{R%X;q})%H8EJj zPYB+IzI(cfVrXQJ+h*TVIbmN_MKoHCqk+o0l_Tq4KX~867k(PoJk`tD$mQcFBm#5A zYlNw0>9Xo8u4GcxZl;$(ie{T7{_>91N5cLklf(Brb6A&ZGtURz&u4*-Yx|xGBIrjdr+3ad4K#5PYvg-tF#m{{R!|K4e8$I{qkYg1@nk zNz{JNbnhPa%1C-QKmB^Z@Q%56q~15#h%w_W89i&z^-G%%5$GOz64EC-SMMu&SBFxS zOeIYpy3XEcR*jzK1W?^t+$^y=6&{BKj8>ew%jj28URX>L8Nm+`?A63WsNTnP>29uF zU*_YbX&q6n-BN2sc{9m8S4Xy}pTDXw_EGyf9*d-S^g?`%O~W}qD~{r~Z!Gi=71*(w zDCGchznbw9m;XOr#u zR2Mh4J|(yNHmMSSn~+38rjK>%8^|ymZ@p2TCKLGph!4L zdy1D+@GbNb7qE4>i2c#txx1S?yJ^gGK_a?(F9NzbqcU1dn^w~2)2=Xs%!u-?2L`R_ zo+G@|P98(FcFjX;tlU8o#_F=f-9wI*pLiphMtBgS=anbDR{Ait^k=E){xj3>cE>x2 z-yMBxtFhD!XUw;hFdl~@zIs$KfOF1kb4u|w=8V~m%#k_ym5ysEQBPudEvh}@^hK^% zNfJr+YqCC99nE;=v8i~M!@85(T$fB7EQEZlE2Yysb76F;=*Ot8OT=FdrM6iw zv`70zyK-Ufg1q+NO=%{_-yDOgo(QcJY1LO-jlr#te3}69m$j8<1aMUHE6=aIORso# z@#B;dg|e)DD~`1BE~%%;zh=5{ayZR(U+|w>-@*2MZ%>X<(|Yy#(4i>qu7sh~wI{?& zYv{x}uAG+GeIjz)W$QzHspSc~hFW%U$+Gej{Opv#pxZ&bCrorNEcTOct^8ijx?D6^2v_!dk zp%UJ`tRYJ?ws0}{R>EsG>{b~BcS_B-&1J?t`%{#a?DZziYE)kzY7;mYHqcz4>A7jA z#F}N~l1+J|pd@4DBahOx?=%k%>9MPulVIYsk};oN7zq$-_6<=3aON{{Z#s?Y)%#l@mF;J00+aXLXg=*I=ggY z4lB_-EAY2Z(aPN4;_B#g^L^1pTJVb9+);y;$3dxIM`vV_SugGzBQ+38Z-%J{nwP1L<0u#;tglHUwPR-4B*tzFGABYr=JlNbU7Y@<}oI zjpeX8=dU#_mxwi6I~c7jZl!`1ZO5K7TD(4rQF{4>)vV8d()=?ur~G}*8I4X+SB!pw zpQ&EkTDW_AB%WBh6I9nwO+&&qCgS~Lc~b@VV3R}|T+!O1p=n1@`Oa&aD*pf*6x)x& zj)zOtEwyWM!9vJB{A@WDfpf0dSlvpRZHmb%AD4bfkm;z zMOOuftwAKCQ_{>NqRno4cWUKO`~1BS8>T5(!T_b z5BcW0=v9p4Zm23#>Q%OtZ7zQ4Pz(-EMj{g4lS7rS6!^ZFv6x>!V{;uk;;G>i8ntD0J`B zcS_=8ymY-=ojr^)ug$^5dM1P8IJIbno}l6j!NR{{YsSRks!{i$s{D@HbBy$sjOMi~ z2DIGQi6m~M01qqpesvs#=g(9I7GF$wbX>Ru)mPRf>>u2scuxrE$&qVeeCBM>0Ly+Eu7IwC8nSd zw*zs&ADvK72i#62Z??xH=j3%cBio9#UN3iIT_@V+RDLCr%JHKv!X;ieaw~U2)Z_64 z(JRB(MaN|jQWX&aAHp%3 zsV{HC4+3`^iPP*Wwc4ThkEMG=YxD*0sC4- z4CG|LLMpAt!<{N=S=6+7?6pqk;r^MY+DjsN^0w3Rf_naRrH{kURdmH1HAro-$7=o; zvA5Nq>^Bm%pg7*@aC-DO{c#E8=UTEHPlm0PT!*{Hw0hbbT{Lw+U+maHt)7{&d@|a^3<}a_8EyooQ5l z>NJdFO?%9_XJc(+5Q^R^gPen&m3LdZ zj?YolEZ)s|0b|MKDuu1hlsZMscec*+%l@WMO7wSEXqBK?M6YWt)%cO29HHmtYN>M~ zCB!nu!ccHpKj9rXtzAgVXQb)F%iR-v>IZ+X@rQ!_#0Nw_c4I<9num<8g8k*T! z4LnANbbxftb2plPqZQL%LwF;Ye+#g|9+kx#yRKs8Z5xSiUGbie;j6T?ynT+tAOYI9 zEHAC~3rOy5oIFwtf-A$mB$w5-_+*P~xw%ih@RyCXKMra!TFA`yOyLY|+qV_B z)w!vuEx}b>);^D3YcpBC(ybx#^*IyElY*==D>`oxN;OC=?c!e`#3q(jKXq^q^{!9F z*LrV^bj!#zd$fD#hFMNI1FkCwQ+HR`w?2sJzu_J5zM)|xkXlv)BFdhYSucN*+eCh)HALj?JHRuuRvDxZo6;>8E04&Sfp5n3mVr+a(rKYo| z1s729bgDT%{VOWY?JjI$X2VSq;m4@1hx^FtV=r~x8k67u0AXuQ3{of|gCOk7Ud`cc zR?}P4Be;qnk|+x6j)uNz)URIt`R)kXM+^89UW4$j#M4`ApV`+E<{6jeCysy4D?0R? zso$zDo8_00?xod^(7>Ne)?TyWtuI}-Xs>Ni$rulgMPuq3ZN+Kq+LX@K1DZu`UO zS{E9`jF#aDj%fx=dE|Bf01DwycTOzb%~{C{im+WCSx4P!>!pkPGyBZvO8TS4 zC$VVNCWcM%H$uHf)}l9noXL&ge6GDkU%a%`d_61M-CH^Ua6=E2)eDI2?$sF?V{`l! z-E`50sp@uq9PyR?jk?8jMcuD*e)5XxggQ31P3D;-Yd$%Q{{R=^UPo;#_AF#SbZ~tQ zX~lIMkM@f>&9uXu1B&3UfQ+9sG-mLxC8V`SqBKZ>W1b@nnftv5=|_>WGO@d&lg=AI z&o#~1_})}iwAQV~%v}Vd?+%r#9?$_1bH}v$u&I_E zrFRjkotd|5uj)y8EVIO>+aDq)QT*!A@n9}(Aivo}PHyAU%#Ke-}W~ zDnoB1tbc_;IjZ`U@9+!Td7hPRt=oN_T4RYe<%glB&9B);o69#|*sc!3>tEGmQD>sc z%6c;l3hng&00!$95eB+=?Xokpa(}H%u#G)X)n{!2EoQ;;(T%Knwg9TPw=iDZKw`Ih zvaQHogWkHUPXpM-RqkYYn0C(<(b{R&c591x5k0`?^QRm%Q@dd5Nt}FsAGp7@G5wV- z(S|a^1P^MS!{P3eXzL}wGq@ZEJPNbo>#b(iG;KLPLCY}fURQnbH&Bj7gxo&mIBpGg z(84zx!JPFgYi5sXGg#@-?T=z8{uLFMed07#{m=tF*w>25pY!>_%0pw_No1J6zEp1ICy zh2-mXAd_>9{{RW-E1^@0MtqWQQ`JA=G}D-2vxEM9Duw;dgRI++I$;q9s`alkB~>w! z!2XogW?mz9K9u=VAF}Mu#>T?KSk(@BQb3F4M$*9U6@&3wL>bK0R;<&inTRbn}=r@&Wwhx{bA&uwup%5x@I zk+!j~PEwAn)ij;#dj73+BfctcyonM{?T_bNcZ-o(cJ=0*e)X2#rp$akgv!1_| zdmfcNtk4)I+U)?{hUWsLT~|xg#FqBbBT2NNA%5xUT&kp^oKiPQJ4)v%p^Ym?iGsM6 z^fvhb9CB-KTGw>1v4}1pX;S5WaEsGEznxT142v6C-uCGcHxja<@&P^ZipbjbuCcV@ zXrwrXY<=%)c8uNbOLBtI98-86?oD23v}@1Z+;XbJjlPDqCHO_CSXm3Jd$oI%Uz#j% z-qqjf@@aZS(6e??e#zzmkLCH+WPUciw!Ck*+Nic?+J$=kY5Pi_XZ{?e%l3~6)h$h> z)Pu@oeb{wj_e~mo*|EE~nioE5xIA{RVet-)Vd7HKyh{{^m5~NB^{!`B_-k=xq@7N} z;b&;RW=vr5U2&}*Uw=a_-J)Y%c&7eMeJ`S(M3I?68@R{Py#X1ob;$K4nX%=9Gt+nZ zEnXC0CwB26n-s)2K!sR*2XYQJl zNY~!x>DopkYwct`tHGz#^(#$EONb|QGV|AiUX$?8R*LQbMMf@G z8gh2j^-Jv{!&ZbX-K=vd92Okc3$1(~(l42$vUas|*mSK+e;3``&m>dZiNdx&b^JWz zo-1niTeyY=k_&Lfmv$Ss>s*mlVExu~^TJD0$n_tDy7jRp)@iw5s+{{US^A<}>hG-xyQ z{{ZXOpICh&N4wnci0q7{c{Ud~Vg1Ib;P8gOb#}4aSllRXMo9v_@o!@RL>hIvWB!?n zhT~Gbc~w_xDeOB^_Og8tIYo6mrr+V0iD$}OMj}!^^RdV4TH05^-RKSWr5YUk$b?sI zb8~dbfg^M8RhB7+8+&u#btgKHt%>%r>_?~kC-BvqZjsFVV+_Op0IyvXIwyxTXwl=e zfE4?rfK6i$Jq}rzel-)vB#ap3{xy??rSV99-7TVYzuWflMiSoK06GO26dw_g24WS> zY?IRhlp~&b$E`nO0)4#BjCh;n$cB`7qnvp^O6LS^w;=u$;R(-9DE2VyFK<)1)SnQ* z$%lyYITgp>cz41#w=z#YoOioqk&b^#x8yMxCZUqq@-k`0oMZ621Fbi?jdk$*!gmt+ zkvy<2KI;Df`s$JRS*A%Tdu?5#i#-oo=?qQ}0+htTFga}1`^#xC_OGfk*Wq7=unf2I zJ7b)a{xvxGP2qfRTe&gcpZ>jDNQ-0<$9kAV@hBYascL`B#;bpHs-F*hC20o9ftVB2 nnwH;1@RpWX+v&?8BOf<35XXRX&lM4ciRPaxd&!B`eG&iJA=wLr literal 0 HcmV?d00001 diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/zigzag_colorful.jpg b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/zigzag_colorful.jpg new file mode 100644 index 0000000000000000000000000000000000000000..bb0c46ea1debbe937188c7ba1811cb5a00a0a7c2 GIT binary patch literal 19632 zcmbTecQ{;a_clC22q8#FL>o1F?_~(02hkZVdheYuiA0GS1kro1qt}Sud+)uQ5e#B5 z-pPGG@AG@#@2~Isw!U;qFZ=pVrS5PD5kGuN z%Oi#TOx+ZR&Xt7sQ*8DldTD3}sm8=_20k;lAY8oXFUVe!GrnSCW?|(Q5EK#?5tVr( zD<`j@sHCZ-t)r`_4>q^3w6eCbwR89I^z!!c_51uKI3zUeYj|Az_k_fxAIT{>xq0~o zg+;|B)it$s^$m?p&7ED{J-vPX1A~)O(=)Sk^9zd`n_JsEyLOdsKi9~= zxc=h&-`Bsu{vWuA(YPL9VPRt7{KbXwz#ILGNsRT7mIwQ(lsb;7>oYpuPmf5XW3!wZbg<{aKEmlL2nm0V32ZtRv3i z%C6ND8QSjqqx-4*-t9Gj0Pfu zcg)6A_STR{cf-?&#KH#E@4MHwrLnWm8#WNFI!(3jl4RmUaV37D&<|~pb#-Q}?Pbx$ zm!t#9%Bu1vi1?;m>`T{DvT_izTUvPOXp|K`ROrBNZX@inD{26L@iiZmUTIreTe^+9 znu0O3DmQ$>KbSa0`IZXQy;Y8@9zSQgCu@-F}mNfok>Ktcs+Ju_MVBJ7b#QcY!i#AIY5Cni;-~v# zo>hw`cnZ@@*><`WvgIZbhXnh$sX%^-L*KQAhvi350$Yj2kTm9?vo=W9E)dbnNXKLj^3eJ3bF?$r)Zyt*IR~s%vy$CGm!B8A5MA zUhqq~7MLu9(xXO?iq4~!*&C0lZXjx^_GBG(clP`K@A5Tl2U!oCqz5Y^qq7r29?{c; zZ(W=a1|+D=q{IVu6_DSrsMm(Bz24$Mb+CwBq+p7w>57`(^3T~Gt37+7e=>0?{__z- zQ!;Jv@5imarGTP0mpGiBRPp&W#!r`+Vo` zQTCle7Zf&G%-qyt6F${JI?RRNPc1}d2|5a&<7b*u%f#NRV~X?3Mo6sQ1GcaDqa)~e zMMHGRXd;7Hi?uygL$SGV@ZUOeAo)hv%L=ABv4~#nffV<1r!$pKYDZWJIG5##R%n6Z zU)=hTgmxK|gR8wyfl~55PKNndm+1hxX>DM*~O& zTGj1bdOQmnlCcW0c&>Au4p)Dm-FQnFITn^x9j**|Wppk%#4&kE9f6WeG81DYeK;ai zxVQgd7KjOMGqEJKYB*!JTimG+t8}(HlE{rJf5rPP%|&#bD~d*qG^sS7pPSfM^N=`G-&R;8y`d&A_kE$uzn& zv6a=k-JclK9>67&+5sNVUYoLZBr2piDU$5y2gtezJ+C$p8X3%p6q~IN-J~yk<8bnB z%&d=A>N-cF6a~Eda53K9Yb)nGfi~Dh_TH}kTf0if&_6BS_kbV|Pj(_jcjjouAz(kD zrY2*KYN;#M{w#Zll!&ml`kZ$*aF22;c}`rBq^d0>@g>LD(_tT9`NHaZfTWZA1VeF! zU20%p=I#g7GYo@gnz3vK-t%>3f1)4T7!N6$O*Ra1tTnh~Ze4rPe=DX9Qe1Pe&(xgP zDZKQMkQ=_e^S&DUW zP0CCLE=NAkV58|Z@gduo)u#=|YJ5NH-e%CV?qYnNdAsmQa3%4uh>v_>^F4qo5CF>! z(|*Ce+6KJ72PAJ>=}V_v&U;pBqfE9PwPr7Oz0CB@qpJgO#^s2P6zK8sPaL(k6Ug4-zG^GqBg^G{ED|Eu{{s zwVcYac_1g)jBqw7_tic%oVse?h=JxRML);;{ycMbPb%NO9yys`x5X~{l|skR^cARP ze{I`~)J|cQg`Q?qThX2RM(EN?b62rT=_~Gd>IH=J z83-p|sk%J(PrKwF$>j5jM~?qP3LvkIyhIR2cb6#jNjDbigm0DUEtni z%M$K5Ufa5D&eTZvnVAGD|Azcl%NTaEemv84*aC@F_$Y<-fDntkzop8?ok8CQ8M|Vi z*Hck!=1Cw#tBaE0_&W3^K)&SW9&qui(3sIqpi0|FGy;WtWC~TXPr=4|;l9+z?=svU z&pAMz<5iR1wa=iRIjth@-`hzJg)!x~Y9}d6;iJ2P7%GNRWs7uZcqc#TTY>9STH8&FpJ6bdpHZ_!0i$#5qQ zZ8FAs?BC-?YM;5GE^g;{?&FmhDP5rQ$glbNu(JU9I-4AJ1!4f6C|>iA^cP&WJTt6S zL03ZNrol9kbody^=>)sc5$k9Jw8K4rhd;#UbxECfdVPlxih6n{Hg*HN^uASUZ@UL@ zpu-BJ0t7`~R-`>w59^hk26HAIqN4+}kke>QqjfxQ;=AiNh*D2eUED&|a`Apwo2O8s zGM=ovBXh=}>5l?qR3G|1L#OdnBj`w66F{=A0-6NsQ$^)Yp-w=el--DCtH&#rWEoHWGTWCe-CS+2H7w#(*ceT=laNkF zVXomu;zxKsjxnztp^W|GDKq09?>8CHNL>_EPIM%NseHVB{c+h!9ASB^`Qe z^X}tA0LD0;r_p2E{P`$(tpS~TfVxv50nf#y2?SNe1O5}&XJqbJ+(#Pu5!R5xbH;ip zkMu(zy^p)Lf21I&EDJ4{&+N=iU!K96+mGBu;S)}p^be#a+Ox~_>1~#Ci}A(C51q;x zs~c^sj4DKlCqj!ZOS}aK+^J{RqobB}u~?P9H@#|Kr%oq7T$zY+_XZsYDSu7T#UtXQ zOnZ{KcWAegk)ilhRrrty4J2_&?JRTndx+`P>wVDSowkEae67iF@wB2_*ZZ7`F@c6q z$GA-tEywX^Zt$}!`Kz_5gL}YSx0mQjVc4>PQ6FhR-m)yWaJ1%v!m;b8wZd^~r0?@r z1;8aw*lb@c`v(%(R8y*_RT5if3(fvK$}@^x4J(*|jQ&hu+Hgt5mM7eUwJXyMBz2-~ z70eWw8yRD~bztnksuaJS0s;#hx8!C{d9tOmUT~HJ%`35}BgKNm!jEYZ=el*qT+NBX z&uEijIH!asOqEmUkeTMFDStjc<^cG1HZU&KcvYtC|Mi%2*2ZWvL$Du&mwDdHhH!Ag zPUOvGl-<7pa>>=Si&Gb>4eaF`RM)(~<>)>uZ%Y#&5<^gTP2U4Dwpi@T4@8GKB=R~t z4~d(NKJTPM7J#c`*Pto^nD2$P0<8>oCJs@jB;wVDInw%2ZN${atFXbYhK}Cu@@Ke#_Q%3XySQw4>;P&oJN?HS=SmC@8nYb%-qmNI_10Pt>&+k zrCJz20#SNvh{c@GHrC)bCkke=H|&oT&ANU-ptkZ#6E9~^!GhUE{ifA2ALUmQ+M9E4 zQC~tTpc^--=0!@>xI`PrN$JPF57n-GL=Iw{_=b)|M@DNr7?ZX`c3s` z$eB%eEt*Z{?IiV1qW*Ll)$4r|^!gfcC1#i;LA}hKmR)1s>dy7@1gVElP#Ycw;e?m0+H15wZ*O;VUNYTTv z5sjTLyayPp=1yFsMvHkM4=GWIYM=hq9%urw{A+W|LcQO|f~7?zt#Lw+P4DKYyn*mDoljz+{>)S#?*Za-UlV`dCa8W;9kN_LERS+GtVO`@0hPn4`m+_D zrR{?6pOR3Rev&v=-Ur>OApm%4&it+nF%+zlD`vt1mHGR_!%Qu2nRm=VsGqAdE$oW? zDwN?b$=-i90mp?`i#N?h8pXzB{Rwu+5e>|X5=_~nA#8gL@eOE`zXupnl@196(>K+B zS2O1x)~u8{4j}v!@OnppfY*SD9m9uE9&ssPFEzKO=pAH(nlP0_?&IDA`jRF>k7C}- z$0zUbpT_Qj)pG9vd=bEyAeNHl_7kOozd-5^7sk*F5ae?c@%040>YJROp)mbb7yR0 z((Gtftht|ny#*(XI?WQ?3bWyaMU^EJHZg*FVZAyAB(-Wp26ZL2DMLJ;B&Q9d!N5zk z5_FzF^y0)}yE@iQMlELRczIl3oZ9s|p2hf( zGi|gMa~G6!(l$kD0d;98zSnMvghYW`t3!}9ai3!cs}9)?#zZ#h@-3m7@~8ZAj}UjZ zBzN`)Utqfdru-sJk77x%xCk~SRF%*0Q2j(xtJu)HZV$8Y;9Ofl3l)S#bDMx(N zyie-)*uVqQ;Nk-|-<$cW_~^UTBb2}tc=gBkF5c__r=Qj>4$(23kvIx-*4|GJTo8>`fuMr1FqJPC$>z zuB6SnlNG+D?)ES}BydMo#O(1j)&mrLJq}uy{m}_urHQj&%%j>iY-RUf&YO{!^ynxR ztkDKd9DFNC{c2a_1-z7meuIG9rkx25_V6d|dO^4GpmR;*+qmToeizEnT7_c{*p31zg2?ZJ;na}vQTXilQ? zF5T?Ku`vYcY$EhW2;z=@_C;}}Csm}}V)r&8F%TZmhI4%-a?zpt^aP$d-r+8HPM?BH zlaQN?CyGvI+cf3g_xisX%mDK&9iT{;8K=D`9vfVC98lh&kD-`zIA6u_ZcSQ`==xWf% zpDa7UmM>fv&v358%PRlJm=d~FpQcL8znfOY!s@gRv)VeT+3+COVY}#+K`TY{}7IJ4+C_w7j)o1}UM%FI{SWMC2NNrDPW+_$}Kn*{dSPUrm{}+=X-yNUh7u183@)?To_N{33-(PlNgHraFJCz3(9M zyxKM%mLSV>$J7v+rDZ?z84rD;(vMmfr172>v-bee^aSksBKV{XeX zpG$S(%UMcaYII7+yCvqn{{#oxJ;6c0`~+$&V&Lrb+;eSBz=}DJtdr&!$^-c&B3)-S zgyc7<1r`{ZeQY<3dJ~LBRsZOGV4Vq6#4=vYLsxG>C6-`a@f^R0Ww#F9aHP|bLyklMI&4~Uyi1AVm z6lM5NQM`_!9H})=r7z^3NxKo>ESi~(U6~ZBBSpDn*4j&-n&rN~ws%XATB_Y>h;W@~ z&$sv&40`G5S)^88Fn6{(atyiH9+?$vy^~51j>bRR_C{~dqlY8Yb8gr1f@<$Y3G?OI ztKZ)2czN~m_6hh+{(7&-jig1@X|pQjPN!&@N(=c%)MJ-=M+F?NjFcE+WRQ87 z>Ya7|{yAreCRD#HQ%$%KPt}qj-|D#t2Z0{t&MI#PqQrgXdwJ97+}Ow*!S! z3CsL%XqaM`ewVtw`PNH}4Y;%2e!qTEFvKr7J85J%2sP;Dg|D=Q7GLJ{jjNXD`htko zNw8yA%%h~NW1~r5A1YSx-UBwD^%R%b*6&~P)Km6L*!DrLzL2aC4R9ealeP2|Xl+oJ9w9La2z5TaLgekno;HPeYLwk&P*~2jwad)2 zS31@ikp6_a{T9LkwnnY!dC}iqzLLgd!NBS+jOzTYVwQ^U3qNy(ca4?t!jg#7hF!aq>_Gu z@G>#r`MwI1`|6N8*r)NV?Nu`**2tshnZ-|Y#EI@lJOC!Z<3VDv-C|wECkxIK32TS2 z_c#Rfkk?|Fs46=7ViRI=7k4yjfeToDlFeMhST)u@&T4E6YxLeNGIl`kgG`)}Y`jJg ze^qQ%5be3N81OQ*5~!xMe@xvbx?s(yXvotGi_zjJS3Fxc8TjZ&W%6Dc0ICu<9XTIpf^ z;=`oH3KZHL935Q>YvWJ$?g3?lpd6obSQ5}`_KRy-q?aMhugLh$D%6PsDxiOp>jZwG zT+rN-T=3({scZa6*J9l6;OV|3(x=lezWZuzCZBhDpNZPPNCVr5lLkaO4=_ zpIHvsp97@>ltP$y+P^5Y?^3Hk=Tir3WBGBLm+J%cisID`3~|G~iHD<`qwWOVazec> zzWtY=!+?T%3El$F@C0XP0=n7;wajFgYSc4F$k=Rl9NCU^8Lwo5i!sG%TJr~2TVxoY z9$|t-uQBb?VeV9gs7H_AgCE0BJo%AP>oCru4bRLcjJr3f zub*z8Y3B4pveE8&(VshF>BjIiFT`fdO#Ay>p^gMJOR5+Mn4W=wO+INQ#O z^MFE2BpU1A9t)L<#TeG?)l+$XOR6V$DT@>0qF1S$P94hT{i=~$NW$|IqneUjikc9; zRx4gM{8e;W+JT^%~Vq$L!uHZOGS+AyXXl$3o9jkvw#(Azi->vGN&#cmvf>6{jN=( zF|R1`zPzh(g5hP|Mr|vXyX}`o@oEo!LUcyD+K?O8&_e&2B8mU59w@&M7&6ixT|2;U z)z%o}cySOr*arydnf3yIZLSGfC&0ZuSVSx&-I*mx~ zk-+r+Px8^Baw*879^paDt)!&VQL(nHQCeKL+4&PN$3B|W71Wt%6`l)Edf}_=K3FLI zuR%f+fbl!N_B*hp8cOp5o&&X<*1-fuAus4CO&YZuvH`Q(OFwf!qN_bbgHIw)xyr`cFu)ovvT`k&*FRi`Yi8 z{oE?ds*;kI3+7tv!!CfA40Gl=K8}trIV-u(qje;*3ICP`&33czbgjk8!-w;-nR0I0 z0+f(kAwPF!v)W1Fgf{-GTHzh9ooJm8|+qxyRb1GXcj%{vu z{URMZfIE5Xm4TaUOEc2?))oak<$wo^`O>I3qr2iF*J_Oj;SJU1lvU^|8`x4Va*l(o4?|Z;UPto>h2S)@VfF_v}j;+<- zehL?*>bhAN#o3yAdA_jQ*w)Oe%n(1u=GK>Jt`dY>!K6SZeJ0XbbYj#SlObfB-q>HG)N?sxO>?B+ z&awIt&!`n$m6GVDvc`RHN`_B%Y2>Rd^w?e_Gki;IuE-5D+pagwgA?R&=8@!6ztfQ zN_$C$cqDiE)Rhd}$a3?6Z|dwXZV86dJnml{{EFTKteA}BKv#u+?R$Vdm6~(q%EnWX z6V5Mo)cQNlhKZm7$=P&EYtUWi(o(?zp@tvxg~ZIt0a&2^`b#;&KGC0(rM0>j+1N2z9R;w&x5}Q;Cg|RJ3rfK6Mzwz0^bAhF5(zWy;u@bRVH!M6Z-FjgAYs-tB#)wBJHMzn6pnUeY}zz)5pb!# z{9~PSGk2Za-1xC`P=>9y%S|+`l&eU9)O`^rOyUmyI}+JE@MS!VQIxMO}P(Su=9DTXwF*z_4ka4P$YtAU5ZnXZI1kXHY| z%qaDlIv>QNMKV0a;>JS}Jq$Dtxi(cv$X>}G#ZWOuPg%n=z_dI)T}mzrK0z0dp7zI?nr|jgUD#1IN4|c_G1@7aS)oMv%SHXroVw0C0|2U#;)3$ zZ*)`}HXqgQ8+X0^4jPJ7acl3CY!01UYnUsf$Z%gR@XS^`TRf?}SVmGlQDmLIJ`yu= zJ_q6bj}RbL%fp#Swn#b=oR`f>WJXaaE{rr3Egmt?*SlHCv}abrM51?Nm3Ham1t7=V*a}W zF(*KA9V;l&X;u4-`@+#ra|N`?*Vk(v8`_WZY;XSj|6g+mM3PY zIq3;C35DE>%~?4#@B9!lM!#5-NSDSqWM5Hv+g`4gm;G(f!?#3C5w>%5 zXXEV=S+d`M_)_nE5bvY>smhI0{8tLnvL8P4cBEmWH-zNap9Gpx()Ut0SkyDQU?aQa z4c%N(K955--u%RL`$D+^N21q#?Cx<8pk1A;((^|gDQ8IMS2L;@@VyCn$oYx1s>5Ic zNAZ;LS1ebq?^iFLvI|Z`wnmNn6Kbl;*0nX*mkX||;oTun!l>Nel6xsz7bEDNj&>AD zmo@I)FuI>E?W}p*&TDgk3?5DPN)oJg@LWf7!ym!fn7$U|qe0yTn#>kXO!hPE8>>#; zf}X5~^TU+px`{(#EMBaCxZGYBp_;mtytC9-`d2(h89vjeT&{WOLs~SQD})z5ya9(% zFP!mCWJKK&7>(B*@FyFK5RF(0A?FqUb}9a0pCfu?9z4RL&YA`7M7&n+#SOa$Y}IX= zLRERe3zK77wqUVLwnw^C%agtaQk1nE|LHhbdMJL&EwThfnHen#)SSk@ z++Q1?)1e^QVk^G~@LH4m6p3o9?+5jbEU@#*GnN*+x=`%kiSA9^-pmPA`g{^Eobu?C zP*a5CV9V@-UR>Xao)`PNQ>7(-<&SWt$MN}etrv5SB$QHgbhzSmGzJ{5Camc`{~ple zbPxFb1pk$?GNJl((TQVw#nxX@1|5FlO$P%@!SLd*Y>+fr}^H0W9}OAq+ErepF}|`QcMH-+zN;nAImHm1tfu>P;voN5-N` zc|V8(S~1MY(R0wg%Q{_b&T=DKiaZ06%##u@#J@=_$sXPb=`75~ZsLG+Ke)QX?(^_^ z)%4xF7BN-}&t#&cvJ|%Gtk_5v$Hy8ar%(<~`9GDlMX|0`s$dzr5&Ep*9OtFa5tc%_Ef41+fHm-OZ z1%NFTrh6=j?}klo`1~@_9n*Bs&d9J!pZKIgRYm_yv~-N~`TUd4f|_c+Z)XqafPXGL zHhJt5Cnf^fcYf_?{f4v_ozNyGM!DVYm{S=b@mf3!=?mUt7(PjlGS zkmss>b4Pk*)Iv*x{e`Eb`x~xah{MQJVX)rehw6!nHnb)o!o|^}NMfS6De~zX7p^ro z(XFMmwF5&rXqlnCkg>zO-n3&l=?%0p>*_*s>OrZIvEk}4V{>x;H|BSvZm+4(tqIWK zOu{EK#^gZbLH|2@2UQNYMN#=vD?Q5QL_G2HS)+SEfAnbgs7(K(Fv!-7*@-H_`7Z!y z4vN%?BHb1}b8DJBQcl?uE<07CdKve|A?OKS>}`}6*yO9}%bR3_8!6wi8(;M8Moh!oA5XP%*(5qvZhLMcMIStow`8ur)XyaYAB&<%C%e;Vo*)KD0#($V1EWd6BhC>QZPK%Wpwoq=uy zmHRZ+E3%7u7P0j;IRC-10=~Fp1^G!H6^}_IrGBb|;%*~j>?Wf5zdoP~RzFZJt)Jzo zYcV!{TR*Gd8{6DaMKyN4+p6SJ{xzlWp_lhuSds^JfTWSM2d<%O8zUj_DJc>%Sf_f&(-(+>{rSOh4@9qt5=ab z221lZEcA-+a*J5~$%n>0_a-fTm}FcF$GU$5?{EXi0vnOU6DxW>m!`hvqRR_BzheE0 z5vKEK@d?{rjbVz9F3i@Rupi!K2A^hX4@AdY)d2ohL#mM<^7PX;I&mdA!S4W6fc#SE zk^va}_)b-wv}%VQc=YO0eLnz_1U@U8D32&A?l4OK5vlaWx1e*em(R)BxzNB~K$urZ zS0Y?2^L#{x@GIg@_2@?QaE1L?e#ln%W+Lm;`D?tY z!REA5!gqF&v!~0i=3XK|J=){quM?r^LYl+P9-%=ZHWW8QutQ1YI&Age#OA4X?5q0t zJHviA^a(asnK;6m@`J^_OV1m2=qGGwBDRwecRNfGz~|6CEJK(1op2MazasTggWA*& zc^0RiSHr_c0?(1p5x~8Sgs#vo-QMZ#p!V(PG*uQ&!zoe@7`x;1dw@>T;Ky@HWmL6; zAE9d6&E@*EjvuV>E2F(w;|8=5=XtcmhVev0d-mn!8bQvJ5Wju&u zit?MWj@qIT<&EPN!ng37TQ)s&5MX^0nQ+n~u0K;Oaz3ztI1^(_)i2xSQF|#*xqd~R zQ)~#%AH3bd@-Us&M1TUte!Nhgwd^*J@Gscz4j{WS*+Fy|{56>peX<;xC6%uZb=F+} zuD+2W{&gaB{~j=m_}%z3fI1FU&r^oD2e4nqzps*Hrow5n(I}~De4{bk2f&C^F=%P2 z`91hda41UGhNmMf?+t=8z<&#ENIg?-&(j%S)AIo03MP{5vWETOq+>8av!NXXG(>Q! z|D=51WE>J2C@XJSrvE3h%-f$sKu*lu5=&kv3*_P-Xyo@qaCg`Ls4&CZ_El_k_XX4K zWznk8*FlBIGSLt+flEa72X<~$NAK0jMbqYBzM@)sn`*DWZ@DGUXk@e)H(K~!GPTs_ zl4@ayq~1(YI_38!e$3&!9mg}QE0atQ>VW@3F&*`C8S5!q$vkEF^owV>we)j8%^lk| zA7w!Wp5c%lT3f?6-DNf-Z=Kg(9vNVua4uwMz%@2A4tdddg@TMdUEKAj$jew+6v=h? zybL^FY&lwDVK2bz7I)yIjF>=IC@C5%#VJJ3U9TnQ)>hkPRQNw!u+Cmt*%=(B2-b(} z!ab+p@0=UeHsYz2{4-V6pht$6O`Es^x&}TZ9fdO8+?aEB1!$>Ppv9IbzmxiO>65HT zVldeF``j00W;Vd>@{Pa$90w%BKm@Fk{ zd7iI+W4+1Bg$J+;4r)(2%Plz66;(Mj0xQCg(n0(B_HmLd2Wrj)^6{d<)T=0Zuo?r-*jDh?P=P_)RaVym`Pki;Dv8LA4;k& zSJago9_dQ!yL{^v?7|C}Pa5|}W*&Ux^K&&2RgK*<+uVV8&k)@MUNl{zjh;jplpXp# zUscdyKV2@)-MhY96m5@4#vh~~f!UD4hW2YJrq~zp+t>CxTNJK6YFLEqU-4O*QiHY2 zosWmu17cv32kmPpf;Dt@EqQ0R>Pb&E%A(|HG$%7wr@~TBHuQ)CLoz%d0Ig&laEdBh zwrL2_`{-jqVf_hpT)H2CL@Q6;ex4nA)TQ@6BR7GEI62kdXzEZ7Hrte>6)|~FY%B*)@+Ek&}N7`&z zleI1%4UISr&!CwbR}$!B;$SSK<6Y<4g4~=?v3jMS(>B^ByPN;FP2B8#tV0Lm1S}8; zq;`w4zBA`@GF^7Rj9-7{ua1JF9^Cq);6t8UL-wDIXw+KPxeS zSxl#Dm1uJdF!#E;#x+|ZO5Oddmk&AXI4ER8VO~irMTU(l&eRfG2I$f*D;e$2iic{~F!hp}Ds7En9{_&5kzi6jN6yy*j6%sj=y{VKJ13j&K2N(u0G_3rfYfZgVPw}i&aGWHUFgD2yR1Z3X(UZ$I-)T+xvw)=W8z+acE=%CV+Qx5!GlIMJ=P{9i_L2q6VI~Nao4X_xHkftGb0k z$Ak|M;!z7`(LReJ?|6mY+5edKNM>*M`7(eDavi*b^p>bcZf-GpcubZR<~4{NDNj&Z zt#0xNKt&e)Dw}8gk1!qU^-@Rb}7u4FbdhQF74nyMOFJbdHXLyNZP$8WS7X>1u3Xkyp{wY5p}U&T$e zy3K$1tu)^u-s>N7`B(X9CBskd;YbhwMOSRBuV_u9L%U`A-E+TdTC3stnA4gNs~>l9c_k^G z;2h6&efOTTheRoOqGJ8{lDpefTXsy%{UPE;pEGeI=i#qg!X~N_8}O2{uV~kUofyq| zLQR~rrrFv0Ud&<5lH;cEOX&hhpR@m zMp={p6JOXdy$!qS0hwdmR@0@4+WAEpGLDSed-1bGVO2TT@t@YH%$_;O2egj9&iBkM zwFRUnN21G0s|Y*U(&LI=04!1cIs{?*#hjn$HwaFFRmX=LO~)_kr~S32UjLSu1Io=vWE2^gUxUpra+5u*VfM_SE7bhNFg*T_#tVDy-#@u0SX z!nEZ|GxLRjqlcApkX3@t#A|oq{XxI~s4RA2S>2YZJAZ%D;(RMsbFV6 zmHD7FnCsw8y&83d3ap!wCmw&vx_${QPRXD->|9bKXXJ!==7-q$Ga4y zeSA zcK!J0+5$25_r94Kd17ZTd2A-!p_8fky>f@F8aX{(ifBdgc!TGHtP`m%)fzKu0MGA8 z!c(+AZR+tOsx{qIZoFv&N+C|Jy==!z=G0|GzJlR5Nt4?C?H3 z09wzXdbLUN{A!3GxMa)%_i1ag4&(|w)kAsXSu%eH^$n)vNY^bDEtHML6CTXSspfQ8 zT}}k@2gDT4)|OS4dnz;jpNb91y~bB&m=7NHgQT)^tAjn6i#t~M0LpZo6WFE{L<)ap zQ!K>^7v>Bp#&bnIA@NM)+3}MSN4D_k^>VDv1h? zcz`+%w-4|N``bi zHM-;F)O+R6%IqU-IhF|i8P6PclW}*Znkhpo2tK=sq6My@iwP4J($TzyMcKK{QI?j> zEd{18^K5kJj?Fdn@P891JK9{~VZ4NxmH-K;iU zQu<$%il?cXhoU3$p|b9soyXPl1J{3sQx5bdJJG?63;~LA{wBzlTNc}hA05j23qdpO z_8(-$HV9ko>ND;zzuzlB1;_<>l%!OYTW8JRm7KGWbrZ2%=SiZq54W#{QzWCyFCK?{)>bGRa9$ov zp2xW|07p1d znmp}sD?%~@LJOvTbIvZ##`-j|U}hx8kTdWoI!ga(_wRo7-bZ)#*>QqjFB?0GEQ4R6 zR~zIX9ulLGZs12PQ()%}mOV3C_@^%D_x!gm@Cym9H1G*uu2rIK4YRWUut$G0bSRm$ zt;SOR#~a2gF!Sv(&(799CGvj-atV$0k3;};&}L#H9=cxju}y}bwGdjZbHYjHF(O&l)q#xm>&0<<)p8hu(|?ixFA zerY{@m*4q*mEA!ER?ws{ph(bRR97WzLW{E1pFc|pTOTK0v$g*KtvrqoL)PxJEAc(C z42{IFx80F_{rwO0Bp*=H^;)lv)ALp%YPO6%^FAI#XmR0t3HHY=8J&OSIQC?nJ z>DDiMeJ_^+@G}`d|%PNcQo$-5OMrqWPru^7SnEkHnW6{NX?_Sww$z27CVi zdA(cteq`i*j^NsTtI2;SIP)>ZY-qk8migA==}KCTKs*^fo`c`=^&Ov;ACu*5UoURm znByHWk4ogLiBWc1o(&ugY-Q_Km7Cl8>~Ve_@g>iP?pX@?)^Z=-z)yegFQNW_ksg_4 zd26a!E$xf%f%qW(=-axm?0?UpK54D^dP^sJi&Zae_lW9a{{Rv1`TEse4_mv?Z^ZWJ zayJsf-*!d!_w+r!goC|0ic09)1&^(l)b=(0AL~`*dfijh^=mB>%v|3Rckv7>_oY7G z-v0pNGnFTfG<4Ha^%6(8Y=LRqFzb{3-bN39z=7wwo~L!IUcKGqo1|_494aqmQP>aj z&1l~#>t;FKI#lgJ-cRr>YntWWy?Aae7%XTu#S)*EHTL)QJ^p|JMq5jW;f_~@);T#w zBZFNIiQ&d~Tbuay#yLn>a^CDcJ-^B7XM!u39vP54ZZOJgj;19>R$87NEV{NPF0E-= zPiy|CFX3+$$)g{!-iDV?W1J7Y5%phhQTY1j(PNSei+JvBWm&D|8Ao1++t~Y8lj|BK z#+frZgC(&)ES{dt@BIG&O10s?5>KGqlqzPil>YVtfAQt?Z|C_D(+R07qoXg(>fopK zd`H9if51ILvvtjS*TdH8XF)aOoW*VqFh9KBgZch=&eZ&IrrBCYCYnCgb1LpY{0N6n zpWi*dA58h(&CSN4diN7M&2uwklgVS)0r~(c+p*~8dE9Bo+2P&f`t~H$^-HZ<`R?xI z-y?7U;Zb`kj>G)*sAsgfv~^kGc-e;F7-J&3%@4z$W{_Q8g=vQ=A5e$X`k&?ewy$iU z)Ge9|gxsg09aQ>{dg7ibDOqZd8lG2HwKr0uw4Z;}$MCm{mdZ8Lb!KL@96>)*7yBpG zeLXsK#yi3RM09m@bqtP&DnaN$uN~C1yNxp7q{Dn<{E~sg59ocXZ^GUzeI zJSgR&KCA8OKM~v>xXnpi?8~!Sm}z}q6SC{u{s;B3=(~3?0A!lP@fV6<@cu2}Rk+lM z$x?E$4#lzD`hGldni}tot?ZNR`ZFcO?3rtlGO3Zk1G{6dr+o4U0~%ajUF=ZvPb(5h zIV?{kllqfX>Tzo_Y!+MEJ8)a6c9*M?uEaXeB|w?oQ~Q&x1G zS@V~Kd;b7=N9H~F72lOp5=xG*6O62ga-<%F8u9H< zNxIW+3d*IUC*+h5mA}CIRqqdYn(slGqbg^!ll^78HyQr&`XA@{5!V`uO6OJsH>-u0 zu*B@X_5Nt>>l0hhQ&Ii=C1be*1fGsP{r&#{$mniuWwn!RgM;^2au|D5HRLBMa_rAm zwHQ^4g+-y}6I;s##E{H}IS}EL_N{#rKzsQXdxd1R9D&fu>^;5z055K`?@P13o*Awo z4)Gp6$D#EZ=l=lKYUOlK6x?jLJenCqVO7ebyjIKn#-c1}sEQR-U;r=xr%{ZY4oz5S ztzdZV?yk}s$;M%YzxHoT^dHpmT+xe`$GHefT18y1T;{vs<8vckSf~>oWRIYKaqs;9 z0G~}XQb6S-LaaL)WjM-I<#co6Fz~MqN{I5tQjW;VRg~a@FhQ=HL-6BB;_Ci9Amt%$ zt?t9q+x(t}O)kz|K`)*lXF*w%x=F7f8JZRU0NH(U(Ek8K#bFpqO6c~Z z2}w5*_qrG;T9)Ox~^XJ<4L~)LYJqN$P`8Vw}yE_@?n%)qP zDd*UF4?&)P$g7pDY(t)r*yOmmVi#C}lBXpLh?#Ygx z&F}pG07~!au8SfvI)+vlD#IX(s#RS#1`g+?YW6LC(vimRl2&>-1J)*;Bx5U{rT%J z$SA4cwrQ25W?11)UPm}kC=o$*YS(&n;^NpGG5#eZu0{vf>G{@5oSM1oC{4$mEeeyt z70eF|$R0NsWj*VQ);v8foP6tG^4nu1<1C+FZ-4T8iYe2pDsMxF7lWsXjitYH|Jf>w Bz90Yq literal 0 HcmV?d00001 From 00ef38723ec8e83b5f06a0dab73d78fd3f13c620 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Tue, 9 Jul 2024 13:51:32 +0800 Subject: [PATCH 02/14] add keras models Signed-off-by: zehao-intel --- examples/.config/model_params_keras_3x.json | 25 + .../.config/model_params_tensorflow_3x.json | 7 + .../cv/mobilenet_v2/quantization/ptq/main.py | 1 - .../imagenet_prepare/build_imagenet_data.py | 567 + .../download_and_convert_imagenet.sh | 100 + .../cv/imagenet_prepare/download_imagenet.sh | 99 + .../imagenet_lsvrc_2015_synsets.txt | 1000 + .../cv/imagenet_prepare/imagenet_metadata.txt | 21842 ++++++++++++++++ .../inception_v3/quantization/ptq/README.md | 65 + .../quantization/ptq/data_process.py | 543 + .../cv/inception_v3/quantization/ptq/main.py | 143 + .../quantization/ptq/prepare_model.py | 35 + .../quantization/ptq/requirements.txt | 2 + .../quantization/ptq/run_benchmark.sh | 51 + .../quantization/ptq/run_quant.sh | 40 + .../mobilenet_v2/quantization/ptq/README.md | 75 + .../quantization/ptq/data_process.py | 543 + .../cv/mobilenet_v2/quantization/ptq/main.py | 141 + .../quantization/ptq/prepare_model.py | 35 + .../quantization/ptq/requirements.txt | 2 + .../quantization/ptq/run_benchmark.sh | 51 + .../quantization/ptq/run_quant.sh | 40 + .../tensorflow/keras/cv/prepare_dataset.sh | 71 + .../resnet_v2_50/quantization/ptq/README.md | 76 + .../quantization/ptq/data_process.py | 543 + .../cv/resnet_v2_50/quantization/ptq/main.py | 141 + .../quantization/ptq/prepare_model.py | 35 + .../quantization/ptq/requirements.txt | 2 + .../quantization/ptq/run_benchmark.sh | 50 + .../quantization/ptq/run_quant.sh | 40 + 30 files changed, 26364 insertions(+), 1 deletion(-) create mode 100644 examples/.config/model_params_keras_3x.json create mode 100644 examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/build_imagenet_data.py create mode 100644 examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/download_and_convert_imagenet.sh create mode 100644 examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/download_imagenet.sh create mode 100644 examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt create mode 100644 examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/imagenet_metadata.txt create mode 100644 examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/README.md create mode 100644 examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/data_process.py create mode 100644 examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/main.py create mode 100644 examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/prepare_model.py create mode 100644 examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/requirements.txt create mode 100644 examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/run_benchmark.sh create mode 100644 examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/run_quant.sh create mode 100644 examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/README.md create mode 100644 examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/data_process.py create mode 100644 examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/main.py create mode 100644 examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/prepare_model.py create mode 100644 examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/requirements.txt create mode 100644 examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/run_benchmark.sh create mode 100644 examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/run_quant.sh create mode 100644 examples/3.x_api/tensorflow/keras/cv/prepare_dataset.sh create mode 100644 examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/README.md create mode 100644 examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/data_process.py create mode 100644 examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/main.py create mode 100644 examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/prepare_model.py create mode 100644 examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/requirements.txt create mode 100644 examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/run_benchmark.sh create mode 100644 examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/run_quant.sh diff --git a/examples/.config/model_params_keras_3x.json b/examples/.config/model_params_keras_3x.json new file mode 100644 index 00000000000..65b0466667e --- /dev/null +++ b/examples/.config/model_params_keras_3x.json @@ -0,0 +1,25 @@ +{ + "keras": { + "resnetv2_50": { + "model_src_dir": "keras/cv/resnet_v2_50/quantization/ptq", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset2/models/tensorflow/resnetv2_50_keras/saved_model", + "main_script": "main.py", + "batch_size": 32 + }, + "inception_v3": { + "model_src_dir": "keras/cv/inception_v3/quantization/ptq", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset2/models/tensorflow/inception_v3_keras/saved_model", + "main_script": "main.py", + "batch_size": 32 + }, + "mobilenet_v2": { + "model_src_dir": "keras/cv/mobilenet_v2/quantization/ptq", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset2/models/tensorflow/mobilenet_v2_keras/saved_model", + "main_script": "main.py", + "batch_size": 32 + } + } +} diff --git a/examples/.config/model_params_tensorflow_3x.json b/examples/.config/model_params_tensorflow_3x.json index f81b8abfeb3..69b5bb11059 100644 --- a/examples/.config/model_params_tensorflow_3x.json +++ b/examples/.config/model_params_tensorflow_3x.json @@ -72,6 +72,13 @@ "main_script": "main.py", "batch_size": 32 }, + "resnetv2_50": { + "model_src_dir": "cv/resnet_v2_50/quantization/ptq", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_resnet_v2_50.pb", + "main_script": "main.py", + "batch_size": 32 + }, "vgg16": { "model_src_dir": "cv/vgg16/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", diff --git a/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/main.py b/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/main.py index 024225431f4..d22a9bd4494 100644 --- a/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/main.py @@ -117,7 +117,6 @@ def run(self): q_model.save(args.output_graph) if args.benchmark: - dataloader = create_dataloader('tensorflow', dataloader_args) dataset = ImageRecordDataset( root=args.dataset_location, transform=ComposeTransform(transform_list= [ diff --git a/examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/build_imagenet_data.py b/examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/build_imagenet_data.py new file mode 100644 index 00000000000..c52d2bd4218 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/build_imagenet_data.py @@ -0,0 +1,567 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Converts ImageNet data to TFRecords file format with Example protos. + +The raw ImageNet data set is expected to reside in JPEG files located in the +following directory structure. + + data_dir/n01440764/ILSVRC2012_val_00000293.JPEG + data_dir/n01440764/ILSVRC2012_val_00000543.JPEG + ... + +where 'n01440764' is the unique synset label associated with +these images. + +The training data set consists of 1000 sub-directories (i.e. labels) +each containing 1200 JPEG images for a total of 1.2M JPEG images. + +The evaluation data set consists of 1000 sub-directories (i.e. labels) +each containing 50 JPEG images for a total of 50K JPEG images. + +This TensorFlow script converts the training and evaluation data into +a sharded data set consisting of 1024 and 128 TFRecord files, respectively. + + train_directory/train-00000-of-01024 + train_directory/train-00001-of-01024 + ... + train_directory/train-00127-of-01024 + +and + + validation_directory/validation-00000-of-00128 + validation_directory/validation-00001-of-00128 + ... + validation_directory/validation-00127-of-00128 + +Each validation TFRecord file contains ~390 records. Each training TFREcord +file contains ~1250 records. Each record within the TFRecord file is a +serialized Example proto. The Example proto contains the following fields: + + image/encoded: string containing JPEG encoded image in RGB colorspace + image/height: integer, image height in pixels + image/width: integer, image width in pixels + image/colorspace: string, specifying the colorspace, always 'RGB' + image/channels: integer, specifying the number of channels, always 3 + image/format: string, specifying the format, always'JPEG' + + image/filename: string containing the basename of the image file + e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG' + image/class/label: integer specifying the index in a classification layer. + The label ranges from [1, 1000] where 0 is not used. + image/class/synset: string specifying the unique ID of the label, + e.g. 'n01440764' + image/class/text: string specifying the human-readable version of the label + e.g. 'red fox, Vulpes vulpes' + +Note that the length of xmin is identical to the length of xmax, ymin and ymax +for each example. + +Running this script using 16 threads may take around ~2.5 hours on a HP Z420. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from datetime import datetime +import os +import random +import sys +import threading + +import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf +tf.compat.v1.disable_eager_execution() + + +tf.compat.v1.app.flags.DEFINE_string('raw_directory', None, + 'Raw data directory') + +tf.compat.v1.app.flags.DEFINE_string('output_directory', None, + 'Output data directory') + +tf.compat.v1.app.flags.DEFINE_integer('shards', 1, + 'Number of shards in TFRecord files.') + +tf.compat.v1.app.flags.DEFINE_string('subset', 'validation', + 'Subset of imagenet, can be validation/train') + +tf.compat.v1.app.flags.DEFINE_integer('num_threads', 1, + 'Number of threads to preprocess the images.') + +# The labels file contains a list of valid labels are held in this file. +# Assumes that the file contains entries as such: +# n01440764 +# n01443537 +# n01484850 +# where each line corresponds to a label expressed as a synset. We map +# each synset contained in the file to an integer (based on the alphabetical +# ordering). See below for details. +tf.compat.v1.app.flags.DEFINE_string('labels_file', + 'imagenet_lsvrc_2015_synsets.txt', + 'Labels file') + +# This file containing mapping from synset to human-readable label. +# Assumes each line of the file looks like: +# +# n02119247 black fox +# n02119359 silver fox +# n02119477 red fox, Vulpes fulva +# +# where each line corresponds to a unique mapping. Note that each line is +# formatted as \t. +tf.compat.v1.app.flags.DEFINE_string('imagenet_metadata_file', + 'imagenet_metadata.txt', + 'ImageNet metadata file') + +FLAGS = tf.compat.v1.app.flags.FLAGS + + +def _int64_feature(value): + """Wrapper for inserting int64 features into Example proto.""" + if not isinstance(value, list): + value = [value] + return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) + + +def _float_feature(value): + """Wrapper for inserting float features into Example proto.""" + if not isinstance(value, list): + value = [value] + return tf.train.Feature(float_list=tf.train.FloatList(value=value)) + + +def _bytes_feature(value): + """Wrapper for inserting bytes features into Example proto.""" + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + + +def _convert_to_example(filename, image_buffer, label, synset, human, + height, width): + """Build an Example proto for an example. + + Args: + filename: string, path to an image file, e.g., '/path/to/example.JPG' + image_buffer: string, JPEG encoding of RGB image + label: integer, identifier for the ground truth for the network + synset: string, unique WordNet ID specifying the label, e.g., 'n02323233' + human: string, human-readable label, e.g., 'red fox, Vulpes vulpes' + height: integer, image height in pixels + width: integer, image width in pixels + Returns: + Example proto + """ + + colorspace = b'RGB' + channels = 3 + image_format = b'JPEG' + + example = tf.train.Example(features=tf.train.Features(feature={ + 'image/height': _int64_feature(height), + 'image/width': _int64_feature(width), + 'image/colorspace': _bytes_feature(colorspace), + 'image/channels': _int64_feature(channels), + 'image/class/label': _int64_feature(label), + 'image/class/synset': _bytes_feature(bytes(synset,'utf-8')), + 'image/class/text': _bytes_feature(bytes(human,'utf-8')), + 'image/format': _bytes_feature(image_format), + 'image/filename': _bytes_feature(bytes(os.path.basename(filename),'utf-8')), + 'image/encoded': _bytes_feature(image_buffer)})) + return example + + +class ImageCoder(object): + """Helper class that provides TensorFlow image coding utilities.""" + + def __init__(self): + # Create a single Session to run all image coding calls. + self._sess = tf.compat.v1.Session() + + # Initializes function that converts PNG to JPEG data. + self._png_data = tf.compat.v1.placeholder(dtype=tf.string) + image = tf.image.decode_png(self._png_data, channels=3) + self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100) + + # Initializes function that converts CMYK JPEG data to RGB JPEG data. + self._cmyk_data = tf.compat.v1.placeholder(dtype=tf.string) + image = tf.image.decode_jpeg(self._cmyk_data, channels=0) + self._cmyk_to_rgb = tf.image.encode_jpeg(image, format='rgb', quality=100) + + # Initializes function that decodes RGB JPEG data. + self._decode_jpeg_data = tf.compat.v1.placeholder(dtype=tf.string) + self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3) + + def png_to_jpeg(self, image_data): + return self._sess.run(self._png_to_jpeg, + feed_dict={self._png_data: image_data}) + + def cmyk_to_rgb(self, image_data): + return self._sess.run(self._cmyk_to_rgb, + feed_dict={self._cmyk_data: image_data}) + + def decode_jpeg(self, image_data): + image = self._sess.run(self._decode_jpeg, + feed_dict={self._decode_jpeg_data: image_data}) + assert len(image.shape) == 3 + assert image.shape[2] == 3 + return image + + +def _is_png(filename): + """Determine if a file contains a PNG format image. + + Args: + filename: string, path of the image file. + + Returns: + boolean indicating if the image is a PNG. + """ + # File list from: + # https://groups.google.com/forum/embed/?place=forum/torch7#!topic/torch7/fOSTXHIESSU + return 'n02105855_2933.JPEG' in filename + + +def _is_cmyk(filename): + """Determine if file contains a CMYK JPEG format image. + + Args: + filename: string, path of the image file. + + Returns: + boolean indicating if the image is a JPEG encoded with CMYK color space. + """ + # File list from: + # https://github.com/cytsai/ilsvrc-cmyk-image-list + blacklist = ['n01739381_1309.JPEG', 'n02077923_14822.JPEG', + 'n02447366_23489.JPEG', 'n02492035_15739.JPEG', + 'n02747177_10752.JPEG', 'n03018349_4028.JPEG', + 'n03062245_4620.JPEG', 'n03347037_9675.JPEG', + 'n03467068_12171.JPEG', 'n03529860_11437.JPEG', + 'n03544143_17228.JPEG', 'n03633091_5218.JPEG', + 'n03710637_5125.JPEG', 'n03961711_5286.JPEG', + 'n04033995_2932.JPEG', 'n04258138_17003.JPEG', + 'n04264628_27969.JPEG', 'n04336792_7448.JPEG', + 'n04371774_5854.JPEG', 'n04596742_4225.JPEG', + 'n07583066_647.JPEG', 'n13037406_4650.JPEG'] + return filename.split('/')[-1] in blacklist + + +def _process_image(filename, coder): + """Process a single image file. + + Args: + filename: string, path to an image file e.g., '/path/to/example.JPG'. + coder: instance of ImageCoder to provide TensorFlow image coding utils. + Returns: + image_buffer: string, JPEG encoding of RGB image. + height: integer, image height in pixels. + width: integer, image width in pixels. + """ + # Read the image file. + image_data = tf.io.gfile.GFile(filename, 'rb').read() + + # Clean the dirty data. + if _is_png(filename): + # 1 image is a PNG. + print('Converting PNG to JPEG for %s' % filename) + image_data = coder.png_to_jpeg(image_data) + elif _is_cmyk(filename): + # 22 JPEG images are in CMYK colorspace. + print('Converting CMYK to RGB for %s' % filename) + image_data = coder.cmyk_to_rgb(image_data) + + # Decode the RGB JPEG. + image = coder.decode_jpeg(image_data) + + # Check that image converted to RGB + assert len(image.shape) == 3 + height = image.shape[0] + width = image.shape[1] + assert image.shape[2] == 3 + + return image_data, height, width + + +def _process_image_files_batch(coder, thread_index, ranges, name, filenames, + synsets, labels, humans, num_shards): + """Processes and saves list of images as TFRecord in 1 thread. + + Args: + coder: instance of ImageCoder to provide TensorFlow image coding utils. + thread_index: integer, unique batch to run index is within [0, len(ranges)). + ranges: list of pairs of integers specifying ranges of each batches to + analyze in parallel. + name: string, unique identifier specifying the data set + filenames: list of strings; each string is a path to an image file + synsets: list of strings; each string is a unique WordNet ID + labels: list of integer; each integer identifies the ground truth + humans: list of strings; each string is a human-readable label + num_shards: integer number of shards for this data set. + """ + # Each thread produces N shards where N = int(num_shards / num_threads). + # For instance, if num_shards = 128, and the num_threads = 2, then the first + # thread would produce shards [0, 64). + num_threads = len(ranges) + assert not num_shards % num_threads + num_shards_per_batch = int(num_shards / num_threads) + + shard_ranges = np.linspace(ranges[thread_index][0], + ranges[thread_index][1], + num_shards_per_batch + 1).astype(int) + num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0] + + counter = 0 + for s in xrange(num_shards_per_batch): + # Generate a sharded version of the file name, e.g. 'train-00002-of-00010' + shard = thread_index * num_shards_per_batch + s + output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards) + output_file = os.path.join(FLAGS.output_directory, output_filename) + writer = tf.io.TFRecordWriter(output_file) + + shard_counter = 0 + files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int) # HERE + for i in files_in_shard: + filename = filenames[i] + label = labels[i] + synset = synsets[i] + human = humans[i] + + image_buffer, height, width = _process_image(filename, coder) + + example = _convert_to_example(filename, image_buffer, label, synset, human, height, width) + writer.write(example.SerializeToString()) + shard_counter += 1 + counter += 1 + + if not counter % 1000: + print('%s [thread %d]: Processed %d of %d images in thread batch.' % + (datetime.now(), thread_index, counter, num_files_in_thread)) + sys.stdout.flush() + + writer.close() + print('%s [thread %d]: Wrote %d images to %s' % + (datetime.now(), thread_index, shard_counter, output_file)) + sys.stdout.flush() + shard_counter = 0 + print('%s [thread %d]: Wrote %d images to %d shards.' % + (datetime.now(), thread_index, counter, num_files_in_thread)) + sys.stdout.flush() + + +def _process_image_files(name, filenames, synsets, labels, humans, num_shards): + """Process and save list of images as TFRecord of Example protos. + + Args: + name: string, unique identifier specifying the data set + filenames: list of strings; each string is a path to an image file + synsets: list of strings; each string is a unique WordNet ID + labels: list of integer; each integer identifies the ground truth + humans: list of strings; each string is a human-readable label + num_shards: integer number of shards for this data set. + """ + assert len(filenames) == len(synsets) + assert len(filenames) == len(labels) + assert len(filenames) == len(humans) + + # Break all images into batches with a [ranges[i][0], ranges[i][1]]. + spacing = np.linspace(0, len(filenames), FLAGS.num_threads + 1).astype(np.int) + ranges = [] + threads = [] + for i in xrange(len(spacing) - 1): + ranges.append([spacing[i], spacing[i+1]]) + + # Launch a thread for each batch. + print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges)) + sys.stdout.flush() + + # Create a mechanism for monitoring when all threads are finished. + coord = tf.train.Coordinator() + + # Create a generic TensorFlow-based utility for converting all image codings. + coder = ImageCoder() + + threads = [] + for thread_index in xrange(len(ranges)): + args = (coder, thread_index, ranges, name, filenames, + synsets, labels, humans, num_shards) + t = threading.Thread(target=_process_image_files_batch, args=args) + t.start() + threads.append(t) + + # Wait for all the threads to terminate. + coord.join(threads) + print('%s: Finished writing all %d images in data set.' % + (datetime.now(), len(filenames))) + sys.stdout.flush() + + +def _find_image_files(data_dir, labels_file): + """Build a list of all images files and labels in the data set. + + Args: + data_dir: string, path to the root directory of images. + + Assumes that the ImageNet data set resides in JPEG files located in + the following directory structure. + + data_dir/n01440764/ILSVRC2012_val_00000293.JPEG + data_dir/n01440764/ILSVRC2012_val_00000543.JPEG + + where 'n01440764' is the unique synset label associated with these images. + + labels_file: string, path to the labels file. + + The list of valid labels are held in this file. Assumes that the file + contains entries as such: + n01440764 + n01443537 + n01484850 + where each line corresponds to a label expressed as a synset. We map + each synset contained in the file to an integer (based on the alphabetical + ordering) starting with the integer 1 corresponding to the synset + contained in the first line. + + The reason we start the integer labels at 1 is to reserve label 0 as an + unused background class. + + Returns: + filenames: list of strings; each string is a path to an image file. + synsets: list of strings; each string is a unique WordNet ID. + labels: list of integer; each integer identifies the ground truth. + """ + print('Determining list of input files and labels from %s.' % data_dir) + challenge_synsets = [l.strip() for l in + tf.compat.v1.gfile.FastGFile(labels_file, 'r').readlines()] + + labels = [] + filenames = [] + synsets = [] + + # Leave label index 0 empty as a background class. + label_index = 1 + + # Construct the list of JPEG files and labels. + for synset in challenge_synsets: + jpeg_file_path = '%s/%s/*.JPEG' % (data_dir, synset) + matching_files = tf.io.gfile.glob(jpeg_file_path) + + labels.extend([label_index] * len(matching_files)) + synsets.extend([synset] * len(matching_files)) + filenames.extend(matching_files) + + if not label_index % 100: + print('Finished finding files in %d of %d classes.' % ( + label_index, len(challenge_synsets))) + label_index += 1 + + # Shuffle the ordering of all image files in order to guarantee + # random ordering of the images with respect to label in the + # saved TFRecord files. Make the randomization repeatable. + shuffled_index = range(len(filenames)) + random.seed(12345) + + random.shuffle(list(range(len(shuffled_index)))) + + filenames = [filenames[i] for i in shuffled_index] + synsets = [synsets[i] for i in shuffled_index] + labels = [labels[i] for i in shuffled_index] + + print('Found %d JPEG files across %d labels inside %s.' % + (len(filenames), len(challenge_synsets), data_dir)) + return filenames, synsets, labels + + +def _find_human_readable_labels(synsets, synset_to_human): + """Build a list of human-readable labels. + + Args: + synsets: list of strings; each string is a unique WordNet ID. + synset_to_human: dict of synset to human labels, e.g., + 'n02119022' --> 'red fox, Vulpes vulpes' + + Returns: + List of human-readable strings corresponding to each synset. + """ + humans = [] + for s in synsets: + assert s in synset_to_human, ('Failed to find: %s' % s) + humans.append(synset_to_human[s]) + return humans + + +def _process_dataset(name, directory, num_shards, synset_to_human): + """Process a complete data set and save it as a TFRecord. + + Args: + name: string, unique identifier specifying the data set. + directory: string, root path to the data set. + num_shards: integer number of shards for this data set. + synset_to_human: dict of synset to human labels, e.g., + 'n02119022' --> 'red fox, Vulpes vulpes' + """ + filenames, synsets, labels = _find_image_files(directory, FLAGS.labels_file) + humans = _find_human_readable_labels(synsets, synset_to_human) + + _process_image_files(name, filenames, synsets, labels, + humans, num_shards) + + +def _build_synset_lookup(imagenet_metadata_file): + """Build lookup for synset to human-readable label. + + Args: + imagenet_metadata_file: string, path to file containing mapping from + synset to human-readable label. + + Assumes each line of the file looks like: + + n02119247 black fox + n02119359 silver fox + n02119477 red fox, Vulpes fulva + + where each line corresponds to a unique mapping. Note that each line is + formatted as \t. + + Returns: + Dictionary of synset to human labels, such as: + 'n02119022' --> 'red fox, Vulpes vulpes' + """ + lines = tf.compat.v1.gfile.FastGFile(imagenet_metadata_file, 'r').readlines() + synset_to_human = {} + for l in lines: + if l: + parts = l.strip().split('\t') + assert len(parts) == 2 + synset = parts[0] + human = parts[1] + synset_to_human[synset] = human + return synset_to_human + + +def main(unused_argv): + assert not FLAGS.shards % FLAGS.num_threads, ( + 'Please make the FLAGS.num_threads commensurate with FLAGS.shards') + + print('Saving results to %s' % FLAGS.output_directory) + + # Build a map from synset to human-readable label. + synset_to_human = _build_synset_lookup(FLAGS.imagenet_metadata_file) + + if(FLAGS.raw_directory != None): + _process_dataset(FLAGS.subset, FLAGS.raw_directory,FLAGS.shards, synset_to_human) + +if __name__ == '__main__': + tf.compat.v1.app.run() diff --git a/examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/download_and_convert_imagenet.sh b/examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/download_and_convert_imagenet.sh new file mode 100644 index 00000000000..f9baa85ab07 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/download_and_convert_imagenet.sh @@ -0,0 +1,100 @@ +#!/bin/bash +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# Script to download and preprocess ImageNet Challenge 2012 +# training and validation data set. +# +# The final output of this script are sharded TFRecord files containing +# serialized Example protocol buffers. See build_imagenet_data.py for +# details of how the Example protocol buffers contain the ImageNet data. +# +# The final output of this script appears as such: +# +# data_dir/train-00000-of-01024 +# data_dir/train-00001-of-01024 +# ... +# data_dir/train-00127-of-01024 +# +# and +# +# data_dir/validation-00000-of-00128 +# data_dir/validation-00001-of-00128 +# ... +# data_dir/validation-00127-of-00128 +# +# Note that this script may take several hours to run to completion. The +# conversion of the ImageNet data to TFRecords alone takes 2-3 hours depending +# on the speed of your machine. Please be patient. +# +# **IMPORTANT** +# To download the raw images, the user must create an account with image-net.org +# and generate a username and access_key. The latter two are required for +# downloading the raw images. +# + +set -e + +if [ -z "$1" ]; then + echo "usage download_and_convert_imagenet.sh [data dir]" + exit +fi + +# Create the output and temporary directories. +DATA_DIR="${1%/}" +SCRATCH_DIR="${DATA_DIR}/raw-data/" +mkdir -p "${DATA_DIR}" +mkdir -p "${SCRATCH_DIR}" +WORK_DIR="$0.runfiles/__main__" + +# Download the ImageNet data. +LABELS_FILE="${WORK_DIR}/datasets/imagenet_lsvrc_2015_synsets.txt" +DOWNLOAD_SCRIPT="${WORK_DIR}/datasets/download_imagenet.sh" +"${DOWNLOAD_SCRIPT}" "${SCRATCH_DIR}" "${LABELS_FILE}" + +# Note the locations of the train and validation data. +TRAIN_DIRECTORY="${SCRATCH_DIR}train/" +VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/" + +# Preprocess the validation data by moving the images into the appropriate +# sub-directory based on the label (synset) of the image. +echo "Organizing the validation data into sub-directories." +PREPROCESS_VAL_SCRIPT="${WORK_DIR}/datasets/preprocess_imagenet_validation_data.py" +VAL_LABELS_FILE="${WORK_DIR}/datasets/imagenet_2012_validation_synset_labels.txt" + +"${PREPROCESS_VAL_SCRIPT}" "${VALIDATION_DIRECTORY}" "${VAL_LABELS_FILE}" + +# Convert the XML files for bounding box annotations into a single CSV. +echo "Extracting bounding box information from XML." +BOUNDING_BOX_SCRIPT="${WORK_DIR}/datasets/process_bounding_boxes.py" +BOUNDING_BOX_FILE="${SCRATCH_DIR}/imagenet_2012_bounding_boxes.csv" +BOUNDING_BOX_DIR="${SCRATCH_DIR}bounding_boxes/" + +"${BOUNDING_BOX_SCRIPT}" "${BOUNDING_BOX_DIR}" "${LABELS_FILE}" \ + | sort >"${BOUNDING_BOX_FILE}" +echo "Finished downloading and preprocessing the ImageNet data." + +# Build the TFRecords version of the ImageNet data. +BUILD_SCRIPT="${WORK_DIR}/build_imagenet_data" +OUTPUT_DIRECTORY="${DATA_DIR}" +IMAGENET_METADATA_FILE="${WORK_DIR}/datasets/imagenet_metadata.txt" + +"${BUILD_SCRIPT}" \ + --train_directory="${TRAIN_DIRECTORY}" \ + --validation_directory="${VALIDATION_DIRECTORY}" \ + --output_directory="${OUTPUT_DIRECTORY}" \ + --imagenet_metadata_file="${IMAGENET_METADATA_FILE}" \ + --labels_file="${LABELS_FILE}" \ + --bounding_box_file="${BOUNDING_BOX_FILE}" diff --git a/examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/download_imagenet.sh b/examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/download_imagenet.sh new file mode 100644 index 00000000000..c780e179f93 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/download_imagenet.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# Script to download ImageNet Challenge 2012 training and validation data set. +# +# Downloads and decompresses raw images and bounding boxes. +# +# **IMPORTANT** +# To download the raw images, the user must create an account with image-net.org +# and generate a username and access_key. The latter two are required for +# downloading the raw images. +# +# usage: +# ./download_imagenet.sh [dirname] +set -e + +if [ "x$IMAGENET_ACCESS_KEY" == x -o "x$IMAGENET_USERNAME" == x ]; then + cat < Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Pretrained model + +The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: + ``` +python prepare_model.py --output_model=/path/to/model + ``` +`--output_model ` the model should be saved as SavedModel format or H5 format. + + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/keras/image_recognition/ + # convert validation subset + bash prepare_dataset.sh --output_dir=/inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=/inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd inception_v3/quantization/ptq + ``` +> **Note**: +> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images. + +# Run Command + +## Quantization + ```shell + bash run_quant.sh --input_model=./inception_v3_keras/ --output_model=./result --dataset_location=/path/to/evaluation/dataset + ``` + +## Benchmark + ```shell + bash run_benchmark.sh --input_model=./result --dataset_location=/path/to/evaluation/dataset --mode=performance --batch_size=1 + bash run_benchmark.sh --input_model=./result --dataset_location=/path/to/evaluation/dataset --mode=accuracy --batch_size=32 + ``` diff --git a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/data_process.py new file mode 100644 index 00000000000..b8cd01593c6 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/data_process.py @@ -0,0 +1,543 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +class ParseDecodeImagenet: + """Parse features in Example proto. + + Returns: + tuple of parsed image and label + """ + + def __call__(self, sample): + """Parse features in example.""" + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), + } + + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(serialized=sample, features=feature_map) + label = tf.cast(features["image/class/label"], dtype=tf.int32) + image = features["image/encoded"] + image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST") + return (image, label) + + +class BilinearImagenetTransform(object): + """Combination of a series of transforms which is applicable to images in Imagenet. + + Args: + height: Height of the result + width:Width of the result + central_fraction(float, default=0.875):fraction of size to crop + mean_value(list, default=[0.0,0.0,0.0]):means for each channel + scale(float, default=1.0):std value + + Returns: + tuple of processed image and label + """ + + def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0): + """Initialize `BilinearImagenetTransform` class.""" + self.height = height + self.width = width + self.mean_value = mean_value + self.scale = scale + self.central_fraction = central_fraction + + # sample is (images, labels) + def __call__(self, sample): + """Convert `BilinearImagenetTransform` feature.""" + image, label = sample + if image.dtype is not tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image containing 87.5% area of the original image. + if self.central_fraction: + image = tf.image.central_crop(image, central_fraction=self.central_fraction) + + if self.height and self.width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) + image = (image - means) * self.scale + return (image, label) + + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class ShiftRescale(object): + """Label shift by 1 and rescale. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + image, label = sample + label -= 1 + image = (image - 127.5) / 127.5 + return (image, label) + + +class LabelShift(object): + """Convert label to label - label_shift. + + Args: + label_shift(int, default=0): number of label shift + + Returns: + tuple of processed image and label + """ + + def __init__(self, label_shift=0): + """Initialize `LabelShift` class.""" + self.label_shift = label_shift + + def __call__(self, sample): + """Convert label to label_shift.""" + images, labels = sample + if isinstance(labels, np.ndarray): + labels = labels - self.label_shift + elif isinstance(labels, list): + if isinstance(labels[0], tuple): + labels = [tuple(np.array(label) - self.label_shift) for label in labels] + elif isinstance(labels[0], np.ndarray): + labels = [label - self.label_shift for label in labels] + else: + labels = np.array(labels) - self.label_shift + labels = labels.tolist() + else: + labels = np.array(labels) - self.label_shift + return images, labels + + +class ImageRecordDataset(object): + """Tensorflow imageNet database in tf record format. + + Please arrange data in this way: + root/validation-000-of-100 + root/validation-001-of-100 + ... + root/validation-099-of-100 + The file name needs to follow this pattern: '* - * -of- *' + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + """Configuration for Imagenet dataset.""" + + def __new__(cls, root, transform=None, filter=None): + """Build a new object of TensorflowImageRecord class.""" + from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module + + glob_pattern = os.path.join(root, "*-*-of-*") + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) + + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) + + if transform is not None: + transform.transform_list.insert(0, ParseDecodeImagenet()) + else: + transform = ParseDecodeImagenet() + ds = ds.map(transform, num_parallel_calls=None) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + return ds + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric_cls + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class TopKMetric(BaseMetric): + """Compute Top-k Accuracy classification score for Tensorflow model. + + This metric computes the number of times where the correct label is among + the top k labels predicted. + + Attributes: + k (int): The number of most likely outcomes considered to find the correct label. + num_correct: The number of predictions that were correct classified. + num_sample: The total number of predictions. + """ + + def __init__(self, k=1): + """Initialize the k, number of samples and correct predictions. + + Args: + k: The number of most likely outcomes considered to find the correct label. + """ + self.k = k + self.num_correct = 0 + self.num_sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = TopKMetric._topk_shape_validate(preds, labels) + + labels = labels.reshape([len(labels)]) + with tf.Graph().as_default() as acc_graph: + topk = tf.nn.in_top_k( + predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k + ) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) + + self.num_sample += len(labels) + self.num_correct += correct + + def reset(self): + """Reset the number of samples and correct predictions.""" + self.num_correct = 0 + self.num_sample = 0 + + def result(self): + """Compute the top-k score. + + Returns: + The top-k score. + """ + if self.num_sample == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + elif getattr(self, "_hvd", None) is not None: # pragma: no cover + allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) + allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) + return allgather_num_correct / allgather_num_sample + return self.num_correct / self.num_sample + + @staticmethod + def _topk_shape_validate(preds, labels): + # preds shape can be Nxclass_num or class_num(N=1 by default) + # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax + if isinstance(preds, int): + preds = [preds] + preds = np.array(preds) + elif isinstance(preds, np.ndarray): + preds = np.array(preds) + elif isinstance(preds, list): + preds = np.array(preds) + preds = preds.reshape((-1, preds.shape[-1])) + + # consider labels just int value 1x1 + if isinstance(labels, int): + labels = [labels] + labels = np.array(labels) + elif isinstance(labels, tuple): + labels = np.array([labels]) + labels = labels.reshape((labels.shape[-1], -1)) + elif isinstance(labels, list): + if isinstance(labels[0], int): + labels = np.array(labels) + labels = labels.reshape((labels.shape[0], 1)) + elif isinstance(labels[0], tuple): + labels = np.array(labels) + labels = labels.reshape((labels.shape[-1], -1)) + else: + labels = np.array(labels) + # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) + # only support 2 dimension one-shot labels + # or 1 dimension one-hot class_num will confuse with N + + if len(preds.shape) == 1: + N = 1 + class_num = preds.shape[0] + preds = preds.reshape([-1, class_num]) + elif len(preds.shape) >= 2: + N = preds.shape[0] + preds = preds.reshape([N, -1]) + class_num = preds.shape[1] + + label_N = labels.shape[0] + assert label_N == N, "labels batch size should same with preds" + labels = labels.reshape([N, -1]) + # one-hot labels will have 2 dimension not equal 1 + if labels.shape[1] != 1: + labels = labels.argsort()[..., -1:] + return preds, labels + + +class TFDataLoader(object): # pragma: no cover + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/main.py b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/main.py new file mode 100644 index 00000000000..2a8ba68a6cf --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/main.py @@ -0,0 +1,143 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import time + +import numpy as np +import tensorflow as tf + +from neural_compressor.utils import logger +from data_process import ( + ImageRecordDataset, + ComposeTransform, + BilinearImagenetTransform, + TFDataLoader, + TopKMetric, + LabelShift, +) + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +flags = tf.compat.v1.flags +FLAGS = flags.FLAGS + +## Required parameters +flags.DEFINE_string( + 'input_model', None, 'Run inference with specified keras model.') + +flags.DEFINE_string( + 'output_model', None, 'The output quantized model.') + +flags.DEFINE_string( + 'mode', 'performance', 'define benchmark mode for accuracy or performance') + +flags.DEFINE_bool( + 'tune', False, 'whether to tune the model') + +flags.DEFINE_bool( + 'benchmark', False, 'whether to benchmark the model') + +flags.DEFINE_string( + 'calib_data', None, 'location of calibration dataset') + +flags.DEFINE_string( + 'eval_data', None, 'location of evaluate dataset') + +flags.DEFINE_integer('batch_size', 32, 'batch_size') + +flags.DEFINE_integer( + 'iters', 100, 'maximum iteration when evaluating performance') + +height = width = 299 +eval_dataset = ImageRecordDataset(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ + [BilinearImagenetTransform(height=height, width=width)])) + +eval_dataloader = TFDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) + +if FLAGS.calib_data: + calib_dataset = ImageRecordDataset(root=FLAGS.calib_data, transform= \ + ComposeTransform(transform_list= [BilinearImagenetTransform(height=height, width=width)])) + calib_dataloader = TFDataLoader(dataset=calib_dataset, batch_size=10) + +def evaluate(model): + """ + Custom evaluate function to inference the model for specified metric on validation dataset. + + Args: + model (tf.keras.Model): The input model will be the objection of tf.keras.Model. + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + latency_list = [] + metric = TopKMetric() + postprocess = LabelShift(label_shift=1) + + def eval_func(dataloader, metric): + warmup = 5 + iteration = None + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + for idx, (inputs, labels) in enumerate(dataloader): + start = time.time() + predictions = model.predict_on_batch(inputs) + end = time.time() + latency_list.append(end - start) + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + if iteration and idx >= iteration: + break + latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size + return latency + + latency = eval_func(eval_dataloader, metric) + if FLAGS.benchmark: + logger.info("\n{} mode benchmark result:".format(FLAGS.mode)) + for i, res in enumerate(latency_list): + logger.debug("Iteration {} result {}:".format(i, res)) + if FLAGS.benchmark and FLAGS.mode == 'performance': + logger.info("Batch size = {}".format(eval_dataloader.batch_size)) + logger.info("Latency: {:.3f} ms".format(latency * 1000)) + logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +def main(_): + if FLAGS.tune: + from neural_compressor.common import set_random_seed + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + + set_random_seed(9527) + quant_config = StaticQuantConfig() + q_model = quantize_model(FLAGS.input_model, quant_config, calib_dataloader) + q_model.save(FLAGS.output_model) + + + if FLAGS.benchmark: + from neural_compressor.tensorflow import Model + + inc_model = Model(FLAGS.input_model) + if FLAGS.mode == 'performance': + evaluate(inc_model.model) + else: + accuracy = evaluate(inc_model.model) + logger.info('Batch size = %d' % FLAGS.batch_size) + logger.info("Accuracy: %.5f" % accuracy) + + +if __name__ == "__main__": + tf.compat.v1.app.run() diff --git a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/prepare_model.py b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/prepare_model.py new file mode 100644 index 00000000000..abf63dc93b4 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/prepare_model.py @@ -0,0 +1,35 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +from tensorflow.keras.applications.inception_v3 import InceptionV3 +def get_inception_v3_model(saved_path): + model = InceptionV3(weights='imagenet') + model.save(saved_path) + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Export pretained keras model', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--output_model', + type=str, + help='path to exported model file') + + args = parser.parse_args() + get_inception_v3_model(args.output_model) diff --git a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..2f0697d8502 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/requirements.txt @@ -0,0 +1,2 @@ +tensorflow +intel-extension-for-tensorflow[cpu] diff --git a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..43b1636c839 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_benchmark { + + python main.py \ + --input_model ${input_model} \ + --benchmark \ + --mode ${mode} \ + --eval_data ${dataset_location} \ + --batch_size ${batch_size} \ + --iters ${iters} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..7e3ed727f71 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/run_quant.sh @@ -0,0 +1,40 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input_model ${input_model} \ + --output_model ${output_model} \ + --eval_data ${dataset_location} \ + --calib_data ${dataset_location} \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/README.md b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/README.md new file mode 100644 index 00000000000..cd7a564b505 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/README.md @@ -0,0 +1,75 @@ +Step-by-Step +============ + +This document is used to enable Tensorflow Keras model mobilenet_v2 quantization and benchmark using Intel® Neural Compressor. +This example can run on Intel CPUs and GPUs. + + +# Prerequisite + +## 1. Environment + +### Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### Install Requirements +The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this example. +The Intel Extension for Tensorflow for Intel CPUs is installed as default. +```shell +pip install -r requirements.txt +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Pretrained model + +The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: + ``` +python prepare_model.py --output_model=/path/to/model + ``` +`--output_model ` the model should be saved as SavedModel format or H5 format. + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/keras/image_recognition/ + # convert validation subset + bash prepare_dataset.sh --output_dir=/mobilenet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=/mobilenet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd mobilenet_v2/quantization/ptq + ``` +> **Note**: +> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images. + +# Run Command + +## Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = PostTrainingQuantConfig( + device="gpu", + backend="itex", + ... + ) +``` + +## Quantization + ```shell + bash run_quant.sh --input_model=./mobilenet_v2_keras/ --output_model=./result --dataset_location=/path/to/evaluation/dataset + ``` + +## Benchmark + ```shell + bash run_benchmark.sh --input_model=./result --mode=accuracy --dataset_location=/path/to/evaluation/dataset --batch_size=32 + bash run_benchmark.sh --input_model=./result --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=1 + ``` diff --git a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/data_process.py new file mode 100644 index 00000000000..a655a5ce9a8 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/data_process.py @@ -0,0 +1,543 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +class ParseDecodeImagenet: + """Parse features in Example proto. + + Returns: + tuple of parsed image and label + """ + + def __call__(self, sample): + """Parse features in example.""" + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), + } + + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(serialized=sample, features=feature_map) + label = tf.cast(features["image/class/label"], dtype=tf.int32) + image = features["image/encoded"] + image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST") + return (image, label) + + +class BilinearImagenetTransform(object): + """Combination of a series of transforms which is applicable to images in Imagenet. + + Args: + height: Height of the result + width:Width of the result + central_fraction(float, default=0.875):fraction of size to crop + mean_value(list, default=[0.0,0.0,0.0]):means for each channel + scale(float, default=1.0):std value + + Returns: + tuple of processed image and label + """ + + def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0): + """Initialize `BilinearImagenetTransform` class.""" + self.height = height + self.width = width + self.mean_value = mean_value + self.scale = scale + self.central_fraction = central_fraction + + # sample is (images, labels) + def __call__(self, sample): + """Convert `BilinearImagenetTransform` feature.""" + image, label = sample + if image.dtype is not tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image containing 87.5% area of the original image. + if self.central_fraction: + image = tf.image.central_crop(image, central_fraction=self.central_fraction) + + if self.height and self.width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) + image = (image - means) * self.scale + return (image, label) + + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class LabelShift(object): + """Convert label to label - label_shift. + + Args: + label_shift(int, default=0): number of label shift + + Returns: + tuple of processed image and label + """ + + def __init__(self, label_shift=0): + """Initialize `LabelShift` class.""" + self.label_shift = label_shift + + def __call__(self, sample): + """Convert label to label_shift.""" + images, labels = sample + if isinstance(labels, np.ndarray): + labels = labels - self.label_shift + elif isinstance(labels, list): + if isinstance(labels[0], tuple): + labels = [tuple(np.array(label) - self.label_shift) for label in labels] + elif isinstance(labels[0], np.ndarray): + labels = [label - self.label_shift for label in labels] + else: + labels = np.array(labels) - self.label_shift + labels = labels.tolist() + else: + labels = np.array(labels) - self.label_shift + return images, labels + + +class ShiftRescale(object): + """Label shift by 1 and rescale. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + image, label = sample + label -= 1 + image = (image - 127.5) / 127.5 + return (image, label) + + +class ImageRecordDataset(object): + """Tensorflow imageNet database in tf record format. + + Please arrange data in this way: + root/validation-000-of-100 + root/validation-001-of-100 + ... + root/validation-099-of-100 + The file name needs to follow this pattern: '* - * -of- *' + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + """Configuration for Imagenet dataset.""" + + def __new__(cls, root, transform=None, filter=None): + """Build a new object of TensorflowImageRecord class.""" + from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module + + glob_pattern = os.path.join(root, "*-*-of-*") + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) + + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) + + if transform is not None: + transform.transform_list.insert(0, ParseDecodeImagenet()) + else: + transform = ParseDecodeImagenet() + ds = ds.map(transform, num_parallel_calls=None) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + return ds + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric_cls + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class TopKMetric(BaseMetric): + """Compute Top-k Accuracy classification score for Tensorflow model. + + This metric computes the number of times where the correct label is among + the top k labels predicted. + + Attributes: + k (int): The number of most likely outcomes considered to find the correct label. + num_correct: The number of predictions that were correct classified. + num_sample: The total number of predictions. + """ + + def __init__(self, k=1): + """Initialize the k, number of samples and correct predictions. + + Args: + k: The number of most likely outcomes considered to find the correct label. + """ + self.k = k + self.num_correct = 0 + self.num_sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = TopKMetric._topk_shape_validate(preds, labels) + + labels = labels.reshape([len(labels)]) + with tf.Graph().as_default() as acc_graph: + topk = tf.nn.in_top_k( + predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k + ) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) + + self.num_sample += len(labels) + self.num_correct += correct + + def reset(self): + """Reset the number of samples and correct predictions.""" + self.num_correct = 0 + self.num_sample = 0 + + def result(self): + """Compute the top-k score. + + Returns: + The top-k score. + """ + if self.num_sample == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + elif getattr(self, "_hvd", None) is not None: # pragma: no cover + allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) + allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) + return allgather_num_correct / allgather_num_sample + return self.num_correct / self.num_sample + + @staticmethod + def _topk_shape_validate(preds, labels): + # preds shape can be Nxclass_num or class_num(N=1 by default) + # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax + if isinstance(preds, int): + preds = [preds] + preds = np.array(preds) + elif isinstance(preds, np.ndarray): + preds = np.array(preds) + elif isinstance(preds, list): + preds = np.array(preds) + preds = preds.reshape((-1, preds.shape[-1])) + + # consider labels just int value 1x1 + if isinstance(labels, int): + labels = [labels] + labels = np.array(labels) + elif isinstance(labels, tuple): + labels = np.array([labels]) + labels = labels.reshape((labels.shape[-1], -1)) + elif isinstance(labels, list): + if isinstance(labels[0], int): + labels = np.array(labels) + labels = labels.reshape((labels.shape[0], 1)) + elif isinstance(labels[0], tuple): + labels = np.array(labels) + labels = labels.reshape((labels.shape[-1], -1)) + else: + labels = np.array(labels) + # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) + # only support 2 dimension one-shot labels + # or 1 dimension one-hot class_num will confuse with N + + if len(preds.shape) == 1: + N = 1 + class_num = preds.shape[0] + preds = preds.reshape([-1, class_num]) + elif len(preds.shape) >= 2: + N = preds.shape[0] + preds = preds.reshape([N, -1]) + class_num = preds.shape[1] + + label_N = labels.shape[0] + assert label_N == N, "labels batch size should same with preds" + labels = labels.reshape([N, -1]) + # one-hot labels will have 2 dimension not equal 1 + if labels.shape[1] != 1: + labels = labels.argsort()[..., -1:] + return preds, labels + + +class TFDataLoader(object): # pragma: no cover + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/main.py b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/main.py new file mode 100644 index 00000000000..0e8767eb0a9 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/main.py @@ -0,0 +1,141 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import time + +import numpy as np +import tensorflow as tf + +from neural_compressor.utils import logger +from data_process import ( + ImageRecordDataset, + ComposeTransform, + BilinearImagenetTransform, + TFDataLoader, + TopKMetric, + LabelShift, +) + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +flags = tf.compat.v1.flags +FLAGS = flags.FLAGS + +## Required parameters +flags.DEFINE_string( + 'input_model', None, 'Run inference with specified keras model.') + +flags.DEFINE_string( + 'output_model', None, 'The output quantized model.') + +flags.DEFINE_string( + 'mode', 'performance', 'define benchmark mode for accuracy or performance') + +flags.DEFINE_bool( + 'tune', False, 'whether to tune the model') + +flags.DEFINE_bool( + 'benchmark', False, 'whether to benchmark the model') + +flags.DEFINE_string( + 'calib_data', None, 'location of calibration dataset') + +flags.DEFINE_string( + 'eval_data', None, 'location of evaluate dataset') + +flags.DEFINE_integer('batch_size', 32, 'batch_size') + +flags.DEFINE_integer( + 'iters', 100, 'maximum iteration when evaluating performance') + +height = width = 224 +eval_dataset = ImageRecordDataset(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ + [BilinearImagenetTransform(height=height, width=width)])) + +eval_dataloader = TFDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) + +if FLAGS.calib_data: + calib_dataset = ImageRecordDataset(root=FLAGS.calib_data, transform= \ + ComposeTransform(transform_list= [BilinearImagenetTransform(height=height, width=width)])) + calib_dataloader = TFDataLoader(dataset=calib_dataset, batch_size=10) + +def evaluate(model): + """ + Custom evaluate function to inference the model for specified metric on validation dataset. + + Args: + model (tf.keras.Model): The input model will be the objection of tf.keras.Model. + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + latency_list = [] + metric = TopKMetric() + postprocess = LabelShift(label_shift=1) + + def eval_func(dataloader, metric): + warmup = 5 + iteration = None + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + for idx, (inputs, labels) in enumerate(dataloader): + start = time.time() + predictions = model.predict_on_batch(inputs) + end = time.time() + latency_list.append(end - start) + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + if iteration and idx >= iteration: + break + latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size + return latency + + latency = eval_func(eval_dataloader, metric) + if FLAGS.benchmark: + logger.info("\n{} mode benchmark result:".format(FLAGS.mode)) + for i, res in enumerate(latency_list): + logger.debug("Iteration {} result {}:".format(i, res)) + if FLAGS.benchmark and FLAGS.mode == 'performance': + logger.info("Batch size = {}".format(eval_dataloader.batch_size)) + logger.info("Latency: {:.3f} ms".format(latency * 1000)) + logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +def main(_): + if FLAGS.tune: + from neural_compressor.common import set_random_seed + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + + set_random_seed(9527) + quant_config = StaticQuantConfig() + q_model = quantize_model(FLAGS.input_model, quant_config, calib_dataloader) + q_model.save(FLAGS.output_model) + + if FLAGS.benchmark: + from neural_compressor.tensorflow import Model + + inc_model = Model(FLAGS.input_model) + if FLAGS.mode == 'performance': + evaluate(inc_model.model) + else: + accuracy = evaluate(inc_model.model) + logger.info('Batch size = %d' % FLAGS.batch_size) + logger.info("Accuracy: %.5f" % accuracy) + +if __name__ == "__main__": + tf.compat.v1.app.run() diff --git a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/prepare_model.py b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/prepare_model.py new file mode 100644 index 00000000000..e31b3e83de0 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/prepare_model.py @@ -0,0 +1,35 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2 +def get_mobilenet_v2_model(saved_path): + model = MobileNetV2(weights='imagenet') + model.save(saved_path) + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Export pretained keras model', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--output_model', + type=str, + help='path to exported model file') + + args = parser.parse_args() + get_mobilenet_v2_model(args.output_model) diff --git a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..8b7b47da969 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/requirements.txt @@ -0,0 +1,2 @@ +tensorflow>=2.11.1 +intel-extension-for-tensorflow[cpu] diff --git a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..43b1636c839 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_benchmark { + + python main.py \ + --input_model ${input_model} \ + --benchmark \ + --mode ${mode} \ + --eval_data ${dataset_location} \ + --batch_size ${batch_size} \ + --iters ${iters} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..7e3ed727f71 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/run_quant.sh @@ -0,0 +1,40 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input_model ${input_model} \ + --output_model ${output_model} \ + --eval_data ${dataset_location} \ + --calib_data ${dataset_location} \ + --tune +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/keras/cv/prepare_dataset.sh b/examples/3.x_api/tensorflow/keras/cv/prepare_dataset.sh new file mode 100644 index 00000000000..4aad5d69a3f --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/prepare_dataset.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# set -x + +OUTPUT_DIR="./data" +SUBSET="validation" +SHARDS=1 + +help() +{ + cat <<- EOF + Desc: Convert prepared raw imagnet dataset to tfrecord + -h --help help info + --output_dir Output data directory + default: './data' + --raw_dir Raw data directory + --shards Number of shards in TFRecord files. + default: '1' + --subset Subset of imagenet, can be validation/train. + default: 'validation' +EOF + exit 0 +} + +function main { + init_params "$@" + convert_dataset +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --output_dir=*) + OUTPUT_DIR=$(echo $var |cut -f2 -d=) + ;; + --raw_dir=*) + RAW_DIR=$(echo $var |cut -f2 -d=) + ;; + --shards=*) + SHARDS=$(echo $var |cut -f2 -d=) + ;; + --subset=*) + SUBSET=$(echo $var |cut -f2 -d=) + ;; + -h|--help) help + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done +} + +# convert dataset +function convert_dataset { + if [ ! -d ${OUTPUT_DIR} ]; then + mkdir ${OUTPUT_DIR} + fi + python imagenet_prepare/build_imagenet_data.py \ + --imagenet_metadata_file "imagenet_prepare/imagenet_metadata.txt" \ + --labels_file "imagenet_prepare/imagenet_lsvrc_2015_synsets.txt" \ + --output_directory ${OUTPUT_DIR} \ + --subset ${SUBSET} \ + --raw_directory ${RAW_DIR} \ + --shards ${SHARDS} +} + +main "$@" + diff --git a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/README.md b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/README.md new file mode 100644 index 00000000000..54ab588faf4 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/README.md @@ -0,0 +1,76 @@ +Step-by-Step +============ + +This document is used to enable Tensorflow Keras models using Intel® Neural Compressor. +This example can run on Intel CPUs and GPUs. + + +# Prerequisite + +## 1. Environment + +### Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### Install Requirements +The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this example. +The Intel Extension for Tensorflow for Intel CPUs is installed as default. +```shell +pip install -r requirements.txt +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Pretrained model + +The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: + ``` +python prepare_model.py --output_model=/path/to/model + ``` +`--output_model ` the model should be saved as SavedModel format or H5 format. + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/keras/image_recognition/ + # convert validation subset + bash prepare_dataset.sh --output_dir=/resnetv2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=/resnetv2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd resnetv2_50/quantization/ptq + ``` +> **Note**: +> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
+>         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
+> where 'n01440764' is the unique synset label associated with these images. + +# Run Command + +## Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = PostTrainingQuantConfig( + device="gpu", + backend="itex", + ... + ) +``` + +## Quantization + ```shell + bash run_quant.sh --input_model=./resnetv2_50_keras/ --output_model=./result --dataset_location=/path/to/evaluation/dataset + ``` + +## Benchmark + ```shell + bash run_benchmark.sh --input_model=./result --mode=accuracy --dataset_location=/path/to/evaluation/dataset --batch_size=32 + bash run_benchmark.sh --input_model=./result --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=1 + ``` + diff --git a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/data_process.py new file mode 100644 index 00000000000..b8cd01593c6 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/data_process.py @@ -0,0 +1,543 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import collections + +import numpy as np +import tensorflow as tf + +from abc import abstractmethod +from neural_compressor.common import logger +from neural_compressor.tensorflow.utils.data import default_collate + +class ParseDecodeImagenet: + """Parse features in Example proto. + + Returns: + tuple of parsed image and label + """ + + def __call__(self, sample): + """Parse features in example.""" + # Dense features in Example proto. + feature_map = { + "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""), + "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), + } + + sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) + # Sparse features in Example proto. + feature_map.update( + { + k: sparse_float32 + for k in [ + "image/object/bbox/xmin", + "image/object/bbox/ymin", + "image/object/bbox/xmax", + "image/object/bbox/ymax", + ] + } + ) + + features = tf.io.parse_single_example(serialized=sample, features=feature_map) + label = tf.cast(features["image/class/label"], dtype=tf.int32) + image = features["image/encoded"] + image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST") + return (image, label) + + +class BilinearImagenetTransform(object): + """Combination of a series of transforms which is applicable to images in Imagenet. + + Args: + height: Height of the result + width:Width of the result + central_fraction(float, default=0.875):fraction of size to crop + mean_value(list, default=[0.0,0.0,0.0]):means for each channel + scale(float, default=1.0):std value + + Returns: + tuple of processed image and label + """ + + def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0): + """Initialize `BilinearImagenetTransform` class.""" + self.height = height + self.width = width + self.mean_value = mean_value + self.scale = scale + self.central_fraction = central_fraction + + # sample is (images, labels) + def __call__(self, sample): + """Convert `BilinearImagenetTransform` feature.""" + image, label = sample + if image.dtype is not tf.float32: + image = tf.image.convert_image_dtype(image, dtype=tf.float32) + # Crop the central region of the image containing 87.5% area of the original image. + if self.central_fraction: + image = tf.image.central_crop(image, central_fraction=self.central_fraction) + + if self.height and self.width: + # Resize the image to the specified height and width. + image = tf.expand_dims(image, 0) + image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR) + image = tf.squeeze(image, [0]) + + image = tf.subtract(image, 0.5) + image = tf.multiply(image, 2.0) + means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) + image = (image - means) * self.scale + return (image, label) + + +class ComposeTransform(object): + """Composes several transforms together. + + Args: + transform_list (list of Transform objects): list of transforms to compose + + Returns: + sample (tuple): tuple of processed image and label + """ + + def __init__(self, transform_list): + """Initialize `ComposeTransform` class.""" + self.transform_list = transform_list + + def __call__(self, sample): + """Call transforms in transform_list.""" + for transform in self.transform_list: + sample = transform(sample) + return sample + + +class ShiftRescale(object): + """Label shift by 1 and rescale. + + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + image, label = sample + label -= 1 + image = (image - 127.5) / 127.5 + return (image, label) + + +class LabelShift(object): + """Convert label to label - label_shift. + + Args: + label_shift(int, default=0): number of label shift + + Returns: + tuple of processed image and label + """ + + def __init__(self, label_shift=0): + """Initialize `LabelShift` class.""" + self.label_shift = label_shift + + def __call__(self, sample): + """Convert label to label_shift.""" + images, labels = sample + if isinstance(labels, np.ndarray): + labels = labels - self.label_shift + elif isinstance(labels, list): + if isinstance(labels[0], tuple): + labels = [tuple(np.array(label) - self.label_shift) for label in labels] + elif isinstance(labels[0], np.ndarray): + labels = [label - self.label_shift for label in labels] + else: + labels = np.array(labels) - self.label_shift + labels = labels.tolist() + else: + labels = np.array(labels) - self.label_shift + return images, labels + + +class ImageRecordDataset(object): + """Tensorflow imageNet database in tf record format. + + Please arrange data in this way: + root/validation-000-of-100 + root/validation-001-of-100 + ... + root/validation-099-of-100 + The file name needs to follow this pattern: '* - * -of- *' + + Args: root (str): Root directory of dataset. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + """Configuration for Imagenet dataset.""" + + def __new__(cls, root, transform=None, filter=None): + """Build a new object of TensorflowImageRecord class.""" + from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module + + glob_pattern = os.path.join(root, "*-*-of-*") + file_names = gfile.Glob(glob_pattern) + if not file_names: + raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) + + # pylint: disable=no-name-in-module + from tensorflow.python.data.experimental import parallel_interleave + + ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) + ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) + + if transform is not None: + transform.transform_list.insert(0, ParseDecodeImagenet()) + else: + transform = ParseDecodeImagenet() + ds = ds.map(transform, num_parallel_calls=None) + ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned + return ds + + +class BaseMetric(object): + """The base class of Metric.""" + + def __init__(self, metric, single_output=False, hvd=None): + """Initialize the basic metric. + + Args: + metric: The metric class. + single_output: Whether the output is single or not, defaults to False. + hvd: The Horovod class for distributed training, defaults to None. + """ + self._metric_cls = metric + self._single_output = single_output + self._hvd = hvd + + def __call__(self, *args, **kwargs): + """Evaluate the model predictions, and the reference. + + Returns: + The class itself. + """ + self._metric = self._metric_cls(*args, **kwargs) + return self + + @abstractmethod + def update(self, preds, labels=None, sample_weight=None): + """Update the state that need to be evaluated. + + Args: + preds: The prediction result. + labels: The reference. Defaults to None. + sample_weight: The sampling weight. Defaults to None. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def reset(self): + """Clear the predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @abstractmethod + def result(self): + """Evaluate the difference between predictions and labels. + + Raises: + NotImplementedError: The method should be implemented by subclass. + """ + raise NotImplementedError + + @property + def metric(self): + """Return its metric class. + + Returns: + The metric class. + """ + return self._metric_cls + + @property + def hvd(self): + """Return its hvd class. + + Returns: + The hvd class. + """ + return self._hvd + + @hvd.setter + def hvd(self, hvd): + """Set its hvd. + + Args: + hvd: The Horovod class for distributed training. + """ + self._hvd = hvd + + +class TopKMetric(BaseMetric): + """Compute Top-k Accuracy classification score for Tensorflow model. + + This metric computes the number of times where the correct label is among + the top k labels predicted. + + Attributes: + k (int): The number of most likely outcomes considered to find the correct label. + num_correct: The number of predictions that were correct classified. + num_sample: The total number of predictions. + """ + + def __init__(self, k=1): + """Initialize the k, number of samples and correct predictions. + + Args: + k: The number of most likely outcomes considered to find the correct label. + """ + self.k = k + self.num_correct = 0 + self.num_sample = 0 + + def update(self, preds, labels, sample_weight=None): + """Add the predictions and labels. + + Args: + preds: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. + """ + preds, labels = TopKMetric._topk_shape_validate(preds, labels) + + labels = labels.reshape([len(labels)]) + with tf.Graph().as_default() as acc_graph: + topk = tf.nn.in_top_k( + predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k + ) + fp32_topk = tf.cast(topk, tf.float32) + correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) + + with tf.compat.v1.Session() as acc_sess: + correct = acc_sess.run(correct_tensor) + + self.num_sample += len(labels) + self.num_correct += correct + + def reset(self): + """Reset the number of samples and correct predictions.""" + self.num_correct = 0 + self.num_sample = 0 + + def result(self): + """Compute the top-k score. + + Returns: + The top-k score. + """ + if self.num_sample == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + elif getattr(self, "_hvd", None) is not None: # pragma: no cover + allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) + allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) + return allgather_num_correct / allgather_num_sample + return self.num_correct / self.num_sample + + @staticmethod + def _topk_shape_validate(preds, labels): + # preds shape can be Nxclass_num or class_num(N=1 by default) + # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax + if isinstance(preds, int): + preds = [preds] + preds = np.array(preds) + elif isinstance(preds, np.ndarray): + preds = np.array(preds) + elif isinstance(preds, list): + preds = np.array(preds) + preds = preds.reshape((-1, preds.shape[-1])) + + # consider labels just int value 1x1 + if isinstance(labels, int): + labels = [labels] + labels = np.array(labels) + elif isinstance(labels, tuple): + labels = np.array([labels]) + labels = labels.reshape((labels.shape[-1], -1)) + elif isinstance(labels, list): + if isinstance(labels[0], int): + labels = np.array(labels) + labels = labels.reshape((labels.shape[0], 1)) + elif isinstance(labels[0], tuple): + labels = np.array(labels) + labels = labels.reshape((labels.shape[-1], -1)) + else: + labels = np.array(labels) + # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) + # only support 2 dimension one-shot labels + # or 1 dimension one-hot class_num will confuse with N + + if len(preds.shape) == 1: + N = 1 + class_num = preds.shape[0] + preds = preds.reshape([-1, class_num]) + elif len(preds.shape) >= 2: + N = preds.shape[0] + preds = preds.reshape([N, -1]) + class_num = preds.shape[1] + + label_N = labels.shape[0] + assert label_N == N, "labels batch size should same with preds" + labels = labels.reshape([N, -1]) + # one-hot labels will have 2 dimension not equal 1 + if labels.shape[1] != 1: + labels = labels.argsort()[..., -1:] + return preds, labels + + +class TFDataLoader(object): # pragma: no cover + """Tensorflow dataloader class. + + In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict + method to do session run, this dataloader is designed to satisfy the usage of feed dict + in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. + + Args: + dataset: obj. wrapper of needed data. + batch_size: int. batch size + """ + + def __init__(self, dataset, batch_size=1, last_batch="rollover"): + """Initialize `TFDataDataLoader` class.""" + self.dataset = dataset + self.last_batch = last_batch + self.batch_size = batch_size + dataset = dataset.batch(batch_size) + + def batch(self, batch_size, last_batch="rollover"): + """Dataset return data per batch.""" + drop_last = False if last_batch == "rollover" else True + self.batch_size = batch_size + self.dataset = self.dataset.batch(batch_size, drop_last) + + def __iter__(self): + """Iterate dataloader.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + ) + + def _generate_dataloader( + self, + dataset, + batch_size=1, + last_batch="rollover", + collate_fn=None, + sampler=None, + batch_sampler=None, + num_workers=None, + pin_memory=None, + distributed=False, + ): + """Yield data.""" + drop_last = False if last_batch == "rollover" else True + + def check_dynamic_shape(element_spec): + if isinstance(element_spec, collections.abc.Sequence): + return any([check_dynamic_shape(ele) for ele in element_spec]) + elif isinstance(element_spec, tf.TensorSpec): + return True if element_spec.shape.num_elements() is None else False + else: + raise ValueError("unrecognized element spec...") + + def squeeze_output(output): + if isinstance(output, collections.abc.Sequence): + return [squeeze_output(ele) for ele in output] + elif isinstance(output, np.ndarray): + return np.squeeze(output, axis=0) + else: + raise ValueError("not supported output format....") + + if tf.executing_eagerly(): + index = 0 + outputs = [] + for iter_tensors in dataset: + samples = [] + iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] + if isinstance(iter_inputs, tf.Tensor): + samples.append(iter_inputs.numpy()) + else: + samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) + if isinstance(iter_labels, tf.Tensor): + samples.append(iter_labels.numpy()) + else: + samples.append([np.array(l) for l in iter_labels]) + index += 1 + outputs.append(samples) + if index == batch_size: + outputs = default_collate(outputs) + yield outputs + outputs = [] + index = 0 + if len(outputs) > 0: + outputs = default_collate(outputs) + yield outputs + else: + try_single_batch = check_dynamic_shape(dataset.element_spec) + dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) + ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) + iter_tensors = ds_iterator.get_next() + data_config = tf.compat.v1.ConfigProto() + data_config.use_per_session_threads = 1 + data_config.intra_op_parallelism_threads = 1 + data_config.inter_op_parallelism_threads = 16 + data_sess = tf.compat.v1.Session(config=data_config) + # pylint: disable=no-name-in-module + from tensorflow.python.framework.errors_impl import OutOfRangeError + + while True: + if not try_single_batch: + try: + outputs = data_sess.run(iter_tensors) + yield outputs + except OutOfRangeError: + data_sess.close() + return + else: + try: + outputs = [] + for i in range(0, batch_size): + outputs.append(squeeze_output(data_sess.run(iter_tensors))) + outputs = default_collate(outputs) + yield outputs + except OutOfRangeError: + if len(outputs) == 0: + data_sess.close() + return + else: + outputs = default_collate(outputs) + yield outputs + data_sess.close() + return diff --git a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/main.py b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/main.py new file mode 100644 index 00000000000..97254a194c1 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/main.py @@ -0,0 +1,141 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import time + +import numpy as np +import tensorflow as tf + +from neural_compressor.utils import logger +from data_process import ( + ImageRecordDataset, + ComposeTransform, + BilinearImagenetTransform, + TFDataLoader, + TopKMetric, + LabelShift +) + +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +flags = tf.compat.v1.flags +FLAGS = flags.FLAGS + +## Required parameters +flags.DEFINE_string( + 'input_model', None, 'Run inference with specified keras model.') + +flags.DEFINE_string( + 'output_model', None, 'The output quantized model.') + +flags.DEFINE_string( + 'mode', 'performance', 'define benchmark mode for accuracy or performance') + +flags.DEFINE_bool( + 'tune', False, 'whether to tune the model') + +flags.DEFINE_bool( + 'benchmark', False, 'whether to benchmark the model') + +flags.DEFINE_string( + 'calib_data', None, 'location of calibration dataset') + +flags.DEFINE_string( + 'eval_data', None, 'location of evaluate dataset') + +flags.DEFINE_integer('batch_size', 32, 'batch_size') + +flags.DEFINE_integer( + 'iters', 100, 'maximum iteration when evaluating performance') + +height = width = 224 +eval_dataset = ImageRecordDataset(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ + [BilinearImagenetTransform(height=height, width=width)])) + +eval_dataloader = TFDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) + +if FLAGS.calib_data: + calib_dataset = ImageRecordDataset(root=FLAGS.calib_data, transform= \ + ComposeTransform(transform_list= [BilinearImagenetTransform(height=height, width=width)])) + calib_dataloader = TFDataLoader(dataset=calib_dataset, batch_size=10) + +def evaluate(model): + """ + Custom evaluate function to inference the model for specified metric on validation dataset. + + Args: + model (tf.keras.Model): The input model will be the objection of tf.keras.Model. + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + latency_list = [] + metric = TopKMetric() + postprocess = LabelShift(label_shift=1) + + def eval_func(dataloader, metric): + warmup = 5 + iteration = None + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + for idx, (inputs, labels) in enumerate(dataloader): + start = time.time() + predictions = model.predict_on_batch(inputs) + end = time.time() + latency_list.append(end - start) + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + if iteration and idx >= iteration: + break + latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size + return latency + + latency = eval_func(eval_dataloader, metric) + if FLAGS.benchmark: + logger.info("\n{} mode benchmark result:".format(FLAGS.mode)) + for i, res in enumerate(latency_list): + logger.debug("Iteration {} result {}:".format(i, res)) + if FLAGS.benchmark and FLAGS.mode == 'performance': + logger.info("Batch size = {}".format(eval_dataloader.batch_size)) + logger.info("Latency: {:.3f} ms".format(latency * 1000)) + logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +def main(_): + if FLAGS.tune: + from neural_compressor.common import set_random_seed + from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + + set_random_seed(9527) + quant_config = StaticQuantConfig() + q_model = quantize_model(FLAGS.input_model, quant_config, calib_dataloader) + q_model.save(FLAGS.output_model) + + if FLAGS.benchmark: + from neural_compressor.tensorflow import Model + + inc_model = Model(FLAGS.input_model) + if FLAGS.mode == 'performance': + evaluate(inc_model.model) + else: + accuracy = evaluate(inc_model.model) + logger.info('Batch size = %d' % FLAGS.batch_size) + logger.info("Accuracy: %.5f" % accuracy) + +if __name__ == "__main__": + tf.compat.v1.app.run() diff --git a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/prepare_model.py b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/prepare_model.py new file mode 100644 index 00000000000..f8cd505f965 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/prepare_model.py @@ -0,0 +1,35 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import tensorflow as tf +def get_resnet50_v2_model(saved_path): + model = tf.keras.applications.ResNet50V2(weights='imagenet') + model.save(saved_path) + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Export pretained keras model', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--output_model', + type=str, + help='path to exported model file') + + args = parser.parse_args() + get_resnet50_v2_model(args.output_model) diff --git a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/requirements.txt new file mode 100644 index 00000000000..8b7b47da969 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/requirements.txt @@ -0,0 +1,2 @@ +tensorflow>=2.11.1 +intel-extension-for-tensorflow[cpu] diff --git a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/run_benchmark.sh new file mode 100644 index 00000000000..d464b019f8e --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/run_benchmark.sh @@ -0,0 +1,50 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + batch_size=32 + iters=100 + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + esac + done + +} + +# run_tuning +function run_benchmark { + + python main.py \ + --input_model ${input_model} \ + --benchmark \ + --mode ${mode} \ + --eval_data ${dataset_location} \ + --batch_size ${batch_size} \ + --iters ${iters} +} + +main "$@" diff --git a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/run_quant.sh new file mode 100644 index 00000000000..7e3ed727f71 --- /dev/null +++ b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/run_quant.sh @@ -0,0 +1,40 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --input_model ${input_model} \ + --output_model ${output_model} \ + --eval_data ${dataset_location} \ + --calib_data ${dataset_location} \ + --tune +} + +main "$@" From e82746f8fe473ad488b7f4e7343d4eabef1b5707 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Thu, 11 Jul 2024 14:30:14 +0800 Subject: [PATCH 03/14] fix import issues Signed-off-by: zehao-intel --- .../tensorflow/cv/resnet_v2_50/quantization/ptq/main.py | 1 - .../nlp/transformer_lt/quantization/ptq/main.py | 4 +--- .../faster_rcnn_resnet50/quantization/ptq/main.py | 2 +- .../mask_rcnn_inception_v2/quantization/ptq/main.py | 2 +- .../ssd_mobilenet_v1/quantization/ptq/main.py | 6 +++--- .../wide_deep_large_ds/quantization/ptq/main.py | 8 ++++---- 6 files changed, 10 insertions(+), 13 deletions(-) diff --git a/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/main.py b/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/main.py index 3b9595476e6..bb82476fced 100644 --- a/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/main.py @@ -118,7 +118,6 @@ def run(self): q_model.save(args.output_graph) if args.benchmark: - dataloader = create_dataloader('tensorflow', dataloader_args) dataset = ImageRecordDataset( root=args.dataset_location, transform=ComposeTransform(transform_list= [ diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py index d4d1ff54055..99ac63d2c02 100644 --- a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py @@ -143,9 +143,7 @@ def eval_func(infer_graph, iteration=-1): 'model/Transformer/strided_slice_19:0') ds = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file) - dataloader = DataLoader(framework='tensorflow', dataset=ds, - batch_size=FLAGS.batch_size, collate_fn=collate_fn) - + dataloader = BaseDataLoader(dataset=ds, batch_size=FLAGS.batch_size, collate_fn=collate_fn) config = tf.compat.v1.ConfigProto() config.use_per_session_threads = 1 config.inter_op_parallelism_threads = 1 diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py index 277028064dc..2493297e476 100644 --- a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py @@ -16,13 +16,13 @@ # limitations under the License. # # +from __future__ import division import time import numpy as np import tensorflow as tf -from __future__ import division from argparse import ArgumentParser from data_process import( COCOmAPv2, diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py index 7751c5dadc6..7031a257521 100644 --- a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py @@ -16,13 +16,13 @@ # limitations under the License. # # +from __future__ import division import time import numpy as np import tensorflow as tf -from __future__ import division from argparse import ArgumentParser from data_process import( COCOmAPv2, diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py index b217f63a3ec..cd9f943d374 100644 --- a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py @@ -16,13 +16,13 @@ # limitations under the License. # # +from __future__ import division import time import numpy as np import tensorflow as tf -from __future__ import division from argparse import ArgumentParser from data_process import( COCOmAPv2, @@ -93,7 +93,7 @@ def eval_func(dataloader): eval_dataset = COCORecordDataset(root=args.dataset_location, filter=None, \ transform=ComposeTransform(transform_list=[ResizeTFTransform(size=300)])) - eval_dataloader=TFDataLoader(framework='tensorflow', dataset=eval_dataset, batch_size=args.batch_size) + eval_dataloader=TFDataLoader(dataset=eval_dataset, batch_size=args.batch_size) latency = eval_func(eval_dataloader) if args.benchmark and args.mode == 'performance': print("Batch size = {}".format(args.batch_size)) @@ -105,7 +105,7 @@ def eval_func(dataloader): def main(_): calib_dataset = COCORecordDataset(root=args.dataset_location, filter=None, \ transform=ComposeTransform(transform_list=[ResizeTFTransform(size=300)])) - calib_dataloader = TFDataLoader(framework='tensorflow', dataset=calib_dataset, batch_size=args.batch_size) + calib_dataloader = TFDataLoader(dataset=calib_dataset, batch_size=args.batch_size) if args.tune: from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py index 19ee3647f7e..7ea26e60ff1 100644 --- a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py @@ -18,6 +18,10 @@ # +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + import sys import os import numpy as np @@ -30,10 +34,6 @@ import tensorflow as tf -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - from tensorflow.python.framework import ops from tensorflow.core.framework import graph_pb2 from google.protobuf import text_format From 8c9076670c7c15c3b26a0111c9637e5f7b11275c Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Thu, 11 Jul 2024 16:16:29 +0800 Subject: [PATCH 04/14] fix config Signed-off-by: zehao-intel --- .../.config/model_params_tensorflow_3x.json | 10 +++++----- .../quantization/ptq/data_process.py | 17 +++++++++++++++++ .../quantization/ptq/main.py | 1 + 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/examples/.config/model_params_tensorflow_3x.json b/examples/.config/model_params_tensorflow_3x.json index 69b5bb11059..1e7f8bdab1a 100644 --- a/examples/.config/model_params_tensorflow_3x.json +++ b/examples/.config/model_params_tensorflow_3x.json @@ -12,14 +12,14 @@ "model_src_dir": "nlp/distilbert_base/quantization/ptq", "dataset_location": "/tf_dataset2/datasets/sst2_validation_dataset", "input_model": "/tf_dataset2/models/tensorflow/distilbert_base/fp32/distilbert_base_fp32.pb", - "main_script": "run_inference.py", + "main_script": "main.py", "batch_size": 128 }, "distilbert_base_sq": { "model_src_dir": "nlp/distilbert_base/quantization/ptq", "dataset_location": "/tf_dataset2/datasets/sst2_validation_dataset", "input_model": "/tf_dataset2/models/tensorflow/distilbert_base/fp32/distilbert_base_fp32.pb", - "main_script": "run_inference.py", + "main_script": "main.py", "batch_size": 128 }, "opt_125m_sq": { @@ -139,7 +139,7 @@ "model_src_dir": "recommendation/wide_deep_large_ds/quantization/ptq", "dataset_location": "/tf_dataset/tensorflow/wide_deep_large_ds/dataset", "input_model": "/tf_dataset/tensorflow/wide_deep_large_ds/fp32_optimized_graph.pb", - "main_script": "inference.py", + "main_script": "main.py", "batch_size": 256, "fp32_model_url": "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/wide_deep_fp32_pretrained_model.pb" }, @@ -147,14 +147,14 @@ "model_src_dir": "semantic_image_segmentation/3dunet-mlperf/quantization/ptq", "dataset_location": "/tf_dataset2/models/tensorflow/3dunet/build", "input_model": "/tf_dataset2/models/tensorflow/3dunet/3dunet_dynamic_ndhwc.pb", - "main_script": "run_accuracy.py", + "main_script": "main.py", "batch_size": 100 }, "style_transfer": { "model_src_dir": "style_transfer/arbitrary_style_transfer/quantization/ptq", "dataset_location": "style_images,content_images", "input_model": "/tf_dataset/tensorflow/style_transfer/arbitrary_style_transfer/model.ckpt", - "main_script": "style_tune.py", + "main_script": "main.py", "batch_size": 1 } } diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py index 32e55adb3fd..089227554a5 100644 --- a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py @@ -256,6 +256,23 @@ def hvd(self, hvd): self._hvd = hvd +class LabelBalanceCOCORecordFilter(object): + """The label balance filter for COCO Record.""" + + def __init__(self, size=1): + """Initialize the attribute of class.""" + self.size = size + + def __call__(self, image, label): + """Execute the filter. + + Args: + image: Not used. + label: label of a sample. + """ + return tf.math.equal(len(label[0]), self.size) + + class COCOmAPv2(BaseMetric): """Compute mean average precision of the detection task.""" diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py index 7031a257521..fe30e45edba 100644 --- a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py @@ -30,6 +30,7 @@ ComposeTransform, ResizeTFTransform, TFDataLoader, + LabelBalanceCOCORecordFilter, ) arg_parser = ArgumentParser(description='Parse args') From 60fc76c04d87cac6b30fab838e8f3ccb9840ecff Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Thu, 11 Jul 2024 17:05:23 +0800 Subject: [PATCH 05/14] fix keras config Signed-off-by: zehao-intel --- .../cv/inception_v3/quantization/ptq/README.md | 2 +- .../keras/cv/inception_v3/quantization/ptq/main.py | 3 ++- .../cv/mobilenet_v2/quantization/ptq/README.md | 13 +------------ .../keras/cv/mobilenet_v2/quantization/ptq/main.py | 3 ++- .../cv/resnet_v2_50/quantization/ptq/README.md | 13 +------------ .../keras/cv/resnet_v2_50/quantization/ptq/main.py | 3 ++- 6 files changed, 9 insertions(+), 28 deletions(-) diff --git a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/README.md b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/README.md index 4fa176c3852..b3dfbb967fa 100644 --- a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/README.md @@ -38,7 +38,7 @@ python prepare_model.py --output_model=/path/to/model We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. ```shell - cd examples/keras/image_recognition/ + cd examples/3.x_api/tensorflow/keras/cv/ # convert validation subset bash prepare_dataset.sh --output_dir=/inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation # convert train subset diff --git a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/main.py b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/main.py index 2a8ba68a6cf..abccf02d8f5 100644 --- a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/main.py @@ -119,7 +119,8 @@ def eval_func(dataloader, metric): def main(_): if FLAGS.tune: from neural_compressor.common import set_random_seed - from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + from neural_compressor.tensorflow import quantize_model + from neural_compressor.tensorflow.keras import StaticQuantConfig set_random_seed(9527) quant_config = StaticQuantConfig() diff --git a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/README.md b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/README.md index cd7a564b505..9c85a116ce6 100644 --- a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/README.md @@ -37,7 +37,7 @@ python prepare_model.py --output_model=/path/to/model We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. ```shell - cd examples/keras/image_recognition/ + cd examples/3.x_api/tensorflow/keras/cv/ # convert validation subset bash prepare_dataset.sh --output_dir=/mobilenet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation # convert train subset @@ -52,17 +52,6 @@ python prepare_model.py --output_model=/path/to/model # Run Command -## Quantization Config -The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. - -``` -config = PostTrainingQuantConfig( - device="gpu", - backend="itex", - ... - ) -``` - ## Quantization ```shell bash run_quant.sh --input_model=./mobilenet_v2_keras/ --output_model=./result --dataset_location=/path/to/evaluation/dataset diff --git a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/main.py b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/main.py index 0e8767eb0a9..b577b24892f 100644 --- a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/main.py @@ -119,7 +119,8 @@ def eval_func(dataloader, metric): def main(_): if FLAGS.tune: from neural_compressor.common import set_random_seed - from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + from neural_compressor.tensorflow import quantize_model + from neural_compressor.tensorflow.keras import StaticQuantConfig set_random_seed(9527) quant_config = StaticQuantConfig() diff --git a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/README.md b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/README.md index 54ab588faf4..251b13b97b3 100644 --- a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/README.md @@ -37,7 +37,7 @@ python prepare_model.py --output_model=/path/to/model We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. ```shell - cd examples/keras/image_recognition/ + cd examples/3.x_api/tensorflow/keras/cv/ # convert validation subset bash prepare_dataset.sh --output_dir=/resnetv2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation # convert train subset @@ -52,17 +52,6 @@ python prepare_model.py --output_model=/path/to/model # Run Command -## Quantization Config -The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. - -``` -config = PostTrainingQuantConfig( - device="gpu", - backend="itex", - ... - ) -``` - ## Quantization ```shell bash run_quant.sh --input_model=./resnetv2_50_keras/ --output_model=./result --dataset_location=/path/to/evaluation/dataset diff --git a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/main.py b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/main.py index 97254a194c1..48f30a79fb4 100644 --- a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/main.py @@ -119,7 +119,8 @@ def eval_func(dataloader, metric): def main(_): if FLAGS.tune: from neural_compressor.common import set_random_seed - from neural_compressor.tensorflow import StaticQuantConfig, quantize_model + from neural_compressor.tensorflow import quantize_model + from neural_compressor.tensorflow.keras import StaticQuantConfig set_random_seed(9527) quant_config = StaticQuantConfig() From b78e20b0e9f80ef082b2ac38ba8376be5da6f0c5 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Fri, 12 Jul 2024 15:02:25 +0800 Subject: [PATCH 06/14] fix multiple issues Signed-off-by: zehao-intel --- .../quantization/ptq/README.md | 2 + .../quantization/ptq/data_process.py | 137 +++++++++++++++--- .../quantization/ptq/main.py | 7 +- .../quantization/ptq/main.py | 5 +- .../3dunet-mlperf/quantization/ptq/README.md | 17 ++- .../3dunet-mlperf/quantization/ptq/main.py | 4 +- .../quantization/ptq/requirements.txt | 1 + .../quantization/ptq/README.md | 28 +--- .../quantization/ptq/data_process.py | 127 ---------------- .../quantization/ptq/main.py | 7 +- .../algorithms/static_quant/keras.py | 2 + 11 files changed, 149 insertions(+), 188 deletions(-) diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md index 3091b712180..c672d5e6148 100644 --- a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md @@ -103,6 +103,8 @@ Now we support both pb and ckpt formats. ## 2. Benchmark ```shell bash run_benchmark.sh --input_model=./tensorflow-mask_rcnn_inception_v2-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=performance + + bash run_benchmark.sh --input_model=./tensorflow-mask_rcnn_inception_v2-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=accuracy ``` Details of enabling Intel® Neural Compressor on mask_rcnn_inception_v2 for Tensorflow. diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py index 089227554a5..8d0a074ee82 100644 --- a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py @@ -136,40 +136,135 @@ def __call__(self, sample): return sample -class ResizeTFTransform(object): - """Resize the input image to the given size. +class ResizeWithRatio(): + """Resize image with aspect ratio and pad it to max shape(optional). + + If the image is padded, the label will be processed at the same time. + The input image should be np.array. Args: - size (list or int): Size of the result - interpolation (str, default='bilinear'):Desired interpolation type, - support 'bilinear', 'nearest', 'bicubic' + min_dim (int, default=800): + Resizes the image such that its smaller dimension == min_dim + max_dim (int, default=1365): + Ensures that the image longest side doesn't exceed this value + padding (bool, default=False): + If true, pads image with zeros so its size is max_dim x max_dim Returns: tuple of processed image and label """ - def __init__(self, size, interpolation="bilinear"): - """Initialize `ResizeTFTransform` class.""" - if isinstance(size, int): - self.size = size, size - elif isinstance(size, list): - if len(size) == 1: - self.size = size[0], size[0] - elif len(size) == 2: - self.size = size[0], size[1] - self.interpolation = interpolation + def __init__(self, min_dim=800, max_dim=1365, padding=False, constant_value=0): + """Initialize `ResizeWithRatio` class.""" + self.min_dim = min_dim + self.max_dim = max_dim + self.padding = padding + self.constant_value = constant_value - if self.interpolation not in ["bilinear", "nearest", "bicubic"]: - raise ValueError("Unsupported interpolation type!") + def __call__(self, sample): + """Resize the image with ratio in sample.""" + image, label = sample + height, width = image.shape[:2] + scale = 1 + if self.min_dim: + scale = max(1, self.min_dim / min(height, width)) + if self.max_dim: + image_max = max(height, width) + if round(image_max * scale) > self.max_dim: + scale = self.max_dim / image_max + if scale != 1: + image = cv2.resize(image, (round(height * scale), round(width * scale))) + + bbox, str_label, int_label, image_id = label + + if self.padding: + h, w = image.shape[:2] + pad_param = [ + [(self.max_dim - h) // 2, self.max_dim - h - (self.max_dim - h) // 2], + [(self.max_dim - w) // 2, self.max_dim - w - (self.max_dim - w) // 2], + [0, 0], + ] + if not isinstance(bbox, np.ndarray): + bbox = np.array(bbox) + resized_box = bbox * [height, width, height, width] * scale + moved_box = resized_box + [ + (self.max_dim - h) // 2, + (self.max_dim - w) // 2, + (self.max_dim - h) // 2, + (self.max_dim - w) // 2, + ] + bbox = moved_box / [self.max_dim, self.max_dim, self.max_dim, self.max_dim] + image = np.pad(image, pad_param, mode="constant", constant_values=self.constant_value) + return image, (bbox, str_label, int_label, image_id) + + +class TensorflowResizeWithRatio(): + """Resize image with aspect ratio and pad it to max shape(optional). + + If the image is padded, the label will be processed at the same time. + The input image should be np.array or tf.Tensor. + + Args: + min_dim (int, default=800): + Resizes the image such that its smaller dimension == min_dim + max_dim (int, default=1365): + Ensures that the image longest side doesn't exceed this value + padding (bool, default=False): + If true, pads image with zeros so its size is max_dim x max_dim + + Returns: + tuple of processed image and label + """ + + def __init__(self, min_dim=800, max_dim=1365, padding=False, constant_value=0): + """Initialize `TensorflowResizeWithRatio` class.""" + self.min_dim = min_dim + self.max_dim = max_dim + self.padding = padding + self.constant_value = constant_value def __call__(self, sample): - """Resize the input image in sample to the given size.""" + """Resize the image with ratio in sample.""" image, label = sample if isinstance(image, tf.Tensor): - image = tf.image.resize(image, self.size, method=self.interpolation) + shape = tf.shape(input=image) + height = tf.cast(shape[0], dtype=tf.float32) + width = tf.cast(shape[1], dtype=tf.float32) + scale = 1 + if self.min_dim: + scale = tf.maximum(1.0, tf.cast(self.min_dim / tf.math.minimum(height, width), dtype=tf.float32)) + if self.max_dim: + image_max = tf.cast(tf.maximum(height, width), dtype=tf.float32) + scale = tf.cond( + pred=tf.greater(tf.math.round(image_max * scale), self.max_dim), + true_fn=lambda: self.max_dim / image_max, + false_fn=lambda: scale, + ) + image = tf.image.resize(image, (tf.math.round(height * scale), tf.math.round(width * scale))) + bbox, str_label, int_label, image_id = label + + if self.padding: + shape = tf.shape(input=image) + h = tf.cast(shape[0], dtype=tf.float32) + w = tf.cast(shape[1], dtype=tf.float32) + pad_param = [ + [(self.max_dim - h) // 2, self.max_dim - h - (self.max_dim - h) // 2], + [(self.max_dim - w) // 2, self.max_dim - w - (self.max_dim - w) // 2], + [0, 0], + ] + resized_box = bbox * [height, width, height, width] * scale + moved_box = resized_box + [ + (self.max_dim - h) // 2, + (self.max_dim - w) // 2, + (self.max_dim - h) // 2, + (self.max_dim - w) // 2, + ] + bbox = moved_box / [self.max_dim, self.max_dim, self.max_dim, self.max_dim] + image = tf.pad(image, pad_param, constant_values=self.constant_value) else: - image = cv2.resize(image, self.size, interpolation=interpolation_map[self.interpolation]) - return (image, label) + transform = ResizeWithRatio(self.min_dim, self.max_dim, self.padding) + image, (bbox, str_label, int_label, image_id) = transform(sample) + return image, (bbox, str_label, int_label, image_id) class BaseMetric(object): diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py index fe30e45edba..06498ca56e9 100644 --- a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py @@ -28,9 +28,9 @@ COCOmAPv2, COCORecordDataset, ComposeTransform, - ResizeTFTransform, TFDataLoader, LabelBalanceCOCORecordFilter, + TensorflowResizeWithRatio, ) arg_parser = ArgumentParser(description='Parse args') @@ -92,11 +92,12 @@ def eval_func(dataloader): latency = np.array(latency_list[warmup:]).mean() / args.batch_size return latency + use_padding = True if args.mode == 'performance' else False eval_dataset = COCORecordDataset(root=args.dataset_location, filter=None, \ transform=ComposeTransform(transform_list=[TensorflowResizeWithRatio( - min_dim=800, max_dim=1356, padding=False)])) + min_dim=800, max_dim=1356, padding=use_padding)])) batch_size = 1 if args.mode == 'accuracy' else args.batch_size - eval_dataloader=TFDataLoader(dataset=eval_dataset, batch_size=args.batch_size) + eval_dataloader=TFDataLoader(dataset=eval_dataset, batch_size=batch_size) latency = eval_func(eval_dataloader) if args.benchmark and args.mode == 'performance': diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py index 7ea26e60ff1..a89efd25537 100644 --- a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py @@ -111,7 +111,7 @@ def _parse_function(proto): return dataset def evaluation_func(model, measurer=None): - evaluate_opt_graph.eval_inference(model) + return evaluate_opt_graph.eval_inference(model) class eval_classifier_optimized_graph: """Evaluate image classifier with optimized TensorFlow graph""" @@ -294,8 +294,7 @@ def eval_inference(self, infer_graph): print('Throughput: %.3f records/sec' % throughput) print('--------------------------------------------------') - if self.args.accuracy: - return accuracy + return accuracy def run(self): """ This is neural_compressor function include tuning and benchmark option """ diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md index 5747ba5b4ac..e4618e7604c 100644 --- a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md @@ -14,9 +14,9 @@ This example can run on Intel CPUs and GPUs. pip install neural-compressor ``` -### Install Intel Tensorflow +### Install requirements ```shell -pip install intel-tensorflow +pip install -r requirements.txt ``` > Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). @@ -59,15 +59,18 @@ pip install --upgrade intel-extension-for-tensorflow[cpu] # Run command +Please set the following environment variables before running quantization or benchmark commands: + +* `export nnUNet_preprocessed=/build/preprocessed_data` +* `export nnUNet_raw_data_base=/build/raw_data` +* `export RESULTS_FOLDER=/build/result` ## Quantization +`bash run_quant.sh --input_model=3dunet_dynamic_ndhwc.pb --dataset_location=/build --output_model=3dunet_dynamic_ndhwc_int8.pb` ## Benchmark -* `export nnUNet_preprocessed=/build/preprocessed_data` -* `export nnUNet_raw_data_base=/build/raw_data` -* `export RESULTS_FOLDER=/build/result` -* `pip install -r requirements.txt` -* `python run_accuracy.py --input-model= --data-location= --calib-preprocess= --iters=100 --batch-size=1 --mode=benchmark --bfloat16 0` +`bash run_benchmark.sh --input_model=3dunet_dynamic_ndhwc_int8.pb --dataset_location=/build --batch_size=100 --mode=benchmark` +`bash run_benchmark.sh --input_model=3dunet_dynamic_ndhwc_int8.pb --dataset_location=/build --batch_size=1 --mode=accuracy` diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/main.py b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/main.py index 5ba82875420..bc8ce8edc07 100644 --- a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/main.py @@ -51,7 +51,7 @@ def get_args(): help="One of three options: 'benchmark'/'accuracy'/'tune'.") arg_parser.add_argument('-n', "--iters", help='The number of iteration. shall > warmup num(10)', - type=int, default=20) + type=int, default=100) arg_parser.add_argument('-e', "--num-inter-threads", help='The number of inter-thread.', dest='num_inter_threads', type=int, default=0) @@ -209,7 +209,7 @@ def __len__(self): set_random_seed(9527) quant_config = StaticQuantConfig() - calib_dataloader=BaseDataloader(dataset=CalibrationDL()) + calib_dataloader=BaseDataLoader(dataset=CalibrationDL()) q_model = quantize_model(graph, quant_config, calib_dataloader) try: q_model.save(args.output_model) diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt index 4e85853747e..d5069f8038f 100644 --- a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt @@ -1 +1,2 @@ nnunet +tensorflow \ No newline at end of file diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md index 2802279a8c3..6fa291d0b36 100644 --- a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md @@ -61,7 +61,7 @@ optional arguments: ```shell wget https://storage.googleapis.com/download.magenta.tensorflow.org/models/arbitrary_style_transfer.tar.gz -tar -xvzf arbitrary_style_transfer.tar.gz ./model +tar -xvzf arbitrary_style_transfer.tar.gz ``` ### 3. Prepare Dataset @@ -70,22 +70,12 @@ There are two folders named style_images and content_images in current folder. P # Run Command ```shell - python style_tune.py --output_dir=./result --style_images_paths=./style_images --content_images_paths=./content_images --input_model=./model/model.ckpt + python main.py --output_dir=./result --style_images_paths=./style_images --content_images_paths=./content_images --input_model=./model/model.ckpt ``` ## Quantization Config -The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. - -``` -config = PostTrainingQuantConfig( - device="gpu", - backend="itex", - ... - ) -``` - ## Quantization ```shell bash run_quant.sh --dataset_location=style_images/,content_images/ --input_model=./model/model.ckpt --output_model=saved_model @@ -119,13 +109,9 @@ Here we set the input tensor and output tensors name into *inputs* and *outputs* After prepare step is done, we just need add 2 lines to get the quantized model. ```python -from neural_compressor import quantization -from neural_compressor.config import PostTrainingQuantConfig -conf = PostTrainingQuantConfig(inputs=['style_input', 'content_input'], - outputs=['transformer/expand/conv3/conv/Sigmoid'], - calibration_sampling_size=[50, 100]) -quantized_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=dataloader, - eval_dataloader==dataloader) -``` +from neural_compressor.tensorflow import StaticQuantConfig, quantize_model -The Intel® Neural Compressor quantizer.fit() function will return a best quantized model during timeout constrain. +quant_config = StaticQuantConfig() +q_model = quantize_model(graph, quant_config, calib_dataloader) +q_model.save(FLAGS.output_model) +``` diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/data_process.py index e71f6577cac..d49c262bbec 100644 --- a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/data_process.py +++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/data_process.py @@ -360,130 +360,3 @@ def _topk_shape_validate(preds, labels): if labels.shape[1] != 1: labels = labels.argsort()[..., -1:] return preds, labels - - -class TFDataLoader(object): # pragma: no cover - """Tensorflow dataloader class. - - In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict - method to do session run, this dataloader is designed to satisfy the usage of feed dict - in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. - - Args: - dataset: obj. wrapper of needed data. - batch_size: int. batch size - """ - - def __init__(self, dataset, batch_size=1, last_batch="rollover"): - """Initialize `TFDataDataLoader` class.""" - self.dataset = dataset - self.last_batch = last_batch - self.batch_size = batch_size - dataset = dataset.batch(batch_size) - - def batch(self, batch_size, last_batch="rollover"): - """Dataset return data per batch.""" - drop_last = False if last_batch == "rollover" else True - self.batch_size = batch_size - self.dataset = self.dataset.batch(batch_size, drop_last) - - def __iter__(self): - """Iterate dataloader.""" - return self._generate_dataloader( - self.dataset, - batch_size=self.batch_size, - last_batch=self.last_batch, - ) - - def _generate_dataloader( - self, - dataset, - batch_size=1, - last_batch="rollover", - collate_fn=None, - sampler=None, - batch_sampler=None, - num_workers=None, - pin_memory=None, - distributed=False, - ): - """Yield data.""" - drop_last = False if last_batch == "rollover" else True - - def check_dynamic_shape(element_spec): - if isinstance(element_spec, collections.abc.Sequence): - return any([check_dynamic_shape(ele) for ele in element_spec]) - elif isinstance(element_spec, tf.TensorSpec): - return True if element_spec.shape.num_elements() is None else False - else: - raise ValueError("unrecognized element spec...") - - def squeeze_output(output): - if isinstance(output, collections.abc.Sequence): - return [squeeze_output(ele) for ele in output] - elif isinstance(output, np.ndarray): - return np.squeeze(output, axis=0) - else: - raise ValueError("not supported output format....") - - if tf.executing_eagerly(): - index = 0 - outputs = [] - for iter_tensors in dataset: - samples = [] - iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] - if isinstance(iter_inputs, tf.Tensor): - samples.append(iter_inputs.numpy()) - else: - samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) - if isinstance(iter_labels, tf.Tensor): - samples.append(iter_labels.numpy()) - else: - samples.append([np.array(l) for l in iter_labels]) - index += 1 - outputs.append(samples) - if index == batch_size: - outputs = default_collate(outputs) - yield outputs - outputs = [] - index = 0 - if len(outputs) > 0: - outputs = default_collate(outputs) - yield outputs - else: - try_single_batch = check_dynamic_shape(dataset.element_spec) - dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) - ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) - iter_tensors = ds_iterator.get_next() - data_config = tf.compat.v1.ConfigProto() - data_config.use_per_session_threads = 1 - data_config.intra_op_parallelism_threads = 1 - data_config.inter_op_parallelism_threads = 16 - data_sess = tf.compat.v1.Session(config=data_config) - # pylint: disable=no-name-in-module - from tensorflow.python.framework.errors_impl import OutOfRangeError - - while True: - if not try_single_batch: - try: - outputs = data_sess.run(iter_tensors) - yield outputs - except OutOfRangeError: - data_sess.close() - return - else: - try: - outputs = [] - for i in range(0, batch_size): - outputs.append(squeeze_output(data_sess.run(iter_tensors))) - outputs = default_collate(outputs) - yield outputs - except OutOfRangeError: - if len(outputs) == 0: - data_sess.close() - return - else: - outputs = default_collate(outputs) - yield outputs - data_sess.close() - return diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/main.py b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/main.py index 577e9094209..440b0cee4af 100644 --- a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/main.py @@ -26,8 +26,8 @@ from PIL import Image import time +from neural_compressor.tensorflow.utils import BaseDataLoader, DummyDatasetV2 from data_process import ( - TFDataLoader, StyleTransferDataset, ComposeTransform, ParseDecodeVocTransform, @@ -137,7 +137,7 @@ def main(args=None): ] ) ) - calib_dataloader = TFDataLoader(dataset=dataset, batch_size=FLAGS.batch_size) + calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=FLAGS.batch_size) quant_config = StaticQuantConfig() q_model = quantize_model(graph, quant_config, calib_dataloader) @@ -156,10 +156,9 @@ def main(args=None): resize_shape=(256, 256) ) else: - from neural_compressor.tensorflow.utils import DummyDatasetV2 dataset = DummyDatasetV2(input_shape=[(256, 256, 3), (256, 256, 3)], label_shape=(1, )) - dataloader = TFDataLoader(dataset=dataset, batch_size=FLAGS.batch_size) + dataloader = BaseDataLoader(dataset=dataset, batch_size=FLAGS.batch_size) tf.import_graph_def(frozen_graph, name='') style_transfer(sess, dataloader) diff --git a/neural_compressor/tensorflow/algorithms/static_quant/keras.py b/neural_compressor/tensorflow/algorithms/static_quant/keras.py index 004393c8c27..6e8df825df2 100644 --- a/neural_compressor/tensorflow/algorithms/static_quant/keras.py +++ b/neural_compressor/tensorflow/algorithms/static_quant/keras.py @@ -204,6 +204,8 @@ def _fuse_bn_keras2(self, fuse_conv_bn, fp32_layers): # pragma: no cover else: for bound_node in layer._inbound_nodes: inbound_layer = bound_node.inbound_layers + if isinstance(inbound_layer, list) and len(inbound_layer) == 0: + continue if inbound_layer in self.bn_weights.keys(): for bn_inbound_node in inbound_layer._inbound_nodes: bn_inbound_layer = bn_inbound_node.inbound_layers From 88d86002cdcf73f71dd787c7cf1e43a415151d47 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Mon, 15 Jul 2024 16:51:23 +0800 Subject: [PATCH 07/14] fix transformer_lt Signed-off-by: zehao-intel --- .../transformer_lt/quantization/ptq/README.md | 22 +++++++++---------- .../transformer_lt/quantization/ptq/main.py | 5 ++--- .../algorithms/static_quant/keras.py | 2 +- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/README.md b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/README.md index 9aad1dda2a9..544e954371e 100644 --- a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/README.md @@ -58,22 +58,22 @@ bash prepare_dataset_model.sh ``` ## Run Command +### Quantization ```shell -python main.py --input_graph=/path/to/fp32_graphdef.pb --inputs_file=/path/to/newstest2014.en --reference_file=/path/to/newstest2014.de --vocab_file=/path/to/vocab.txt --tune +bash run_quant.sh --input_model=./model/fp32_graphdef.pb --dataset_location=./data --output_model=./model/int8_graphdef.pb +``` +### Benchmark +```shell +bash run_benchmark.sh --input_model=./model/int8_graphdef.pb --dataset_location=./data --mode=performance + +bash run_benchmark.sh --input_model=./model/int8_graphdef.pb --dataset_location=./data --mode=accuracy --batch_size=1 ``` Details of enabling Intel® Neural Compressor on transformer-lt for Tensorflow. ========================= This is a tutorial of how to enable transformer-lt model with Intel® Neural Compressor. -## User Code Analysis -1. User specifies fp32 *model*, calibration dataset *q_dataloader*, evaluation dataset *eval_dataloader* and metric in tuning.metric field of model-specific yaml config file. - -2. User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself. - -For transformer-lt, we applied the latter one because we don't have dataset and metric for transformer-lt. The task is to implement the *q_dataloader* and *eval_func*. - ### q_dataloader Part Adaption Below dataset class uses getitem to provide the model with input. @@ -124,9 +124,7 @@ After prepare step is done, we add tune code to generate quantized model. if FLAGS.benchmark: assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \ "Benchmark only supports performance or accuracy mode." - eval_func(graph) - elif FLAGS.mode == 'accuracy': acc = eval_func(graph) - print('Accuracy is {:.3f}'.format(acc)) + if FLAGS.mode == 'accuracy': + print('Accuracy is {:.3f}'.format(acc)) ``` -The Intel® Neural Compressor quantization.fit() function will return a best quantized model under time constraint. diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py index 99ac63d2c02..58a93090e7a 100644 --- a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py @@ -250,10 +250,9 @@ def main(_): if FLAGS.benchmark: assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \ "Benchmark only supports performance or accuracy mode." - eval_func(graph) - elif FLAGS.mode == 'accuracy': acc = eval_func(graph) - print('Accuracy is {:.3f}'.format(acc)) + if FLAGS.mode == 'accuracy': + print('Accuracy is {:.3f}'.format(acc)) if __name__ == "__main__": tf.compat.v1.app.run() diff --git a/neural_compressor/tensorflow/algorithms/static_quant/keras.py b/neural_compressor/tensorflow/algorithms/static_quant/keras.py index 6e8df825df2..32700e4c977 100644 --- a/neural_compressor/tensorflow/algorithms/static_quant/keras.py +++ b/neural_compressor/tensorflow/algorithms/static_quant/keras.py @@ -87,7 +87,7 @@ def __init__(self, framework_specific_info): self.fold_conv = [] self.keras3 = True if version1_gte_version2(tf.__version__, "2.16.1") else False if not os.path.exists(DEFAULT_WORKSPACE): - os.mkdir(DEFAULT_WORKSPACE) + os.makedirs(DEFAULT_WORKSPACE) self.tmp_dir = (DEFAULT_WORKSPACE + "tmp_model.keras") if self.keras3 else (DEFAULT_WORKSPACE + "tmp_model") def _set_weights(self, qmodel, layer_weights): From b92543c8d682c516e2cf31eb54c6072b03676e02 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Mon, 15 Jul 2024 22:35:32 +0800 Subject: [PATCH 08/14] fix keras Signed-off-by: zehao-intel --- .../inception_v3/quantization/ptq/README.md | 8 +++--- .../mobilenet_v2/quantization/ptq/README.md | 8 +++--- .../resnet_v2_50/quantization/ptq/README.md | 8 +++--- .../algorithms/static_quant/keras.py | 27 ++++++++++--------- 4 files changed, 26 insertions(+), 25 deletions(-) diff --git a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/README.md b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/README.md index b3dfbb967fa..a9275ab13ce 100644 --- a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/README.md @@ -27,7 +27,7 @@ pip install -r requirements.txt The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: ``` -python prepare_model.py --output_model=/path/to/model +python prepare_model.py --output_model=./inception_v3_keras ``` `--output_model ` the model should be saved as SavedModel format or H5 format. @@ -40,9 +40,9 @@ python prepare_model.py --output_model=/path/to/model ```shell cd examples/3.x_api/tensorflow/keras/cv/ # convert validation subset - bash prepare_dataset.sh --output_dir=/inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + bash prepare_dataset.sh --output_dir=./inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation # convert train subset - bash prepare_dataset.sh --output_dir=/inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + bash prepare_dataset.sh --output_dir=./inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train cd inception_v3/quantization/ptq ``` > **Note**: @@ -55,7 +55,7 @@ python prepare_model.py --output_model=/path/to/model ## Quantization ```shell - bash run_quant.sh --input_model=./inception_v3_keras/ --output_model=./result --dataset_location=/path/to/evaluation/dataset + bash run_quant.sh --input_model=./inception_v3_keras --output_model=./result --dataset_location=/path/to/evaluation/dataset ``` ## Benchmark diff --git a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/README.md b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/README.md index 9c85a116ce6..5f7da9d595f 100644 --- a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/README.md @@ -27,7 +27,7 @@ pip install -r requirements.txt The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: ``` -python prepare_model.py --output_model=/path/to/model +python prepare_model.py --output_model=./mobilenet_v2_keras ``` `--output_model ` the model should be saved as SavedModel format or H5 format. @@ -39,9 +39,9 @@ python prepare_model.py --output_model=/path/to/model ```shell cd examples/3.x_api/tensorflow/keras/cv/ # convert validation subset - bash prepare_dataset.sh --output_dir=/mobilenet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + bash prepare_dataset.sh --output_dir=./mobilenet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation # convert train subset - bash prepare_dataset.sh --output_dir=/mobilenet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + bash prepare_dataset.sh --output_dir=./mobilenet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train cd mobilenet_v2/quantization/ptq ``` > **Note**: @@ -54,7 +54,7 @@ python prepare_model.py --output_model=/path/to/model ## Quantization ```shell - bash run_quant.sh --input_model=./mobilenet_v2_keras/ --output_model=./result --dataset_location=/path/to/evaluation/dataset + bash run_quant.sh --input_model=./mobilenet_v2_keras --output_model=./result --dataset_location=/path/to/evaluation/dataset ``` ## Benchmark diff --git a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/README.md b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/README.md index 251b13b97b3..a276ef7cd0d 100644 --- a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/README.md @@ -27,7 +27,7 @@ pip install -r requirements.txt The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: ``` -python prepare_model.py --output_model=/path/to/model +python prepare_model.py --output_model=./resnetv2_50_keras ``` `--output_model ` the model should be saved as SavedModel format or H5 format. @@ -39,9 +39,9 @@ python prepare_model.py --output_model=/path/to/model ```shell cd examples/3.x_api/tensorflow/keras/cv/ # convert validation subset - bash prepare_dataset.sh --output_dir=/resnetv2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + bash prepare_dataset.sh --output_dir=./resnetv2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation # convert train subset - bash prepare_dataset.sh --output_dir=/resnetv2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + bash prepare_dataset.sh --output_dir=./resnetv2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train cd resnetv2_50/quantization/ptq ``` > **Note**: @@ -54,7 +54,7 @@ python prepare_model.py --output_model=/path/to/model ## Quantization ```shell - bash run_quant.sh --input_model=./resnetv2_50_keras/ --output_model=./result --dataset_location=/path/to/evaluation/dataset + bash run_quant.sh --input_model=./resnetv2_50_keras --output_model=./result --dataset_location=/path/to/evaluation/dataset ``` ## Benchmark diff --git a/neural_compressor/tensorflow/algorithms/static_quant/keras.py b/neural_compressor/tensorflow/algorithms/static_quant/keras.py index 32700e4c977..f6803c03604 100644 --- a/neural_compressor/tensorflow/algorithms/static_quant/keras.py +++ b/neural_compressor/tensorflow/algorithms/static_quant/keras.py @@ -203,19 +203,20 @@ def _fuse_bn_keras2(self, fuse_conv_bn, fp32_layers): # pragma: no cover fuse_layers.append(layer) else: for bound_node in layer._inbound_nodes: - inbound_layer = bound_node.inbound_layers - if isinstance(inbound_layer, list) and len(inbound_layer) == 0: - continue - if inbound_layer in self.bn_weights.keys(): - for bn_inbound_node in inbound_layer._inbound_nodes: - bn_inbound_layer = bn_inbound_node.inbound_layers - if bn_inbound_layer.name in self.conv_weights.keys(): - new_bound_nodes.append(bn_inbound_node) - else: - if bound_node not in new_bound_nodes: - new_bound_nodes.append(bound_node) - else: - new_bound_nodes.append(bound_node) + inbound_layers = bound_node.inbound_layers + if not isinstance(inbound_layers, list): + inbound_layers = [inbound_layers] + for inbound_layer in inbound_layers: + if inbound_layer in self.bn_weights.keys(): + for bn_inbound_node in inbound_layer._inbound_nodes: + bn_inbound_layer = bn_inbound_node.inbound_layers + if bn_inbound_layer.name in self.conv_weights.keys(): + new_bound_nodes.append(bn_inbound_node) + else: + if bound_node not in new_bound_nodes: + new_bound_nodes.append(bound_node) + else: + new_bound_nodes.append(bound_node) layer._inbound_nodes.clear() for bound_node in new_bound_nodes: From 6c5ae4c8a5489d4d42ee99a089fb26c8577c18d7 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Tue, 16 Jul 2024 12:01:44 +0800 Subject: [PATCH 09/14] remove distillbert Signed-off-by: zehao-intel --- .../quantization/ptq/README.md | 187 ---------- .../quantization/ptq/download_dataset.py | 38 -- .../distilbert_base/quantization/ptq/main.py | 324 ------------------ .../quantization/ptq/requirements.txt | 7 - .../quantization/ptq/run_benchmark.sh | 88 ----- .../quantization/ptq/run_quant.sh | 83 ----- .../3dunet-mlperf/quantization/ptq/README.md | 2 +- .../quantization/ptq/run_benchmark.sh | 4 + .../tensorflow/utils/model_wrappers.py | 1 + 9 files changed, 6 insertions(+), 728 deletions(-) delete mode 100644 examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/README.md delete mode 100644 examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/download_dataset.py delete mode 100644 examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/main.py delete mode 100644 examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/requirements.txt delete mode 100644 examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/run_benchmark.sh delete mode 100644 examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/run_quant.sh diff --git a/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/README.md b/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/README.md deleted file mode 100644 index 83d17508bb0..00000000000 --- a/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/README.md +++ /dev/null @@ -1,187 +0,0 @@ -Step-by-Step -============ - -This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor tuning zoo result of DistilBERT base. This example can be run on Intel CPUs and GPUs. - -## Model Details -This DistilBERT base model is based on the paper [*DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter*](https://arxiv.org/abs/1910.01108). \ -The [pretrained-model](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english?text=I+like+you.+I+love+you) thus used, was taken from [Hugging face model repository](https://huggingface.co/models). \ -The frozen model pb can be found at [Model Zoo for Intel® Architecture](https://github.com/IntelAI/models/tree/master/models/language_modeling/tensorflow/distilbert_base/inference). - -## Dataset Details -We use a part of Stanford Sentiment Treebank corpus for our task. Specifically, the validation split present in the SST2 dataset in the hugging face [repository](https://huggingface.co/datasets/sst2). It contains 872 labeled English sentences. The details for downloading the dataset are given below. - -## Prerequisite - -### 1. Install Intel® Neural Compressor -```shell -pip install neural-compressor -``` - -### 2. Install TensorFlow 2.11.dev202242 -Build a TensorFlow pip package from [intel-tensorflow spr_ww42 branch](https://github.com/Intel-tensorflow/tensorflow/tree/spr_ww42) and install it. How to build a TensorFlow pip package from source please refer to this [tutorial](https://www.tensorflow.org/install/source). - -### 3. Install Requirements -```shell -pip install -r requirements.txt -``` - -### 4. Install Intel® Extension for TensorFlow - -#### Quantizing the model on Intel GPU(Mandatory to install ITEX) -Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[xpu] -``` -Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers). - -#### Quantizing the model on Intel CPU(Optional to install ITEX) -Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -> **Note**: -> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. - -### 5. Download Dataset -```shell -python download_dataset.py --path_to_save_dataset -``` - -### 6. Download Model -Download Frozen graph: -```shell -wget https://storage.googleapis.com/intel-optimized-tensorflow/models/2_10_0/distilbert_frozen_graph_fp32_final.pb -``` - -## Run Command -### Run Tuning: -```shell -bash run_quant.sh \ - --input_model=$INPUT_MODEL \ - --dataset_location=$DATASET_DIR \ - --output_model=$OUTPUT_MODEL \ - --batch_size=$BATCH_SIZE \ - --max_seq_length=$MAX_SEQ \ - --warmup_steps=$WARMUPS \ - --num_inter=$INTER_THREADS \ - --num_intra=$INTRA_THREADS -``` -### Run Benchmark: -```shell -# performance mode: get performance -bash run_benchmark.sh \ - --input_model=$INPUT_MODEL \ - --dataset_location=$DATASET_DIR \ - --mode=performance \ - --batch_size=$BATCH_SIZE \ - --max_seq_length=$MAX_SEQ \ - --iters=$ITERS \ - --warmup_steps=$WARMUPS \ - --num_inter=$INTER_THREADS \ - --num_intra=$INTRA_THREADS -``` - -```shell -# accuracy mode: get accuracy -bash run_benchmark.sh \ - --input_model=$INPUT_MODEL \ - --dataset_location=$DATASET_DIR \ - --mode=accuracy \ - --batch_size=$BATCH_SIZE \ - --max_seq_length=$MAX_SEQ \ - --warmup_steps=$WARMUPS \ - --num_inter=$INTER_THREADS \ - --num_intra=$INTRA_THREADS -``` - -Where (Default values are shown in the square brackets): - * $INPUT_MODEL ["./distilbert_base_fp32.pb"]-- The path to input FP32 frozen model .pb file to load - * $DATASET_DIR ["./sst2_validation_dataset"]-- The path to input dataset directory - * $OUTPUT_MODEL ["./output_distilbert_base_int8.pb"]-- The user-specified export path to the output INT8 quantized model - * $BATCH_SIZE [128]-- The batch size for model inference - * $MAX_SEQ [128]-- The maximum total sequence length after tokenization - * $ITERS [872]-- The number of iterations to run in benchmark mode, maximum value is 872 - * $WARMUPS [10]-- The number of warmup steps before benchmarking the model, maximum value is 22 - * $INTER_THREADS [2]-- The number of inter op parallelism thread to use, which can be set to the number of sockets - * $INTRA_THREADS [28]-- The number of intra op parallelism thread to use, which can be set to the number of physical core per socket - - -### Run Smooth Quant to improve int8 accuracy - -#### Tuning -```shell -bash run_quant.sh \ - --input_model=$INPUT_MODEL \ - --dataset_location=$DATASET_DIR \ - --output_model=$OUTPUT_MODEL \ - --batch_size=$BATCH_SIZE \ - --max_seq_length=$MAX_SEQ \ - --warmup_steps=$WARMUPS \ - --num_inter=$INTER_THREADS \ - --num_intra=$INTRA_THREADS \ - --sq=True -``` - - -Details of enabling Intel® Neural Compressor on DistilBERT base for TensorFlow -========================= - -This is a tutorial of how to enable DistilBERT base model with Intel® Neural Compressor. -## User Code Analysis -1. User specifies fp32 *model*, calibration dataloader *q_dataloader*, evaluation dataloader *eval_dataloader* and metric. - -2. User specifies fp32 *model*, calibration dataloader *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataloader and metric by itself. - -For DistilBERT base, we applied the latter one. The task is to implement the *q_dataloader* and *eval_func*. - - -### q_dataloader Part Adaption -Below dataloader class uses generator function to provide the model with input. - -```python -class Dataloader(object): - def __init__(self, data_location, batch_size, steps): - self.batch_size = batch_size - self.data_location = data_location - self.num_batch = math.ceil(steps / batch_size) - - def __iter__(self): - return self.generate_dataloader(self.data_location).__iter__() - - def __len__(self): - return self.num_batch - - def generate_dataloader(self, data_location): - dataset = load_dataset(data_location) - for batch_id in range(self.num_batch): - feed_dict, labels = create_feed_dict_and_labels(dataset, batch_id, self.num_batch) - yield feed_dict, labels -``` - -### Code Update -After prepare step is done, we add the code for quantization tuning to generate quantized model. - -#### Tune -```python - from neural_compressor.tensorflow import StaticQuantConfig, SmoothQuantConfig, quantize_model - - quant_config = SmoothQuantConfig(alpha=0.6) if ARGS.sq else StaticQuantConfig() - q_model = quantize_model(graph, quant_config, self.dataloader) - try: - q_model.save(ARGS.output_graph) - except Exception as e: - tf.compat.v1.logging.error("Failed to save model due to {}".format(str(e))) -``` -#### Benchmark -```python - if ARGS.mode == 'performance': - self.eval_func(graph) - elif ARGS.mode == 'accuracy': - accuracy = self.eval_func(graph) - logger.info("Accuracy: {:.4f}".format(accuracy)) -``` diff --git a/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/download_dataset.py b/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/download_dataset.py deleted file mode 100644 index b92cf39874f..00000000000 --- a/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/download_dataset.py +++ /dev/null @@ -1,38 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -''' -Script to download and save dataset -''' -from datasets import load_dataset -from argparse import ArgumentParser -import os - -def main(): - arg_parser = ArgumentParser(description="Download and save dataset") - arg_parser.add_argument("-p", "--path_to_save_dataset", type=str, - help="path to save the dataset", - default="./") - args = arg_parser.parse_args() - dataset = load_dataset("glue", "sst2", split= "validation") - path = os.path.join(args.path_to_save_dataset, "sst2_validation_dataset") - dataset.save_to_disk(path) - print("Dataset saved in location: {}".format(path)) - -if __name__ == "__main__": - main() diff --git a/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/main.py b/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/main.py deleted file mode 100644 index e0fce11736e..00000000000 --- a/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/main.py +++ /dev/null @@ -1,324 +0,0 @@ - -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -'''DistilBERT base inference, implementation adapted from Hugging Face Library https://huggingface.co/''' -import time -import os -import math - -import tensorflow as tf -import numpy as np - -from argparse import ArgumentParser -from transformers import AutoTokenizer -from datasets import load_from_disk -from tensorflow.core.protobuf import saved_model_pb2 -from tensorflow.python.client import timeline - -from neural_compressor.common import logger -from neural_compressor.tensorflow.utils import dump_elapsed_time - - - -def boolean_string(s): - if s not in {'False', 'True'}: - raise ValueError('Not a valid boolean string') - return s == 'True' - -arg_parser = ArgumentParser(description="Distilbert inference") -arg_parser.add_argument("--task-name", type=str, - help="Name of the task to run benchmark.", - dest="task_name", - default="sst2" - ) -arg_parser.add_argument("-c", "--config", type=str, - help="Quantization configuration file to load.", - dest="config", - default="distilbert_base.yaml" - ) -arg_parser.add_argument("-g", "--in-graph", type=str, - help="Full path to the input graph.", - dest="input_graph", - default=None - ) -arg_parser.add_argument("--data-location", type=str, - help="Path to the dataset.", - dest="data_location" - ) -arg_parser.add_argument("-o", "--output-graph", type=str, - help="The output path of quantized graph.", - dest="output_graph", - default="output_distilbert_base_int8.pb" - ) -arg_parser.add_argument("-m", "--mode", type=str, - choices=['performance', 'accuracy'], - help="One of two options: 'performance'/'accuracy'.", - dest="mode", - default="performance" - ) -arg_parser.add_argument("--tune", type=boolean_string, - help="whether to apply quantization", - dest="tune", - default=False - ) -arg_parser.add_argument('--sq', type=boolean_string, dest='sq', help='smooth quantization', default=False) -arg_parser.add_argument("--benchmark", type=boolean_string, - help="whether to do benchmark", - dest="benchmark", - default=False - ) -arg_parser.add_argument('-e', "--num-inter-threads", type=int, - help="The number of inter-thread.", - dest="num_inter_threads", - default=2 - ) -arg_parser.add_argument('-a', "--num-intra-threads", type=int, - help="The number of intra-thread.", - dest="num_intra_threads", - default=28 - ) -arg_parser.add_argument("--pad-to-max-length", type=boolean_string, - help="Padding option.", - dest="pad_to_max_length", - default=True - ) -arg_parser.add_argument("--warmup-steps", type=int, - help="Number of warmup steps.", - dest="warmup_steps", - default=10 - ) -arg_parser.add_argument("--max-seq-length", type=int, - help="Maximum total sequence length after tokenization.", - dest="max_seq_length", - default=128 - ) -arg_parser.add_argument("--steps", type=int, - help="Number of steps.", - dest="steps", - default=872 - ) -arg_parser.add_argument("--batch-size", type=int, - help="Inference batch-size.", - dest="batch_size", - default=128 - ) -arg_parser.add_argument("--profile", dest='profile', - type=boolean_string, help="profile", - default=False) - -ARGS = arg_parser.parse_args() -MAX_STEPS = 872 -MAX_WARMUP_STEPS = 22 - -def create_feed_dict_and_labels(dataset, batch_id= None, num_batch= None, idx= None): - """Return the input dictionary for the given batch.""" - if idx is None: - start_idx = batch_id * ARGS.batch_size - if batch_id == num_batch - 1: - end_idx = ARGS.steps - else: - end_idx = start_idx + ARGS.batch_size - input_ids = np.array(dataset["input_ids"])[start_idx:end_idx, :] - attention_mask = np.array(dataset["attention_mask"])[start_idx:end_idx, :] - feed_dict = {"input_ids:0": input_ids, - "attention_mask:0": attention_mask, - } - labels = np.array(dataset["label"])[start_idx: end_idx] - else: - input_ids = np.array(dataset["input_ids"])[idx, :].reshape(1, -1) - attention_mask = np.array(dataset["attention_mask"])[idx, :].reshape(1, -1) - feed_dict = {"input_ids:0": input_ids, - "attention_mask:0": attention_mask, - } - labels = np.array(dataset["label"])[idx] - return feed_dict, labels - -def load_dataset(data_location): - def preprocess_function(examples): - """Tokenize the texts.""" - sentence1_key, sentence2_key = "sentence", None - args = ( - (examples[sentence1_key],) if sentence2_key is None - else (examples[sentence1_key], examples[sentence2_key]) - ) - result = tokenizer(*args, padding="max_length", - max_length=ARGS.max_seq_length, - truncation=True - ) - return result - - # Load dataset (only validation split for inference) - dataset = load_from_disk(data_location) - # Load tokenizer - tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english") - # Set max sequence length - if ARGS.max_seq_length > tokenizer.model_max_length: - logger.info(f"The max sequence length passed ({ARGS.max_seq_length}) \ - is larger than the max supported by model \ - ({tokenizer.model_max_length}).Using max_seq_length = \ - {tokenizer.model_max_length}") - ARGS.max_seq_length = min(ARGS.max_seq_length, tokenizer.model_max_length) - # Tokenize the dataset - dataset = dataset.map(preprocess_function, batched=True) - return dataset - -class Dataloader(object): - def __init__(self, data_location, batch_size, steps): - self.batch_size = batch_size - self.data_location = data_location - self.num_batch = math.ceil(steps / batch_size) - - def __iter__(self): - return self.generate_dataloader(self.data_location).__iter__() - - def __len__(self): - return self.num_batch - - def generate_dataloader(self, data_location): - dataset = load_dataset(data_location) - for batch_id in range(self.num_batch): - feed_dict, labels = create_feed_dict_and_labels(dataset, batch_id, self.num_batch) - yield feed_dict, labels - -class Distilbert_base(object): - def __init__(self): - self.validate_args() - self.dataset = load_dataset(ARGS.data_location) - self.dataloader = Dataloader(ARGS.data_location, ARGS.batch_size, ARGS.steps) - - def validate_args(self): - if ARGS.warmup_steps > MAX_WARMUP_STEPS: - logger.warning("Warmup steps greater than max possible value of 22." + \ - " Setting to max value of ", MAX_WARMUP_STEPS) - ARGS.warmup_steps = MAX_WARMUP_STEPS - if ARGS.tune or ARGS.sq or (ARGS.benchmark and ARGS.mode == "accuracy"): - ARGS.steps = MAX_STEPS - elif ARGS.benchmark: - if ARGS.steps > (MAX_STEPS - MAX_WARMUP_STEPS): - logger.warning("Steps greater than max possible value of {}.".format(MAX_STEPS - MAX_WARMUP_STEPS)) - logger.warning("Setting to max value of {}".format(MAX_STEPS - MAX_WARMUP_STEPS)) - ARGS.steps = MAX_STEPS - MAX_WARMUP_STEPS - if not ARGS.data_location: - raise SystemExit("Missing dataset path.") - - def load_graph(self): - """Load the frozen model.""" - graph_def = tf.compat.v1.GraphDef() - sm = saved_model_pb2.SavedModel() - with tf.io.gfile.GFile(ARGS.input_graph, "rb") as f: - try: - content = f.read() - graph_def.ParseFromString(content) - except Exception: - sm.ParseFromString(content) - graph_def = sm.meta_graphs[0].graph_def - with tf.Graph().as_default() as graph: - tf.import_graph_def(graph_def, name="") - logger.info("Loaded graph from: " + ARGS.input_graph) - return graph - - def get_correct_predictions(self, preds, label_ids): - """Evaluate the predictions. - - return the total number of correct predictions. - """ - preds = np.argmax(preds, axis=1) - correct_preds = 0 - for pred, label in zip(preds, label_ids): - if pred == label: - correct_preds += 1 - return correct_preds - - @dump_elapsed_time(customized_msg="Customized eval_func") - def eval_func(self, graph): - # Set the config for running - config = tf.compat.v1.ConfigProto() - config.intra_op_parallelism_threads=ARGS.num_intra_threads - config.inter_op_parallelism_threads=ARGS.num_inter_threads - run_options = tf.compat.v1.RunOptions(trace_level=tf.compat.v1.RunOptions.FULL_TRACE) - run_metadata = tf.compat.v1.RunMetadata() - - output = graph.get_tensor_by_name('Identity:0') - total_time = 0 - accuracy = 0 - logger.info("Started warmup for {} steps...".format(ARGS.warmup_steps)) - start_step_idx = MAX_STEPS - MAX_WARMUP_STEPS - with tf.compat.v1.Session(graph=graph, config=config) as sess: - # Warm up - for step in range(start_step_idx, start_step_idx + ARGS.warmup_steps): - feed_dict, _ = create_feed_dict_and_labels(self.dataset, idx=step) - _ = sess.run(output, feed_dict= feed_dict) - logger.info("Warmup completed.") - # Inference - logger.info("Starting inference for {} steps...".format(ARGS.steps)) - total_correct_predictions = 0 - iter = 0 - for feed_dict, labels in self.dataloader: - iter += 1 - start_time = time.time() - if ARGS.profile: - pred = sess.run(output, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) - else: - pred = sess.run(output, feed_dict=feed_dict) - run_time = time.time() - start_time - if ARGS.tune or ARGS.sq or (ARGS.benchmark and ARGS.mode == "accuracy"): - total_correct_predictions += self.get_correct_predictions(pred, labels) - total_time += run_time - # save profiling file - if ARGS.profile and iter == int(self.dataloader.num_batch / 2): - trace = timeline.Timeline(step_stats=run_metadata.step_stats) - model_dir = str(os.path.dirname(os.path.realpath(__file__))) + '/timeline' - if not os.path.exists(model_dir): - try: - os.makedirs(model_dir) - except: - pass - profiling_file = model_dir + '/timeline-' + str(iter + 1) + '-' + str(os.getpid()) + '.json' - with open(profiling_file, 'w') as trace_file: - trace_file.write(trace.generate_chrome_trace_format(show_memory=False)) - time_per_batch = total_time / float(ARGS.steps / ARGS.batch_size) - accuracy = total_correct_predictions / ARGS.steps - if ARGS.benchmark and ARGS.mode == 'performance': - logger.info("Latency: {:.4f} ms".format(time_per_batch * 1000)) - logger.info("Throughput: {:.4f} sentences/sec".format(self.dataloader.batch_size / time_per_batch)) - return accuracy - - def run(self): - graph = self.load_graph() - if ARGS.tune or ARGS.sq: - from neural_compressor.tensorflow import StaticQuantConfig, SmoothQuantConfig, quantize_model - - quant_config = SmoothQuantConfig(alpha=0.6) if ARGS.sq else StaticQuantConfig() - q_model = quantize_model(graph, quant_config, self.dataloader) - try: - q_model.save(ARGS.output_graph) - except Exception as e: - tf.compat.v1.logging.error("Failed to save model due to {}".format(str(e))) - elif ARGS.benchmark: - assert ARGS.mode == 'performance' or ARGS.mode == 'accuracy', \ - "Benchmark only supports performance or accuracy mode." - if ARGS.mode == 'performance': - self.eval_func(graph) - elif ARGS.mode == 'accuracy': - accuracy = self.eval_func(graph) - logger.info("Accuracy: {:.4f}".format(accuracy)) - -if __name__ == "__main__": - distilbert_ob = Distilbert_base() - distilbert_ob.run() diff --git a/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/requirements.txt deleted file mode 100644 index e60cdb323dd..00000000000 --- a/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -datasets>=2.6.1 -scikit-learn>=1.1.2 -scipy>=1.9.3 -sklearn==0.0 -tokenizers==0.13.1 -transformers>=4.31.0 -intel-tensorflow>=2.12.0 diff --git a/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/run_benchmark.sh deleted file mode 100644 index fed5b300182..00000000000 --- a/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/run_benchmark.sh +++ /dev/null @@ -1,88 +0,0 @@ -#!/bin/bash -# set -x - -function main { - - init_params "$@" - - run_benchmark - -} - -# init params -function init_params { - # set default value - input_model="./distilbert_base_fp32.pb" - dataset_location="./sst2_validation_dataset" - mode="performance" - batch_size=128 - max_seq_length=128 - iters=872 - warmup_steps=10 - num_inter=2 - num_intra=28 - benchmark=True - profile=False - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo "$var" |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo "$var" |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - ;; - --max_seq_length=*) - max_seq_length=$(echo ${var} |cut -f2 -d=) - ;; - --iters=*) - iters=$(echo ${var} |cut -f2 -d=) - ;; - --warmup_steps=*) - warmup_steps=$(echo ${var} |cut -f2 -d=) - ;; - --num_inter=*) - num_inter=$(echo ${var} |cut -f2 -d=) - ;; - --num_intra=*) - num_intra=$(echo ${var} |cut -f2 -d=) - ;; - --benchmark=*) - benchmark=$(echo ${var} |cut -f2 -d=) - ;; - --profile=*) - profile=$(echo ${var} |cut -f2 -d=) - ;; - esac - done - -} - -# run_benchmark -function run_benchmark { - cmd=" - python main.py \ - --in-graph=${input_model} \ - --data-location=${dataset_location} \ - --benchmark=${benchmark} \ - --profile=${profile} \ - --mode=${mode} \ - --steps=${iters} \ - --warmup-steps=${warmup_steps} \ - --batch-size=${batch_size} \ - --max-seq-length=${max_seq_length} \ - --num-inter-threads=${num_inter} \ - --num-intra-threads=${num_intra} - " - echo $cmd - eval $cmd -} - -main "$@" diff --git a/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/run_quant.sh deleted file mode 100644 index 31571c5ff4c..00000000000 --- a/examples/3.x_api/tensorflow/nlp/distilbert_base/quantization/ptq/run_quant.sh +++ /dev/null @@ -1,83 +0,0 @@ -#!/bin/bash -# set -x - -function main { - - init_params "$@" - - run_tuning - -} - -# init params -function init_params { - # set default value - input_model="./distilbert_base_fp32.pb" - dataset_location="./sst2_validation_dataset" - output_model="./output_distilbert_base_int8.pb" - batch_size=128 - max_seq_length=128 - warmup_steps=10 - num_inter=2 - num_intra=28 - tune=True - sq=False - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo ${var} |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo ${var} |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo ${var} |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo ${var} |cut -f2 -d=) - ;; - --max_seq_length=*) - max_seq_length=$(echo ${var} |cut -f2 -d=) - ;; - --warmup_steps=*) - warmup_steps=$(echo ${var} |cut -f2 -d=) - ;; - --num_inter=*) - num_inter=$(echo ${var} |cut -f2 -d=) - ;; - --num_intra=*) - num_intra=$(echo ${var} |cut -f2 -d=) - ;; - --tune=*) - tune=$(echo ${var} |cut -f2 -d=) - ;; - --sq=*) - sq=$(echo ${var} |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - cmd=" - python main.py \ - --in-graph=${input_model} \ - --data-location=${dataset_location} \ - --output-graph=${output_model} \ - --tune=${tune} \ - --sq=${sq} \ - --warmup-steps=${warmup_steps} \ - --batch-size=${batch_size} \ - --max-seq-length=${max_seq_length} \ - --num-inter-threads=${num_inter} \ - --num-intra-threads=${num_intra} - " - echo $cmd - eval $cmd -} - -main "$@" diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md index e4618e7604c..4307ec85480 100644 --- a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md @@ -71,6 +71,6 @@ Please set the following environment variables before running quantization or be ## Benchmark -`bash run_benchmark.sh --input_model=3dunet_dynamic_ndhwc_int8.pb --dataset_location=/build --batch_size=100 --mode=benchmark` +`bash run_benchmark.sh --input_model=3dunet_dynamic_ndhwc_int8.pb --dataset_location=/build --batch_size=100 --iters=500 --mode=benchmark` `bash run_benchmark.sh --input_model=3dunet_dynamic_ndhwc_int8.pb --dataset_location=/build --batch_size=1 --mode=accuracy` diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh index 7e869e7cca7..36f8d8502f0 100644 --- a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh +++ b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh @@ -31,6 +31,9 @@ function init_params { --batch_size=*) batch_size=$(echo $var |cut -f2 -d=) ;; + --iters=*) + iters=$(echo $var |cut -f2 -d=) + ;; *) echo "Error: No such parameter: ${var}" exit 1 @@ -55,6 +58,7 @@ function run_benchmark { --calib-preprocess=${BUILD_DIR}/calib_preprocess \ --batch-size=${batch_size} \ --mode=${mode} \ + --iters=${iters} \ ${extra_cmd} } diff --git a/neural_compressor/tensorflow/utils/model_wrappers.py b/neural_compressor/tensorflow/utils/model_wrappers.py index e1a58f2f53b..baeaa746914 100644 --- a/neural_compressor/tensorflow/utils/model_wrappers.py +++ b/neural_compressor/tensorflow/utils/model_wrappers.py @@ -1429,6 +1429,7 @@ def graph_info(self): def save(self, root, *args, **kwargs): """Save Keras model.""" self._model_object.save(root) + logger.info("Save quantized model to {}.".format(root)) @property def input_node_names(self): From 5f50a34b18eefb67d5a43f529f1000201b212168 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Tue, 16 Jul 2024 14:35:48 +0800 Subject: [PATCH 10/14] add save log for keras Signed-off-by: zehao-intel --- .../tensorflow/keras/cv/inception_v3/quantization/ptq/main.py | 2 +- .../tensorflow/keras/cv/mobilenet_v2/quantization/ptq/main.py | 1 + .../tensorflow/keras/cv/resnet_v2_50/quantization/ptq/main.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/main.py b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/main.py index abccf02d8f5..5f8b08d6e11 100644 --- a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/main.py @@ -126,7 +126,7 @@ def main(_): quant_config = StaticQuantConfig() q_model = quantize_model(FLAGS.input_model, quant_config, calib_dataloader) q_model.save(FLAGS.output_model) - + logger.info("Save quantized model to {}.".format(FLAGS.output_model)) if FLAGS.benchmark: from neural_compressor.tensorflow import Model diff --git a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/main.py b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/main.py index b577b24892f..804dd81ef5a 100644 --- a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/main.py @@ -126,6 +126,7 @@ def main(_): quant_config = StaticQuantConfig() q_model = quantize_model(FLAGS.input_model, quant_config, calib_dataloader) q_model.save(FLAGS.output_model) + logger.info("Save quantized model to {}.".format(FLAGS.output_model)) if FLAGS.benchmark: from neural_compressor.tensorflow import Model diff --git a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/main.py b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/main.py index 48f30a79fb4..7fc6a2cdf10 100644 --- a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/main.py @@ -126,6 +126,7 @@ def main(_): quant_config = StaticQuantConfig() q_model = quantize_model(FLAGS.input_model, quant_config, calib_dataloader) q_model.save(FLAGS.output_model) + logger.info("Save quantized model to {}.".format(FLAGS.output_model)) if FLAGS.benchmark: from neural_compressor.tensorflow import Model From cb40371719f81a1253a6a4acaf266a79b6734b33 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Thu, 18 Jul 2024 15:43:23 +0800 Subject: [PATCH 11/14] fix accuracy using per-channel Signed-off-by: zehao-intel --- .../tensorflow/cv/densenet121/quantization/ptq/main.py | 2 +- .../tensorflow/cv/mobilenet_v2/quantization/ptq/main.py | 2 +- .../faster_rcnn_resnet50/quantization/ptq/README.md | 7 +++++-- .../faster_rcnn_resnet50/quantization/ptq/main.py | 2 +- .../mask_rcnn_inception_v2/quantization/ptq/README.md | 6 +++--- .../mask_rcnn_inception_v2/quantization/ptq/main.py | 2 +- .../ssd_mobilenet_v1/quantization/ptq/README.md | 8 +++++--- .../ssd_mobilenet_v1/quantization/ptq/main.py | 2 +- neural_compressor/tensorflow/quantization/config.py | 4 ++-- 9 files changed, 20 insertions(+), 15 deletions(-) diff --git a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/main.py b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/main.py index 21ca3bcb3d8..ed2013159d6 100644 --- a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/main.py @@ -116,7 +116,7 @@ def run(self): ) calib_dataloader = TFDataLoader(dataset=dataset, batch_size=10) - quant_config = StaticQuantConfig() + quant_config = StaticQuantConfig(weight_granularity="per_channel") q_model = quantize_model(args.input_graph, quant_config, calib_dataloader) q_model.save(args.output_graph) diff --git a/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/main.py b/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/main.py index d22a9bd4494..fd3a07937de 100644 --- a/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/main.py @@ -112,7 +112,7 @@ def run(self): ) calib_dataloader = TFDataLoader(dataset=dataset, batch_size=10) - quant_config = StaticQuantConfig() + quant_config = StaticQuantConfig(weight_granularity="per_channel") q_model = quantize_model(args.input_graph, quant_config, calib_dataloader) q_model.save(args.output_graph) diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md index a2c4fbfbbb8..b7b90b6f8ec 100644 --- a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md @@ -86,13 +86,16 @@ Download CoCo Dataset from [Official Website](https://cocodataset.org/#download) ## 1. Quantization ```shell - # The cmd of running faster_rcnn_resnet50 bash run_quant.sh --input_model=./faster_rcnn_resnet50_fp32_coco_pretrained_model/frozen_inference_graph.pb --output_model=./tensorflow-faster_rcnn_resnet50-tune.pb --dataset_location=/path/to/dataset/coco_val.record ``` ## 2. Benchmark ```shell + # run performance benchmark bash run_benchmark.sh --input_model=./tensorflow-faster_rcnn_resnet50-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=performance + + # run accuracy benchmark + bash run_benchmark.sh --input_model=./tensorflow-faster_rcnn_resnet50-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=accuracy ``` Details of enabling Intel® Neural Compressor on faster_rcnn_resnet50 for Tensorflow. @@ -111,7 +114,7 @@ After prepare step is done, we just need update main.py like below. if args.tune: from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model - quant_config = StaticQuantConfig() + quant_config = StaticQuantConfig(weight_granularity="per_channel") model = Model(args.input_graph) model.input_tensor_names = ['image_tensor'] model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py index 2493297e476..0ca37671fd6 100644 --- a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py @@ -109,7 +109,7 @@ def main(_): if args.tune: from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model - quant_config = StaticQuantConfig() + quant_config = StaticQuantConfig(weight_granularity="per_channel") model = Model(args.input_graph) model.input_tensor_names = ['image_tensor'] model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md index c672d5e6148..9ec8ae2ad78 100644 --- a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md @@ -89,21 +89,21 @@ Now we support both pb and ckpt formats. ### For PB format ```shell - # The cmd of running mask_rcnn_inception_v2 bash run_quant.sh --input_model=./mask_rcnn_inception_v2_coco_2018_01_28/frozen_inference_graph.pb --output_model=./tensorflow-mask_rcnn_inception_v2-tune.pb --dataset_location=/path/to/dataset/coco_val.record ``` ### For ckpt format ```shell - # The cmd of running mask_rcnn_inception_v2 bash run_quant.sh --input_model=./mask_rcnn_inception_v2_coco_2018_01_28/ --output_model=./tensorflow-mask_rcnn_inception_v2-tune.pb --dataset_location=/path/to/dataset/coco_val.record ``` ## 2. Benchmark ```shell + # run performance benchmark bash run_benchmark.sh --input_model=./tensorflow-mask_rcnn_inception_v2-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=performance + # run accuracy benchmark bash run_benchmark.sh --input_model=./tensorflow-mask_rcnn_inception_v2-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=accuracy ``` @@ -123,7 +123,7 @@ After prepare step is done, we just need update main.py like below. if args.tune: from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model - quant_config = StaticQuantConfig() + quant_config = StaticQuantConfig(weight_granularity="per_channel") model = Model(args.input_graph) model.input_tensor_names = ['image_tensor'] model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py index 06498ca56e9..632d66ac25a 100644 --- a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py @@ -114,7 +114,7 @@ def main(_): if args.tune: from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model - quant_config = StaticQuantConfig() + quant_config = StaticQuantConfig(weight_granularity="per_channel") model = Model(args.input_graph) model.input_tensor_names = ['image_tensor'] model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md index 43026b9002d..1b52ecf8b17 100644 --- a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md +++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md @@ -107,20 +107,22 @@ Now we support both pb and ckpt formats. ### For PB format ```shell - # The cmd of running ssd_mobilenet_v1 bash run_quant.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28/frozen_inference_graph.pb --output_model=./tensorflow-ssd_mobilenet_v1-tune.pb --dataset_location=/path/to/dataset/coco_val.record ``` ### For ckpt format ```shell - # The cmd of running ssd_mobilenet_v1 bash run_quant.sh --input_model=./ssd_mobilenet_v1_coco_2018_01_28/ --output_model=./tensorflow-ssd_mobilenet_v1-tune.pb --dataset_location=/path/to/dataset/coco_val.record ``` ## 2. Benchmark ```shell + # run performance benchmark bash run_benchmark.sh --input_model=./tensorflow-ssd_mobilenet_v1-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=performance + + # run accuracy benchmark + bash run_benchmark.sh --input_model=./tensorflow-ssd_mobilenet_v1-tune.pb --dataset_location=/path/to/dataset/coco_val.record --mode=accuracy ``` Details of enabling Intel® Neural Compressor on ssd_mobilenet_v1 for Tensorflow. @@ -139,7 +141,7 @@ After prepare step is done, we just need update main.py like below. if args.tune: from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model - quant_config = StaticQuantConfig() + quant_config = StaticQuantConfig(weight_granularity="per_channel") model = Model(args.input_graph) model.input_tensor_names = ['image_tensor'] model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py index cd9f943d374..dbced65f2d7 100644 --- a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py +++ b/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py @@ -110,7 +110,7 @@ def main(_): if args.tune: from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model - quant_config = StaticQuantConfig() + quant_config = StaticQuantConfig(weight_granularity="per_channel") model = Model(args.input_graph) model.input_tensor_names = ['image_tensor'] model.output_tensor_names = ["num_detections", "detection_boxes", "detection_scores", "detection_classes"] diff --git a/neural_compressor/tensorflow/quantization/config.py b/neural_compressor/tensorflow/quantization/config.py index 752f8d4ecbe..c7b2d5b0549 100644 --- a/neural_compressor/tensorflow/quantization/config.py +++ b/neural_compressor/tensorflow/quantization/config.py @@ -113,7 +113,7 @@ def register_supported_configs(cls) -> List[OperatorConfig]: weight_algorithm=["minmax", "kl"], act_dtype=["int8", "bf16", "fp32"], act_sym=[True, False], - act_granularity=["per_tensor", "per_channel"], + act_granularity=["per_tensor"], act_algorithm=["minmax", "kl"], ) operators = [ @@ -173,7 +173,7 @@ def get_config_set_for_tuning(cls) -> Union[None, "StaticQuantConfig", List["Sta weight_algorithm=["minmax", "kl"], act_dtype=["int8", "fp32"], act_sym=[True, False], - act_granularity=["per_tensor", "per_channel"], + act_granularity=["per_tensor"], act_algorithm=["minmax", "kl"], ) From 5307820c408498efb2ad8302dcc4ec073b1d8ea0 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Tue, 23 Jul 2024 14:03:31 +0800 Subject: [PATCH 12/14] fix keras accuracy Signed-off-by: zehao-intel --- .../mobilenet_v2/quantization/ptq/README.md | 64 --- .../quantization/ptq/data_process.py | 543 ------------------ .../cv/mobilenet_v2/quantization/ptq/main.py | 143 ----- .../quantization/ptq/prepare_model.py | 35 -- .../quantization/ptq/requirements.txt | 2 - .../quantization/ptq/run_benchmark.sh | 51 -- .../quantization/ptq/run_quant.sh | 40 -- .../algorithms/static_quant/keras.py | 7 +- 8 files changed, 2 insertions(+), 883 deletions(-) delete mode 100644 examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/README.md delete mode 100644 examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/data_process.py delete mode 100644 examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/main.py delete mode 100644 examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/prepare_model.py delete mode 100644 examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/requirements.txt delete mode 100644 examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/run_benchmark.sh delete mode 100644 examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/run_quant.sh diff --git a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/README.md b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/README.md deleted file mode 100644 index 5f7da9d595f..00000000000 --- a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/README.md +++ /dev/null @@ -1,64 +0,0 @@ -Step-by-Step -============ - -This document is used to enable Tensorflow Keras model mobilenet_v2 quantization and benchmark using Intel® Neural Compressor. -This example can run on Intel CPUs and GPUs. - - -# Prerequisite - -## 1. Environment - -### Installation -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` - -### Install Requirements -The Tensorflow and intel-extension-for-tensorflow is mandatory to be installed to run this example. -The Intel Extension for Tensorflow for Intel CPUs is installed as default. -```shell -pip install -r requirements.txt -``` -> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). - -## 2. Prepare Pretrained model - -The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: - ``` -python prepare_model.py --output_model=./mobilenet_v2_keras - ``` -`--output_model ` the model should be saved as SavedModel format or H5 format. - -## 3. Prepare Dataset - - TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. - - ```shell - cd examples/3.x_api/tensorflow/keras/cv/ - # convert validation subset - bash prepare_dataset.sh --output_dir=./mobilenet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation - # convert train subset - bash prepare_dataset.sh --output_dir=./mobilenet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train - cd mobilenet_v2/quantization/ptq - ``` -> **Note**: -> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
->         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
->         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
-> where 'n01440764' is the unique synset label associated with these images. - -# Run Command - -## Quantization - ```shell - bash run_quant.sh --input_model=./mobilenet_v2_keras --output_model=./result --dataset_location=/path/to/evaluation/dataset - ``` - -## Benchmark - ```shell - bash run_benchmark.sh --input_model=./result --mode=accuracy --dataset_location=/path/to/evaluation/dataset --batch_size=32 - bash run_benchmark.sh --input_model=./result --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=1 - ``` diff --git a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/data_process.py deleted file mode 100644 index a655a5ce9a8..00000000000 --- a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/data_process.py +++ /dev/null @@ -1,543 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os -import collections - -import numpy as np -import tensorflow as tf - -from abc import abstractmethod -from neural_compressor.common import logger -from neural_compressor.tensorflow.utils.data import default_collate - -class ParseDecodeImagenet: - """Parse features in Example proto. - - Returns: - tuple of parsed image and label - """ - - def __call__(self, sample): - """Parse features in example.""" - # Dense features in Example proto. - feature_map = { - "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""), - "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), - } - - sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) - # Sparse features in Example proto. - feature_map.update( - { - k: sparse_float32 - for k in [ - "image/object/bbox/xmin", - "image/object/bbox/ymin", - "image/object/bbox/xmax", - "image/object/bbox/ymax", - ] - } - ) - - features = tf.io.parse_single_example(serialized=sample, features=feature_map) - label = tf.cast(features["image/class/label"], dtype=tf.int32) - image = features["image/encoded"] - image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST") - return (image, label) - - -class BilinearImagenetTransform(object): - """Combination of a series of transforms which is applicable to images in Imagenet. - - Args: - height: Height of the result - width:Width of the result - central_fraction(float, default=0.875):fraction of size to crop - mean_value(list, default=[0.0,0.0,0.0]):means for each channel - scale(float, default=1.0):std value - - Returns: - tuple of processed image and label - """ - - def __init__(self, height, width, central_fraction=0.875, mean_value=[0.0, 0.0, 0.0], scale=1.0): - """Initialize `BilinearImagenetTransform` class.""" - self.height = height - self.width = width - self.mean_value = mean_value - self.scale = scale - self.central_fraction = central_fraction - - # sample is (images, labels) - def __call__(self, sample): - """Convert `BilinearImagenetTransform` feature.""" - image, label = sample - if image.dtype is not tf.float32: - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # Crop the central region of the image containing 87.5% area of the original image. - if self.central_fraction: - image = tf.image.central_crop(image, central_fraction=self.central_fraction) - - if self.height and self.width: - # Resize the image to the specified height and width. - image = tf.expand_dims(image, 0) - image = tf.image.resize(image, [self.height, self.width], method=tf.image.ResizeMethod.BILINEAR) - image = tf.squeeze(image, [0]) - - image = tf.subtract(image, 0.5) - image = tf.multiply(image, 2.0) - means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) - image = (image - means) * self.scale - return (image, label) - - -class ComposeTransform(object): - """Composes several transforms together. - - Args: - transform_list (list of Transform objects): list of transforms to compose - - Returns: - sample (tuple): tuple of processed image and label - """ - - def __init__(self, transform_list): - """Initialize `ComposeTransform` class.""" - self.transform_list = transform_list - - def __call__(self, sample): - """Call transforms in transform_list.""" - for transform in self.transform_list: - sample = transform(sample) - return sample - - -class LabelShift(object): - """Convert label to label - label_shift. - - Args: - label_shift(int, default=0): number of label shift - - Returns: - tuple of processed image and label - """ - - def __init__(self, label_shift=0): - """Initialize `LabelShift` class.""" - self.label_shift = label_shift - - def __call__(self, sample): - """Convert label to label_shift.""" - images, labels = sample - if isinstance(labels, np.ndarray): - labels = labels - self.label_shift - elif isinstance(labels, list): - if isinstance(labels[0], tuple): - labels = [tuple(np.array(label) - self.label_shift) for label in labels] - elif isinstance(labels[0], np.ndarray): - labels = [label - self.label_shift for label in labels] - else: - labels = np.array(labels) - self.label_shift - labels = labels.tolist() - else: - labels = np.array(labels) - self.label_shift - return images, labels - - -class ShiftRescale(object): - """Label shift by 1 and rescale. - - Returns: - tuple of processed image and label - """ - - def __call__(self, sample): - image, label = sample - label -= 1 - image = (image - 127.5) / 127.5 - return (image, label) - - -class ImageRecordDataset(object): - """Tensorflow imageNet database in tf record format. - - Please arrange data in this way: - root/validation-000-of-100 - root/validation-001-of-100 - ... - root/validation-099-of-100 - The file name needs to follow this pattern: '* - * -of- *' - - Args: root (str): Root directory of dataset. - transform (transform object, default=None): transform to process input data. - filter (Filter objects, default=None): filter out examples according - to specific conditions. - """ - - """Configuration for Imagenet dataset.""" - - def __new__(cls, root, transform=None, filter=None): - """Build a new object of TensorflowImageRecord class.""" - from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module - - glob_pattern = os.path.join(root, "*-*-of-*") - file_names = gfile.Glob(glob_pattern) - if not file_names: - raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) - - # pylint: disable=no-name-in-module - from tensorflow.python.data.experimental import parallel_interleave - - ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) - ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) - - if transform is not None: - transform.transform_list.insert(0, ParseDecodeImagenet()) - else: - transform = ParseDecodeImagenet() - ds = ds.map(transform, num_parallel_calls=None) - ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned - return ds - - -class BaseMetric(object): - """The base class of Metric.""" - - def __init__(self, metric, single_output=False, hvd=None): - """Initialize the basic metric. - - Args: - metric: The metric class. - single_output: Whether the output is single or not, defaults to False. - hvd: The Horovod class for distributed training, defaults to None. - """ - self._metric_cls = metric - self._single_output = single_output - self._hvd = hvd - - def __call__(self, *args, **kwargs): - """Evaluate the model predictions, and the reference. - - Returns: - The class itself. - """ - self._metric = self._metric_cls(*args, **kwargs) - return self - - @abstractmethod - def update(self, preds, labels=None, sample_weight=None): - """Update the state that need to be evaluated. - - Args: - preds: The prediction result. - labels: The reference. Defaults to None. - sample_weight: The sampling weight. Defaults to None. - - Raises: - NotImplementedError: The method should be implemented by subclass. - """ - raise NotImplementedError - - @abstractmethod - def reset(self): - """Clear the predictions and labels. - - Raises: - NotImplementedError: The method should be implemented by subclass. - """ - raise NotImplementedError - - @abstractmethod - def result(self): - """Evaluate the difference between predictions and labels. - - Raises: - NotImplementedError: The method should be implemented by subclass. - """ - raise NotImplementedError - - @property - def metric(self): - """Return its metric class. - - Returns: - The metric class. - """ - return self._metric_cls - - @property - def hvd(self): - """Return its hvd class. - - Returns: - The hvd class. - """ - return self._hvd - - @hvd.setter - def hvd(self, hvd): - """Set its hvd. - - Args: - hvd: The Horovod class for distributed training. - """ - self._hvd = hvd - - -class TopKMetric(BaseMetric): - """Compute Top-k Accuracy classification score for Tensorflow model. - - This metric computes the number of times where the correct label is among - the top k labels predicted. - - Attributes: - k (int): The number of most likely outcomes considered to find the correct label. - num_correct: The number of predictions that were correct classified. - num_sample: The total number of predictions. - """ - - def __init__(self, k=1): - """Initialize the k, number of samples and correct predictions. - - Args: - k: The number of most likely outcomes considered to find the correct label. - """ - self.k = k - self.num_correct = 0 - self.num_sample = 0 - - def update(self, preds, labels, sample_weight=None): - """Add the predictions and labels. - - Args: - preds: The predictions. - labels: The labels corresponding to the predictions. - sample_weight: The sample weight. - """ - preds, labels = TopKMetric._topk_shape_validate(preds, labels) - - labels = labels.reshape([len(labels)]) - with tf.Graph().as_default() as acc_graph: - topk = tf.nn.in_top_k( - predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k - ) - fp32_topk = tf.cast(topk, tf.float32) - correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) - - with tf.compat.v1.Session() as acc_sess: - correct = acc_sess.run(correct_tensor) - - self.num_sample += len(labels) - self.num_correct += correct - - def reset(self): - """Reset the number of samples and correct predictions.""" - self.num_correct = 0 - self.num_sample = 0 - - def result(self): - """Compute the top-k score. - - Returns: - The top-k score. - """ - if self.num_sample == 0: - logger.warning("Sample num during evaluation is 0.") - return 0 - elif getattr(self, "_hvd", None) is not None: # pragma: no cover - allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) - allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) - return allgather_num_correct / allgather_num_sample - return self.num_correct / self.num_sample - - @staticmethod - def _topk_shape_validate(preds, labels): - # preds shape can be Nxclass_num or class_num(N=1 by default) - # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax - if isinstance(preds, int): - preds = [preds] - preds = np.array(preds) - elif isinstance(preds, np.ndarray): - preds = np.array(preds) - elif isinstance(preds, list): - preds = np.array(preds) - preds = preds.reshape((-1, preds.shape[-1])) - - # consider labels just int value 1x1 - if isinstance(labels, int): - labels = [labels] - labels = np.array(labels) - elif isinstance(labels, tuple): - labels = np.array([labels]) - labels = labels.reshape((labels.shape[-1], -1)) - elif isinstance(labels, list): - if isinstance(labels[0], int): - labels = np.array(labels) - labels = labels.reshape((labels.shape[0], 1)) - elif isinstance(labels[0], tuple): - labels = np.array(labels) - labels = labels.reshape((labels.shape[-1], -1)) - else: - labels = np.array(labels) - # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) - # only support 2 dimension one-shot labels - # or 1 dimension one-hot class_num will confuse with N - - if len(preds.shape) == 1: - N = 1 - class_num = preds.shape[0] - preds = preds.reshape([-1, class_num]) - elif len(preds.shape) >= 2: - N = preds.shape[0] - preds = preds.reshape([N, -1]) - class_num = preds.shape[1] - - label_N = labels.shape[0] - assert label_N == N, "labels batch size should same with preds" - labels = labels.reshape([N, -1]) - # one-hot labels will have 2 dimension not equal 1 - if labels.shape[1] != 1: - labels = labels.argsort()[..., -1:] - return preds, labels - - -class TFDataLoader(object): # pragma: no cover - """Tensorflow dataloader class. - - In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict - method to do session run, this dataloader is designed to satisfy the usage of feed dict - in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. - - Args: - dataset: obj. wrapper of needed data. - batch_size: int. batch size - """ - - def __init__(self, dataset, batch_size=1, last_batch="rollover"): - """Initialize `TFDataDataLoader` class.""" - self.dataset = dataset - self.last_batch = last_batch - self.batch_size = batch_size - dataset = dataset.batch(batch_size) - - def batch(self, batch_size, last_batch="rollover"): - """Dataset return data per batch.""" - drop_last = False if last_batch == "rollover" else True - self.batch_size = batch_size - self.dataset = self.dataset.batch(batch_size, drop_last) - - def __iter__(self): - """Iterate dataloader.""" - return self._generate_dataloader( - self.dataset, - batch_size=self.batch_size, - last_batch=self.last_batch, - ) - - def _generate_dataloader( - self, - dataset, - batch_size=1, - last_batch="rollover", - collate_fn=None, - sampler=None, - batch_sampler=None, - num_workers=None, - pin_memory=None, - distributed=False, - ): - """Yield data.""" - drop_last = False if last_batch == "rollover" else True - - def check_dynamic_shape(element_spec): - if isinstance(element_spec, collections.abc.Sequence): - return any([check_dynamic_shape(ele) for ele in element_spec]) - elif isinstance(element_spec, tf.TensorSpec): - return True if element_spec.shape.num_elements() is None else False - else: - raise ValueError("unrecognized element spec...") - - def squeeze_output(output): - if isinstance(output, collections.abc.Sequence): - return [squeeze_output(ele) for ele in output] - elif isinstance(output, np.ndarray): - return np.squeeze(output, axis=0) - else: - raise ValueError("not supported output format....") - - if tf.executing_eagerly(): - index = 0 - outputs = [] - for iter_tensors in dataset: - samples = [] - iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] - if isinstance(iter_inputs, tf.Tensor): - samples.append(iter_inputs.numpy()) - else: - samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) - if isinstance(iter_labels, tf.Tensor): - samples.append(iter_labels.numpy()) - else: - samples.append([np.array(l) for l in iter_labels]) - index += 1 - outputs.append(samples) - if index == batch_size: - outputs = default_collate(outputs) - yield outputs - outputs = [] - index = 0 - if len(outputs) > 0: - outputs = default_collate(outputs) - yield outputs - else: - try_single_batch = check_dynamic_shape(dataset.element_spec) - dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) - ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) - iter_tensors = ds_iterator.get_next() - data_config = tf.compat.v1.ConfigProto() - data_config.use_per_session_threads = 1 - data_config.intra_op_parallelism_threads = 1 - data_config.inter_op_parallelism_threads = 16 - data_sess = tf.compat.v1.Session(config=data_config) - # pylint: disable=no-name-in-module - from tensorflow.python.framework.errors_impl import OutOfRangeError - - while True: - if not try_single_batch: - try: - outputs = data_sess.run(iter_tensors) - yield outputs - except OutOfRangeError: - data_sess.close() - return - else: - try: - outputs = [] - for i in range(0, batch_size): - outputs.append(squeeze_output(data_sess.run(iter_tensors))) - outputs = default_collate(outputs) - yield outputs - except OutOfRangeError: - if len(outputs) == 0: - data_sess.close() - return - else: - outputs = default_collate(outputs) - yield outputs - data_sess.close() - return diff --git a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/main.py b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/main.py deleted file mode 100644 index 804dd81ef5a..00000000000 --- a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/main.py +++ /dev/null @@ -1,143 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2018 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import time - -import numpy as np -import tensorflow as tf - -from neural_compressor.utils import logger -from data_process import ( - ImageRecordDataset, - ComposeTransform, - BilinearImagenetTransform, - TFDataLoader, - TopKMetric, - LabelShift, -) - -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - -flags = tf.compat.v1.flags -FLAGS = flags.FLAGS - -## Required parameters -flags.DEFINE_string( - 'input_model', None, 'Run inference with specified keras model.') - -flags.DEFINE_string( - 'output_model', None, 'The output quantized model.') - -flags.DEFINE_string( - 'mode', 'performance', 'define benchmark mode for accuracy or performance') - -flags.DEFINE_bool( - 'tune', False, 'whether to tune the model') - -flags.DEFINE_bool( - 'benchmark', False, 'whether to benchmark the model') - -flags.DEFINE_string( - 'calib_data', None, 'location of calibration dataset') - -flags.DEFINE_string( - 'eval_data', None, 'location of evaluate dataset') - -flags.DEFINE_integer('batch_size', 32, 'batch_size') - -flags.DEFINE_integer( - 'iters', 100, 'maximum iteration when evaluating performance') - -height = width = 224 -eval_dataset = ImageRecordDataset(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=height, width=width)])) - -eval_dataloader = TFDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) - -if FLAGS.calib_data: - calib_dataset = ImageRecordDataset(root=FLAGS.calib_data, transform= \ - ComposeTransform(transform_list= [BilinearImagenetTransform(height=height, width=width)])) - calib_dataloader = TFDataLoader(dataset=calib_dataset, batch_size=10) - -def evaluate(model): - """ - Custom evaluate function to inference the model for specified metric on validation dataset. - - Args: - model (tf.keras.Model): The input model will be the objection of tf.keras.Model. - - Returns: - accuracy (float): evaluation result, the larger is better. - """ - latency_list = [] - metric = TopKMetric() - postprocess = LabelShift(label_shift=1) - - def eval_func(dataloader, metric): - warmup = 5 - iteration = None - if FLAGS.benchmark and FLAGS.mode == 'performance': - iteration = FLAGS.iters - for idx, (inputs, labels) in enumerate(dataloader): - start = time.time() - predictions = model.predict_on_batch(inputs) - end = time.time() - latency_list.append(end - start) - predictions, labels = postprocess((predictions, labels)) - metric.update(predictions, labels) - if iteration and idx >= iteration: - break - latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size - return latency - - latency = eval_func(eval_dataloader, metric) - if FLAGS.benchmark: - logger.info("\n{} mode benchmark result:".format(FLAGS.mode)) - for i, res in enumerate(latency_list): - logger.debug("Iteration {} result {}:".format(i, res)) - if FLAGS.benchmark and FLAGS.mode == 'performance': - logger.info("Batch size = {}".format(eval_dataloader.batch_size)) - logger.info("Latency: {:.3f} ms".format(latency * 1000)) - logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) - acc = metric.result() - return acc - -def main(_): - if FLAGS.tune: - from neural_compressor.common import set_random_seed - from neural_compressor.tensorflow import quantize_model - from neural_compressor.tensorflow.keras import StaticQuantConfig - - set_random_seed(9527) - quant_config = StaticQuantConfig() - q_model = quantize_model(FLAGS.input_model, quant_config, calib_dataloader) - q_model.save(FLAGS.output_model) - logger.info("Save quantized model to {}.".format(FLAGS.output_model)) - - if FLAGS.benchmark: - from neural_compressor.tensorflow import Model - - inc_model = Model(FLAGS.input_model) - if FLAGS.mode == 'performance': - evaluate(inc_model.model) - else: - accuracy = evaluate(inc_model.model) - logger.info('Batch size = %d' % FLAGS.batch_size) - logger.info("Accuracy: %.5f" % accuracy) - -if __name__ == "__main__": - tf.compat.v1.app.run() diff --git a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/prepare_model.py b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/prepare_model.py deleted file mode 100644 index e31b3e83de0..00000000000 --- a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/prepare_model.py +++ /dev/null @@ -1,35 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2 -def get_mobilenet_v2_model(saved_path): - model = MobileNetV2(weights='imagenet') - model.save(saved_path) - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description='Export pretained keras model', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument( - '--output_model', - type=str, - help='path to exported model file') - - args = parser.parse_args() - get_mobilenet_v2_model(args.output_model) diff --git a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/requirements.txt deleted file mode 100644 index 8b7b47da969..00000000000 --- a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -tensorflow>=2.11.1 -intel-extension-for-tensorflow[cpu] diff --git a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/run_benchmark.sh deleted file mode 100644 index 43b1636c839..00000000000 --- a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/run_benchmark.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - batch_size=32 - iters=100 - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - ;; - --iters=*) - iters=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_benchmark { - - python main.py \ - --input_model ${input_model} \ - --benchmark \ - --mode ${mode} \ - --eval_data ${dataset_location} \ - --batch_size ${batch_size} \ - --iters ${iters} -} - -main "$@" diff --git a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/run_quant.sh deleted file mode 100644 index 7e3ed727f71..00000000000 --- a/examples/3.x_api/tensorflow/keras/cv/mobilenet_v2/quantization/ptq/run_quant.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_tuning - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - python main.py \ - --input_model ${input_model} \ - --output_model ${output_model} \ - --eval_data ${dataset_location} \ - --calib_data ${dataset_location} \ - --tune -} - -main "$@" diff --git a/neural_compressor/tensorflow/algorithms/static_quant/keras.py b/neural_compressor/tensorflow/algorithms/static_quant/keras.py index f6803c03604..c92ee43d1c2 100644 --- a/neural_compressor/tensorflow/algorithms/static_quant/keras.py +++ b/neural_compressor/tensorflow/algorithms/static_quant/keras.py @@ -721,12 +721,9 @@ def _parse_inputs(self, BN_fused_layers=None, conv_names=None): for out_layer_name in out_layer_names: if out_layer_name not in input_layer_dict: - input_layer_dict[out_layer_name] = set([layer.name]) + input_layer_dict[out_layer_name] = [layer.name] else: - input_layer_dict[out_layer_name].add(layer.name) - - for key in input_layer_dict.keys(): - input_layer_dict[key] = list(input_layer_dict[key]) + input_layer_dict[out_layer_name].append(layer.name) try: model_input = self.model.input From 150b7b43f8c3c814583f8009c6ea94a880db8816 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Wed, 24 Jul 2024 17:49:51 +0800 Subject: [PATCH 13/14] remove densenet Signed-off-by: zehao-intel --- examples/.config/model_params_keras_3x.json | 7 - .../.config/model_params_tensorflow_3x.json | 7 - .../cv/densenet121/quantization/ptq/README.md | 78 --- .../quantization/ptq/data_process.py | 581 ------------------ .../cv/densenet121/quantization/ptq/main.py | 147 ----- .../quantization/ptq/requirements.txt | 2 - .../quantization/ptq/run_benchmark.sh | 51 -- .../densenet121/quantization/ptq/run_quant.sh | 39 -- 8 files changed, 912 deletions(-) delete mode 100644 examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/README.md delete mode 100644 examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/data_process.py delete mode 100644 examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/main.py delete mode 100644 examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/requirements.txt delete mode 100644 examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/run_benchmark.sh delete mode 100644 examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/run_quant.sh diff --git a/examples/.config/model_params_keras_3x.json b/examples/.config/model_params_keras_3x.json index 65b0466667e..40bca7c9e22 100644 --- a/examples/.config/model_params_keras_3x.json +++ b/examples/.config/model_params_keras_3x.json @@ -13,13 +13,6 @@ "input_model": "/tf_dataset2/models/tensorflow/inception_v3_keras/saved_model", "main_script": "main.py", "batch_size": 32 - }, - "mobilenet_v2": { - "model_src_dir": "keras/cv/mobilenet_v2/quantization/ptq", - "dataset_location": "/tf_dataset/dataset/imagenet", - "input_model": "/tf_dataset2/models/tensorflow/mobilenet_v2_keras/saved_model", - "main_script": "main.py", - "batch_size": 32 } } } diff --git a/examples/.config/model_params_tensorflow_3x.json b/examples/.config/model_params_tensorflow_3x.json index 1e7f8bdab1a..6bd525ff1b9 100644 --- a/examples/.config/model_params_tensorflow_3x.json +++ b/examples/.config/model_params_tensorflow_3x.json @@ -50,13 +50,6 @@ "main_script": "main.py", "batch_size": 64 }, - "densenet121": { - "model_src_dir": "cv/densenet121/quantization/ptq", - "dataset_location": "/tf_dataset/dataset/imagenet", - "input_model": "/tf_dataset/tensorflow/densenet/densenet-121.pb", - "main_script": "main.py", - "batch_size": 32 - }, "inception_v3": { "model_src_dir": "cv/inception_v3/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", diff --git a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/README.md b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/README.md deleted file mode 100644 index 7dbe04de7ca..00000000000 --- a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/README.md +++ /dev/null @@ -1,78 +0,0 @@ -Step-by-Step -============ - -This document list steps of reproducing densenet121 model tuning and benchmark results via Neural Compressor. -This example can run on Intel CPUs and GPUs. - -> **Note**: -> The models is supported in Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). -# Prerequisite - -## 1. Environment - -### Installation -Recommend python 3.9 or higher version. -```shell -pip install -r requirements.txt -``` - -### Install Intel Extension for Tensorflow -#### Quantizing the model on Intel GPU(Mandatory to install ITEX) -Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[xpu] -``` -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel/intel-extension-for-tensorflow/blob/main/docs/install/install_for_xpu.md#install-gpu-drivers) - -#### Quantizing the model on Intel CPU(Optional to install ITEX) -Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` -> **Note**: -> The version compatibility of stock Tensorflow and ITEX can be checked [here](https://github.com/intel/intel-extension-for-tensorflow#compatibility-table). Please make sure you have installed compatible Tensorflow and ITEX. - -## 2. Prepare pre-trained model -The densenet-series comes from [tensorflow-densenet](https://github.com/pudae/tensorflow-densenet), please also follow the step [Prepare pre-trained model](#3-prepare-pre-trained-model) to get the pb files or use openvino download tools. - ```shell - git clone https://github.com/openvinotoolkit/open_model_zoo.git - cd open_model_zoo/tools/downloader - git checkout tags/2021.2 - pip install -r requirements.in - python downloader.py --name densenet-{121|161|169}-tf -o /PATH/TO/MODEL - ``` - -## 3. Prepare Dataset - - TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in ` examples/3.x_api/tensorflow/cv` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. - - ```shell - cd examples/3.x_api/tensorflow/cv - # convert validation subset - bash prepare_dataset.sh --output_dir=./densenet121/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation - # convert train subset - bash prepare_dataset.sh --output_dir=./densenet121/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train - ``` -> **Note**: -> The raw ImageNet dataset resides in JPEG files should be in the following directory structure. Taking validation set as an example:
->         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000293.JPEG
->         /PATH/TO/img_raw/val/n01440764/ILSVRC2012_val_00000543.JPEG
-> where 'n01440764' is the unique synset label associated with these images. - -# Run - -## 1 Quantization - - ```shell - bash run_quant.sh --input_model=/PATH/TO/densenet-121.pb \ - --output_model=./nc_densenet121.pb --dataset_location=/path/to/ImageNet/ - ``` - -## 2. Benchmark - ```shell - bash run_benchmark.sh --input_model=./nc_densenet121.pb --mode=accuracy --dataset_location=/path/to/ImageNet/ --batch_size=32 - bash run_benchmark.sh --input_model=./nc_densenet121.pb --mode=performance --dataset_location=/path/to/ImageNet/ --batch_size=1 - ``` diff --git a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/data_process.py deleted file mode 100644 index 17b4d9cec5e..00000000000 --- a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/data_process.py +++ /dev/null @@ -1,581 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2024 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import os -import collections - -import numpy as np -import tensorflow as tf - -from abc import abstractmethod -from neural_compressor.common import logger -from neural_compressor.tensorflow.utils.data import default_collate - -class ParseDecodeImagenet: - """Parse features in Example proto. - - Returns: - tuple of parsed image and label - """ - - def __call__(self, sample): - """Parse features in example.""" - # Dense features in Example proto. - feature_map = { - "image/encoded": tf.io.FixedLenFeature([], dtype=tf.string, default_value=""), - "image/class/label": tf.io.FixedLenFeature([1], dtype=tf.int64, default_value=-1), - } - - sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32) - # Sparse features in Example proto. - feature_map.update( - { - k: sparse_float32 - for k in [ - "image/object/bbox/xmin", - "image/object/bbox/ymin", - "image/object/bbox/xmax", - "image/object/bbox/ymax", - ] - } - ) - - features = tf.io.parse_single_example(serialized=sample, features=feature_map) - label = tf.cast(features["image/class/label"], dtype=tf.int32) - image = features["image/encoded"] - image = tf.image.decode_jpeg(image, channels=3, fancy_upscaling=False, dct_method="INTEGER_FAST") - return (image, label) - - -class ResizeCropImagenet(object): - """Combination of a series of transforms which is applicable to images in Imagenet. - - Args: - height (int): Height of the result - width (int): Width of the result - random_crop (bool, default=False): whether to random crop - resize_side (int, default=256):desired shape after resize operation - random_flip_left_right (bool, default=False): whether to random flip left and right - mean_value (list, default=[0.0,0.0,0.0]):means for each channel - scale (float, default=1.0):std value - - Returns: - tuple of processed image and label - """ - - def __init__( - self, - height, - width, - random_crop=False, - resize_side=256, - resize_method="bilinear", - random_flip_left_right=False, - mean_value=[0.0, 0.0, 0.0], - scale=1.0, - data_format="channels_last", - subpixels="RGB", - ): - """Initialize `TensorflowResizeCropImagenetTransform` class.""" - self.height = height - self.width = width - self.mean_value = mean_value - self.scale = scale - self.random_crop = random_crop - self.random_flip_left_right = random_flip_left_right - self.resize_side = resize_side - self.resize_method = resize_method - self.data_format = data_format - self.subpixels = subpixels - - # sample is (images, labels) - def __call__(self, sample): - """Convert `TensorflowResizeCropImagenetTransform` feature.""" - image, label = sample - shape = tf.shape(input=image) - - height = ( - tf.cast(shape[0], dtype=tf.float32) - if self.data_format == "channels_last" - else tf.cast(shape[1], dtype=tf.float32) - ) - width = ( - tf.cast(shape[1], dtype=tf.float32) - if self.data_format == "channels_last" - else tf.cast(shape[2], dtype=tf.float32) - ) - scale = tf.cond( - pred=tf.greater(height, width), - true_fn=lambda: self.resize_side / width, - false_fn=lambda: self.resize_side / height, - ) - - scale = tf.cast(scale, dtype=tf.float32) - new_height = tf.cast(tf.math.rint(height * scale), dtype=tf.int32) - new_width = tf.cast(tf.math.rint(width * scale), dtype=tf.int32) - - if self.subpixels == "BGR" and self.data_format == "channels_first": - # 'RGB'->'BGR' - image = tf.cond( - tf.equal(tf.rank(image), 3), - lambda: tf.experimental.numpy.moveaxis(image[::-1, ...], 0, -1), - lambda: tf.experimental.numpy.moveaxis(image[:, ::-1, ...], 1, -1), - ) - elif self.subpixels == "BGR": - # 'RGB'->'BGR' - image = image[..., ::-1] - image = tf.expand_dims(image, 0) - image = tf.image.resize(image, [new_height, new_width], method=self.resize_method) - image = tf.squeeze(image) - shape = tf.shape(input=image) - if self.random_crop: - y0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[0] - self.height + 1), dtype=tf.dtypes.int32) - x0 = tf.random.uniform(shape=[], minval=0, maxval=(shape[1] - self.width + 1), dtype=tf.dtypes.int32) - else: - y0 = (shape[0] - self.height) // 2 - x0 = (shape[1] - self.width) // 2 - - image = tf.image.crop_to_bounding_box(image, y0, x0, self.height, self.width) - image.set_shape([self.height, self.width, 3]) - if self.random_flip_left_right: - image = tf.image.random_flip_left_right(image) - means = tf.broadcast_to(self.mean_value, tf.shape(input=image)) - image = (image - means) * self.scale - return (image, label) - - -class ComposeTransform(object): - """Composes several transforms together. - - Args: - transform_list (list of Transform objects): list of transforms to compose - - Returns: - sample (tuple): tuple of processed image and label - """ - - def __init__(self, transform_list): - """Initialize `ComposeTransform` class.""" - self.transform_list = transform_list - - def __call__(self, sample): - """Call transforms in transform_list.""" - for transform in self.transform_list: - sample = transform(sample) - return sample - - -class LabelShift(object): - """Convert label to label - label_shift. - - Args: - label_shift(int, default=0): number of label shift - - Returns: - tuple of processed image and label - """ - - def __init__(self, label_shift=0): - """Initialize `LabelShift` class.""" - self.label_shift = label_shift - - def __call__(self, sample): - """Convert label to label_shift.""" - images, labels = sample - if isinstance(labels, np.ndarray): - labels = labels - self.label_shift - elif isinstance(labels, list): - if isinstance(labels[0], tuple): - labels = [tuple(np.array(label) - self.label_shift) for label in labels] - elif isinstance(labels[0], np.ndarray): - labels = [label - self.label_shift for label in labels] - else: - labels = np.array(labels) - self.label_shift - labels = labels.tolist() - else: - labels = np.array(labels) - self.label_shift - return images, labels - - -class ImageRecordDataset(object): - """Tensorflow imageNet database in tf record format. - - Please arrange data in this way: - root/validation-000-of-100 - root/validation-001-of-100 - ... - root/validation-099-of-100 - The file name needs to follow this pattern: '* - * -of- *' - - Args: root (str): Root directory of dataset. - transform (transform object, default=None): transform to process input data. - filter (Filter objects, default=None): filter out examples according - to specific conditions. - """ - - """Configuration for Imagenet dataset.""" - - def __new__(cls, root, transform=None, filter=None): - """Build a new object of TensorflowImageRecord class.""" - from tensorflow.python.platform import gfile # pylint: disable=no-name-in-module - - glob_pattern = os.path.join(root, "*-*-of-*") - file_names = gfile.Glob(glob_pattern) - if not file_names: - raise ValueError("Found no files in --root matching: {}".format(glob_pattern)) - - # pylint: disable=no-name-in-module - from tensorflow.python.data.experimental import parallel_interleave - - ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False) - ds = ds.apply(parallel_interleave(tf.data.TFRecordDataset, cycle_length=len(file_names))) - - if transform is not None: - transform.transform_list.insert(0, ParseDecodeImagenet()) - else: - transform = ParseDecodeImagenet() - ds = ds.map(transform, num_parallel_calls=None) - ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) # this number can be tuned - return ds - - -class BaseMetric(object): - """The base class of Metric.""" - - def __init__(self, metric, single_output=False, hvd=None): - """Initialize the basic metric. - - Args: - metric: The metric class. - single_output: Whether the output is single or not, defaults to False. - hvd: The Horovod class for distributed training, defaults to None. - """ - self._metric_cls = metric - self._single_output = single_output - self._hvd = hvd - - def __call__(self, *args, **kwargs): - """Evaluate the model predictions, and the reference. - - Returns: - The class itself. - """ - self._metric = self._metric_cls(*args, **kwargs) - return self - - @abstractmethod - def update(self, preds, labels=None, sample_weight=None): - """Update the state that need to be evaluated. - - Args: - preds: The prediction result. - labels: The reference. Defaults to None. - sample_weight: The sampling weight. Defaults to None. - - Raises: - NotImplementedError: The method should be implemented by subclass. - """ - raise NotImplementedError - - @abstractmethod - def reset(self): - """Clear the predictions and labels. - - Raises: - NotImplementedError: The method should be implemented by subclass. - """ - raise NotImplementedError - - @abstractmethod - def result(self): - """Evaluate the difference between predictions and labels. - - Raises: - NotImplementedError: The method should be implemented by subclass. - """ - raise NotImplementedError - - @property - def metric(self): - """Return its metric class. - - Returns: - The metric class. - """ - return self._metric_cls - - @property - def hvd(self): - """Return its hvd class. - - Returns: - The hvd class. - """ - return self._hvd - - @hvd.setter - def hvd(self, hvd): - """Set its hvd. - - Args: - hvd: The Horovod class for distributed training. - """ - self._hvd = hvd - - -class TopKMetric(BaseMetric): - """Compute Top-k Accuracy classification score for Tensorflow model. - - This metric computes the number of times where the correct label is among - the top k labels predicted. - - Attributes: - k (int): The number of most likely outcomes considered to find the correct label. - num_correct: The number of predictions that were correct classified. - num_sample: The total number of predictions. - """ - - def __init__(self, k=1): - """Initialize the k, number of samples and correct predictions. - - Args: - k: The number of most likely outcomes considered to find the correct label. - """ - self.k = k - self.num_correct = 0 - self.num_sample = 0 - - def update(self, preds, labels, sample_weight=None): - """Add the predictions and labels. - - Args: - preds: The predictions. - labels: The labels corresponding to the predictions. - sample_weight: The sample weight. - """ - preds, labels = TopKMetric._topk_shape_validate(preds, labels) - - labels = labels.reshape([len(labels)]) - with tf.Graph().as_default() as acc_graph: - topk = tf.nn.in_top_k( - predictions=tf.constant(preds, dtype=tf.float32), targets=tf.constant(labels, dtype=tf.int32), k=self.k - ) - fp32_topk = tf.cast(topk, tf.float32) - correct_tensor = tf.reduce_sum(input_tensor=fp32_topk) - - with tf.compat.v1.Session() as acc_sess: - correct = acc_sess.run(correct_tensor) - - self.num_sample += len(labels) - self.num_correct += correct - - def reset(self): - """Reset the number of samples and correct predictions.""" - self.num_correct = 0 - self.num_sample = 0 - - def result(self): - """Compute the top-k score. - - Returns: - The top-k score. - """ - if self.num_sample == 0: - logger.warning("Sample num during evaluation is 0.") - return 0 - elif getattr(self, "_hvd", None) is not None: # pragma: no cover - allgather_num_correct = sum(self._hvd.allgather_object(self.num_correct)) - allgather_num_sample = sum(self._hvd.allgather_object(self.num_sample)) - return allgather_num_correct / allgather_num_sample - return self.num_correct / self.num_sample - - @staticmethod - def _topk_shape_validate(preds, labels): - # preds shape can be Nxclass_num or class_num(N=1 by default) - # it's more suitable for 'Accuracy' with preds shape Nx1(or 1) output from argmax - if isinstance(preds, int): - preds = [preds] - preds = np.array(preds) - elif isinstance(preds, np.ndarray): - preds = np.array(preds) - elif isinstance(preds, list): - preds = np.array(preds) - preds = preds.reshape((-1, preds.shape[-1])) - - # consider labels just int value 1x1 - if isinstance(labels, int): - labels = [labels] - labels = np.array(labels) - elif isinstance(labels, tuple): - labels = np.array([labels]) - labels = labels.reshape((labels.shape[-1], -1)) - elif isinstance(labels, list): - if isinstance(labels[0], int): - labels = np.array(labels) - labels = labels.reshape((labels.shape[0], 1)) - elif isinstance(labels[0], tuple): - labels = np.array(labels) - labels = labels.reshape((labels.shape[-1], -1)) - else: - labels = np.array(labels) - # labels most have 2 axis, 2 cases: N(or Nx1 sparse) or Nxclass_num(one-hot) - # only support 2 dimension one-shot labels - # or 1 dimension one-hot class_num will confuse with N - - if len(preds.shape) == 1: - N = 1 - class_num = preds.shape[0] - preds = preds.reshape([-1, class_num]) - elif len(preds.shape) >= 2: - N = preds.shape[0] - preds = preds.reshape([N, -1]) - class_num = preds.shape[1] - - label_N = labels.shape[0] - assert label_N == N, "labels batch size should same with preds" - labels = labels.reshape([N, -1]) - # one-hot labels will have 2 dimension not equal 1 - if labels.shape[1] != 1: - labels = labels.argsort()[..., -1:] - return preds, labels - - -class TFDataLoader(object): # pragma: no cover - """Tensorflow dataloader class. - - In tensorflow1.x dataloader is coupled with the graph, but it also support feed_dict - method to do session run, this dataloader is designed to satisfy the usage of feed dict - in tf1.x. Although it's a general dataloader and can be used in MXNet and PyTorch. - - Args: - dataset: obj. wrapper of needed data. - batch_size: int. batch size - """ - - def __init__(self, dataset, batch_size=1, last_batch="rollover"): - """Initialize `TFDataDataLoader` class.""" - self.dataset = dataset - self.last_batch = last_batch - self.batch_size = batch_size - dataset = dataset.batch(batch_size) - - def batch(self, batch_size, last_batch="rollover"): - """Dataset return data per batch.""" - drop_last = False if last_batch == "rollover" else True - self.batch_size = batch_size - self.dataset = self.dataset.batch(batch_size, drop_last) - - def __iter__(self): - """Iterate dataloader.""" - return self._generate_dataloader( - self.dataset, - batch_size=self.batch_size, - last_batch=self.last_batch, - ) - - def _generate_dataloader( - self, - dataset, - batch_size=1, - last_batch="rollover", - collate_fn=None, - sampler=None, - batch_sampler=None, - num_workers=None, - pin_memory=None, - distributed=False, - ): - """Yield data.""" - drop_last = False if last_batch == "rollover" else True - - def check_dynamic_shape(element_spec): - if isinstance(element_spec, collections.abc.Sequence): - return any([check_dynamic_shape(ele) for ele in element_spec]) - elif isinstance(element_spec, tf.TensorSpec): - return True if element_spec.shape.num_elements() is None else False - else: - raise ValueError("unrecognized element spec...") - - def squeeze_output(output): - if isinstance(output, collections.abc.Sequence): - return [squeeze_output(ele) for ele in output] - elif isinstance(output, np.ndarray): - return np.squeeze(output, axis=0) - else: - raise ValueError("not supported output format....") - - if tf.executing_eagerly(): - index = 0 - outputs = [] - for iter_tensors in dataset: - samples = [] - iter_inputs, iter_labels = iter_tensors[0], iter_tensors[1] - if isinstance(iter_inputs, tf.Tensor): - samples.append(iter_inputs.numpy()) - else: - samples.append(tuple(iter_input.numpy() for iter_input in iter_inputs)) - if isinstance(iter_labels, tf.Tensor): - samples.append(iter_labels.numpy()) - else: - samples.append([np.array(l) for l in iter_labels]) - index += 1 - outputs.append(samples) - if index == batch_size: - outputs = default_collate(outputs) - yield outputs - outputs = [] - index = 0 - if len(outputs) > 0: - outputs = default_collate(outputs) - yield outputs - else: - try_single_batch = check_dynamic_shape(dataset.element_spec) - dataset = dataset.batch(1 if try_single_batch else batch_size, drop_last) - ds_iterator = tf.compat.v1.data.make_one_shot_iterator(dataset) - iter_tensors = ds_iterator.get_next() - data_config = tf.compat.v1.ConfigProto() - data_config.use_per_session_threads = 1 - data_config.intra_op_parallelism_threads = 1 - data_config.inter_op_parallelism_threads = 16 - data_sess = tf.compat.v1.Session(config=data_config) - # pylint: disable=no-name-in-module - from tensorflow.python.framework.errors_impl import OutOfRangeError - - while True: - if not try_single_batch: - try: - outputs = data_sess.run(iter_tensors) - yield outputs - except OutOfRangeError: - data_sess.close() - return - else: - try: - outputs = [] - for i in range(0, batch_size): - outputs.append(squeeze_output(data_sess.run(iter_tensors))) - outputs = default_collate(outputs) - yield outputs - except OutOfRangeError: - if len(outputs) == 0: - data_sess.close() - return - else: - outputs = default_collate(outputs) - yield outputs - data_sess.close() - return diff --git a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/main.py b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/main.py deleted file mode 100644 index ed2013159d6..00000000000 --- a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/main.py +++ /dev/null @@ -1,147 +0,0 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import time - -import numpy as np -import tensorflow as tf - -from argparse import ArgumentParser -from data_process import ( - ImageRecordDataset, - ComposeTransform, - ResizeCropImagenet, - LabelShift, - TFDataLoader, - TopKMetric -) - -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - -arg_parser = ArgumentParser(description='Parse args') -arg_parser.add_argument('-g', "--input-graph", - help='Specify the input graph for the transform tool', - dest='input_graph') -arg_parser.add_argument("--output-graph", - help='Specify tune result model save dir', - dest='output_graph') -arg_parser.add_argument('--benchmark', dest='benchmark', action='store_true', help='run benchmark') -arg_parser.add_argument('--mode', dest='mode', default='performance', help='benchmark mode') -arg_parser.add_argument('--tune', dest='tune', action='store_true', help='use neural_compressor to tune.') -arg_parser.add_argument('--dataset_location', dest='dataset_location', - help='location of calibration dataset and evaluate dataset') -arg_parser.add_argument('--batch_size', type=int, default=32, dest='batch_size', help='batch_size of benchmark') -arg_parser.add_argument('--iters', type=int, default=100, dest='iters', help='interations') -args = arg_parser.parse_args() - -def evaluate(model, eval_dataloader, metric, postprocess=None): - """Custom evaluate function to estimate the accuracy of the model. - - Args: - model (tf.Graph_def): The input model graph - - Returns: - accuracy (float): evaluation result, the larger is better. - """ - from neural_compressor.tensorflow import Model - model = Model(model) - input_tensor = model.input_tensor - output_tensor = model.output_tensor if len(model.output_tensor)>1 else \ - model.output_tensor[0] - iteration = -1 - if args.benchmark and args.mode == 'performance': - iteration = args.iters - - def eval_func(dataloader): - latency_list = [] - for idx, (inputs, labels) in enumerate(dataloader): - # dataloader should keep the order and len of inputs same with input_tensor - inputs = np.array([inputs]) - feed_dict = dict(zip(input_tensor, inputs)) - - start = time.time() - predictions = model.sess.run(output_tensor, feed_dict) - end = time.time() - - if postprocess: - predictions, labels = postprocess((predictions, labels)) - - metric.update(predictions, labels) - latency_list.append(end-start) - if idx + 1 == iteration: - break - latency = np.array(latency_list).mean() / args.batch_size - return latency - - latency = eval_func(eval_dataloader) - if args.benchmark and args.mode == 'performance': - print("Batch size = {}".format(args.batch_size)) - print("Latency: {:.3f} ms".format(latency * 1000)) - print("Throughput: {:.3f} images/sec".format(1. / latency)) - acc = metric.result() - return acc - -class eval_classifier_optimized_graph: - """Evaluate image classifier with optimized TensorFlow graph.""" - - def run(self): - """This is neural_compressor function include tuning, export and benchmark option.""" - from neural_compressor.common import set_random_seed - set_random_seed(9527) - - if args.tune: - from neural_compressor.tensorflow import StaticQuantConfig, quantize_model - - dataset = ImageRecordDataset( - root=args.dataset_location, - transform=ComposeTransform(transform_list= [ - ResizeCropImagenet(height=224, width=224, scale=0.017, mean_value=[123.68, 116.78, 103.94]), - ] - ) - ) - calib_dataloader = TFDataLoader(dataset=dataset, batch_size=10) - - quant_config = StaticQuantConfig(weight_granularity="per_channel") - q_model = quantize_model(args.input_graph, quant_config, calib_dataloader) - q_model.save(args.output_graph) - - if args.benchmark: - dataset = ImageRecordDataset( - root=args.dataset_location, - transform=ComposeTransform(transform_list= [ - ResizeCropImagenet(height=224, width=224, scale=0.017, mean_value=[123.68, 116.78, 103.94]), - ] - ) - ) - dataloader = TFDataLoader(dataset=dataset, batch_size=args.batch_size) - - def eval(model): - top1 = TopKMetric(k=1) - postprocess = LabelShift(label_shift=1) - return evaluate(model, dataloader, top1, postprocess) - - if args.mode == 'performance': - eval(args.input_graph) - elif args.mode == 'accuracy': - acc_result = eval(args.input_graph) - print("Batch size = %d" % dataloader.batch_size) - print("Accuracy: %.5f" % acc_result) - -if __name__ == "__main__": - evaluate_opt_graph = eval_classifier_optimized_graph() - evaluate_opt_graph.run() diff --git a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/requirements.txt deleted file mode 100644 index 2755e1a41ac..00000000000 --- a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -tensorflow -neural-compressor diff --git a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/run_benchmark.sh deleted file mode 100644 index 8ecac837cf7..00000000000 --- a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/run_benchmark.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/bash -set -x - -function main { - - init_params "$@" - run_benchmark - -} - -# init params -function init_params { - batch_size=32 - iters=100 - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --mode=*) - mode=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - --batch_size=*) - batch_size=$(echo $var |cut -f2 -d=) - ;; - --iters=*) - iters=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_benchmark { - - python main.py \ - --input-graph ${input_model} \ - --mode ${mode} \ - --dataset_location ${dataset_location} \ - --batch_size ${batch_size} \ - --benchmark \ - --iters ${iters} -} - -main "$@" diff --git a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/run_quant.sh deleted file mode 100644 index 6a9e1b859c9..00000000000 --- a/examples/3.x_api/tensorflow/cv/densenet121/quantization/ptq/run_quant.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -set -x - -function main { - init_params "$@" - run_tuning - -} - -# init params -function init_params { - - for var in "$@" - do - case $var in - --input_model=*) - input_model=$(echo $var |cut -f2 -d=) - ;; - --output_model=*) - output_model=$(echo $var |cut -f2 -d=) - ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; - esac - done - -} - -# run_tuning -function run_tuning { - python main.py \ - --input-graph ${input_model} \ - --output-graph ${output_model} \ - --dataset_location ${dataset_location} \ - --tune -} - -main "$@" From f37f34143b17d65aa6726065be5842571957f826 Mon Sep 17 00:00:00 2001 From: zehao-intel Date: Thu, 25 Jul 2024 13:59:24 +0800 Subject: [PATCH 14/14] change folder name of examples Signed-off-by: zehao-intel --- examples/.config/model_params_keras_3x.json | 4 ++-- examples/.config/model_params_tensorflow_3x.json | 10 +++++----- .../inception_v3/quantization/ptq/README.md | 0 .../inception_v3/quantization/ptq/data_process.py | 0 .../inception_v3/quantization/ptq/main.py | 0 .../inception_v3/quantization/ptq/requirements.txt | 0 .../inception_v3/quantization/ptq/run_benchmark.sh | 0 .../inception_v3/quantization/ptq/run_quant.sh | 0 .../mobilenet_v2/quantization/ptq/README.md | 0 .../mobilenet_v2/quantization/ptq/data_process.py | 0 .../mobilenet_v2/quantization/ptq/main.py | 0 .../mobilenet_v2/quantization/ptq/requirements.txt | 0 .../mobilenet_v2/quantization/ptq/run_benchmark.sh | 0 .../mobilenet_v2/quantization/ptq/run_quant.sh | 0 .../{cv => image_recognition}/prepare_dataset.sh | 0 .../resnet_v2_50/quantization/ptq/README.md | 0 .../resnet_v2_50/quantization/ptq/data_process.py | 0 .../resnet_v2_50/quantization/ptq/main.py | 0 .../resnet_v2_50/quantization/ptq/requirements.txt | 0 .../resnet_v2_50/quantization/ptq/run_benchmark.sh | 0 .../resnet_v2_50/quantization/ptq/run_quant.sh | 0 .../vgg16/quantization/ptq/README.md | 0 .../vgg16/quantization/ptq/data_process.py | 0 .../vgg16/quantization/ptq/main.py | 0 .../vgg16/quantization/ptq/requirements.txt | 0 .../vgg16/quantization/ptq/run_benchmark.sh | 0 .../vgg16/quantization/ptq/run_quant.sh | 0 .../vision_transformer/quantization/ptq/README.md | 0 .../vision_transformer/quantization/ptq/__init__.py | 0 .../quantization/ptq/data_process.py | 0 .../vision_transformer/quantization/ptq/main.py | 0 .../quantization/ptq/requirements.txt | 0 .../quantization/ptq/run_benchmark.sh | 0 .../vision_transformer/quantization/ptq/run_quant.sh | 0 .../imagenet_prepare/build_imagenet_data.py | 0 .../imagenet_prepare/download_and_convert_imagenet.sh | 0 .../imagenet_prepare/download_imagenet.sh | 0 .../imagenet_prepare/imagenet_lsvrc_2015_synsets.txt | 0 .../imagenet_prepare/imagenet_metadata.txt | 0 .../inception_v3/quantization/ptq/README.md | 0 .../inception_v3/quantization/ptq/data_process.py | 0 .../inception_v3/quantization/ptq/main.py | 0 .../inception_v3/quantization/ptq/prepare_model.py | 0 .../inception_v3/quantization/ptq/requirements.txt | 0 .../inception_v3/quantization/ptq/run_benchmark.sh | 0 .../inception_v3/quantization/ptq/run_quant.sh | 0 .../keras/{cv => image_recognition}/prepare_dataset.sh | 0 .../resnet_v2_50/quantization/ptq/README.md | 0 .../resnet_v2_50/quantization/ptq/data_process.py | 0 .../resnet_v2_50/quantization/ptq/main.py | 0 .../resnet_v2_50/quantization/ptq/prepare_model.py | 0 .../resnet_v2_50/quantization/ptq/requirements.txt | 0 .../resnet_v2_50/quantization/ptq/run_benchmark.sh | 0 .../resnet_v2_50/quantization/ptq/run_quant.sh | 0 54 files changed, 7 insertions(+), 7 deletions(-) rename examples/3.x_api/tensorflow/{cv => image_recognition}/inception_v3/quantization/ptq/README.md (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/inception_v3/quantization/ptq/data_process.py (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/inception_v3/quantization/ptq/main.py (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/inception_v3/quantization/ptq/requirements.txt (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/inception_v3/quantization/ptq/run_benchmark.sh (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/inception_v3/quantization/ptq/run_quant.sh (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/mobilenet_v2/quantization/ptq/README.md (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/mobilenet_v2/quantization/ptq/data_process.py (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/mobilenet_v2/quantization/ptq/main.py (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/mobilenet_v2/quantization/ptq/requirements.txt (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/mobilenet_v2/quantization/ptq/run_benchmark.sh (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/mobilenet_v2/quantization/ptq/run_quant.sh (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/prepare_dataset.sh (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/resnet_v2_50/quantization/ptq/README.md (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/resnet_v2_50/quantization/ptq/data_process.py (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/resnet_v2_50/quantization/ptq/main.py (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/resnet_v2_50/quantization/ptq/requirements.txt (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/resnet_v2_50/quantization/ptq/run_benchmark.sh (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/resnet_v2_50/quantization/ptq/run_quant.sh (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/vgg16/quantization/ptq/README.md (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/vgg16/quantization/ptq/data_process.py (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/vgg16/quantization/ptq/main.py (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/vgg16/quantization/ptq/requirements.txt (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/vgg16/quantization/ptq/run_benchmark.sh (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/vgg16/quantization/ptq/run_quant.sh (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/vision_transformer/quantization/ptq/README.md (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/vision_transformer/quantization/ptq/__init__.py (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/vision_transformer/quantization/ptq/data_process.py (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/vision_transformer/quantization/ptq/main.py (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/vision_transformer/quantization/ptq/requirements.txt (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/vision_transformer/quantization/ptq/run_benchmark.sh (100%) rename examples/3.x_api/tensorflow/{cv => image_recognition}/vision_transformer/quantization/ptq/run_quant.sh (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/imagenet_prepare/build_imagenet_data.py (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/imagenet_prepare/download_and_convert_imagenet.sh (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/imagenet_prepare/download_imagenet.sh (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/imagenet_prepare/imagenet_metadata.txt (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/inception_v3/quantization/ptq/README.md (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/inception_v3/quantization/ptq/data_process.py (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/inception_v3/quantization/ptq/main.py (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/inception_v3/quantization/ptq/prepare_model.py (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/inception_v3/quantization/ptq/requirements.txt (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/inception_v3/quantization/ptq/run_benchmark.sh (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/inception_v3/quantization/ptq/run_quant.sh (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/prepare_dataset.sh (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/resnet_v2_50/quantization/ptq/README.md (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/resnet_v2_50/quantization/ptq/data_process.py (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/resnet_v2_50/quantization/ptq/main.py (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/resnet_v2_50/quantization/ptq/prepare_model.py (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/resnet_v2_50/quantization/ptq/requirements.txt (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/resnet_v2_50/quantization/ptq/run_benchmark.sh (100%) rename examples/3.x_api/tensorflow/keras/{cv => image_recognition}/resnet_v2_50/quantization/ptq/run_quant.sh (100%) diff --git a/examples/.config/model_params_keras_3x.json b/examples/.config/model_params_keras_3x.json index 40bca7c9e22..bac8a06b4a3 100644 --- a/examples/.config/model_params_keras_3x.json +++ b/examples/.config/model_params_keras_3x.json @@ -1,14 +1,14 @@ { "keras": { "resnetv2_50": { - "model_src_dir": "keras/cv/resnet_v2_50/quantization/ptq", + "model_src_dir": "keras/image_recognition/resnet_v2_50/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset2/models/tensorflow/resnetv2_50_keras/saved_model", "main_script": "main.py", "batch_size": 32 }, "inception_v3": { - "model_src_dir": "keras/cv/inception_v3/quantization/ptq", + "model_src_dir": "keras/image_recognition/inception_v3/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset2/models/tensorflow/inception_v3_keras/saved_model", "main_script": "main.py", diff --git a/examples/.config/model_params_tensorflow_3x.json b/examples/.config/model_params_tensorflow_3x.json index 6bd525ff1b9..550f4d01219 100644 --- a/examples/.config/model_params_tensorflow_3x.json +++ b/examples/.config/model_params_tensorflow_3x.json @@ -51,7 +51,7 @@ "batch_size": 64 }, "inception_v3": { - "model_src_dir": "cv/inception_v3/quantization/ptq", + "model_src_dir": "image_recognition/inception_v3/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset/pre-trained-models/inceptionv3/fp32/freezed_inceptionv3.pb", "main_script": "main.py", @@ -59,28 +59,28 @@ "fp32_model_url": "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_8/inceptionv3_fp32_pretrained_model.pb" }, "mobilenetv2": { - "model_src_dir": "cv/mobilenet_v2/quantization/ptq", + "model_src_dir": "image_recognition/mobilenet_v2/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_mobilenet_v2.pb", "main_script": "main.py", "batch_size": 32 }, "resnetv2_50": { - "model_src_dir": "cv/resnet_v2_50/quantization/ptq", + "model_src_dir": "image_recognition/resnet_v2_50/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_resnet_v2_50.pb", "main_script": "main.py", "batch_size": 32 }, "vgg16": { - "model_src_dir": "cv/vgg16/quantization/ptq", + "model_src_dir": "image_recognition/vgg16/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset/pre-train-model-slim/pbfile/frozen_pb/frozen_vgg16.pb", "main_script": "main.py", "batch_size": 32 }, "ViT": { - "model_src_dir": "cv/vision_transformer/quantization/ptq", + "model_src_dir": "image_recognition/vision_transformer/quantization/ptq", "dataset_location": "/tf_dataset/dataset/imagenet", "input_model": "/tf_dataset/tensorflow/vit/HF-ViT-Base16-Img224-frozen.pb", "main_script": "main.py", diff --git a/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/README.md b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/README.md similarity index 100% rename from examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/README.md rename to examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/README.md diff --git a/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/data_process.py similarity index 100% rename from examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/data_process.py rename to examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/data_process.py diff --git a/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/main.py b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/main.py similarity index 100% rename from examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/main.py rename to examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/main.py diff --git a/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/requirements.txt similarity index 100% rename from examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/requirements.txt rename to examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/requirements.txt diff --git a/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh similarity index 100% rename from examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/run_benchmark.sh rename to examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh diff --git a/examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_quant.sh similarity index 100% rename from examples/3.x_api/tensorflow/cv/inception_v3/quantization/ptq/run_quant.sh rename to examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_quant.sh diff --git a/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/README.md b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/README.md similarity index 100% rename from examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/README.md rename to examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/README.md diff --git a/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/data_process.py similarity index 100% rename from examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/data_process.py rename to examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/data_process.py diff --git a/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/main.py b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/main.py similarity index 100% rename from examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/main.py rename to examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/main.py diff --git a/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/requirements.txt similarity index 100% rename from examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/requirements.txt rename to examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/requirements.txt diff --git a/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_benchmark.sh similarity index 100% rename from examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/run_benchmark.sh rename to examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_benchmark.sh diff --git a/examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_quant.sh similarity index 100% rename from examples/3.x_api/tensorflow/cv/mobilenet_v2/quantization/ptq/run_quant.sh rename to examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_quant.sh diff --git a/examples/3.x_api/tensorflow/cv/prepare_dataset.sh b/examples/3.x_api/tensorflow/image_recognition/prepare_dataset.sh similarity index 100% rename from examples/3.x_api/tensorflow/cv/prepare_dataset.sh rename to examples/3.x_api/tensorflow/image_recognition/prepare_dataset.sh diff --git a/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/README.md b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/README.md similarity index 100% rename from examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/README.md rename to examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/README.md diff --git a/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/data_process.py similarity index 100% rename from examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/data_process.py rename to examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/data_process.py diff --git a/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/main.py b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/main.py similarity index 100% rename from examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/main.py rename to examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/main.py diff --git a/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt similarity index 100% rename from examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/requirements.txt rename to examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt diff --git a/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh similarity index 100% rename from examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/run_benchmark.sh rename to examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh diff --git a/examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh similarity index 100% rename from examples/3.x_api/tensorflow/cv/resnet_v2_50/quantization/ptq/run_quant.sh rename to examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh diff --git a/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/README.md b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/README.md similarity index 100% rename from examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/README.md rename to examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/README.md diff --git a/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/data_process.py similarity index 100% rename from examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/data_process.py rename to examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/data_process.py diff --git a/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/main.py b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/main.py similarity index 100% rename from examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/main.py rename to examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/main.py diff --git a/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/requirements.txt similarity index 100% rename from examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/requirements.txt rename to examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/requirements.txt diff --git a/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_benchmark.sh similarity index 100% rename from examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/run_benchmark.sh rename to examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_benchmark.sh diff --git a/examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_quant.sh similarity index 100% rename from examples/3.x_api/tensorflow/cv/vgg16/quantization/ptq/run_quant.sh rename to examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_quant.sh diff --git a/examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/README.md b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md similarity index 100% rename from examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/README.md rename to examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md diff --git a/examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/__init__.py b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/__init__.py similarity index 100% rename from examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/__init__.py rename to examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/__init__.py diff --git a/examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/data_process.py similarity index 100% rename from examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/data_process.py rename to examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/data_process.py diff --git a/examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/main.py b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/main.py similarity index 100% rename from examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/main.py rename to examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/main.py diff --git a/examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/requirements.txt similarity index 100% rename from examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/requirements.txt rename to examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/requirements.txt diff --git a/examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_benchmark.sh similarity index 100% rename from examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/run_benchmark.sh rename to examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_benchmark.sh diff --git a/examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_quant.sh similarity index 100% rename from examples/3.x_api/tensorflow/cv/vision_transformer/quantization/ptq/run_quant.sh rename to examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_quant.sh diff --git a/examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/build_imagenet_data.py b/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/build_imagenet_data.py similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/build_imagenet_data.py rename to examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/build_imagenet_data.py diff --git a/examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/download_and_convert_imagenet.sh b/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/download_and_convert_imagenet.sh rename to examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh diff --git a/examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/download_imagenet.sh b/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_imagenet.sh similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/download_imagenet.sh rename to examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_imagenet.sh diff --git a/examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt b/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt rename to examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt diff --git a/examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/imagenet_metadata.txt b/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/imagenet_metadata.txt similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/imagenet_prepare/imagenet_metadata.txt rename to examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/imagenet_metadata.txt diff --git a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/README.md b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/README.md similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/README.md rename to examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/README.md diff --git a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/data_process.py similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/data_process.py rename to examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/data_process.py diff --git a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/main.py b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/main.py similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/main.py rename to examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/main.py diff --git a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/prepare_model.py b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/prepare_model.py similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/prepare_model.py rename to examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/prepare_model.py diff --git a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/requirements.txt similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/requirements.txt rename to examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/requirements.txt diff --git a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/run_benchmark.sh rename to examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh diff --git a/examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_quant.sh similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/inception_v3/quantization/ptq/run_quant.sh rename to examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_quant.sh diff --git a/examples/3.x_api/tensorflow/keras/cv/prepare_dataset.sh b/examples/3.x_api/tensorflow/keras/image_recognition/prepare_dataset.sh similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/prepare_dataset.sh rename to examples/3.x_api/tensorflow/keras/image_recognition/prepare_dataset.sh diff --git a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/README.md b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/README.md similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/README.md rename to examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/README.md diff --git a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/data_process.py b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/data_process.py similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/data_process.py rename to examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/data_process.py diff --git a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/main.py b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/main.py similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/main.py rename to examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/main.py diff --git a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/prepare_model.py b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/prepare_model.py similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/prepare_model.py rename to examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/prepare_model.py diff --git a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/requirements.txt rename to examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt diff --git a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/run_benchmark.sh b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/run_benchmark.sh rename to examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh diff --git a/examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/run_quant.sh b/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh similarity index 100% rename from examples/3.x_api/tensorflow/keras/cv/resnet_v2_50/quantization/ptq/run_quant.sh rename to examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh