diff --git a/testdata/dnn/onnx/data/input_quantized_conv_int8_weights_qdq.npy b/testdata/dnn/onnx/data/input_quantized_conv_int8_weights_qdq.npy new file mode 100644 index 000000000..df711356e Binary files /dev/null and b/testdata/dnn/onnx/data/input_quantized_conv_int8_weights_qdq.npy differ diff --git a/testdata/dnn/onnx/data/input_quantized_conv_per_channel_weights_qdq.npy b/testdata/dnn/onnx/data/input_quantized_conv_per_channel_weights_qdq.npy new file mode 100644 index 000000000..d659c0821 Binary files /dev/null and b/testdata/dnn/onnx/data/input_quantized_conv_per_channel_weights_qdq.npy differ diff --git a/testdata/dnn/onnx/data/input_quantized_conv_uint8_weights_qdq.npy b/testdata/dnn/onnx/data/input_quantized_conv_uint8_weights_qdq.npy new file mode 100644 index 000000000..ff2418d7a Binary files /dev/null and b/testdata/dnn/onnx/data/input_quantized_conv_uint8_weights_qdq.npy differ diff --git a/testdata/dnn/onnx/data/output_quantized_conv_int8_weights_qdq.npy b/testdata/dnn/onnx/data/output_quantized_conv_int8_weights_qdq.npy new file mode 100644 index 000000000..38d0d37aa Binary files /dev/null and b/testdata/dnn/onnx/data/output_quantized_conv_int8_weights_qdq.npy differ diff --git a/testdata/dnn/onnx/data/output_quantized_conv_per_channel_weights_qdq.npy b/testdata/dnn/onnx/data/output_quantized_conv_per_channel_weights_qdq.npy new file mode 100644 index 000000000..a9210ca1e Binary files /dev/null and b/testdata/dnn/onnx/data/output_quantized_conv_per_channel_weights_qdq.npy differ diff --git a/testdata/dnn/onnx/data/output_quantized_conv_uint8_weights_qdq.npy b/testdata/dnn/onnx/data/output_quantized_conv_uint8_weights_qdq.npy new file mode 100644 index 000000000..bb7a8b9b3 Binary files /dev/null and b/testdata/dnn/onnx/data/output_quantized_conv_uint8_weights_qdq.npy differ diff --git a/testdata/dnn/onnx/generate_quantized_onnx_models.py b/testdata/dnn/onnx/generate_quantized_onnx_models.py index 33a90190c..6d812b883 100644 --- a/testdata/dnn/onnx/generate_quantized_onnx_models.py +++ b/testdata/dnn/onnx/generate_quantized_onnx_models.py @@ -5,9 +5,9 @@ import torch.nn.functional as F import numpy as np import os -import onnx +import onnx # version >= 1.12.0 import onnxruntime as rt -from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantType +from onnxruntime.quantization import quantize_static, CalibrationDataReader, QuantType, QuantFormat class DataReader(CalibrationDataReader): def __init__(self, model_path, batchsize=5): @@ -20,16 +20,16 @@ def __init__(self, model_path, batchsize=5): def get_next(self): return next(self.enum_data_dicts, None) -def quantize_and_save_model(name, input, model, act_type="uint8", wt_type="uint8", per_channel=False): +def quantize_and_save_model(name, input, model, act_type="uint8", wt_type="uint8", per_channel=False, ops_version = 13, quanFormat=QuantFormat.QOperator): float_model_path = os.path.join("models", "dummy.onnx") quantized_model_path = os.path.join("models", name + ".onnx") type_dict = {"uint8" : QuantType.QUInt8, "int8" : QuantType.QInt8} model.eval() - torch.onnx.export(model, input, float_model_path, export_params=True, opset_version=12) + torch.onnx.export(model, input, float_model_path, export_params=True, opset_version=ops_version) dr = DataReader(float_model_path) - quantize_static(float_model_path, quantized_model_path, dr, per_channel=per_channel, + quantize_static(float_model_path, quantized_model_path, dr, quant_format=quanFormat, per_channel=per_channel, activation_type=type_dict[act_type], weight_type=type_dict[wt_type]) os.remove(float_model_path) @@ -53,10 +53,16 @@ def quantize_and_save_model(name, input, model, act_type="uint8", wt_type="uint8 input = Variable(torch.randn(1, 3, 10, 10)) conv = nn.Conv2d(3, 5, kernel_size=3, stride=2, padding=1) +# generate QOperator qunatized model quantize_and_save_model("quantized_conv_uint8_weights", input, conv) quantize_and_save_model("quantized_conv_int8_weights", input, conv, wt_type="int8") quantize_and_save_model("quantized_conv_per_channel_weights", input, conv, per_channel=True) +# generate QDQ qunatized model +quantize_and_save_model("quantized_conv_uint8_weights_qdq", input, conv, quanFormat=QuantFormat.QDQ) +quantize_and_save_model("quantized_conv_int8_weights_qdq", input, conv, wt_type="int8", quanFormat=QuantFormat.QDQ) +quantize_and_save_model("quantized_conv_per_channel_weights_qdq", input, conv, per_channel=True, quanFormat=QuantFormat.QDQ) + input = Variable(torch.randn(1, 3)) linear = nn.Linear(3, 4, bias=True) quantize_and_save_model("quantized_matmul_uint8_weights", input, linear) diff --git a/testdata/dnn/onnx/models/quantized_conv_int8_weights_qdq.onnx b/testdata/dnn/onnx/models/quantized_conv_int8_weights_qdq.onnx new file mode 100644 index 000000000..1f2a30980 Binary files /dev/null and b/testdata/dnn/onnx/models/quantized_conv_int8_weights_qdq.onnx differ diff --git a/testdata/dnn/onnx/models/quantized_conv_per_channel_weights_qdq.onnx b/testdata/dnn/onnx/models/quantized_conv_per_channel_weights_qdq.onnx new file mode 100644 index 000000000..fae86694f Binary files /dev/null and b/testdata/dnn/onnx/models/quantized_conv_per_channel_weights_qdq.onnx differ diff --git a/testdata/dnn/onnx/models/quantized_conv_uint8_weights_qdq.onnx b/testdata/dnn/onnx/models/quantized_conv_uint8_weights_qdq.onnx new file mode 100644 index 000000000..5213aa32f Binary files /dev/null and b/testdata/dnn/onnx/models/quantized_conv_uint8_weights_qdq.onnx differ