1919
2020import onnx
2121from onnx import onnx_pb as onnx_proto
22- from neural_compressor .adaptor .ox_utils .operators .ops import op_registry , Operator
22+ from neural_compressor .adaptor .ox_utils .operators .ops import op_registry , Operator , QOperator , qop_registry
2323from neural_compressor .adaptor .ox_utils .util import find_by_name , attribute_to_kwarg
2424
2525@op_registry (op_types = "Conv, FusedConv" )
@@ -156,6 +156,7 @@ def convert(self, convert_format):
156156 if attribute .name == 'activation_params' : # pragma: no cover
157157 continue
158158 kwargs .update (attribute_to_kwarg (attribute ))
159+
159160 qlinear_conv_node = onnx .helper .make_node ("QLinearConv" , qlinear_conv_inputs ,
160161 [qlinear_conv_output ],
161162 node .name , ** kwargs )
@@ -164,4 +165,71 @@ def convert(self, convert_format):
164165 self .quantizer .remove_nodes .append (child )
165166 self .quantizer .remove_nodes .append (node )
166167
168+ @qop_registry (op_types = "QLinearConv" )
169+ class QConvOperator (QOperator ):
170+ def __init__ (self , onnx_node , children , initializers ):
171+ super ().__init__ (onnx_node , children , initializers )
167172
173+ def convert (self ):
174+ node = self .node
175+ add_nodes = []
176+ inits = []
177+ # input dq
178+ in_dq1 = onnx .helper .make_node (
179+ 'DequantizeLinear' ,
180+ node .input [:3 ],
181+ [node .name + '_in_dequant1' ],
182+ node .name + '_in_dequant1' )
183+
184+ in_dq2 = onnx .helper .make_node (
185+ 'DequantizeLinear' ,
186+ node .input [3 :6 ],
187+ [node .name + '_in_dequant2' ],
188+ node .name + '_in_dequant2' )
189+
190+ add_nodes .extend ([in_dq1 , in_dq2 ])
191+ inputs = [node .name + '_in_dequant1' , node .name + '_in_dequant2' ]
192+ if len (node .input ) == 9 :
193+ import numpy as np
194+ input_scale = onnx .numpy_helper .to_array (
195+ find_by_name (node .input [1 ], self .initializers ))
196+ weight_scale = onnx .numpy_helper .to_array (
197+ find_by_name (node .input [4 ], self .initializers ))
198+ bias_scale = input_scale * weight_scale
199+
200+ # update scale initializer
201+ bias_scale_data = np .asarray (bias_scale , dtype = np .float32 ).reshape (- 1 )
202+ bias_scale_initializer = onnx .numpy_helper .from_array (bias_scale_data ,
203+ node .input [8 ] + '_scale' )
204+ inits .extend ([bias_scale_initializer ])
205+
206+ # update zero initializer
207+ bias_zp_data = np .zeros (bias_scale .shape , dtype = np .int32 ).reshape (- 1 )
208+ bias_zp_initializer = onnx .numpy_helper .from_array (
209+ bias_zp_data , node .input [8 ] + '_zero_point' )
210+ inits .extend ([bias_zp_initializer ])
211+ in_dq3 = onnx .helper .make_node (
212+ 'DequantizeLinear' ,
213+ [node .input [8 ], bias_scale_initializer .name , bias_zp_initializer .name ],
214+ [node .name + '_in_dequant3' ],
215+ node .name + '_in_dequant3' )
216+ inputs .append (in_dq3 .name )
217+ add_nodes .append (in_dq3 )
218+ # output q
219+ out_q = onnx .helper .make_node (
220+ 'QuantizeLinear' ,
221+ [node .name + '_out' , node .input [6 ], node .input [7 ]],
222+ node .output ,
223+ node .name + '_out_quant' )
224+ outputs = [node .name + '_out' ]
225+ add_nodes .append (out_q )
226+
227+ kwargs = {}
228+ for attribute in node .attribute : # pragma: no cover
229+ kwargs .update (attribute_to_kwarg (attribute ))
230+
231+ binary_node = onnx .helper .make_node (
232+ node .op_type .split ('QLinear' )[- 1 ], inputs ,
233+ outputs , node .name + '_convert' , ** kwargs )
234+ add_nodes .append (binary_node )
235+ return True , add_nodes , inits
0 commit comments