Skip to content

Commit d89d2ff

Browse files
mengniwang95zehao-intel
authored andcommitted
Export Qlinear to QDQ (#224)
Signed-off-by: mengniwa <[email protected]> Signed-off-by: zehao-intel <[email protected]>
1 parent e5e077e commit d89d2ff

File tree

24 files changed

+1390
-35
lines changed

24 files changed

+1390
-35
lines changed

neural_compressor/adaptor/ox_utils/operators/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@
1818

1919
from os.path import dirname, basename, isfile, join
2020
import glob
21-
from .ops import OPERATORS
21+
from .ops import OPERATORS, QOPERATORS
2222

2323
modules = glob.glob(join(dirname(__file__), "*.py"))
2424

2525
for f in modules:
2626
if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'):
2727
__import__(basename(f)[:-3], globals(), locals(), level=1)
2828

29-
__all__ = ["OPERATORS"]
29+
__all__ = ["OPERATORS", "QOPERATORS"]

neural_compressor/adaptor/ox_utils/operators/activation.py

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#
1818

1919
import onnx
20-
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
20+
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry
2121
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain
2222

2323
@op_registry(op_types="LeakyRelu, Sigmoid")
@@ -87,4 +87,40 @@ def quantize(self):
8787
self.quantizer.dequantize_tensor(node, node.input[0])
8888
else:
8989
self.quantizer.model.replace_input_of_all_nodes(node.output[0], node.input[0])
90-
self.quantizer.remove_nodes.append(node)
90+
self.quantizer.remove_nodes.append(node)
91+
92+
@qop_registry(op_types="QLinearLeakyRelu, QLinearSigmoid")
93+
class QActivationOperator(QOperator):
94+
def __init__(self, onnx_node, children, initializers):
95+
super().__init__(onnx_node, children, initializers)
96+
97+
def convert(self):
98+
node = self.node
99+
add_nodes = []
100+
inits = []
101+
# input dq
102+
in_dq = onnx.helper.make_node(
103+
'DequantizeLinear',
104+
node.input[:3],
105+
[node.name + '_in_dequant'],
106+
node.name + '_in_dequant')
107+
inputs = [node.name + '_in_dequant']
108+
add_nodes.append(in_dq)
109+
# output q
110+
out_q = onnx.helper.make_node(
111+
'QuantizeLinear',
112+
[node.name + '_out', node.input[3], node.input[4]],
113+
node.output,
114+
node.name + '_out_quant')
115+
outputs = [node.name + '_out']
116+
add_nodes.append(out_q)
117+
118+
kwargs = {}
119+
for attribute in node.attribute: # pragma: no cover
120+
kwargs.update(attribute_to_kwarg(attribute))
121+
122+
activation_node = onnx.helper.make_node(
123+
node.op_type.split('QLinear')[-1], inputs,
124+
outputs, node.name + '_convert', **kwargs)
125+
add_nodes.append(activation_node)
126+
return True, add_nodes, inits

neural_compressor/adaptor/ox_utils/operators/argmax.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@
1515
# See the License for the specific language governing permissions and
1616
# limitations under the License.
1717
#
18-
19-
20-
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
18+
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry
2119

2220
@op_registry(op_types="ArgMax")
2321
class ArgMaxOperator(Operator):
@@ -35,5 +33,9 @@ def convert(self, convert_format):
3533
origin_name = node.input[0].split('_argmax_node')[0]
3634

3735
if origin_name in self.quantizer.quantized_value_map:
38-
node.input[0] = self.quantizer.quantized_value_map[origin_name].q_name
39-
node.name = node.name + '_quant'
36+
node.name = node.name + '_quant'
37+
38+
@qop_registry(op_types="ArgMax")
39+
class QArgMaxOperator(QOperator):
40+
def __init__(self, onnx_node, children, initializers):
41+
super().__init__(onnx_node, children, initializers)

neural_compressor/adaptor/ox_utils/operators/attention.py

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717
#
1818

1919
import onnx
20-
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
21-
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain
20+
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, qop_registry, QOperator
21+
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain, find_by_name
2222

2323
@op_registry(op_types="Attention")
2424
class AttentionOperator(Operator):
@@ -74,3 +74,46 @@ def convert(self, convert_format):
7474
self.quantizer.new_nodes.append(qattention_node)
7575

7676
self.quantizer.remove_nodes.append(node)
77+
78+
@qop_registry(op_types="QAttention")
79+
class QAttentionOperator(QOperator):
80+
def __init__(self, onnx_node, children, initializers):
81+
super().__init__(onnx_node, children, initializers)
82+
83+
def convert(self):
84+
node = self.node
85+
add_nodes = []
86+
inputs = []
87+
inits = []
88+
if find_by_name(node.input[3], self.initializers) is None:
89+
return False, add_nodes, inits
90+
# input dq
91+
in_dq1 = onnx.helper.make_node(
92+
'DequantizeLinear',
93+
[node.input[0], node.input[3], node.input[6]],
94+
[node.name + '_in_dequant1'],
95+
node.name + '_in_dequant1')
96+
97+
in_dq2 = onnx.helper.make_node(
98+
'DequantizeLinear',
99+
[node.input[1], node.input[4], node.input[7]],
100+
[node.name + '_in_dequant2'],
101+
node.name + '_in_dequant2')
102+
inputs = [node.name + '_in_dequant1',
103+
node.name + '_in_dequant2',
104+
node.input[2],
105+
node.input[5]]
106+
107+
add_nodes.extend([in_dq1, in_dq2])
108+
109+
outputs = node.output
110+
kwargs = {}
111+
for attribute in node.attribute: # pragma: no cover
112+
kwargs.update(attribute_to_kwarg(attribute))
113+
kwargs["domain"] = ms_domain
114+
115+
binary_node = onnx.helper.make_node(
116+
'Attention', inputs,
117+
outputs, node.name + '_convert', **kwargs)
118+
add_nodes.append(binary_node)
119+
return True, add_nodes, inits

neural_compressor/adaptor/ox_utils/operators/binary_op.py

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#
1818

1919
import onnx
20-
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
20+
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry
2121
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain
2222

2323
@op_registry(op_types="Add, Mul")
@@ -77,4 +77,47 @@ def convert(self, convert_format):
7777
self.quantizer.new_nodes += [qlinear_binary_math_node]
7878
self.quantizer.remove_nodes.extend(parents)
7979
self.quantizer.remove_nodes.append(child)
80-
self.quantizer.remove_nodes.append(node)
80+
self.quantizer.remove_nodes.append(node)
81+
82+
@qop_registry(op_types="QLinearAdd, QLinearMul")
83+
class QBinaryOperator(QOperator):
84+
def __init__(self, onnx_node, children, initializers):
85+
super().__init__(onnx_node, children, initializers)
86+
87+
def convert(self):
88+
node = self.node
89+
add_nodes = []
90+
inits = []
91+
# input dq
92+
in_dq1 = onnx.helper.make_node(
93+
'DequantizeLinear',
94+
node.input[:3],
95+
[node.name + '_in_dequant1'],
96+
node.name + '_in_dequant1')
97+
98+
in_dq2 = onnx.helper.make_node(
99+
'DequantizeLinear',
100+
node.input[3:6],
101+
[node.name + '_in_dequant2'],
102+
node.name + '_in_dequant2')
103+
inputs = [node.name + '_in_dequant1', node.name + '_in_dequant2']
104+
105+
add_nodes.extend([in_dq1, in_dq2])
106+
# output q
107+
out_q = onnx.helper.make_node(
108+
'QuantizeLinear',
109+
[node.name + '_out', node.input[6], node.input[7]],
110+
node.output,
111+
node.name + '_out_quant')
112+
outputs = [node.name + '_out']
113+
add_nodes.append(out_q)
114+
115+
kwargs = {}
116+
for attribute in node.attribute: # pragma: no cover
117+
kwargs.update(attribute_to_kwarg(attribute))
118+
119+
binary_node = onnx.helper.make_node(
120+
node.op_type.split('QLinear')[-1], inputs,
121+
outputs, node.name + '_convert', **kwargs)
122+
add_nodes.append(binary_node)
123+
return True, add_nodes, inits

neural_compressor/adaptor/ox_utils/operators/concat.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#
1818

1919
import onnx
20-
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
20+
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry
2121
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain
2222

2323
@op_registry(op_types="Concat")
@@ -96,3 +96,42 @@ def cast(self): # pragma: no cover
9696
if node.input[0] not in [i.tensor_name for i in self.quantizer.new_value_info.values()]:
9797
return
9898
self.quantizer.dtype_cast(self.node, self.dtype)
99+
100+
@qop_registry(op_types="QLinearConcat")
101+
class QConcatOperator(QOperator):
102+
def __init__(self, onnx_node, children, initializers):
103+
super().__init__(onnx_node, children, initializers)
104+
105+
def convert(self):
106+
node = self.node
107+
add_nodes = []
108+
inputs = []
109+
inits = []
110+
# input dq
111+
for i in range(int((len(node.input) - 2) / 3 - 1)):
112+
in_dq = onnx.helper.make_node(
113+
'DequantizeLinear',
114+
node.input[2 + i*3 : 2 + (i+1)*3],
115+
[node.name + '_in_dequant_' + str(i)],
116+
node.name + '_in_dequant_' + str(i))
117+
inputs.append(node.name + '_in_dequant_' + str(i))
118+
add_nodes.append(in_dq)
119+
120+
# output q
121+
out_q = onnx.helper.make_node(
122+
'QuantizeLinear',
123+
[node.name + '_out', node.input[0], node.input[1]],
124+
node.output,
125+
node.name + '_out_quant')
126+
outputs = [node.name + '_out']
127+
add_nodes.append(out_q)
128+
129+
kwargs = {}
130+
for attribute in node.attribute: # pragma: no cover
131+
kwargs.update(attribute_to_kwarg(attribute))
132+
133+
concat_node = onnx.helper.make_node(
134+
'Concat', inputs,
135+
outputs, node.name + '_convert', **kwargs)
136+
add_nodes.append(concat_node)
137+
return True, add_nodes, inits

neural_compressor/adaptor/ox_utils/operators/conv.py

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
import onnx
2121
from onnx import onnx_pb as onnx_proto
22-
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
22+
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry
2323
from neural_compressor.adaptor.ox_utils.util import find_by_name, attribute_to_kwarg
2424

2525
@op_registry(op_types="Conv, FusedConv")
@@ -156,6 +156,7 @@ def convert(self, convert_format):
156156
if attribute.name == 'activation_params': # pragma: no cover
157157
continue
158158
kwargs.update(attribute_to_kwarg(attribute))
159+
159160
qlinear_conv_node = onnx.helper.make_node("QLinearConv", qlinear_conv_inputs,
160161
[qlinear_conv_output],
161162
node.name, **kwargs)
@@ -164,4 +165,71 @@ def convert(self, convert_format):
164165
self.quantizer.remove_nodes.append(child)
165166
self.quantizer.remove_nodes.append(node)
166167

168+
@qop_registry(op_types="QLinearConv")
169+
class QConvOperator(QOperator):
170+
def __init__(self, onnx_node, children, initializers):
171+
super().__init__(onnx_node, children, initializers)
167172

173+
def convert(self):
174+
node = self.node
175+
add_nodes = []
176+
inits = []
177+
# input dq
178+
in_dq1 = onnx.helper.make_node(
179+
'DequantizeLinear',
180+
node.input[:3],
181+
[node.name + '_in_dequant1'],
182+
node.name + '_in_dequant1')
183+
184+
in_dq2 = onnx.helper.make_node(
185+
'DequantizeLinear',
186+
node.input[3:6],
187+
[node.name + '_in_dequant2'],
188+
node.name + '_in_dequant2')
189+
190+
add_nodes.extend([in_dq1, in_dq2])
191+
inputs = [node.name + '_in_dequant1', node.name + '_in_dequant2']
192+
if len(node.input) == 9:
193+
import numpy as np
194+
input_scale = onnx.numpy_helper.to_array(
195+
find_by_name(node.input[1], self.initializers))
196+
weight_scale = onnx.numpy_helper.to_array(
197+
find_by_name(node.input[4], self.initializers))
198+
bias_scale = input_scale * weight_scale
199+
200+
# update scale initializer
201+
bias_scale_data = np.asarray(bias_scale, dtype=np.float32).reshape(-1)
202+
bias_scale_initializer = onnx.numpy_helper.from_array(bias_scale_data,
203+
node.input[8] + '_scale')
204+
inits.extend([bias_scale_initializer])
205+
206+
# update zero initializer
207+
bias_zp_data = np.zeros(bias_scale.shape, dtype=np.int32).reshape(-1)
208+
bias_zp_initializer = onnx.numpy_helper.from_array(
209+
bias_zp_data, node.input[8] + '_zero_point')
210+
inits.extend([bias_zp_initializer])
211+
in_dq3 = onnx.helper.make_node(
212+
'DequantizeLinear',
213+
[node.input[8], bias_scale_initializer.name, bias_zp_initializer.name],
214+
[node.name + '_in_dequant3'],
215+
node.name + '_in_dequant3')
216+
inputs.append(in_dq3.name)
217+
add_nodes.append(in_dq3)
218+
# output q
219+
out_q = onnx.helper.make_node(
220+
'QuantizeLinear',
221+
[node.name + '_out', node.input[6], node.input[7]],
222+
node.output,
223+
node.name + '_out_quant')
224+
outputs = [node.name + '_out']
225+
add_nodes.append(out_q)
226+
227+
kwargs = {}
228+
for attribute in node.attribute: # pragma: no cover
229+
kwargs.update(attribute_to_kwarg(attribute))
230+
231+
binary_node = onnx.helper.make_node(
232+
node.op_type.split('QLinear')[-1], inputs,
233+
outputs, node.name + '_convert', **kwargs)
234+
add_nodes.append(binary_node)
235+
return True, add_nodes, inits

neural_compressor/adaptor/ox_utils/operators/direct_q8.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# limitations under the License.
1717
#
1818

19-
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
19+
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, qop_registry, QOperator
2020

2121
@op_registry(op_types="Reshape, Transpose, Squeeze, Unsqueeze")
2222
class Direct8BitOperator(Operator):
@@ -83,3 +83,8 @@ def cast(self):
8383
if node.input[0] not in [i.tensor_name for i in self.quantizer.new_value_info.values()]:
8484
return
8585
self.quantizer.dtype_cast(self.node, self.dtype)
86+
87+
@qop_registry(op_types="Reshape, Transpose, Squeeze, Unsqueeze")
88+
class QDirectOperator(QOperator):
89+
def __init__(self, onnx_node, children, initializers):
90+
super().__init__(onnx_node, children, initializers)

0 commit comments

Comments
 (0)