Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions neural_compressor/adaptor/ox_utils/operators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@

from os.path import dirname, basename, isfile, join
import glob
from .ops import OPERATORS
from .ops import OPERATORS, QOPERATORS

modules = glob.glob(join(dirname(__file__), "*.py"))

for f in modules:
if isfile(f) and not f.startswith('__') and not f.endswith('__init__.py'):
__import__(basename(f)[:-3], globals(), locals(), level=1)

__all__ = ["OPERATORS"]
__all__ = ["OPERATORS", "QOPERATORS"]
40 changes: 38 additions & 2 deletions neural_compressor/adaptor/ox_utils/operators/activation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#

import onnx
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain

@op_registry(op_types="LeakyRelu, Sigmoid")
Expand Down Expand Up @@ -87,4 +87,40 @@ def quantize(self):
self.quantizer.dequantize_tensor(node, node.input[0])
else:
self.quantizer.model.replace_input_of_all_nodes(node.output[0], node.input[0])
self.quantizer.remove_nodes.append(node)
self.quantizer.remove_nodes.append(node)

@qop_registry(op_types="QLinearLeakyRelu, QLinearSigmoid")
class QActivationOperator(QOperator):
def __init__(self, onnx_node, children, initializers):
super().__init__(onnx_node, children, initializers)

def convert(self):
node = self.node
add_nodes = []
inits = []
# input dq
in_dq = onnx.helper.make_node(
'DequantizeLinear',
node.input[:3],
[node.name + '_in_dequant'],
node.name + '_in_dequant')
inputs = [node.name + '_in_dequant']
add_nodes.append(in_dq)
# output q
out_q = onnx.helper.make_node(
'QuantizeLinear',
[node.name + '_out', node.input[3], node.input[4]],
node.output,
node.name + '_out_quant')
outputs = [node.name + '_out']
add_nodes.append(out_q)

kwargs = {}
for attribute in node.attribute: # pragma: no cover
kwargs.update(attribute_to_kwarg(attribute))

activation_node = onnx.helper.make_node(
node.op_type.split('QLinear')[-1], inputs,
outputs, node.name + '_convert', **kwargs)
add_nodes.append(activation_node)
return True, add_nodes, inits
12 changes: 7 additions & 5 deletions neural_compressor/adaptor/ox_utils/operators/argmax.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#


from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry

@op_registry(op_types="ArgMax")
class ArgMaxOperator(Operator):
Expand All @@ -35,5 +33,9 @@ def convert(self, convert_format):
origin_name = node.input[0].split('_argmax_node')[0]

if origin_name in self.quantizer.quantized_value_map:
node.input[0] = self.quantizer.quantized_value_map[origin_name].q_name
node.name = node.name + '_quant'
node.name = node.name + '_quant'

@qop_registry(op_types="ArgMax")
class QArgMaxOperator(QOperator):
def __init__(self, onnx_node, children, initializers):
super().__init__(onnx_node, children, initializers)
47 changes: 45 additions & 2 deletions neural_compressor/adaptor/ox_utils/operators/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
#

import onnx
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, qop_registry, QOperator
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain, find_by_name

@op_registry(op_types="Attention")
class AttentionOperator(Operator):
Expand Down Expand Up @@ -74,3 +74,46 @@ def convert(self, convert_format):
self.quantizer.new_nodes.append(qattention_node)

self.quantizer.remove_nodes.append(node)

@qop_registry(op_types="QAttention")
class QAttentionOperator(QOperator):
def __init__(self, onnx_node, children, initializers):
super().__init__(onnx_node, children, initializers)

def convert(self):
node = self.node
add_nodes = []
inputs = []
inits = []
if find_by_name(node.input[3], self.initializers) is None:
return False, add_nodes, inits
# input dq
in_dq1 = onnx.helper.make_node(
'DequantizeLinear',
[node.input[0], node.input[3], node.input[6]],
[node.name + '_in_dequant1'],
node.name + '_in_dequant1')

in_dq2 = onnx.helper.make_node(
'DequantizeLinear',
[node.input[1], node.input[4], node.input[7]],
[node.name + '_in_dequant2'],
node.name + '_in_dequant2')
inputs = [node.name + '_in_dequant1',
node.name + '_in_dequant2',
node.input[2],
node.input[5]]

add_nodes.extend([in_dq1, in_dq2])

outputs = node.output
kwargs = {}
for attribute in node.attribute: # pragma: no cover
kwargs.update(attribute_to_kwarg(attribute))
kwargs["domain"] = ms_domain

binary_node = onnx.helper.make_node(
'Attention', inputs,
outputs, node.name + '_convert', **kwargs)
add_nodes.append(binary_node)
return True, add_nodes, inits
47 changes: 45 additions & 2 deletions neural_compressor/adaptor/ox_utils/operators/binary_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#

import onnx
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain

@op_registry(op_types="Add, Mul")
Expand Down Expand Up @@ -77,4 +77,47 @@ def convert(self, convert_format):
self.quantizer.new_nodes += [qlinear_binary_math_node]
self.quantizer.remove_nodes.extend(parents)
self.quantizer.remove_nodes.append(child)
self.quantizer.remove_nodes.append(node)
self.quantizer.remove_nodes.append(node)

@qop_registry(op_types="QLinearAdd, QLinearMul")
class QBinaryOperator(QOperator):
def __init__(self, onnx_node, children, initializers):
super().__init__(onnx_node, children, initializers)

def convert(self):
node = self.node
add_nodes = []
inits = []
# input dq
in_dq1 = onnx.helper.make_node(
'DequantizeLinear',
node.input[:3],
[node.name + '_in_dequant1'],
node.name + '_in_dequant1')

in_dq2 = onnx.helper.make_node(
'DequantizeLinear',
node.input[3:6],
[node.name + '_in_dequant2'],
node.name + '_in_dequant2')
inputs = [node.name + '_in_dequant1', node.name + '_in_dequant2']

add_nodes.extend([in_dq1, in_dq2])
# output q
out_q = onnx.helper.make_node(
'QuantizeLinear',
[node.name + '_out', node.input[6], node.input[7]],
node.output,
node.name + '_out_quant')
outputs = [node.name + '_out']
add_nodes.append(out_q)

kwargs = {}
for attribute in node.attribute: # pragma: no cover
kwargs.update(attribute_to_kwarg(attribute))

binary_node = onnx.helper.make_node(
node.op_type.split('QLinear')[-1], inputs,
outputs, node.name + '_convert', **kwargs)
add_nodes.append(binary_node)
return True, add_nodes, inits
41 changes: 40 additions & 1 deletion neural_compressor/adaptor/ox_utils/operators/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#

import onnx
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry
from neural_compressor.adaptor.ox_utils.util import attribute_to_kwarg, ms_domain

@op_registry(op_types="Concat")
Expand Down Expand Up @@ -96,3 +96,42 @@ def cast(self): # pragma: no cover
if node.input[0] not in [i.tensor_name for i in self.quantizer.new_value_info.values()]:
return
self.quantizer.dtype_cast(self.node, self.dtype)

@qop_registry(op_types="QLinearConcat")
class QConcatOperator(QOperator):
def __init__(self, onnx_node, children, initializers):
super().__init__(onnx_node, children, initializers)

def convert(self):
node = self.node
add_nodes = []
inputs = []
inits = []
# input dq
for i in range(int((len(node.input) - 2) / 3 - 1)):
in_dq = onnx.helper.make_node(
'DequantizeLinear',
node.input[2 + i*3 : 2 + (i+1)*3],
[node.name + '_in_dequant_' + str(i)],
node.name + '_in_dequant_' + str(i))
inputs.append(node.name + '_in_dequant_' + str(i))
add_nodes.append(in_dq)

# output q
out_q = onnx.helper.make_node(
'QuantizeLinear',
[node.name + '_out', node.input[0], node.input[1]],
node.output,
node.name + '_out_quant')
outputs = [node.name + '_out']
add_nodes.append(out_q)

kwargs = {}
for attribute in node.attribute: # pragma: no cover
kwargs.update(attribute_to_kwarg(attribute))

concat_node = onnx.helper.make_node(
'Concat', inputs,
outputs, node.name + '_convert', **kwargs)
add_nodes.append(concat_node)
return True, add_nodes, inits
70 changes: 69 additions & 1 deletion neural_compressor/adaptor/ox_utils/operators/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import onnx
from onnx import onnx_pb as onnx_proto
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, QOperator, qop_registry
from neural_compressor.adaptor.ox_utils.util import find_by_name, attribute_to_kwarg

@op_registry(op_types="Conv, FusedConv")
Expand Down Expand Up @@ -156,6 +156,7 @@ def convert(self, convert_format):
if attribute.name == 'activation_params': # pragma: no cover
continue
kwargs.update(attribute_to_kwarg(attribute))

qlinear_conv_node = onnx.helper.make_node("QLinearConv", qlinear_conv_inputs,
[qlinear_conv_output],
node.name, **kwargs)
Expand All @@ -164,4 +165,71 @@ def convert(self, convert_format):
self.quantizer.remove_nodes.append(child)
self.quantizer.remove_nodes.append(node)

@qop_registry(op_types="QLinearConv")
class QConvOperator(QOperator):
def __init__(self, onnx_node, children, initializers):
super().__init__(onnx_node, children, initializers)

def convert(self):
node = self.node
add_nodes = []
inits = []
# input dq
in_dq1 = onnx.helper.make_node(
'DequantizeLinear',
node.input[:3],
[node.name + '_in_dequant1'],
node.name + '_in_dequant1')

in_dq2 = onnx.helper.make_node(
'DequantizeLinear',
node.input[3:6],
[node.name + '_in_dequant2'],
node.name + '_in_dequant2')

add_nodes.extend([in_dq1, in_dq2])
inputs = [node.name + '_in_dequant1', node.name + '_in_dequant2']
if len(node.input) == 9:
import numpy as np
input_scale = onnx.numpy_helper.to_array(
find_by_name(node.input[1], self.initializers))
weight_scale = onnx.numpy_helper.to_array(
find_by_name(node.input[4], self.initializers))
bias_scale = input_scale * weight_scale

# update scale initializer
bias_scale_data = np.asarray(bias_scale, dtype=np.float32).reshape(-1)
bias_scale_initializer = onnx.numpy_helper.from_array(bias_scale_data,
node.input[8] + '_scale')
inits.extend([bias_scale_initializer])

# update zero initializer
bias_zp_data = np.zeros(bias_scale.shape, dtype=np.int32).reshape(-1)
bias_zp_initializer = onnx.numpy_helper.from_array(
bias_zp_data, node.input[8] + '_zero_point')
inits.extend([bias_zp_initializer])
in_dq3 = onnx.helper.make_node(
'DequantizeLinear',
[node.input[8], bias_scale_initializer.name, bias_zp_initializer.name],
[node.name + '_in_dequant3'],
node.name + '_in_dequant3')
inputs.append(in_dq3.name)
add_nodes.append(in_dq3)
# output q
out_q = onnx.helper.make_node(
'QuantizeLinear',
[node.name + '_out', node.input[6], node.input[7]],
node.output,
node.name + '_out_quant')
outputs = [node.name + '_out']
add_nodes.append(out_q)

kwargs = {}
for attribute in node.attribute: # pragma: no cover
kwargs.update(attribute_to_kwarg(attribute))

binary_node = onnx.helper.make_node(
node.op_type.split('QLinear')[-1], inputs,
outputs, node.name + '_convert', **kwargs)
add_nodes.append(binary_node)
return True, add_nodes, inits
7 changes: 6 additions & 1 deletion neural_compressor/adaptor/ox_utils/operators/direct_q8.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# limitations under the License.
#

from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator
from neural_compressor.adaptor.ox_utils.operators.ops import op_registry, Operator, qop_registry, QOperator

@op_registry(op_types="Reshape, Transpose, Squeeze, Unsqueeze")
class Direct8BitOperator(Operator):
Expand Down Expand Up @@ -83,3 +83,8 @@ def cast(self):
if node.input[0] not in [i.tensor_name for i in self.quantizer.new_value_info.values()]:
return
self.quantizer.dtype_cast(self.node, self.dtype)

@qop_registry(op_types="Reshape, Transpose, Squeeze, Unsqueeze")
class QDirectOperator(QOperator):
def __init__(self, onnx_node, children, initializers):
super().__init__(onnx_node, children, initializers)
Loading