Skip to content

Commit f10e263

Browse files
authored
Fix ONNXRT diagnosis bug (#1061)
Signed-off-by: yuwenzho <[email protected]>
1 parent 9d7546f commit f10e263

File tree

5 files changed

+142
-32
lines changed

5 files changed

+142
-32
lines changed

neural_compressor/adaptor/ox_utils/calibration.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -139,17 +139,22 @@ def augment_graph(self, activation_only=False, weight_only=False):
139139
(node.name in self.white_nodes)
140140
if should_be_dump:
141141
if not weight_only and not activation_only:
142-
tensors_to_dump.update(node.input)
142+
tensors_to_dump.update([input for input in node.input if len(input) != 0])
143+
tensors_to_dump.update([output for output in node.output if len(output) != 0])
143144
tensors_to_dump.update(node.output)
144145
elif weight_only:
145146
for input in node.input:
146147
if self.already_quantized and \
147-
input.replace('_dequantized', '_quantized') in initializers:
148+
input.replace('_dequantized', '_quantized') in initializers and \
149+
len(input) != 0:
148150
tensors_to_dump.add(input)
149-
elif not self.already_quantized and input in initializers:
151+
elif not self.already_quantized and \
152+
input in initializers and \
153+
len(input) != 0:
150154
tensors_to_dump.add(input)
151155
elif activation_only:
152-
tensors_to_dump.update([node.input[0]])
156+
if len(node.input[0]) != 0:
157+
tensors_to_dump.update([node.input[0]])
153158

154159
model_inputs = [i.name for i in model.graph.input]
155160
for tensor in tensors_to_dump:
@@ -525,6 +530,8 @@ def dump_tensor(self, activation=True, weight=False, format=None):
525530
for i in range(iters):
526531
if node.op_type in ['Attention', 'QAttention'] and tensor_name not in node.input[:2]:
527532
continue
533+
if node.op_type in ['MatMul', 'QLinearMatMul'] and tensor_name != node.input[0]:
534+
continue
528535
if is_qdq:
529536
map_node_activation[i][node_name] = \
530537
{tensor_name.replace('_dequantized', '').replace('_' + node_name, ''): tensors[i]}

neural_compressor/adaptor/ox_utils/quantizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def quantize_model(self):
149149
"""Quantize onnx model."""
150150
# step 1: insert q-dq, cast-cast pairs
151151
self.insert_qdq()
152-
152+
153153
# step 2: remove redundant pairs -> qdq model
154154
self.remove_redundant_pairs()
155155

@@ -158,7 +158,7 @@ def quantize_model(self):
158158

159159
self.merge_dedicated_qdq_pair()
160160

161-
self.model.remove_unused_constant()
161+
self.model.remove_unused_nodes()
162162

163163
self.model.model.producer_name = __producer__
164164
self.model.model.producer_version = __version__

neural_compressor/adaptor/ox_utils/smooth_quant.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def transform(self, alpha=0.5, folding=True, percentile=99.999, op_types=['Gemm'
171171
if folding:
172172
self._fold_scale(scales)
173173
self.model.topological_sort()
174-
self.model.remove_unused_constant()
174+
self.model.remove_unused_nodes()
175175
return self.model
176176

177177
def recover(self):

neural_compressor/model/onnx_model.py

Lines changed: 51 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -330,32 +330,45 @@ def get_scale_zero(self, tensor):
330330
if not tensor.endswith('_quantized'):
331331
logger.debug("Find {} in the quantized graph is not quantized.".format(tensor))
332332
return None, None
333+
334+
def _searcher(tensor_name):
335+
"""Search scale and zero point tensor recursivly."""
336+
node = self._input_name_to_nodes[tensor_name][0]
337+
parent = self._output_name_to_node[tensor_name] if tensor_name in self._output_name_to_node else None
338+
direct_int8 = ['Reshape', 'Transpose', 'Squeeze', 'Unsqueeze', 'MaxPool', 'Pad', 'Split']
339+
if parent is not None and parent.op_type in direct_int8:
340+
fp32_tensor_name = \
341+
parent.input[0].replace('_quantized', '')\
342+
.replace('_QuantizeLinear', '').replace('_QuantizeInput', '')
343+
elif node.op_type in ['Gather']: # pragma: no cover
344+
fp32_tensor_name = \
345+
node.output[0].replace('_quantized', '')\
346+
.replace('_QuantizeLinear', '').replace('_QuantizeInput', '')
347+
else:
348+
fp32_tensor_name = \
349+
tensor_name.replace('_quantized', '')\
350+
.replace('_QuantizeLinear', '').replace('_QuantizeInput', '')
351+
scale = fp32_tensor_name + '_scale'
352+
scale_tensor = self.get_initializer(scale)
353+
zo = fp32_tensor_name + '_zero_point'
354+
zo_tensor = self.get_initializer(zo)
355+
356+
if scale_tensor is None or zo_tensor is None:
357+
if parent is not None:
358+
scale_tensor, zo_tensor = _searcher(parent.input[0])
359+
return scale_tensor, zo_tensor
360+
333361
node = self._input_name_to_nodes[tensor][0]
334-
parent = self._output_name_to_node[tensor] if tensor in self._output_name_to_node else None
335-
direct_int8 = ['Reshape', 'Transpose', 'Squeeze', 'Unsqueeze', 'MaxPool', 'Pad']
336-
if parent is not None and parent.op_type in direct_int8:
337-
fp32_tensor_name = \
338-
parent.input[0].replace('_quantized', '').replace('_QuantizeLinear', '').replace('_QuantizeInput', '')
339-
elif node.op_type in ['Gather']:
340-
fp32_tensor_name = \
341-
node.output[0].replace('_quantized', '').replace('_QuantizeLinear', '').replace('_QuantizeInput', '')
342-
else:
343-
fp32_tensor_name = \
344-
tensor.replace('_quantized', '').replace('_QuantizeLinear', '').replace('_QuantizeInput', '')
345-
scale = fp32_tensor_name + '_scale'
346-
scale_tensor = self.get_initializer(scale)
347-
zo = fp32_tensor_name + '_zero_point'
348-
zo_tensor = self.get_initializer(zo)
349-
350362
#TODO check if scale_tensor and zero_point is needed
351363
# for bias of qlinearconv, scale and zero_point is not needed
352364
if (node.op_type == 'QLinearConv' and tensor == node.input[-1]) or \
353365
(node.op_type == 'QGemm' and tensor == node.input[-3]):
354-
pass
366+
return None, None
355367
else:
368+
scale_tensor, zo_tensor = _searcher(tensor)
356369
assert scale_tensor, 'missing scale for tensor {}'.format(tensor)
357370
assert zo_tensor, 'missing zero point for tensor {}'.format(tensor)
358-
return scale_tensor, zo_tensor
371+
return scale_tensor, zo_tensor
359372

360373
def save_model_to_file(self, output_path, use_external_data_format=False):
361374
"""Save model to external data, which is needed for model size > 2GB."""
@@ -406,8 +419,8 @@ def replace_output_of_all_nodes(self, old_output_name, new_output_name,
406419
if node.op_type not in black_optype:
407420
ONNXModel.replace_node_output(node, old_output_name, new_output_name)
408421

409-
def remove_unused_constant(self):
410-
"""Remove unused constant."""
422+
def remove_unused_nodes(self):
423+
"""Remove unused nodes."""
411424
unused_nodes = []
412425
nodes = self.nodes()
413426
for node in nodes:
@@ -420,6 +433,23 @@ def remove_unused_constant(self):
420433
self.get_children(node)[0].output[0] not in self._input_name_to_nodes:
421434
unused_nodes.append(node)
422435
unused_nodes.extend(self.get_children(node))
436+
else:
437+
# remove the node if it does not serve as the input or output of any other nodes
438+
unused = True
439+
for output in node.output:
440+
if output in self._input_name_to_nodes or \
441+
output in self.output():
442+
unused = False
443+
break
444+
for input in node.input:
445+
if self.get_initializer(input) is not None:
446+
continue
447+
elif input in self._output_name_to_node or \
448+
input in self.input():
449+
unused = False
450+
break
451+
if unused:
452+
unused_nodes.append(node)
423453
self.remove_nodes(unused_nodes)
424454

425455
ununsed_weights = []
@@ -616,7 +646,7 @@ def export(self, save_path, conf):
616646
self.remove_nodes(remove_nodes)
617647
self.add_initializers(inits)
618648
self.update()
619-
self.remove_unused_constant()
649+
self.remove_unused_nodes()
620650
self.topological_sort()
621651
self.save(save_path)
622652
else:

test/model/test_onnx_model.py

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55
import unittest
66
import numpy as np
77

8-
sys.path.append('..')
98
from neural_compressor.model.onnx_model import ONNXModel
9+
from neural_compressor.data import Datasets, DATALOADERS
10+
from neural_compressor import quantization, PostTrainingQuantConfig
1011

1112
def get_onnx_model():
1213
model = torchvision.models.resnet18()
@@ -109,6 +110,48 @@ def setUp(self):
109110
model = helper.make_model(graph)
110111
self.q_model = ONNXModel(model)
111112

113+
# MatMul
114+
# |
115+
# Add
116+
# |
117+
# Reshape
118+
# |
119+
# Reshape
120+
# |
121+
# MatMul
122+
# |
123+
# Add
124+
125+
input = onnx.helper.make_tensor_value_info('input', onnx.TensorProto.FLOAT, [2, 4])
126+
127+
W1 = onnx.helper.make_tensor_value_info('W1', onnx.TensorProto.FLOAT, [4, 5])
128+
w1 = generate_input_initializer([4, 5], np.float32, 'W1')
129+
B1 = onnx.helper.make_tensor_value_info('b1', onnx.TensorProto.FLOAT, [5])
130+
b1 = generate_input_initializer([5], np.float32, 'b1')
131+
shape = numpy_helper.from_array(np.array((2, 5)).astype(np.int64), name='shape')
132+
W2 = onnx.helper.make_tensor_value_info('W2', onnx.TensorProto.FLOAT, [5, 6])
133+
w2 = generate_input_initializer([5, 6], np.float32, 'W2')
134+
B2 = onnx.helper.make_tensor_value_info('b2', onnx.TensorProto.FLOAT, [6])
135+
b2 = generate_input_initializer([6], np.float32, 'b2')
136+
output = onnx.helper.make_tensor_value_info('output', onnx.TensorProto.FLOAT, [2, 6])
137+
138+
node1 = onnx.helper.make_node('MatMul', inputs=['input', 'W1'], outputs=['y1'])
139+
node2 = onnx.helper.make_node('Add', inputs=['y1', 'b1'], outputs=['y1_add_b1'])
140+
node3 = onnx.helper.make_node('Reshape', inputs=['y1_add_b1', 'shape'], outputs=['y2'])
141+
node4 = onnx.helper.make_node('Reshape', inputs=['y2', 'shape'], outputs=['y3'])
142+
node5 = onnx.helper.make_node('MatMul', inputs=['y3', 'W2'], outputs=['y4'])
143+
node6 = onnx.helper.make_node('Add', inputs=['y4', 'b2'], outputs=['output'])
144+
145+
graph = onnx.helper.make_graph([node1, node2, node3, node4, node5, node6], 'test_matmul_reshape_graph', [input, W1, B1, W2, B2], [output])
146+
graph.initializer.add().CopyFrom(w1)
147+
graph.initializer.add().CopyFrom(b1)
148+
graph.initializer.add().CopyFrom(w2)
149+
graph.initializer.add().CopyFrom(b2)
150+
graph.initializer.add().CopyFrom(shape)
151+
152+
model = onnx.helper.make_model(graph, **{'opset_imports': [onnx.helper.make_opsetid('', 14)]})
153+
self.matmul_reshape_model = model
154+
112155
def test_nodes(self):
113156
self.assertEqual(len(self.model.nodes()), 6)
114157
nodes_name = [node.name for node in self.model.nodes()]
@@ -254,9 +297,29 @@ def test_find_nodes_by_initializer(self):
254297
self.assertEqual(nodes[0].name, "Conv1")
255298

256299
def test_get_scale_zero(self):
257-
input_scale, input_zero = self.q_model.get_scale_zero('B_quantized')
258-
weight_scale, weight_zero = self.q_model.get_scale_zero('C_quantized')
259-
bias_scale, bias_zero = self.q_model.get_scale_zero('E')
300+
import time
301+
result = [0.1]
302+
def sub_eval(model, result):
303+
time.sleep(0.001 * len(result))
304+
return result[0]
305+
306+
def eval(model):
307+
return sub_eval(model, result)
308+
309+
dataset = Datasets("onnxrt_qdq")["dummy"]((4, 4), low=0., high=0., dtype='float32')
310+
dataloader = DATALOADERS["onnxrt_qdq"](dataset, 2)
311+
config = PostTrainingQuantConfig()
312+
q_model = quantization.fit(self.matmul_reshape_model, config,
313+
calib_dataloader=dataloader, eval_func=eval)
314+
q_model.save('test.onnx')
315+
scale, zp = q_model.get_scale_zero('y3_QuantizeInput_quantized')
316+
self.assertEqual(scale.name, 'y1_add_b1_scale')
317+
self.assertEqual(zp.name, 'y1_add_b1_zero_point')
318+
319+
scale, zp = q_model.get_scale_zero('input_quantized')
320+
self.assertEqual(scale.name, 'input_scale')
321+
self.assertEqual(zp.name, 'input_zero_point')
322+
260323

261324
def test_save(self):
262325
self.model.save_model_to_file('./test_model_6.onnx', use_external_data_format=True)
@@ -268,5 +331,15 @@ def test_find_by_name(self):
268331
initializer = find_by_name('X1', self.model.initializer())
269332
self.assertIsNone(initializer)
270333

334+
def test_remove_unused_nodes(self):
335+
self.assertEqual(len(self.model.nodes()), 6)
336+
node_to_add = onnx.helper.make_node('Relu', ['output1'], ['output2'], keepdims=0, name='added_relu')
337+
self.model.add_node(node_to_add)
338+
self.assertEqual(len(self.model.nodes()), 7)
339+
self.model.remove_unused_nodes()
340+
self.assertEqual(len(self.model.nodes()), 6)
341+
342+
343+
271344
if __name__ == "__main__":
272345
unittest.main()

0 commit comments

Comments
 (0)