Skip to content

Commit dfe8c4d

Browse files
committed
add output scale
Signed-off-by: Xin He <[email protected]>
1 parent 5170483 commit dfe8c4d

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

neural_compressor/experimental/export/torch2onnx.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ def get_quantizable_onnx_ops(
232232
'Embedding' in str(module.__class__.__name__) or \
233233
'Linear' in str(module.__class__.__name__):
234234
if hasattr(module, 'weight') and callable(module.weight):
235-
if module.weight().dtype == torch.qint8:
235+
if module.weight().dtype in [torch.qint8, torch.quint8]:
236236
node = module_node_mapping[name.split('.module')[0]]
237237
quantize_nodes.append(node)
238238
return quantize_nodes
@@ -300,10 +300,10 @@ def build_scale_mapping(
300300
scale_zp_dict[input_scale_args] = recoder['input_scale']
301301
scale_zp_dict[input_zp_args] = recoder['input_zeropoint']
302302
### We need Matmul+Add to match Linear for output scale and zero-point
303-
# output_scale_args = node.output[0] + '_scale'
304-
# output_zp_args = node.output[0] + '_zero_point'
305-
# scale_zp_dict[output_scale_args] = recoder['output_scale']
306-
# scale_zp_dict[output_zp_args] = recoder['output_zeropoint']
303+
output_scale_args = node.output[0] + '_scale'
304+
output_zp_args = node.output[0] + '_zero_point'
305+
scale_zp_dict[output_scale_args] = recoder['output_scale']
306+
scale_zp_dict[output_zp_args] = recoder['output_zeropoint']
307307
return scale_zp_dict
308308

309309

0 commit comments

Comments
 (0)