add output scale

xin3he · xin3he · commit dfe8c4dfd96a · 2022-12-02T16:20:04.000+08:00
Signed-off-by: Xin He &lt;xin3.he@intel.com&gt;
diff --git a/neural_compressor/experimental/export/torch2onnx.py b/neural_compressor/experimental/export/torch2onnx.py
@@ -232,7 +232,7 @@ def get_quantizable_onnx_ops(
           'Embedding' in str(module.__class__.__name__) or \
           'Linear' in str(module.__class__.__name__):
             if hasattr(module, 'weight') and callable(module.weight):
-                if module.weight().dtype == torch.qint8:
+                if module.weight().dtype in [torch.qint8, torch.quint8]:
                     node = module_node_mapping[name.split('.module')[0]]
                     quantize_nodes.append(node)
     return quantize_nodes
@@ -300,10 +300,10 @@ def build_scale_mapping(
                 scale_zp_dict[input_scale_args] = recoder['input_scale']
                 scale_zp_dict[input_zp_args] = recoder['input_zeropoint']
                 ### We need Matmul+Add to match Linear for output scale and zero-point
-                # output_scale_args = node.output[0] + '_scale'
-                # output_zp_args = node.output[0] + '_zero_point'
-                # scale_zp_dict[output_scale_args] = recoder['output_scale']
-                # scale_zp_dict[output_zp_args] = recoder['output_zeropoint']
+                output_scale_args = node.output[0] + '_scale'
+                output_zp_args = node.output[0] + '_zero_point'
+                scale_zp_dict[output_scale_args] = recoder['output_scale']
+                scale_zp_dict[output_zp_args] = recoder['output_zeropoint']
     return scale_zp_dict