Skip to content

Commit 7ffbbf1

Browse files
xin3heyiliu30
authored andcommitted
Add export examples for new API (#225)
Signed-off-by: Xin He <[email protected]>
1 parent 620c5f1 commit 7ffbbf1

File tree

12 files changed

+143
-8
lines changed

12 files changed

+143
-8
lines changed

examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,5 +198,10 @@ Shapley values originate from cooperative game theory that come with desirable p
198198
> **Note** : run_glue_tune_with_shap.py is the example of "SST2" task. If you want to execute other glue task, you may take some slight change under "ShapleyMSE" class.
199199
200200

201+
# Appendix
201202

203+
## Export to ONNX
202204

205+
Right now, we experimentally support exporting PyTorch model to ONNX model, includes FP32 and INT8 model.
206+
207+
By enabling `--onnx` argument, Intel Neural Compressor will export fp32 ONNX model, INT8 QDQ ONNX model, and INT8 QLinear ONNX model.

examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/requirements.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,6 @@ torch >= 1.3
66
transformers>=4.10.0
77
shap
88
scipy
9-
sacremoses
9+
sacremoses
10+
onnx
11+
onnxruntime

examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_glue_tune.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,18 +144,25 @@ class ModelArguments:
144144
tune: bool = field(
145145
default=False,
146146
metadata={
147-
"help": "tune quantized model with Intel Neural Compressor)."
148-
},
147+
"help": "tune quantized model with Intel Neural Compressor)."},
149148
)
150149
benchmark: bool = field(
151150
default=False,
152-
metadata={"help": "run benchmark."})
151+
metadata={"help": "run benchmark."},
152+
)
153153
int8: bool = field(
154154
default=False,
155-
metadata={"help":"run benchmark."})
155+
metadata={"help":"initialize int8 model."},
156+
)
156157
accuracy_only: bool = field(
157158
default=False,
158-
metadata={"help":"Whether to only test accuracy for model tuned by Neural Compressor."})
159+
metadata={"help":"Whether to only test accuracy for model tuned by Neural Compressor."},
160+
)
161+
onnx: bool = field(
162+
default=False, metadata={"help": "convert PyTorch model to ONNX"}
163+
)
164+
165+
159166
def main():
160167
# See all possible arguments in src/transformers/training_args.py
161168
# or by passing the --help flag to this script.
@@ -439,6 +446,24 @@ def eval_func_for_nc(model_tuned):
439446
q_model = fit(model, conf=conf, eval_func=eval_func_for_nc)
440447
from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream
441448
save_for_huggingface_upstream(q_model, tokenizer, training_args.output_dir)
449+
450+
if model_args.onnx:
451+
eval_dataloader = trainer.get_eval_dataloader()
452+
it = iter(eval_dataloader)
453+
input = next(it)
454+
input.pop('labels')
455+
symbolic_names = {0: 'batch_size', 1: 'max_seq_len'}
456+
dynamic_axes = {k: symbolic_names for k in input.keys()}
457+
from neural_compressor.config import Torch2ONNXConfig
458+
int8_onnx_config = Torch2ONNXConfig(
459+
dtype="int8",
460+
opset_version=14,
461+
example_inputs=tuple(input.values()),
462+
input_names=list(input.keys()),
463+
output_names=['labels'],
464+
dynamic_axes=dynamic_axes,
465+
)
466+
q_model.export('int8-nlp-model.onnx', int8_onnx_config)
442467
exit(0)
443468

444469
if model_args.accuracy_only:

examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/eager/run_tuning.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ function run_tuning {
8686
--no_cuda \
8787
--output_dir ${tuned_checkpoint} \
8888
--tune \
89+
--onnx \
8990
${extra_cmd}
9091
}
9192

examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,3 +187,11 @@ quantizer.model = common.Model(model)
187187
model = quantizer.fit()
188188
model.save(training_args.output_dir)
189189
```
190+
191+
# Appendix
192+
193+
## Export to ONNX
194+
195+
Right now, we experimentally support exporting PyTorch model to ONNX model, includes FP32 and INT8 model.
196+
197+
By enabling `--onnx` argument, Intel Neural Compressor will export fp32 ONNX model, INT8 QDQ ONNX model, and INT8 QLinear ONNX model.

examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ protobuf
44
scipy
55
scikit-learn
66
Keras-Preprocessing
7+
onnx
8+
onnxruntime
79
transformers >= 4.16.0
810
--find-links https://download.pytorch.org/whl/torch_stable.html
911
torch >= 1.8.0+cpu

examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,9 @@ class ModelArguments:
195195
accuracy_only: bool = field(
196196
default=False, metadata={"help": "get accuracy"}
197197
)
198+
onnx: bool = field(
199+
default=False, metadata={"help": "convert PyTorch model to ONNX"}
200+
)
198201

199202

200203
def main():
@@ -502,9 +505,46 @@ def eval_func(model):
502505
from neural_compressor.config import PostTrainingQuantConfig, TuningCriterion
503506
tuning_criterion = TuningCriterion(max_trials=600)
504507
conf = PostTrainingQuantConfig(approach="static", backend="pytorch_fx", tuning_criterion=tuning_criterion)
505-
model = fit(model, conf=conf, calib_dataloader=eval_dataloader, eval_func=eval_func)
508+
q_model = fit(model, conf=conf, calib_dataloader=eval_dataloader, eval_func=eval_func)
506509
from neural_compressor.utils.load_huggingface import save_for_huggingface_upstream
507-
save_for_huggingface_upstream(model, tokenizer, training_args.output_dir)
510+
save_for_huggingface_upstream(q_model, tokenizer, training_args.output_dir)
511+
512+
if model_args.onnx:
513+
it = iter(eval_dataloader)
514+
input = next(it)
515+
input.pop('labels')
516+
symbolic_names = {0: 'batch_size', 1: 'max_seq_len'}
517+
dynamic_axes = {k: symbolic_names for k in input.keys()}
518+
from neural_compressor.config import Torch2ONNXConfig
519+
fp32_onnx_config = Torch2ONNXConfig(
520+
dtype="fp32",
521+
opset_version=14,
522+
example_inputs=tuple(input.values()),
523+
input_names=list(input.keys()),
524+
output_names=['labels'],
525+
dynamic_axes=dynamic_axes,
526+
)
527+
q_model.export('fp32-model.onnx', fp32_onnx_config)
528+
int8_onnx_config = Torch2ONNXConfig(
529+
dtype="int8",
530+
opset_version=14,
531+
quant_format="QDQ",
532+
example_inputs=tuple(input.values()),
533+
input_names=list(input.keys()),
534+
output_names=['labels'],
535+
dynamic_axes=dynamic_axes,
536+
)
537+
q_model.export('int8-nlp-qdq-model.onnx', int8_onnx_config)
538+
int8_onnx_config = Torch2ONNXConfig(
539+
dtype="int8",
540+
opset_version=14,
541+
quant_format="QLinear",
542+
example_inputs=tuple(input.values()),
543+
input_names=list(input.keys()),
544+
output_names=['labels'],
545+
dynamic_axes=dynamic_axes,
546+
)
547+
q_model.export('int8-nlp-qlinear-model.onnx', int8_onnx_config)
508548
return
509549

510550
if model_args.benchmark or model_args.accuracy_only:

examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_tuning.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ function run_tuning {
9292
--no_cuda \
9393
--output_dir ${tuned_checkpoint} \
9494
--tune \
95+
--onnx \
9596
--overwrite_output_dir \
9697
${extra_cmd}
9798
}

examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,3 +117,11 @@ model = OptimizedModel.from_pretrained(
117117
```
118118

119119
We also upstreamed several int8 models into HuggingFace [model hub](https://huggingface.co/models?other=Intel%C2%AE%20Neural%20Compressor) for users to ramp up.
120+
121+
# Appendix
122+
123+
## Export to ONNX
124+
125+
Right now, we experimentally support exporting PyTorch model to ONNX model, includes FP32 and INT8 model.
126+
127+
By enabling `--onnx` argument, Intel Neural Compressor will export fp32 ONNX model, INT8 QDQ ONNX model, and INT8 QLinear ONNX model.

examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,7 @@ datasets == 1.18.0
44
sentencepiece != 0.1.92
55
protobuf
66
scipy
7+
onnx
8+
onnxruntime
79
--find-links https://download.pytorch.org/whl/torch_stable.html
810
torch >= 1.8.0+cpu

0 commit comments

Comments
 (0)