From 574021fbbb55d47591f86554e8856cfb92979188 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Tue, 21 Nov 2023 15:59:57 +0800 Subject: [PATCH 1/5] support for restoring ipex model Signed-off-by: Kaihui-intel --- neural_compressor/utils/pytorch.py | 46 +++++++++++++++++++++++++++++ test/algorithm/test_smooth_quant.py | 19 ++++++++++++ 2 files changed, 65 insertions(+) diff --git a/neural_compressor/utils/pytorch.py b/neural_compressor/utils/pytorch.py index cda525f1cc6..462a39a1c33 100644 --- a/neural_compressor/utils/pytorch.py +++ b/neural_compressor/utils/pytorch.py @@ -464,3 +464,49 @@ def load(checkpoint_dir=None, model=None, layer_wise=False, history_cfg=None, ** assert len(mismatch_log.unexpected_keys) == 0, "Loading state_dict failed: {}".format(mismatch_log) util.get_embedding_contiguous(model) return model + +def recover_model_from_json(model, json_file_path, example_inputs): + """Recover ipex model from JSON file. + + Args: + model (object): fp32 model need to do quantization. + json_file_path (json): configuration JSON file for ipex. + example_inputs (tuple or torch.Tensor): example inputs that will be passed to the ipex function. + + Returns: + (object): quantized model + """ + from ..utils.utility import LazyImport + ipex = LazyImport("intel_extension_for_pytorch") + from torch.ao.quantization.observer import MinMaxObserver + qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( + alpha=0.5, act_observer=MinMaxObserver() + ) + if isinstance(example_inputs, dict): + model = ipex.quantization.prepare(model, qconfig, example_kwarg_inputs=example_inputs, inplace=True) + else: + model = ipex.quantization.prepare(model, qconfig, example_inputs=example_inputs, inplace=True) + model.load_qconf_summary(qconf_summary=json_file_path) + model = ipex.quantization.convert(model, inplace=True) + with torch.no_grad(): + try: + if isinstance(example_inputs, dict): + model = torch.jit.trace(model, example_kwarg_inputs=example_inputs) + else: + model = torch.jit.trace(model, example_inputs) + model = torch.jit.freeze(model.eval()) + except: + if isinstance(example_inputs, dict): + model = torch.jit.trace( + model, example_kwarg_inputs=example_inputs, strict=False, check_trace=False + ) + else: + model = torch.jit.trace(model, example_inputs, strict=False) + model = torch.jit.freeze(model.eval()) + if isinstance(example_inputs, dict): + model(**example_inputs) + model(**example_inputs) + else: + model(example_inputs) + model(example_inputs) + return model diff --git a/test/algorithm/test_smooth_quant.py b/test/algorithm/test_smooth_quant.py index a6c99e66a69..c4740b43507 100644 --- a/test/algorithm/test_smooth_quant.py +++ b/test/algorithm/test_smooth_quant.py @@ -880,6 +880,25 @@ def calib_func(model): calib_func=calib_func, ) q_model.save("saved") + # test recover_model_from_json + from neural_compressor.utils.pytorch import recover_model_from_json + tmp_model = copy.deepcopy(fp32_model) + + ipex_model = recover_model_from_json(tmp_model, "./saved/best_configure.json", example_inputs=input_ids) + inc_output = q_model.model(input_ids) + ipex_output = ipex_model(input_ids) + self.assertTrue(torch.allclose(inc_output, ipex_output, atol=1e-05)) + + example_tuple = (input_ids, ) + ipex_model = recover_model_from_json(tmp_model, "./saved/best_configure.json", example_inputs=example_tuple) + ipex_output = ipex_model(input_ids) + self.assertTrue(torch.allclose(inc_output, ipex_output, atol=1e-05)) + + example_dict = {"x": input_ids} + ipex_model = recover_model_from_json(tmp_model, "./saved/best_configure.json", example_inputs=example_dict) + ipex_output = ipex_model(input_ids) + self.assertTrue(torch.allclose(inc_output, ipex_output, atol=1e-05)) + # compare ipex and inc quantization with open("saved/best_configure.json", "r") as f: inc_config_json = json.load(f) From cdddd3ccf0ca42e730560a8319b39da92aa4dd01 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 21 Nov 2023 08:43:25 +0000 Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- neural_compressor/utils/pytorch.py | 11 +++++------ test/algorithm/test_smooth_quant.py | 3 ++- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/neural_compressor/utils/pytorch.py b/neural_compressor/utils/pytorch.py index 462a39a1c33..7e3a1b15a95 100644 --- a/neural_compressor/utils/pytorch.py +++ b/neural_compressor/utils/pytorch.py @@ -465,6 +465,7 @@ def load(checkpoint_dir=None, model=None, layer_wise=False, history_cfg=None, ** util.get_embedding_contiguous(model) return model + def recover_model_from_json(model, json_file_path, example_inputs): """Recover ipex model from JSON file. @@ -477,11 +478,11 @@ def recover_model_from_json(model, json_file_path, example_inputs): (object): quantized model """ from ..utils.utility import LazyImport + ipex = LazyImport("intel_extension_for_pytorch") from torch.ao.quantization.observer import MinMaxObserver - qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping( - alpha=0.5, act_observer=MinMaxObserver() - ) + + qconfig = ipex.quantization.get_smooth_quant_qconfig_mapping(alpha=0.5, act_observer=MinMaxObserver()) if isinstance(example_inputs, dict): model = ipex.quantization.prepare(model, qconfig, example_kwarg_inputs=example_inputs, inplace=True) else: @@ -497,9 +498,7 @@ def recover_model_from_json(model, json_file_path, example_inputs): model = torch.jit.freeze(model.eval()) except: if isinstance(example_inputs, dict): - model = torch.jit.trace( - model, example_kwarg_inputs=example_inputs, strict=False, check_trace=False - ) + model = torch.jit.trace(model, example_kwarg_inputs=example_inputs, strict=False, check_trace=False) else: model = torch.jit.trace(model, example_inputs, strict=False) model = torch.jit.freeze(model.eval()) diff --git a/test/algorithm/test_smooth_quant.py b/test/algorithm/test_smooth_quant.py index c4740b43507..45ef03af420 100644 --- a/test/algorithm/test_smooth_quant.py +++ b/test/algorithm/test_smooth_quant.py @@ -882,6 +882,7 @@ def calib_func(model): q_model.save("saved") # test recover_model_from_json from neural_compressor.utils.pytorch import recover_model_from_json + tmp_model = copy.deepcopy(fp32_model) ipex_model = recover_model_from_json(tmp_model, "./saved/best_configure.json", example_inputs=input_ids) @@ -889,7 +890,7 @@ def calib_func(model): ipex_output = ipex_model(input_ids) self.assertTrue(torch.allclose(inc_output, ipex_output, atol=1e-05)) - example_tuple = (input_ids, ) + example_tuple = (input_ids,) ipex_model = recover_model_from_json(tmp_model, "./saved/best_configure.json", example_inputs=example_tuple) ipex_output = ipex_model(input_ids) self.assertTrue(torch.allclose(inc_output, ipex_output, atol=1e-05)) From a92ba34170964711ae00cb3ccb57fa221542767b Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Tue, 21 Nov 2023 16:06:33 +0800 Subject: [PATCH 3/5] update docstring Signed-off-by: Kaihui-intel --- neural_compressor/utils/pytorch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/utils/pytorch.py b/neural_compressor/utils/pytorch.py index 7e3a1b15a95..f9cd72505e4 100644 --- a/neural_compressor/utils/pytorch.py +++ b/neural_compressor/utils/pytorch.py @@ -472,7 +472,7 @@ def recover_model_from_json(model, json_file_path, example_inputs): Args: model (object): fp32 model need to do quantization. json_file_path (json): configuration JSON file for ipex. - example_inputs (tuple or torch.Tensor): example inputs that will be passed to the ipex function. + example_inputs (tuple or torch.Tensor or dict): example inputs that will be passed to the ipex function. Returns: (object): quantized model From b33b523c7c2ad9eb6bc0676992675bf2250f33f2 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Tue, 21 Nov 2023 16:43:18 +0800 Subject: [PATCH 4/5] fix pylint Signed-off-by: Kaihui-intel --- neural_compressor/utils/pytorch.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/neural_compressor/utils/pytorch.py b/neural_compressor/utils/pytorch.py index f9cd72505e4..a5216f1d519 100644 --- a/neural_compressor/utils/pytorch.py +++ b/neural_compressor/utils/pytorch.py @@ -492,12 +492,14 @@ def recover_model_from_json(model, json_file_path, example_inputs): with torch.no_grad(): try: if isinstance(example_inputs, dict): + # pylint: disable=E1120 model = torch.jit.trace(model, example_kwarg_inputs=example_inputs) else: model = torch.jit.trace(model, example_inputs) model = torch.jit.freeze(model.eval()) except: if isinstance(example_inputs, dict): + # pylint: disable=E1120 model = torch.jit.trace(model, example_kwarg_inputs=example_inputs, strict=False, check_trace=False) else: model = torch.jit.trace(model, example_inputs, strict=False) From 113153331f5987878da3eb586a08eaf4a0932ea1 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Tue, 21 Nov 2023 16:51:03 +0800 Subject: [PATCH 5/5] fix pylint Signed-off-by: Kaihui-intel --- neural_compressor/utils/pytorch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/neural_compressor/utils/pytorch.py b/neural_compressor/utils/pytorch.py index a5216f1d519..f2267b1ca1a 100644 --- a/neural_compressor/utils/pytorch.py +++ b/neural_compressor/utils/pytorch.py @@ -492,14 +492,14 @@ def recover_model_from_json(model, json_file_path, example_inputs): with torch.no_grad(): try: if isinstance(example_inputs, dict): - # pylint: disable=E1120 + # pylint: disable=E1120,E1123 model = torch.jit.trace(model, example_kwarg_inputs=example_inputs) else: model = torch.jit.trace(model, example_inputs) model = torch.jit.freeze(model.eval()) except: if isinstance(example_inputs, dict): - # pylint: disable=E1120 + # pylint: disable=E1120,E1123 model = torch.jit.trace(model, example_kwarg_inputs=example_inputs, strict=False, check_trace=False) else: model = torch.jit.trace(model, example_inputs, strict=False)