diff --git a/neural_compressor/utils/pytorch.py b/neural_compressor/utils/pytorch.py index 98d79570c6a..ad2d090b0a8 100644 --- a/neural_compressor/utils/pytorch.py +++ b/neural_compressor/utils/pytorch.py @@ -507,6 +507,9 @@ def recover_model_from_json(model, json_file_path, example_inputs): if isinstance(example_inputs, dict): model(**example_inputs) model(**example_inputs) + elif isinstance(example_inputs, tuple) or isinstance(example_inputs, list): + model(*example_inputs) + model(*example_inputs) else: model(example_inputs) model(example_inputs) diff --git a/test/model/test_model_pytorch.py b/test/model/test_model_pytorch.py index f0990b6558c..34ac9f51596 100644 --- a/test/model/test_model_pytorch.py +++ b/test/model/test_model_pytorch.py @@ -114,19 +114,19 @@ def test_WeightOnlyLinear(self): for dtype in compression_dtype: new_model = Model() inc_model = INCModel(new_model) - inc_model.export_compressed_model( + compressed_model = inc_model.export_compressed_model( qweight_config_path="saved/qconfig.json", compression_dtype=dtype, scale_dtype=torch.float32, use_optimum_format=False, ) out2 = q_model(input) - torch.save(inc_model.state_dict(), "saved/tmp.pt") + torch.save(compressed_model.state_dict(), "saved/tmp.pt") model_size2 = os.path.getsize("saved/tmp.pt") / 1024 print("WeightOnlyLinear Model size:{:.3f}M".format(model_size2)) - self.assertTrue(isinstance(inc_model.model.fc1, WeightOnlyLinear)) - self.assertTrue(inc_model.model.fc1.qweight.dtype == dtype) - self.assertTrue(inc_model.model.fc1.scales.dtype == torch.float32) + self.assertTrue(isinstance(compressed_model.fc1, WeightOnlyLinear)) + self.assertTrue(compressed_model.fc1.qweight.dtype == dtype) + self.assertTrue(compressed_model.fc1.scales.dtype == torch.float32) self.assertTrue(model_size1 / model_size2 > 2) self.assertTrue(torch.all(torch.isclose(out1, out2, atol=5e-1))) @@ -135,35 +135,35 @@ def test_WeightOnlyLinear(self): for dim in compress_dims: new_model = Model() inc_model = INCModel(new_model) - inc_model.export_compressed_model( + compressed_model = inc_model.export_compressed_model( qweight_config_path="saved/qconfig.json", compression_dim=dim, use_optimum_format=False, ) out2 = q_model(input) - torch.save(inc_model.state_dict(), "saved/tmp.pt") + torch.save(compressed_model.state_dict(), "saved/tmp.pt") model_size2 = os.path.getsize("saved/tmp.pt") / 1024 print("WeightOnlyLinear Model size:{:.3f}M".format(model_size2)) - self.assertTrue(isinstance(inc_model.model.fc1, WeightOnlyLinear)) + self.assertTrue(isinstance(compressed_model.fc1, WeightOnlyLinear)) if dim == 1: - self.assertTrue(inc_model.model.fc1.qweight.shape[0] == inc_model.model.fc1.out_features) + self.assertTrue(compressed_model.fc1.qweight.shape[1] != compressed_model.fc1.in_features) else: - self.assertTrue(inc_model.model.fc1.qweight.shape[1] == inc_model.model.fc1.in_features) + self.assertTrue(compressed_model.fc1.qweight.shape[0] != compressed_model.fc1.out_features) self.assertTrue(model_size1 / model_size2 > 2) self.assertTrue(torch.all(torch.isclose(out1, out2, atol=5e-1))) # test half dtype new_model = Model() inc_model = INCModel(new_model) - inc_model.export_compressed_model( + compressed_model = inc_model.export_compressed_model( qweight_config_path="saved/qconfig.json", ) out2 = q_model(input) - torch.save(inc_model.state_dict(), "saved/tmp.pt") + torch.save(compressed_model.state_dict(), "saved/tmp.pt") model_size2 = os.path.getsize("saved/tmp.pt") / 1024 print("WeightOnlyLinear Model size:{:.3f}M".format(model_size2)) - self.assertTrue(isinstance(inc_model.model.fc1, WeightOnlyLinear)) - self.assertTrue(inc_model.model.fc1.scales.dtype == torch.float16) + self.assertTrue(isinstance(compressed_model.fc1, WeightOnlyLinear)) + self.assertTrue(compressed_model.fc1.scales.dtype == torch.float16) self.assertTrue(model_size1 / model_size2 > 2) self.assertTrue(torch.all(torch.isclose(out1, out2, atol=5e-1)))