Skip to content

Commit c7b8739

Browse files
committed
Cleaned and Rebased PR for (quic#481) to change the hash creation module for all models in Qefficient.
This PR contains changes made to the modelling_qeff, modeling_auto to allow usage of certain export parameters and kwargs passed during model creation. The hashing module is now made independant of the calling class and the test scripts are updated accordingly to test for this functionality. Added functionality to have an overarching parent directory in cache to contain all different exported model configs belonging to the same architecture. In case the architecture isn't present in the config of the model, we instead proceed with self.model_name based parent directory creation. Hash is now created during export, so as to incorporate all the additional params needed for unique hash creation thus, the test scripts have been modified to test hashing functionalities accordingly. We maintain an Exclusion list of params for kwargs to be discarded during hashing parameter selection. We'll need to look into the alternate approach of maintaining an Inclusion list instead. There was a comment to use MetaClasses to handle raising a warning whenever someone loads a model without using 'from_pretrained' method but the current class architecture of VLMs and SpeechSeq2Seq models don't allow for this, this use case will be handled in a different PR. Signed-off-by: Dhiraj Kumar Sah <[email protected]>
1 parent ccc1923 commit c7b8739

File tree

16 files changed

+487
-276
lines changed

16 files changed

+487
-276
lines changed

QEfficient/base/modeling_qeff.py

Lines changed: 51 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#
66
# ----------------------------------------------------------------------------
77

8-
import hashlib
8+
import copy
99
import inspect
1010
import logging
1111
import shutil
@@ -22,8 +22,16 @@
2222
from QEfficient.base.pytorch_transforms import PytorchTransform
2323
from QEfficient.compile.qnn_compiler import compile as qnn_compile
2424
from QEfficient.generation.cloud_infer import QAICInferenceSession
25-
from QEfficient.utils import constants, create_json, dump_qconfig, generate_mdp_partition_config, load_json
26-
from QEfficient.utils.cache import QEFF_HOME, to_hashable
25+
from QEfficient.utils import (
26+
constants,
27+
create_json,
28+
dump_qconfig,
29+
filter_and_create_export_hash,
30+
generate_mdp_partition_config,
31+
hash_compile_params,
32+
load_json,
33+
)
34+
from QEfficient.utils.cache import QEFF_HOME
2735

2836
logger = logging.getLogger(__name__)
2937

@@ -45,12 +53,16 @@ class QEFFBaseModel(ABC):
4553
def _transform_names(cls) -> List[str]:
4654
return [x.__name__ for x in cls._pytorch_transforms + cls._onnx_transforms]
4755

48-
def __init__(self, model: torch.nn.Module) -> None:
56+
def __init__(self, model: torch.nn.Module, **kwargs) -> None:
4957
super().__init__()
5058
self.model = model
59+
self.hash_params = self.create_model_params(**kwargs)
5160
self.onnx_path: Optional[str] = None
5261
self.qpc_path: Optional[str] = None
5362
self.qpc_session: Optional[QAICInferenceSession] = None
63+
if hasattr(self.model.config, "architectures"):
64+
model_architecture = getattr(self.model.config, "architectures", None)
65+
self.model_architecture = model_architecture[0] if isinstance(model_architecture, list) else None
5466

5567
# Apply the transformations
5668
any_transformed = False
@@ -63,13 +75,16 @@ def __init__(self, model: torch.nn.Module) -> None:
6375
else:
6476
logger.info(f"Pytorch transforms applied to model: {self.model_name}")
6577

66-
@property
67-
@abstractmethod
68-
def model_name(self) -> str: ...
78+
def create_model_params(self, **kwargs) -> Dict:
79+
model_params = copy.deepcopy(kwargs)
80+
model_params["config"] = self.model.config.to_diff_dict()
81+
model_params["peft_config"] = getattr(self.model, "active_peft_config", None)
82+
model_params["applied_transform_names"] = self._transform_names()
83+
return model_params
6984

7085
@property
7186
@abstractmethod
72-
def model_hash(self) -> str: ...
87+
def model_name(self) -> str: ...
7388

7489
@abstractmethod
7590
def export(self, export_dir: Optional[str] = None) -> Path:
@@ -134,8 +149,17 @@ def _export(
134149
:onnx_transform_kwargs (dict): Additional arguments to be passed to `Transform.apply` for this class.
135150
:export_dir (str): Specify the export directory. The export_dir will be suffixed with a hash corresponding to current model.
136151
"""
137-
export_dir = Path(export_dir or (QEFF_HOME / self.model_name))
138-
export_dir = export_dir.with_name(export_dir.name + "-" + self.model_hash)
152+
parent_dir = self.model_architecture or self.model_name
153+
export_dir = Path(export_dir or (QEFF_HOME / parent_dir / self.model_name))
154+
export_hash, filtered_hash_params = filter_and_create_export_hash(
155+
model_params=self.hash_params,
156+
output_names=output_names,
157+
dynamic_axes=dynamic_axes,
158+
export_kwargs=export_kwargs,
159+
onnx_transform_kwargs=onnx_transform_kwargs,
160+
)
161+
self.export_hash = export_hash
162+
export_dir = export_dir.with_name(export_dir.name + "-" + export_hash)
139163
onnx_path = export_dir / f"{self.model_name}.onnx"
140164
if onnx_path.is_file():
141165
self.onnx_path = onnx_path
@@ -210,6 +234,11 @@ def _export(
210234
finally:
211235
shutil.rmtree(tmp_onnx_dir, ignore_errors=True)
212236

237+
# Dump JSON file with hashed parameters
238+
hashed_params_export_path = export_dir / "hashed_export_params.json"
239+
create_json(hashed_params_export_path, filtered_hash_params)
240+
logger.info("Hashed parameters exported successfully.")
241+
213242
self.onnx_path = onnx_path
214243
return onnx_path
215244

@@ -299,23 +328,15 @@ def _compile(
299328
else:
300329
mdp_ts_json = None
301330

302-
compile_hash = hashlib.sha256(to_hashable(command))
303-
304-
if specializations is not None:
305-
compile_hash.update(to_hashable(specializations))
306-
307-
if custom_io is not None:
308-
compile_hash.update(to_hashable(custom_io))
309-
310-
if num_speculative_tokens:
311-
compile_hash.update(to_hashable({"num_speculative_tokens": num_speculative_tokens}))
312-
313-
# Hash the MDP partition config and the number of devices.
314-
compile_hash.update(to_hashable(mdp_ts_json))
315-
compile_hash.update(to_hashable({"mdp_ts_num_devices": mdp_ts_num_devices}))
331+
compile_hash, hashed_params = hash_compile_params(
332+
command=command,
333+
specializations=specializations,
334+
custom_io=custom_io,
335+
mdp_ts_num_devices=mdp_ts_num_devices,
336+
mdp_ts_json=mdp_ts_json,
337+
num_speculative_tokens=num_speculative_tokens,
338+
)
316339

317-
# Check if already compiled
318-
compile_hash = compile_hash.hexdigest()[:16]
319340
compile_dir = qpc_path.with_name(qpc_path.name + "-" + compile_hash)
320341
qpc_path = compile_dir / "qpc"
321342
qpc_path.mkdir(parents=True, exist_ok=True)
@@ -366,6 +387,10 @@ def _compile(
366387
]
367388
)
368389
)
390+
# Dump JSON file with hashed parameters
391+
hashed_compile_params_path = compile_dir / "hashed_compile_params.json"
392+
create_json(hashed_compile_params_path, hashed_params)
393+
logger.info("Hashed parameters exported successfully.")
369394

370395
self.qpc_path = qpc_path
371396

QEfficient/compile/qnn_compiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@
1212
from typing import Dict, List, Optional
1313

1414
from QEfficient.utils._utils import create_json, execute_command, load_json
15-
from QEfficient.utils.cache import to_hashable
1615
from QEfficient.utils.constants import QnnConstants
1716
from QEfficient.utils.generate_qnn_network_specialization_config import (
1817
generate_data_format_config,
1918
generate_qnn_specialization,
2019
)
20+
from QEfficient.utils.hash_utils import to_hashable
2121
from QEfficient.utils.logging_utils import logger
2222

2323

QEfficient/peft/auto.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from QEfficient.transformers.models.pytorch_transforms import CustomOpsTransform, KVCacheTransform
2828
from QEfficient.utils import constants
2929
from QEfficient.utils._utils import get_padding_shape_from_config
30-
from QEfficient.utils.cache import to_hashable
30+
from QEfficient.utils.hash_utils import to_hashable
3131

3232
logger = logging.getLogger(__name__)
3333

QEfficient/peft/lora/auto.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from QEfficient import QEFFAutoModelForCausalLM
1919
from QEfficient.peft.lora.pytorch_transforms import LoraModelInputsTransform, TargetModulesTransform
2020
from QEfficient.utils import constants, get_padding_shape_from_config
21-
from QEfficient.utils.cache import to_hashable
21+
from QEfficient.utils.hash_utils import to_hashable
2222
from QEfficient.utils.logging_utils import logger
2323

2424

0 commit comments

Comments
 (0)