Made changes to incorporate PEFT model configs and addressed the comments on naming and ordering as well.

quic-dhirajku · quic-dhirajku · commit b28d33efbd79 · 2025-08-04T08:33:52.000Z
Signed-off-by: Dhiraj Kumar Sah &lt;dhirajku@qti.qualcomm.com&gt;
diff --git a/QEfficient/base/modeling_qeff.py b/QEfficient/base/modeling_qeff.py
@@ -53,24 +53,17 @@ class QEFFBaseModel(ABC):
     def _transform_names(cls) -> List[str]:
         return [x.__name__ for x in cls._pytorch_transforms + cls._onnx_transforms]
 
-    def create_model_params(self, **kwargs) -> Dict:
-        model_params = copy.deepcopy(kwargs)
-
-        model_params["config"] = self.model.config.to_diff_dict()
-        model_params["_transform_names"] = self._transform_names()
-        return model_params
-
     def __init__(self, model: torch.nn.Module, **kwargs) -> None:
         super().__init__()
         self.model = model
         self.hash_params = self.create_model_params(**kwargs)
 
-        if hasattr(self.model.config, "architectures"):
-            self.model_architecture = self.model.config.architectures[0]
         self.onnx_path: Optional[str] = None
         self.qpc_path: Optional[str] = None
         self.qpc_session: Optional[QAICInferenceSession] = None
         self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None)
+        if hasattr(self.model.config, "architectures"):
+            self.model_architecture = getattr(self.model.config, "architectures", [None])[0]
 
         # Apply the transformations
         any_transformed = False
@@ -83,6 +76,13 @@ def __init__(self, model: torch.nn.Module, **kwargs) -> None:
         else:
             logger.info(f"Pytorch transforms applied to model: {self.model_name}")
 
+    def create_model_params(self, **kwargs) -> Dict:
+        model_params = copy.deepcopy(kwargs)
+        model_params["config"] = self.model.config.to_diff_dict()
+        model_params["peft_config"] = getattr(self.model, "active_peft_config", None)
+        model_params["applied_transform_names"] = self._transform_names()
+        return model_params
+
     @property
     @abstractmethod
     def model_name(self) -> str: ...
@@ -150,17 +150,15 @@ def _export(
             :onnx_transform_kwargs (dict): Additional arguments to be passed to `Transform.apply` for this class.
             :export_dir (str): Specify the export directory. The export_dir will be suffixed with a hash corresponding to current model.
         """
-
-        export_dir = Path(export_dir or (QEFF_HOME / self.model_architecture / self.model_name))
+        parent_dir = self.model_architecture or self.model_name
+        export_dir = Path(export_dir or (QEFF_HOME / parent_dir / self.model_name))
         export_hash, filtered_hash_params = filter_and_create_export_hash(
             model_params=self.hash_params,
             output_names=output_names,
             dynamic_axes=dynamic_axes,
             export_kwargs=export_kwargs,
             onnx_transform_kwargs=onnx_transform_kwargs,
-            export_dir=export_dir,
         )
-
         export_dir = export_dir.with_name(export_dir.name + "-" + export_hash)
         onnx_path = export_dir / f"{self.model_name}.onnx"
         if onnx_path.is_file():
@@ -237,7 +235,7 @@ def _export(
             shutil.rmtree(tmp_onnx_dir, ignore_errors=True)
 
         # Dump JSON file with hashed parameters
-        hashed_params_export_path = export_dir / "hashed_model_params.json"
+        hashed_params_export_path = export_dir / "hashed_export_params.json"
         create_json(hashed_params_export_path, filtered_hash_params)
         logger.info("Hashed parameters exported successfully.")
 
diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
@@ -169,7 +169,7 @@ def __init__(self, model: nn.Module, pooling=None, **kwargs):
             self.model, _ = PoolingTransform.apply(self.model, pooling)
 
         self.model.base_model.config.use_cache = True
-        self.hash_params["qeff_class"] = self.__class__.__name__
+        self.hash_params["qeff_auto_class"] = self.__class__.__name__
 
     @classmethod
     @with_replaced_quantizers
@@ -430,7 +430,7 @@ class QEffVisionEncoderForTextImageToTextModel(QEFFBaseModel):
     def __init__(self, model: nn.modules, **kwargs):
         super().__init__(model, **kwargs)
         self.model = model.get_qeff_vision_encoder()
-        self.hash_params["qeff_class"] = self.__class__.__name__
+        self.hash_params["qeff_auto_class"] = self.__class__.__name__
 
     def export(self, inputs, output_names, dynamic_axes, export_dir=None):
         return self._export(inputs, output_names, dynamic_axes, export_dir)
@@ -485,7 +485,7 @@ class QEffCausalLMForTextImageToTextModel(QEFFBaseModel):
     def __init__(self, model, **kwargs):
         super().__init__(model, **kwargs)
         self.model = model.get_qeff_language_decoder()
-        self.hash_params["qeff_class"] = self.__class__.__name__
+        self.hash_params["qeff_auto_class"] = self.__class__.__name__
 
     def export(self, inputs, output_names, dynamic_axes, export_dir=None):
         return self._export(inputs, output_names, dynamic_axes, export_dir)
@@ -773,7 +773,7 @@ def kv_offload_generate(
             inputs["input_ids"],
             (0, padded_len - input_ids_length),
             "constant",
-            1,
+            pad_token_id,
         )
         inputs["attention_mask"] = torch.nn.functional.pad(
             inputs["attention_mask"], (0, padded_len - input_ids_length), "constant", 0
@@ -911,7 +911,7 @@ def __init__(
             self.model.config.vision_config.use_flash_attn = "false"
         else:
             self.model.config.text_config.use_cache = True
-        self.hash_params["qeff_class"] = self.__class__.__name__
+        self.hash_params["qeff_auto_class"] = self.__class__.__name__
 
     @classmethod
     def from_pretrained(
@@ -1091,7 +1091,7 @@ def cloud_ai_100_generate(
             inputs["input_ids"],
             (0, padded_len - input_ids_length),
             "constant",
-            1,
+            pad_token_id,
         )
         inputs["attention_mask"] = torch.nn.functional.pad(
             inputs["attention_mask"], (0, padded_len - input_ids_length), "constant", 0
@@ -1360,7 +1360,7 @@ def __init__(
         self.pretrained_model_name_or_path = kwargs.get("pretrained_model_name_or_path", None)
         self.model, transformed = SpDTransform.apply(self.model, qaic_config, **kwargs)
         self.is_tlm = transformed
-        self.hash_params["qeff_class"] = self.__class__.__name__
+        self.hash_params["qeff_auto_class"] = self.__class__.__name__
         # ---Sampling---
         # Note: SamplerTransform should be applied after all other transforms
         # are done. The role of the sampler is to just add nodes at the output of the
@@ -1901,7 +1901,7 @@ def __init__(self, model: nn.Module, **kwargs):
         super().__init__(model, **kwargs)
         self.model.config.use_cache = True
         self.num_layers = model.config.num_hidden_layers
-        self.hash_params["qeff_class"] = self.__class__.__name__
+        self.hash_params["qeff_auto_class"] = self.__class__.__name__
 
     @property
     def get_model_config(self) -> dict:
diff --git a/QEfficient/utils/_utils.py b/QEfficient/utils/_utils.py
@@ -25,8 +25,8 @@
     PreTrainedTokenizerFast,
 )
 
-from QEfficient.utils.cache import hash_dict_params
 from QEfficient.utils.constants import KWARGS_EXCLUSION_LIST, QEFF_MODELS_DIR, Constants, QnnConstants
+from QEfficient.utils.hash_utils import hash_dict_params
 from QEfficient.utils.logging_utils import logger
 
 
@@ -564,7 +564,7 @@ def create_json(file_path: str, json_data: object):
     """
     try:
         with open(file_path, "w") as file:
-            json.dump(json_data, file, indent=4)
+            json.dump(make_serializable(json_data), file, indent=4)
     except Exception as e:
         print(f"Failed to create JSON File {file_path}: {e}")
 
@@ -772,6 +772,8 @@ def filter_and_create_export_hash(**kwargs):
     onnx_transform_kwargs = kwargs.get("onnx_transform_kwargs")
     if onnx_transform_kwargs:
         filtered_params.update(onnx_transform_kwargs)
+    if filtered_params.get("peft_config") is not None:
+        filtered_params["peft_config"] = filtered_params["peft_config"].to_dict()
 
     return hash_dict_params(filtered_params), filtered_params
 
diff --git a/QEfficient/utils/cache.py b/QEfficient/utils/cache.py
@@ -5,13 +5,8 @@
 #
 # ----------------------------------------------------------------------------
 
-import hashlib
-import json
 import os
 from pathlib import Path
-from typing import Dict
-
-from QEfficient.utils.constants import HASH_HEXDIGEST_STR_LEN
 
 QEFF_HOME: Path = None
 if "QEFF_HOME" in os.environ:
@@ -20,34 +15,3 @@
     QEFF_HOME = Path(os.environ["XDG_CACHE_HOME"]) / "qeff_models"
 else:
     QEFF_HOME = Path("~/.cache/qeff_models").expanduser()
-
-
-def json_serializable(obj):
-    if isinstance(obj, set):
-        return sorted(obj)
-    raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable")
-
-
-def to_hashable(obj) -> bytes:
-    """
-    Converts obj to bytes such that same object will result in same hash
-    """
-    return json.dumps(
-        obj,
-        skipkeys=False,
-        ensure_ascii=True,
-        check_circular=True,
-        allow_nan=False,
-        indent=None,
-        separators=(",", ":"),
-        default=json_serializable,
-        sort_keys=True,
-    ).encode()
-
-
-def hash_dict_params(dict_items: Dict, hash_string_size: int = HASH_HEXDIGEST_STR_LEN):
-    """
-    Takes a dictionary of items and returns a SHA256 hash object
-    """
-    mhash = hashlib.sha256(to_hashable(dict_items))
-    return mhash.hexdigest()[:hash_string_size]
diff --git a/QEfficient/utils/hash_utils.py b/QEfficient/utils/hash_utils.py
@@ -0,0 +1,43 @@
+# -----------------------------------------------------------------------------
+#
+# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# ----------------------------------------------------------------------------
+
+import hashlib
+import json
+from typing import Dict
+
+from QEfficient.utils.constants import HASH_HEXDIGEST_STR_LEN
+
+
+def json_serializable(obj):
+    if isinstance(obj, set):
+        return sorted(obj)
+    raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable")
+
+
+def to_hashable(obj) -> bytes:
+    """
+    Converts obj to bytes such that same object will result in same hash
+    """
+    return json.dumps(
+        obj,
+        skipkeys=False,
+        ensure_ascii=True,
+        check_circular=True,
+        allow_nan=False,
+        indent=None,
+        separators=(",", ":"),
+        default=json_serializable,
+        sort_keys=True,
+    ).encode()
+
+
+def hash_dict_params(dict_items: Dict, hash_string_size: int = HASH_HEXDIGEST_STR_LEN):
+    """
+    Takes a dictionary of items and returns a SHA256 hash object
+    """
+    mhash = hashlib.sha256(to_hashable(dict_items))
+    return mhash.hexdigest()[:hash_string_size]
diff --git a/tests/peft/lora/test_lora_model.py b/tests/peft/lora/test_lora_model.py
@@ -21,14 +21,24 @@
 configs = [
     pytest.param(
         AutoConfig.for_model(
-            "llama", num_hidden_layers=2, num_attention_heads=4, num_key_value_heads=2, hidden_size=128
+            "llama",
+            num_hidden_layers=2,
+            num_attention_heads=4,
+            num_key_value_heads=2,
+            hidden_size=128,
+            architectures=["LlamaForCausalLM"],
         ),
         LoraConfig(target_modules=["q_proj", "v_proj"], task_type="CAUSAL_LM", lora_alpha=8),
         id="llama-2l-4h-2kvh-128d-qv",
     ),
     pytest.param(
         AutoConfig.for_model(
-            "mistral", num_hidden_layers=2, num_attention_heads=4, num_key_value_heads=2, hidden_size=128
+            "mistral",
+            num_hidden_layers=2,
+            num_attention_heads=4,
+            num_key_value_heads=2,
+            hidden_size=128,
+            architectures=["MistralForCausalLM"],
         ),
         LoraConfig(target_modules=["q_proj", "v_proj"], task_type="CAUSAL_LM", lora_alpha=6),
         id="mistral-2l-4h-128d-qv",
diff --git a/tests/peft/test_peft_model.py b/tests/peft/test_peft_model.py
@@ -20,14 +20,24 @@
 configs = [
     pytest.param(
         AutoConfig.for_model(
-            "llama", num_hidden_layers=2, num_attention_heads=4, num_key_value_heads=2, hidden_size=128
+            "llama",
+            num_hidden_layers=2,
+            num_attention_heads=4,
+            num_key_value_heads=2,
+            hidden_size=128,
+            architectures=["LlamaForCausalLM"],
         ),
         LoraConfig(target_modules=["q_proj", "v_proj"], task_type="CAUSAL_LM"),
         id="llama-2l-4h-2kvh-128d-qv",
     ),
     pytest.param(
         AutoConfig.for_model(
-            "mistral", num_hidden_layers=2, num_attention_heads=4, num_key_value_heads=2, hidden_size=128
+            "mistral",
+            num_hidden_layers=2,
+            num_attention_heads=4,
+            num_key_value_heads=2,
+            hidden_size=128,
+            architectures=["MistralForCausalLM"],
         ),
         LoraConfig(target_modules=["q_proj", "k_proj", "v_proj"], task_type="CAUSAL_LM"),
         id="mistral-2l-4h-128d-qkv",
@@ -83,6 +93,9 @@ def test_auto_peft_model_for_causal_lm_from_pretrained(base_config, adapter_conf
         QEffAutoPeftModelForCausalLM.from_pretrained(adapter_path / adapter_name, full_batch_size=4)
 
 
+# This test isn't required anymore as different adapter names should generate different hashes. We'll
+# phase out this test in some time.
+@pytest.mark.skip(reason="Different adapter names will create different hashes so we'll skip this test.")
 def test_auto_peft_model_for_causal_lm_hash():
     base_config_0, adapter_config_0 = configs[0].values
     base_config_1, adapter_config_1 = configs[1].values
@@ -129,7 +142,7 @@ def test_auto_peft_model_for_causal_lm_export(base_config, adapter_config, tmp_p
     qeff_model.export(tmp_path)
     end = perf_counter()
     export_time_0 = end - start
-    model_path = tmp_path.with_name(tmp_path.name + "-" + qeff_model.model_hash)
+    model_path = tmp_path.with_name(tmp_path.name + "-" + qeff_model.export_hash)
     assert model_path.is_dir()
     assert qeff_model.onnx_path.is_file()