NVIDIA
diff --git a/‎tensorrt_llm/_torch/auto_deploy/export/export.py‎
Lines changed: 18 additions & 5 deletions b/‎tensorrt_llm/_torch/auto_deploy/export/export.py‎
Lines changed: 18 additions & 5 deletions
diff --git a/‎tensorrt_llm/_torch/auto_deploy/models/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎tensorrt_llm/_torch/auto_deploy/models/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎tensorrt_llm/_torch/auto_deploy/models/hf.py‎
Lines changed: 39 additions & 7 deletions b/‎tensorrt_llm/_torch/auto_deploy/models/hf.py‎
Lines changed: 39 additions & 7 deletions
@@ -93,18 +93,31 @@ def _deduplicate_params_and_buffers(gm: fx.GraphModule) -> None:
 
 def _add_missing_load_hooks(gm: fx.GraphModule, model: nn.Module) -> None:
     """Adds back the state dict load hooks stripped away during export."""
-    hooks = {
+    pre_hooks = {
         k: mod._load_state_dict_pre_hooks
         for k, mod in model.named_modules()
         if mod._load_state_dict_pre_hooks
     }
 
     for mod_name, mod in gm.named_modules():
-        if mod_name in hooks:
-            for hook in hooks.pop(mod_name).values():
+        if mod_name in pre_hooks:
+            for hook in pre_hooks.pop(mod_name).values():
                 mod._register_load_state_dict_pre_hook(hook.hook, with_module=hook.with_module)
-    assert not (bool(hooks)), f"""Mismatch in names of exported and source modules with hooks.
-        The following module names were not found in exported module {list(hooks.keys())}"""
+    assert not (bool(pre_hooks)), f"""Mismatch in names of exported and source modules with hooks.
+        The following module names were not found in exported module {list(pre_hooks.keys())}"""
+
+    post_hooks = {
+        k: mod._load_state_dict_post_hooks
+        for k, mod in model.named_modules()
+        if mod._load_state_dict_post_hooks
+    }
+
+    for mod_name, mod in gm.named_modules():
+        if mod_name in post_hooks:
+            for hook in post_hooks.pop(mod_name).values():
+                mod.register_load_state_dict_post_hook(hook)
+    assert not (bool(post_hooks)), f"""Mismatch in names of exported and source modules with hooks.
+        The following module names were not found in exported module {list(post_hooks.keys())}"""
 
 
 def _add_load_hook_for_aliased_params(gm: fx.GraphModule, model: nn.Module) -> None:
 
@@ -1,2 +1,4 @@
+# TODO: When getting rid of the nemotron H patches, import `modeling_nemotron_h` here to ensure the
+# custom model implementation is registered.
 from . import hf, patches
 from .factory import *
@@ -110,6 +110,10 @@ class AutoModelForCausalLMFactory(AutoModelFactory):
         "use_cache": False,
     }
 
+    # The below maps from an entry in a model's config dict's `model_type` to the alternative
+    # `AutoModelForCausalLM` we would like to use.
+    _custom_model_mapping: Dict[str, Type[AutoModelForCausalLM]] = {}
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self._quant_config_reader: QuantConfigReader | None = None
@@ -212,14 +216,25 @@ def _build_model(self, device: DeviceLikeType) -> nn.Module:
         """Build the model on the desired device."""
         model_config, unused_kwargs = self._get_model_config()
 
+        model_type = getattr(model_config, "model_type", "")
+        custom_model_cls = self._custom_model_mapping.get(model_type, None)
         with (init_empty_weights if device == "meta" else nullcontext)():
-            model = self.automodel_cls.from_config(
-                model_config,
-                **{
-                    "trust_remote_code": True,
-                    **unused_kwargs,
-                },
-            )
+            if custom_model_cls is not None:
+                # `_from_config` has some behavior we would like to use where possible. It is
+                # defined in the `PreTrainedModel` mixin.
+                if hasattr(custom_model_cls, "_from_config"):
+                    model = custom_model_cls._from_config(model_config, **unused_kwargs)
+                else:
+                    model = custom_model_cls(model_config, **unused_kwargs)
+            else:
+                model = self.automodel_cls.from_config(
+                    model_config,
+                    **{
+                        "trust_remote_code": True,
+                        **unused_kwargs,
+                    },
+                )
+
         if device == "meta":
             # post-init --> this must be called explicitly for HF models the way we initialize them
             # since this "gets lost" with the init_empty_weights context manager.
@@ -482,6 +497,23 @@ def _remap_param_names_load_hook(self, model, state_dict, *args, **kwargs) -> No
     def get_export_infos(self, model: nn.Module) -> List[SubModuleExportInfo]:
         return [FullModelExportInfo()]
 
+    @classmethod
+    def register_custom_model_cls(
+        cls, model_type: str, custom_model_cls: Type[AutoModelForCausalLM]
+    ) -> None:
+        """Register a custom model implementation.
+
+        This is useful when the default `AutoModelForCausalLM` is not the one we want to use. For
+        example, when the model's code is in a HuggingFace repo that is out of date, or has
+        dependencies that TensorRT-LLM does not have, etc.
+
+        Args:
+            model_type: This should be the value for the `model_type` field in the model's config.
+            custom_model_cls: The `AutoModelForCausalLM` implementation that should be used for
+                `model_type`.
+        """
+        cls._custom_model_mapping[model_type] = custom_model_cls
+
 
 class _StateDictParamNameConverter:
     """Helper class for applying param name conversions to a state dict.