NVIDIA
diff --git a/‎tensorrt_llm/_torch/auto_deploy/models/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎tensorrt_llm/_torch/auto_deploy/models/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎tensorrt_llm/_torch/auto_deploy/models/hf.py‎
Lines changed: 39 additions & 7 deletions b/‎tensorrt_llm/_torch/auto_deploy/models/hf.py‎
Lines changed: 39 additions & 7 deletions
@@ -1,2 +1,4 @@
+# TODO: When getting rid of the nemotron H patches, import `modeling_nemotron_h` here to ensure the
+# custom model implementation is registered.
 from . import hf, patches
 from .factory import *
@@ -110,6 +110,10 @@ class AutoModelForCausalLMFactory(AutoModelFactory):
         "use_cache": False,
     }
 
+    # The below maps from an entry in a model's config dict's `model_type` to the alternative
+    # `AutoModelForCausalLM` we would like to use.
+    _custom_model_mapping: Dict[str, Type[AutoModelForCausalLM]] = {}
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self._quant_config_reader: QuantConfigReader | None = None
@@ -205,14 +209,25 @@ def _build_model(self, device: DeviceLikeType) -> nn.Module:
         """Build the model on the desired device."""
         model_config, unused_kwargs = self._get_model_config()
 
+        model_type = getattr(model_config, "model_type", "")
+        custom_model_cls = self._custom_model_mapping.get(model_type, None)
         with (init_empty_weights if device == "meta" else nullcontext)():
-            model = self.automodel_cls.from_config(
-                model_config,
-                **{
-                    "trust_remote_code": True,
-                    **unused_kwargs,
-                },
-            )
+            if custom_model_cls is not None:
+                # `_from_config` has some behavior we would like to use where possible. It is
+                # defined in the `PreTrainedModel` mixin.
+                if hasattr(custom_model_cls, "_from_config"):
+                    model = custom_model_cls._from_config(model_config, **unused_kwargs)
+                else:
+                    model = custom_model_cls(model_config, **unused_kwargs)
+            else:
+                model = self.automodel_cls.from_config(
+                    model_config,
+                    **{
+                        "trust_remote_code": True,
+                        **unused_kwargs,
+                    },
+                )
+
         if device == "meta":
             # post-init --> this must be called explicitly for HF models the way we initialize them
             # since this "gets lost" with the init_empty_weights context manager.
@@ -475,6 +490,23 @@ def _remap_param_names_load_hook(self, model, state_dict, *args, **kwargs) -> No
     def get_export_infos(self, model: nn.Module) -> List[SubModuleExportInfo]:
         return [FullModelExportInfo()]
 
+    @classmethod
+    def register_custom_model_cls(
+        cls, model_type: str, custom_model_cls: Type[AutoModelForCausalLM]
+    ) -> None:
+        """Register a custom model implementation.
+
+        This is useful when the default `AutoModelForCausalLM` is not the one we want to use. For
+        example, when the model's code is in a HuggingFace repo that is out of date, or has
+        dependencies that TensorRT-LLM does not have, etc.
+
+        Args:
+            model_type: This should be the value for the `model_type` field in the model's config.
+            custom_model_cls: The `AutoModelForCausalLM` implementation that should be used for
+                `model_type`.
+        """
+        cls._custom_model_mapping[model_type] = custom_model_cls
+
 
 class _StateDictParamNameConverter:
     """Helper class for applying param name conversions to a state dict.