pytorch · NicolasHug · Apr 22, 2022 · Apr 20, 2022 · Apr 21, 2022 · Apr 21, 2022
diff --git a/docs/source/models/regnet.rst b/docs/source/models/regnet.rst
@@ -0,0 +1,37 @@
+RegNet
+======
+
+.. currentmodule:: torchvision.models
+
+The RegNet model is based on the `Designing Network Design Spaces
+<https://arxiv.org/abs/2003.13678>`_ paper.
+
+
+Model builders
+--------------
+
+The following model builders can be used to instantiate a RegNet model, with or
+without pre-trained weights. All the model builders internally rely on the
+``torchvision.models.regnet.RegNet`` base class. Please refer to the `source code
+<https://github.com/pytorch/vision/blob/main/torchvision/models/regnet.py>`_ for
+more details about this class.
+
+.. autosummary::
+   :toctree: generated/
+   :template: function.rst
+
+   regnet_y_400mf
+   regnet_y_800mf
+   regnet_y_1_6gf
+   regnet_y_3_2gf
+   regnet_y_8gf
+   regnet_y_16gf
+   regnet_y_32gf
+   regnet_y_128gf
+   regnet_x_400mf
+   regnet_x_800mf
+   regnet_x_1_6gf
+   regnet_x_3_2gf
+   regnet_x_8gf
+   regnet_x_16gf
+   regnet_x_32gf
diff --git a/docs/source/models/resnet.rst b/docs/source/models/resnet.rst
@@ -10,7 +10,7 @@ The ResNet model is based on the `Deep Residual Learning for Image Recognition
 Model builders
 --------------
 
-The following model builders can be used to instanciate a ResNet model, with or
+The following model builders can be used to instantiate a ResNet model, with or
 without pre-trained weights. All the model builders internally rely on the
 ``torchvision.models.resnet.ResNet`` base class. Please refer to the `source
 code

diff --git a/docs/source/models/squeezenet.rst b/docs/source/models/squeezenet.rst
@@ -11,7 +11,7 @@ paper.
 Model builders
 --------------
 
-The following model builders can be used to instanciate a SqueezeNet model, with or
+The following model builders can be used to instantiate a SqueezeNet model, with or
 without pre-trained weights. All the model builders internally rely on the
 ``torchvision.models.squeezenet.SqueezeNet`` base class. Please refer to the `source
 code

diff --git a/docs/source/models/vgg.rst b/docs/source/models/vgg.rst
@@ -10,7 +10,7 @@ Image Recognition <https://arxiv.org/abs/1409.1556>`_ paper.
 Model builders
 --------------
 
-The following model builders can be used to instanciate a VGG model, with or
+The following model builders can be used to instantiate a VGG model, with or
 without pre-trained weights. All the model buidlers internally rely on the
 ``torchvision.models.vgg.VGG`` base class. Please refer to the `source code
 <https://github.com/pytorch/vision/blob/main/torchvision/models/vgg.py>`_ for

diff --git a/docs/source/models/vision_transformer.rst b/docs/source/models/vision_transformer.rst
@@ -0,0 +1,28 @@
+VisionTransformer
+=================
+
+.. currentmodule:: torchvision.models
+
+The VisionTransformer model is based on the `An Image is Worth 16x16 Words:
+Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_ paper.
+
+
+Model builders
+--------------
+
+The following model builders can be used to instantiate a VisionTransformer model, with or
+without pre-trained weights. All the model builders internally rely on the
+``torchvision.models.vision_transformer.VisionTransformer`` base class.
+Please refer to the `source code
+<https://github.com/pytorch/vision/blob/main/torchvision/models/vision_transformer.py>`_ for
+more details about this class.
+
+.. autosummary::
+   :toctree: generated/
+   :template: function.rst
+
+   vit_b_16
+   vit_b_32
+   vit_l_16
+   vit_l_32
+   vit_h_14
diff --git a/docs/source/models_new.rst b/docs/source/models_new.rst
@@ -36,9 +36,11 @@ weights:
 .. toctree::
    :maxdepth: 1
 
+   models/regnet
    models/resnet
    models/squeezenet
    models/vgg
+   models/vision_transformer
 
 
 Table of all available classification weights

diff --git a/torchvision/models/regnet.py b/torchvision/models/regnet.py
diff --git a/torchvision/models/vision_transformer.py b/torchvision/models/vision_transformer.py
@@ -490,11 +490,20 @@ class ViT_H_14_Weights(WeightsEnum):
 def vit_b_16(*, weights: Optional[ViT_B_16_Weights] = None, progress: bool = True, **kwargs: Any) -> VisionTransformer:
     """
     Constructs a vit_b_16 architecture from
-    `"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale" <https://arxiv.org/abs/2010.11929>`_.
+    `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.
 
     Args:
-        weights (ViT_B_16_Weights, optional): The pretrained weights for the model
-        progress (bool): If True, displays a progress bar of the download to stderr
+        weights (:class:`~torchvision.models.vision_transformer.ViT_B_16_Weights`, optional): The pretrained
+            weights to use. See :class:`~torchvision.models.vision_transformer.ViT_B_16_Weights`
+            below for more details and possible values. By default, no pre-trained weights are used.
+        progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True.
+        **kwargs: parameters passed to the ``torchvision.models.vision_transformer.VisionTransformer``
+            base class. Please refer to the `source code
+            <https://github.com/pytorch/vision/blob/main/torchvision/models/vision_transformer.py>`_
+            for more details about this class.
+
+    .. autoclass:: torchvision.models.vision_transformer.ViT_B_16_Weights
+        :members:
     """
     weights = ViT_B_16_Weights.verify(weights)
 
@@ -514,11 +523,20 @@ def vit_b_16(*, weights: Optional[ViT_B_16_Weights] = None, progress: bool = Tru
 def vit_b_32(*, weights: Optional[ViT_B_32_Weights] = None, progress: bool = True, **kwargs: Any) -> VisionTransformer:
     """
     Constructs a vit_b_32 architecture from
-    `"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale" <https://arxiv.org/abs/2010.11929>`_.
+    `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.
 
     Args:
-        weights (ViT_B_32_Weights, optional): The pretrained weights for the model
-        progress (bool): If True, displays a progress bar of the download to stderr
+        weights (:class:`~torchvision.models.vision_transformer.ViT_B_32_Weights`, optional): The pretrained
+            weights to use. See :class:`~torchvision.models.vision_transformer.ViT_B_32_Weights`
+            below for more details and possible values. By default, no pre-trained weights are used.
+        progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True.
+        **kwargs: parameters passed to the ``torchvision.models.vision_transformer.VisionTransformer``
+            base class. Please refer to the `source code
+            <https://github.com/pytorch/vision/blob/main/torchvision/models/vision_transformer.py>`_
+            for more details about this class.
+
+    .. autoclass:: torchvision.models.vision_transformer.ViT_B_32_Weights
+        :members:
     """
     weights = ViT_B_32_Weights.verify(weights)
 
@@ -538,11 +556,20 @@ def vit_b_32(*, weights: Optional[ViT_B_32_Weights] = None, progress: bool = Tru
 def vit_l_16(*, weights: Optional[ViT_L_16_Weights] = None, progress: bool = True, **kwargs: Any) -> VisionTransformer:
     """
     Constructs a vit_l_16 architecture from
-    `"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale" <https://arxiv.org/abs/2010.11929>`_.
+    `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.
 
     Args:
-        weights (ViT_L_16_Weights, optional): The pretrained weights for the model
-        progress (bool): If True, displays a progress bar of the download to stderr
+        weights (:class:`~torchvision.models.vision_transformer.ViT_L_16_Weights`, optional): The pretrained
+            weights to use. See :class:`~torchvision.models.vision_transformer.ViT_L_16_Weights`
+            below for more details and possible values. By default, no pre-trained weights are used.
+        progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True.
+        **kwargs: parameters passed to the ``torchvision.models.vision_transformer.VisionTransformer``
+            base class. Please refer to the `source code
+            <https://github.com/pytorch/vision/blob/main/torchvision/models/vision_transformer.py>`_
+            for more details about this class.
+
+    .. autoclass:: torchvision.models.vision_transformer.ViT_L_16_Weights
+        :members:
     """
     weights = ViT_L_16_Weights.verify(weights)
 
@@ -562,11 +589,20 @@ def vit_l_16(*, weights: Optional[ViT_L_16_Weights] = None, progress: bool = Tru
 def vit_l_32(*, weights: Optional[ViT_L_32_Weights] = None, progress: bool = True, **kwargs: Any) -> VisionTransformer:
     """
     Constructs a vit_l_32 architecture from
-    `"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale" <https://arxiv.org/abs/2010.11929>`_.
+    `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.
 
     Args:
-        weights (ViT_L_32_Weights, optional): The pretrained weights for the model
-        progress (bool): If True, displays a progress bar of the download to stderr
+        weights (:class:`~torchvision.models.vision_transformer.ViT_L_32_Weights`, optional): The pretrained
+            weights to use. See :class:`~torchvision.models.vision_transformer.ViT_L_32_Weights`
+            below for more details and possible values. By default, no pre-trained weights are used.
+        progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True.
+        **kwargs: parameters passed to the ``torchvision.models.vision_transformer.VisionTransformer``
+            base class. Please refer to the `source code
+            <https://github.com/pytorch/vision/blob/main/torchvision/models/vision_transformer.py>`_
+            for more details about this class.
+
+    .. autoclass:: torchvision.models.vision_transformer.ViT_L_32_Weights
+        :members:
     """
     weights = ViT_L_32_Weights.verify(weights)
 
@@ -585,11 +621,20 @@ def vit_l_32(*, weights: Optional[ViT_L_32_Weights] = None, progress: bool = Tru
 def vit_h_14(*, weights: Optional[ViT_H_14_Weights] = None, progress: bool = True, **kwargs: Any) -> VisionTransformer:
     """
     Constructs a vit_h_14 architecture from
-    `"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale" <https://arxiv.org/abs/2010.11929>`_.
+    `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.
 
     Args:
-        weights (ViT_H_14_Weights, optional): The pretrained weights for the model
-        progress (bool): If True, displays a progress bar of the download to stderr
+        weights (:class:`~torchvision.models.vision_transformer.ViT_H_14_Weights`, optional): The pretrained
+            weights to use. See :class:`~torchvision.models.vision_transformer.ViT_H_14_Weights`
+            below for more details and possible values. By default, no pre-trained weights are used.
+        progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True.
+        **kwargs: parameters passed to the ``torchvision.models.vision_transformer.VisionTransformer``
+            base class. Please refer to the `source code
+            <https://github.com/pytorch/vision/blob/main/torchvision/models/vision_transformer.py>`_
+            for more details about this class.
+
+    .. autoclass:: torchvision.models.vision_transformer.ViT_H_14_Weights
+        :members:
     """
     weights = ViT_H_14_Weights.verify(weights)