pytorch
diff --git a/‎docs/source/models.rst‎
Lines changed: 15 additions & 2 deletions b/‎docs/source/models.rst‎
Lines changed: 15 additions & 2 deletions
diff --git a/‎hubconf.py‎
Lines changed: 0 additions & 1 deletion b/‎hubconf.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎mypy.ini‎
Lines changed: 4 additions & 0 deletions b/‎mypy.ini‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎references/classification/README.md‎
Lines changed: 14 additions & 0 deletions b/‎references/classification/README.md‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎references/classification/train.py‎
Lines changed: 21 additions & 8 deletions b/‎references/classification/train.py‎
Lines changed: 21 additions & 8 deletions
diff --git a/‎references/detection/README.md‎
Lines changed: 7 additions & 0 deletions b/‎references/detection/README.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎references/detection/train.py‎
Lines changed: 19 additions & 8 deletions b/‎references/detection/train.py‎
Lines changed: 19 additions & 8 deletions
diff --git a/‎references/optical_flow/train.py‎
Lines changed: 23 additions & 9 deletions b/‎references/optical_flow/train.py‎
Lines changed: 23 additions & 9 deletions
@@ -41,6 +41,7 @@ architectures for image classification:
 -  `EfficientNet`_
 -  `RegNet`_
 -  `VisionTransformer`_
+-  `ConvNeXt`_
 
 You can construct a model with random weights by calling its constructor:
 
@@ -88,7 +89,6 @@ You can construct a model with random weights by calling its constructor:
     vit_b_32 = models.vit_b_32()
     vit_l_16 = models.vit_l_16()
     vit_l_32 = models.vit_l_32()
-    vit_h_14 = models.vit_h_14() 
 
 We provide pre-trained models, using the PyTorch :mod:`torch.utils.model_zoo`.
 These can be constructed by passing ``pretrained=True``:
@@ -248,6 +248,7 @@ vit_b_16                          81.072          95.318
 vit_b_32                          75.912          92.466
 vit_l_16                          79.662          94.638
 vit_l_32                          76.972          93.070
+convnext_tiny (prototype)         82.520          96.146
 ================================  =============   =============
 
 
@@ -266,6 +267,7 @@ vit_l_32                          76.972          93.070
 .. _EfficientNet: https://arxiv.org/abs/1905.11946
 .. _RegNet: https://arxiv.org/abs/2003.13678
 .. _VisionTransformer: https://arxiv.org/abs/2010.11929
+.. _ConvNeXt: https://arxiv.org/abs/2201.03545
 
 .. currentmodule:: torchvision.models
 
@@ -461,7 +463,6 @@ VisionTransformer
     vit_b_32
     vit_l_16
     vit_l_32
-    vit_h_14
 
 Quantized Models
 ----------------
@@ -594,6 +595,7 @@ The models subpackage contains definitions for the following model
 architectures for detection:
 
 - `Faster R-CNN <https://arxiv.org/abs/1506.01497>`_
+- `FCOS <https://arxiv.org/abs/1904.01355>`_
 - `Mask R-CNN <https://arxiv.org/abs/1703.06870>`_
 - `RetinaNet <https://arxiv.org/abs/1708.02002>`_
 - `SSD <https://arxiv.org/abs/1512.02325>`_
@@ -639,6 +641,7 @@ Network                                 box AP   mask AP   keypoint AP
 Faster R-CNN ResNet-50 FPN              37.0     -         -
 Faster R-CNN MobileNetV3-Large FPN      32.8     -         -
 Faster R-CNN MobileNetV3-Large 320 FPN  22.8     -         -
+FCOS ResNet-50 FPN                      39.2     -         -
 RetinaNet ResNet-50 FPN                 36.4     -         -
 SSD300 VGG16                            25.1     -         -
 SSDlite320 MobileNetV3-Large            21.3     -         -
@@ -699,6 +702,7 @@ Network                                 train time (s / it)  test time (s / it)
 Faster R-CNN ResNet-50 FPN              0.2288               0.0590              5.2
 Faster R-CNN MobileNetV3-Large FPN      0.1020               0.0415              1.0
 Faster R-CNN MobileNetV3-Large 320 FPN  0.0978               0.0376              0.6
+FCOS ResNet-50 FPN                      0.1450               0.0539              3.3
 RetinaNet ResNet-50 FPN                 0.2514               0.0939              4.1
 SSD300 VGG16                            0.2093               0.0744              1.5
 SSDlite320 MobileNetV3-Large            0.1773               0.0906              1.5
@@ -718,6 +722,15 @@ Faster R-CNN
     torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn
     torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn
 
+FCOS
+----
+
+.. autosummary::
+    :toctree: generated/
+    :template: function.rst
+
+    torchvision.models.detection.fcos_resnet50_fpn
+
 
 RetinaNet
 ---------
 
@@ -63,5 +63,4 @@
     vit_b_32,
     vit_l_16,
     vit_l_32,
-    vit_h_14,
 )
@@ -70,6 +70,10 @@ ignore_errors = True
 
 ignore_errors = True
 
+[mypy-torchvision.models.detection.fcos]
+
+ignore_errors = True
+
 [mypy-torchvision.ops.*]
 
 ignore_errors = True
 
@@ -197,6 +197,20 @@ Note that the above command corresponds to training on a single node with 8 GPUs
 For generatring the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
 and `--batch_size 64`.
 
+
+### ConvNeXt
+```
+torchrun --nproc_per_node=8 train.py\ 
+--model convnext_tiny --batch-size 128 --opt adamw --lr 1e-3 --lr-scheduler cosineannealinglr \ 
+--lr-warmup-epochs 5 --lr-warmup-method linear --auto-augment ta_wide --epochs 600 --random-erase 0.1 \ 
+--label-smoothing 0.1 --mixup-alpha 0.2 --cutmix-alpha 1.0 --weight-decay 0.05 --norm-weight-decay 0.0 \
+--train-crop-size 176 --model-ema --val-resize-size 236 --ra-sampler --ra-reps 4
+```
+
+Note that the above command corresponds to training on a single node with 8 GPUs.
+For generatring the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs),
+and `--batch_size 64`.
+
 ## Mixed precision training
 Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [torch.cuda.amp](https://pytorch.org/docs/stable/amp.html?highlight=amp#module-torch.cuda.amp).
 
 
@@ -16,9 +16,9 @@
 
 
 try:
-    from torchvision.prototype import models as PM
+    from torchvision import prototype
 except ImportError:
-    PM = None
+    prototype = None
 
 
 def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, args, model_ema=None, scaler=None):
@@ -154,13 +154,18 @@ def load_data(traindir, valdir, args):
         print(f"Loading dataset_test from {cache_path}")
         dataset_test, _ = torch.load(cache_path)
     else:
-        if not args.weights:
+        if not args.prototype:
             preprocessing = presets.ClassificationPresetEval(
                 crop_size=val_crop_size, resize_size=val_resize_size, interpolation=interpolation
             )
         else:
-            weights = PM.get_weight(args.weights)
-            preprocessing = weights.transforms()
+            if args.weights:
+                weights = prototype.models.get_weight(args.weights)
+                preprocessing = weights.transforms()
+            else:
+                preprocessing = prototype.transforms.ImageNetEval(
+                    crop_size=val_crop_size, resize_size=val_resize_size, interpolation=interpolation
+                )
 
         dataset_test = torchvision.datasets.ImageFolder(
             valdir,
@@ -186,8 +191,10 @@ def load_data(traindir, valdir, args):
 
 
 def main(args):
-    if args.weights and PM is None:
+    if args.prototype and prototype is None:
         raise ImportError("The prototype module couldn't be found. Please install the latest torchvision nightly.")
+    if not args.prototype and args.weights:
+        raise ValueError("The weights parameter works only in prototype mode. Please pass the --prototype argument.")
     if args.output_dir:
         utils.mkdir(args.output_dir)
 
@@ -229,10 +236,10 @@ def main(args):
     )
 
     print("Creating model")
-    if not args.weights:
+    if not args.prototype:
         model = torchvision.models.__dict__[args.model](pretrained=args.pretrained, num_classes=num_classes)
     else:
-        model = PM.__dict__[args.model](weights=args.weights, num_classes=num_classes)
+        model = prototype.models.__dict__[args.model](weights=args.weights, num_classes=num_classes)
     model.to(device)
 
     if args.distributed and args.sync_bn:
@@ -491,6 +498,12 @@ def get_args_parser(add_help=True):
     )
 
     # Prototype models only
+    parser.add_argument(
+        "--prototype",
+        dest="prototype",
+        help="Use prototype model builders instead those from main area",
+        action="store_true",
+    )
     parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load")
 
     return parser
 
@@ -41,6 +41,13 @@ torchrun --nproc_per_node=8 train.py\
     --lr-steps 16 22 --aspect-ratio-group-factor 3
 ```
 
+### FCOS ResNet-50 FPN
+```
+torchrun --nproc_per_node=8 train.py\
+    --dataset coco --model fcos_resnet50_fpn --epochs 26\
+    --lr-steps 16 22 --aspect-ratio-group-factor 3  --lr 0.01 --amp
+```
+
 ### RetinaNet
 ```
 torchrun --nproc_per_node=8 train.py\
 
@@ -34,9 +34,9 @@
 
 
 try:
-    from torchvision.prototype import models as PM
+    from torchvision import prototype
 except ImportError:
-    PM = None
+    prototype = None
 
 
 def get_dataset(name, image_set, transform, data_path):
@@ -50,11 +50,14 @@ def get_dataset(name, image_set, transform, data_path):
 def get_transform(train, args):
     if train:
         return presets.DetectionPresetTrain(args.data_augmentation)
-    elif not args.weights:
+    elif not args.prototype:
         return presets.DetectionPresetEval()
     else:
-        weights = PM.get_weight(args.weights)
-        return weights.transforms()
+        if args.weights:
+            weights = prototype.models.get_weight(args.weights)
+            return weights.transforms()
+        else:
+            return prototype.transforms.CocoEval()
 
 
 def get_args_parser(add_help=True):
@@ -141,6 +144,12 @@ def get_args_parser(add_help=True):
     parser.add_argument("--dist-url", default="env://", type=str, help="url used to set up distributed training")
 
     # Prototype models only
+    parser.add_argument(
+        "--prototype",
+        dest="prototype",
+        help="Use prototype model builders instead those from main area",
+        action="store_true",
+    )
     parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load")
 
     # Mixed precision training parameters
@@ -150,8 +159,10 @@ def get_args_parser(add_help=True):
 
 
 def main(args):
-    if args.weights and PM is None:
+    if args.prototype and prototype is None:
         raise ImportError("The prototype module couldn't be found. Please install the latest torchvision nightly.")
+    if not args.prototype and args.weights:
+        raise ValueError("The weights parameter works only in prototype mode. Please pass the --prototype argument.")
     if args.output_dir:
         utils.mkdir(args.output_dir)
 
@@ -193,12 +204,12 @@ def main(args):
     if "rcnn" in args.model:
         if args.rpn_score_thresh is not None:
             kwargs["rpn_score_thresh"] = args.rpn_score_thresh
-    if not args.weights:
+    if not args.prototype:
         model = torchvision.models.detection.__dict__[args.model](
             pretrained=args.pretrained, num_classes=num_classes, **kwargs
         )
     else:
-        model = PM.detection.__dict__[args.model](weights=args.weights, num_classes=num_classes, **kwargs)
+        model = prototype.models.detection.__dict__[args.model](weights=args.weights, num_classes=num_classes, **kwargs)
     model.to(device)
     if args.distributed and args.sync_bn:
         model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
 
@@ -10,10 +10,9 @@
 from torchvision.datasets import KittiFlow, FlyingChairs, FlyingThings3D, Sintel, HD1K
 
 try:
-    from torchvision.prototype import models as PM
-    from torchvision.prototype.models import optical_flow as PMOF
+    from torchvision import prototype
 except ImportError:
-    PM = PMOF = None
+    prototype = None
 
 
 def get_train_dataset(stage, dataset_root):
@@ -133,9 +132,12 @@ def inner_loop(blob):
 def validate(model, args):
     val_datasets = args.val_dataset or []
 
-    if args.weights:
-        weights = PM.get_weight(args.weights)
-        preprocessing = weights.transforms()
+    if args.prototype:
+        if args.weights:
+            weights = prototype.models.get_weight(args.weights)
+            preprocessing = weights.transforms()
+        else:
+            preprocessing = prototype.transforms.RaftEval()
     else:
         preprocessing = OpticalFlowPresetEval()
 
@@ -192,10 +194,14 @@ def train_one_epoch(model, optimizer, scheduler, train_loader, logger, args):
 
 
 def main(args):
+    if args.prototype and prototype is None:
+        raise ImportError("The prototype module couldn't be found. Please install the latest torchvision nightly.")
+    if not args.prototype and args.weights:
+        raise ValueError("The weights parameter works only in prototype mode. Please pass the --prototype argument.")
     utils.setup_ddp(args)
 
-    if args.weights:
-        model = PMOF.__dict__[args.model](weights=args.weights)
+    if args.prototype:
+        model = prototype.models.optical_flow.__dict__[args.model](weights=args.weights)
     else:
         model = torchvision.models.optical_flow.__dict__[args.model](pretrained=args.pretrained)
 
@@ -317,7 +323,6 @@ def get_args_parser(add_help=True):
     )
     # TODO: resume, pretrained, and weights should be in an exclusive arg group
     parser.add_argument("--pretrained", action="store_true", help="Whether to use pretrained weights")
-    parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load.")
 
     parser.add_argument(
         "--num_flow_updates",
@@ -336,6 +341,15 @@ def get_args_parser(add_help=True):
         required=True,
     )
 
+    # Prototype models only
+    parser.add_argument(
+        "--prototype",
+        dest="prototype",
+        help="Use prototype model builders instead those from main area",
+        action="store_true",
+    )
+    parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load.")
+
     return parser
Original file line number	Diff line number	Diff line change
`@@ -63,5 +63,4 @@`
`63`	`63`	`vit_b_32,`
`64`	`64`	`vit_l_16,`
`65`	`65`	`vit_l_32,`
`66`		`- vit_h_14,`
`67`	`66`	`)`