Skip to content

Commit 8283332

Browse files
authored
Merge branch 'main' into gtsrb_prototype
2 parents 1ef84e0 + 8886a3c commit 8283332

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+1669
-428
lines changed

docs/source/models.rst

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ architectures for image classification:
4141
- `EfficientNet`_
4242
- `RegNet`_
4343
- `VisionTransformer`_
44+
- `ConvNeXt`_
4445

4546
You can construct a model with random weights by calling its constructor:
4647

@@ -88,7 +89,6 @@ You can construct a model with random weights by calling its constructor:
8889
vit_b_32 = models.vit_b_32()
8990
vit_l_16 = models.vit_l_16()
9091
vit_l_32 = models.vit_l_32()
91-
vit_h_14 = models.vit_h_14()
9292
9393
We provide pre-trained models, using the PyTorch :mod:`torch.utils.model_zoo`.
9494
These can be constructed by passing ``pretrained=True``:
@@ -248,6 +248,7 @@ vit_b_16 81.072 95.318
248248
vit_b_32 75.912 92.466
249249
vit_l_16 79.662 94.638
250250
vit_l_32 76.972 93.070
251+
convnext_tiny (prototype) 82.520 96.146
251252
================================ ============= =============
252253

253254

@@ -266,6 +267,7 @@ vit_l_32 76.972 93.070
266267
.. _EfficientNet: https://arxiv.org/abs/1905.11946
267268
.. _RegNet: https://arxiv.org/abs/2003.13678
268269
.. _VisionTransformer: https://arxiv.org/abs/2010.11929
270+
.. _ConvNeXt: https://arxiv.org/abs/2201.03545
269271

270272
.. currentmodule:: torchvision.models
271273

@@ -461,7 +463,6 @@ VisionTransformer
461463
vit_b_32
462464
vit_l_16
463465
vit_l_32
464-
vit_h_14
465466

466467
Quantized Models
467468
----------------
@@ -594,6 +595,7 @@ The models subpackage contains definitions for the following model
594595
architectures for detection:
595596

596597
- `Faster R-CNN <https://arxiv.org/abs/1506.01497>`_
598+
- `FCOS <https://arxiv.org/abs/1904.01355>`_
597599
- `Mask R-CNN <https://arxiv.org/abs/1703.06870>`_
598600
- `RetinaNet <https://arxiv.org/abs/1708.02002>`_
599601
- `SSD <https://arxiv.org/abs/1512.02325>`_
@@ -639,6 +641,7 @@ Network box AP mask AP keypoint AP
639641
Faster R-CNN ResNet-50 FPN 37.0 - -
640642
Faster R-CNN MobileNetV3-Large FPN 32.8 - -
641643
Faster R-CNN MobileNetV3-Large 320 FPN 22.8 - -
644+
FCOS ResNet-50 FPN 39.2 - -
642645
RetinaNet ResNet-50 FPN 36.4 - -
643646
SSD300 VGG16 25.1 - -
644647
SSDlite320 MobileNetV3-Large 21.3 - -
@@ -699,6 +702,7 @@ Network train time (s / it) test time (s / it)
699702
Faster R-CNN ResNet-50 FPN 0.2288 0.0590 5.2
700703
Faster R-CNN MobileNetV3-Large FPN 0.1020 0.0415 1.0
701704
Faster R-CNN MobileNetV3-Large 320 FPN 0.0978 0.0376 0.6
705+
FCOS ResNet-50 FPN 0.1450 0.0539 3.3
702706
RetinaNet ResNet-50 FPN 0.2514 0.0939 4.1
703707
SSD300 VGG16 0.2093 0.0744 1.5
704708
SSDlite320 MobileNetV3-Large 0.1773 0.0906 1.5
@@ -718,6 +722,15 @@ Faster R-CNN
718722
torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn
719723
torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn
720724

725+
FCOS
726+
----
727+
728+
.. autosummary::
729+
:toctree: generated/
730+
:template: function.rst
731+
732+
torchvision.models.detection.fcos_resnet50_fpn
733+
721734

722735
RetinaNet
723736
---------

hubconf.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,5 +63,4 @@
6363
vit_b_32,
6464
vit_l_16,
6565
vit_l_32,
66-
vit_h_14,
6766
)

mypy.ini

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ ignore_errors = True
7070

7171
ignore_errors = True
7272

73+
[mypy-torchvision.models.detection.fcos]
74+
75+
ignore_errors = True
76+
7377
[mypy-torchvision.ops.*]
7478

7579
ignore_errors = True

references/classification/README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,20 @@ Note that the above command corresponds to training on a single node with 8 GPUs
197197
For generatring the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
198198
and `--batch_size 64`.
199199

200+
201+
### ConvNeXt
202+
```
203+
torchrun --nproc_per_node=8 train.py\
204+
--model convnext_tiny --batch-size 128 --opt adamw --lr 1e-3 --lr-scheduler cosineannealinglr \
205+
--lr-warmup-epochs 5 --lr-warmup-method linear --auto-augment ta_wide --epochs 600 --random-erase 0.1 \
206+
--label-smoothing 0.1 --mixup-alpha 0.2 --cutmix-alpha 1.0 --weight-decay 0.05 --norm-weight-decay 0.0 \
207+
--train-crop-size 176 --model-ema --val-resize-size 236 --ra-sampler --ra-reps 4
208+
```
209+
210+
Note that the above command corresponds to training on a single node with 8 GPUs.
211+
For generatring the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs),
212+
and `--batch_size 64`.
213+
200214
## Mixed precision training
201215
Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [torch.cuda.amp](https://pytorch.org/docs/stable/amp.html?highlight=amp#module-torch.cuda.amp).
202216

references/classification/train.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616

1717

1818
try:
19-
from torchvision.prototype import models as PM
19+
from torchvision import prototype
2020
except ImportError:
21-
PM = None
21+
prototype = None
2222

2323

2424
def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, args, model_ema=None, scaler=None):
@@ -154,13 +154,18 @@ def load_data(traindir, valdir, args):
154154
print(f"Loading dataset_test from {cache_path}")
155155
dataset_test, _ = torch.load(cache_path)
156156
else:
157-
if not args.weights:
157+
if not args.prototype:
158158
preprocessing = presets.ClassificationPresetEval(
159159
crop_size=val_crop_size, resize_size=val_resize_size, interpolation=interpolation
160160
)
161161
else:
162-
weights = PM.get_weight(args.weights)
163-
preprocessing = weights.transforms()
162+
if args.weights:
163+
weights = prototype.models.get_weight(args.weights)
164+
preprocessing = weights.transforms()
165+
else:
166+
preprocessing = prototype.transforms.ImageNetEval(
167+
crop_size=val_crop_size, resize_size=val_resize_size, interpolation=interpolation
168+
)
164169

165170
dataset_test = torchvision.datasets.ImageFolder(
166171
valdir,
@@ -186,8 +191,10 @@ def load_data(traindir, valdir, args):
186191

187192

188193
def main(args):
189-
if args.weights and PM is None:
194+
if args.prototype and prototype is None:
190195
raise ImportError("The prototype module couldn't be found. Please install the latest torchvision nightly.")
196+
if not args.prototype and args.weights:
197+
raise ValueError("The weights parameter works only in prototype mode. Please pass the --prototype argument.")
191198
if args.output_dir:
192199
utils.mkdir(args.output_dir)
193200

@@ -229,10 +236,10 @@ def main(args):
229236
)
230237

231238
print("Creating model")
232-
if not args.weights:
239+
if not args.prototype:
233240
model = torchvision.models.__dict__[args.model](pretrained=args.pretrained, num_classes=num_classes)
234241
else:
235-
model = PM.__dict__[args.model](weights=args.weights, num_classes=num_classes)
242+
model = prototype.models.__dict__[args.model](weights=args.weights, num_classes=num_classes)
236243
model.to(device)
237244

238245
if args.distributed and args.sync_bn:
@@ -491,6 +498,12 @@ def get_args_parser(add_help=True):
491498
)
492499

493500
# Prototype models only
501+
parser.add_argument(
502+
"--prototype",
503+
dest="prototype",
504+
help="Use prototype model builders instead those from main area",
505+
action="store_true",
506+
)
494507
parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load")
495508

496509
return parser

references/detection/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@ torchrun --nproc_per_node=8 train.py\
4141
--lr-steps 16 22 --aspect-ratio-group-factor 3
4242
```
4343

44+
### FCOS ResNet-50 FPN
45+
```
46+
torchrun --nproc_per_node=8 train.py\
47+
--dataset coco --model fcos_resnet50_fpn --epochs 26\
48+
--lr-steps 16 22 --aspect-ratio-group-factor 3 --lr 0.01 --amp
49+
```
50+
4451
### RetinaNet
4552
```
4653
torchrun --nproc_per_node=8 train.py\

references/detection/train.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@
3434

3535

3636
try:
37-
from torchvision.prototype import models as PM
37+
from torchvision import prototype
3838
except ImportError:
39-
PM = None
39+
prototype = None
4040

4141

4242
def get_dataset(name, image_set, transform, data_path):
@@ -50,11 +50,14 @@ def get_dataset(name, image_set, transform, data_path):
5050
def get_transform(train, args):
5151
if train:
5252
return presets.DetectionPresetTrain(args.data_augmentation)
53-
elif not args.weights:
53+
elif not args.prototype:
5454
return presets.DetectionPresetEval()
5555
else:
56-
weights = PM.get_weight(args.weights)
57-
return weights.transforms()
56+
if args.weights:
57+
weights = prototype.models.get_weight(args.weights)
58+
return weights.transforms()
59+
else:
60+
return prototype.transforms.CocoEval()
5861

5962

6063
def get_args_parser(add_help=True):
@@ -141,6 +144,12 @@ def get_args_parser(add_help=True):
141144
parser.add_argument("--dist-url", default="env://", type=str, help="url used to set up distributed training")
142145

143146
# Prototype models only
147+
parser.add_argument(
148+
"--prototype",
149+
dest="prototype",
150+
help="Use prototype model builders instead those from main area",
151+
action="store_true",
152+
)
144153
parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load")
145154

146155
# Mixed precision training parameters
@@ -150,8 +159,10 @@ def get_args_parser(add_help=True):
150159

151160

152161
def main(args):
153-
if args.weights and PM is None:
162+
if args.prototype and prototype is None:
154163
raise ImportError("The prototype module couldn't be found. Please install the latest torchvision nightly.")
164+
if not args.prototype and args.weights:
165+
raise ValueError("The weights parameter works only in prototype mode. Please pass the --prototype argument.")
155166
if args.output_dir:
156167
utils.mkdir(args.output_dir)
157168

@@ -193,12 +204,12 @@ def main(args):
193204
if "rcnn" in args.model:
194205
if args.rpn_score_thresh is not None:
195206
kwargs["rpn_score_thresh"] = args.rpn_score_thresh
196-
if not args.weights:
207+
if not args.prototype:
197208
model = torchvision.models.detection.__dict__[args.model](
198209
pretrained=args.pretrained, num_classes=num_classes, **kwargs
199210
)
200211
else:
201-
model = PM.detection.__dict__[args.model](weights=args.weights, num_classes=num_classes, **kwargs)
212+
model = prototype.models.detection.__dict__[args.model](weights=args.weights, num_classes=num_classes, **kwargs)
202213
model.to(device)
203214
if args.distributed and args.sync_bn:
204215
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

references/optical_flow/train.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,9 @@
1010
from torchvision.datasets import KittiFlow, FlyingChairs, FlyingThings3D, Sintel, HD1K
1111

1212
try:
13-
from torchvision.prototype import models as PM
14-
from torchvision.prototype.models import optical_flow as PMOF
13+
from torchvision import prototype
1514
except ImportError:
16-
PM = PMOF = None
15+
prototype = None
1716

1817

1918
def get_train_dataset(stage, dataset_root):
@@ -133,9 +132,12 @@ def inner_loop(blob):
133132
def validate(model, args):
134133
val_datasets = args.val_dataset or []
135134

136-
if args.weights:
137-
weights = PM.get_weight(args.weights)
138-
preprocessing = weights.transforms()
135+
if args.prototype:
136+
if args.weights:
137+
weights = prototype.models.get_weight(args.weights)
138+
preprocessing = weights.transforms()
139+
else:
140+
preprocessing = prototype.transforms.RaftEval()
139141
else:
140142
preprocessing = OpticalFlowPresetEval()
141143

@@ -192,10 +194,14 @@ def train_one_epoch(model, optimizer, scheduler, train_loader, logger, args):
192194

193195

194196
def main(args):
197+
if args.prototype and prototype is None:
198+
raise ImportError("The prototype module couldn't be found. Please install the latest torchvision nightly.")
199+
if not args.prototype and args.weights:
200+
raise ValueError("The weights parameter works only in prototype mode. Please pass the --prototype argument.")
195201
utils.setup_ddp(args)
196202

197-
if args.weights:
198-
model = PMOF.__dict__[args.model](weights=args.weights)
203+
if args.prototype:
204+
model = prototype.models.optical_flow.__dict__[args.model](weights=args.weights)
199205
else:
200206
model = torchvision.models.optical_flow.__dict__[args.model](pretrained=args.pretrained)
201207

@@ -317,7 +323,6 @@ def get_args_parser(add_help=True):
317323
)
318324
# TODO: resume, pretrained, and weights should be in an exclusive arg group
319325
parser.add_argument("--pretrained", action="store_true", help="Whether to use pretrained weights")
320-
parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load.")
321326

322327
parser.add_argument(
323328
"--num_flow_updates",
@@ -336,6 +341,15 @@ def get_args_parser(add_help=True):
336341
required=True,
337342
)
338343

344+
# Prototype models only
345+
parser.add_argument(
346+
"--prototype",
347+
dest="prototype",
348+
help="Use prototype model builders instead those from main area",
349+
action="store_true",
350+
)
351+
parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load.")
352+
339353
return parser
340354

341355

0 commit comments

Comments
 (0)