From 4e7fc2e38103c409a18402dc3863db25cca8f4cc Mon Sep 17 00:00:00 2001 From: Khushi Agrawal Date: Mon, 18 Oct 2021 11:53:21 +0530 Subject: [PATCH 1/3] add typing in faster_rcnn, initial commit. --- torchvision/models/detection/faster_rcnn.py | 96 ++++++++++----------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py index 02da39e8c73..618542a4ca2 100644 --- a/torchvision/models/detection/faster_rcnn.py +++ b/torchvision/models/detection/faster_rcnn.py @@ -143,39 +143,39 @@ class FasterRCNN(GeneralizedRCNN): def __init__( self, - backbone, - num_classes=None, + backbone: nn.Module, + num_classes: int = None, # transform parameters - min_size=800, - max_size=1333, - image_mean=None, - image_std=None, + min_size: int = 800, + max_size: int = 1333, + image_mean: Tuple[float, float, float] = None, + image_std: Tuple[float, float, float] = None, # RPN parameters - rpn_anchor_generator=None, - rpn_head=None, - rpn_pre_nms_top_n_train=2000, - rpn_pre_nms_top_n_test=1000, - rpn_post_nms_top_n_train=2000, - rpn_post_nms_top_n_test=1000, - rpn_nms_thresh=0.7, - rpn_fg_iou_thresh=0.7, - rpn_bg_iou_thresh=0.3, - rpn_batch_size_per_image=256, - rpn_positive_fraction=0.5, - rpn_score_thresh=0.0, + rpn_anchor_generator: AnchorGenerator = None, + rpn_head: nn.Module = None, + rpn_pre_nms_top_n_train: int = 2000, + rpn_pre_nms_top_n_test: int = 1000, + rpn_post_nms_top_n_train: int = 2000, + rpn_post_nms_top_n_test: int = 1000, + rpn_nms_thresh: float = 0.7, + rpn_fg_iou_thresh: float = 0.7, + rpn_bg_iou_thresh: float = 0.3, + rpn_batch_size_per_image: int = 256, + rpn_positive_fraction: float = 0.5, + rpn_score_thresh: float = 0.0, # Box parameters - box_roi_pool=None, - box_head=None, - box_predictor=None, - box_score_thresh=0.05, - box_nms_thresh=0.5, - box_detections_per_img=100, - box_fg_iou_thresh=0.5, - box_bg_iou_thresh=0.5, - box_batch_size_per_image=512, - box_positive_fraction=0.25, - bbox_reg_weights=None, - ): + box_roi_pool: MultiScaleRoIAlign = None, + box_head: nn.Module = None, + box_predictor: nn.Module = None, + box_score_thresh: float = 0.05, + box_nms_thresh: float = 0.5, + box_detections_per_img: int = 100, + box_fg_iou_thresh: float = 0.5, + box_bg_iou_thresh: float = 0.5, + box_batch_size_per_image: int = 512, + box_positive_fraction: float = 0.25, + bbox_reg_weights: Tuple[float, float, float, float] = None, + ) -> None: if not hasattr(backbone, "out_channels"): raise ValueError( @@ -264,13 +264,13 @@ class TwoMLPHead(nn.Module): representation_size (int): size of the intermediate representation """ - def __init__(self, in_channels, representation_size): + def __init__(self, in_channels: int, representation_size: int) -> None: super(TwoMLPHead, self).__init__() self.fc6 = nn.Linear(in_channels, representation_size) self.fc7 = nn.Linear(representation_size, representation_size) - def forward(self, x): + def forward(self, x: Tensor) -> nn.Module: x = x.flatten(start_dim=1) x = F.relu(self.fc6(x)) @@ -289,12 +289,12 @@ class FastRCNNPredictor(nn.Module): num_classes (int): number of output classes (including background) """ - def __init__(self, in_channels, num_classes): + def __init__(self, in_channels: int, num_classes: int) -> None: super(FastRCNNPredictor, self).__init__() self.cls_score = nn.Linear(in_channels, num_classes) self.bbox_pred = nn.Linear(in_channels, num_classes * 4) - def forward(self, x): + def forward(self, x: Tensor) -> Tuple[nn.Module, nn.Module]: if x.dim() == 4: assert list(x.shape[2:]) == [1, 1] x = x.flatten(start_dim=1) @@ -312,8 +312,8 @@ def forward(self, x): def fasterrcnn_resnet50_fpn( - pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs -): + pretrained: bool = False, progress: bool = True, num_classes: int = 91, pretrained_backbone: bool = True, trainable_backbone_layers: int = None, **kwargs +) -> FasterRCNN: """ Constructs a Faster R-CNN model with a ResNet-50-FPN backbone. @@ -395,14 +395,14 @@ def fasterrcnn_resnet50_fpn( def _fasterrcnn_mobilenet_v3_large_fpn( - weights_name, - pretrained=False, - progress=True, - num_classes=91, - pretrained_backbone=True, - trainable_backbone_layers=None, - **kwargs, -): + weights_name: str, + pretrained: bool = False, + progress: bool = True, + num_classes: int = 91, + pretrained_backbone: bool = True, + trainable_backbone_layers=None, + **kwargs, +) -> FasterRCNN: trainable_backbone_layers = _validate_trainable_layers( pretrained or pretrained_backbone, trainable_backbone_layers, 6, 3 ) @@ -436,8 +436,8 @@ def _fasterrcnn_mobilenet_v3_large_fpn( def fasterrcnn_mobilenet_v3_large_320_fpn( - pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs -): + pretrained: bool = False, progress: bool = True, num_classes: int = 91, pretrained_backbone: bool = True, trainable_backbone_layers: int = None, **kwargs +) -> FasterRCNN: """ Constructs a low resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone tunned for mobile use-cases. It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See @@ -481,8 +481,8 @@ def fasterrcnn_mobilenet_v3_large_320_fpn( def fasterrcnn_mobilenet_v3_large_fpn( - pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs -): + pretrained: bool = False, progress: bool = True, num_classes: int = 91, pretrained_backbone: bool = True, trainable_backbone_layers: int = None, **kwargs +) -> FasterRCNN: """ Constructs a high resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone. It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See From a14380ffe6b8e5af147844c8ba56a019fa26a893 Mon Sep 17 00:00:00 2001 From: Aditya Oke Date: Mon, 18 Oct 2021 20:52:04 +0530 Subject: [PATCH 2/3] Enable mypy and type frcnn correctly --- mypy.ini | 4 -- torchvision/models/detection/faster_rcnn.py | 65 +++++++++++++-------- 2 files changed, 41 insertions(+), 28 deletions(-) diff --git a/mypy.ini b/mypy.ini index a2733d3ae3b..4b7fc09b83c 100644 --- a/mypy.ini +++ b/mypy.ini @@ -37,10 +37,6 @@ ignore_errors = True ignore_errors = True -[mypy-torchvision.models.detection.faster_rcnn] - -ignore_errors = True - [mypy-torchvision.models.detection.mask_rcnn] ignore_errors = True diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py index 618542a4ca2..b0e2183165d 100644 --- a/torchvision/models/detection/faster_rcnn.py +++ b/torchvision/models/detection/faster_rcnn.py @@ -1,5 +1,7 @@ +from typing import Tuple, List, Optional, Any + import torch.nn.functional as F -from torch import nn +from torch import nn, Tensor from torchvision.ops import MultiScaleRoIAlign from ..._internally_replaced_utils import load_state_dict_from_url @@ -55,10 +57,10 @@ class FasterRCNN(GeneralizedRCNN): If box_predictor is specified, num_classes should be None. min_size (int): minimum size of the image to be rescaled before feeding it to the backbone max_size (int): maximum size of the image to be rescaled before feeding it to the backbone - image_mean (Tuple[float, float, float]): mean values used for input normalization. + image_mean (List[float]): mean values used for input normalization. They are generally the mean values of the dataset on which the backbone has been trained on - image_std (Tuple[float, float, float]): std values used for input normalization. + image_std (List[float]): std values used for input normalization. They are generally the std values of the dataset on which the backbone has been trained on rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature maps. @@ -144,15 +146,15 @@ class FasterRCNN(GeneralizedRCNN): def __init__( self, backbone: nn.Module, - num_classes: int = None, + num_classes: Optional[int] = None, # transform parameters min_size: int = 800, max_size: int = 1333, - image_mean: Tuple[float, float, float] = None, - image_std: Tuple[float, float, float] = None, + image_mean: Optional[List[float]] = None, + image_std: Optional[List[float]] = None, # RPN parameters - rpn_anchor_generator: AnchorGenerator = None, - rpn_head: nn.Module = None, + rpn_anchor_generator: Optional[AnchorGenerator] = None, + rpn_head: Optional[nn.Module] = None, rpn_pre_nms_top_n_train: int = 2000, rpn_pre_nms_top_n_test: int = 1000, rpn_post_nms_top_n_train: int = 2000, @@ -164,9 +166,9 @@ def __init__( rpn_positive_fraction: float = 0.5, rpn_score_thresh: float = 0.0, # Box parameters - box_roi_pool: MultiScaleRoIAlign = None, - box_head: nn.Module = None, - box_predictor: nn.Module = None, + box_roi_pool: Optional[MultiScaleRoIAlign] = None, + box_head: Optional[nn.Module] = None, + box_predictor: Optional[nn.Module] = None, box_score_thresh: float = 0.05, box_nms_thresh: float = 0.5, box_detections_per_img: int = 100, @@ -174,7 +176,7 @@ def __init__( box_bg_iou_thresh: float = 0.5, box_batch_size_per_image: int = 512, box_positive_fraction: float = 0.25, - bbox_reg_weights: Tuple[float, float, float, float] = None, + bbox_reg_weights: Optional[Tuple[float, float, float, float]] = None, ) -> None: if not hasattr(backbone, "out_channels"): @@ -270,7 +272,7 @@ def __init__(self, in_channels: int, representation_size: int) -> None: self.fc6 = nn.Linear(in_channels, representation_size) self.fc7 = nn.Linear(representation_size, representation_size) - def forward(self, x: Tensor) -> nn.Module: + def forward(self, x: Tensor) -> Tensor: x = x.flatten(start_dim=1) x = F.relu(self.fc6(x)) @@ -294,7 +296,7 @@ def __init__(self, in_channels: int, num_classes: int) -> None: self.cls_score = nn.Linear(in_channels, num_classes) self.bbox_pred = nn.Linear(in_channels, num_classes * 4) - def forward(self, x: Tensor) -> Tuple[nn.Module, nn.Module]: + def forward(self, x: Tensor) -> Tuple[Tensor, Tensor]: if x.dim() == 4: assert list(x.shape[2:]) == [1, 1] x = x.flatten(start_dim=1) @@ -312,7 +314,12 @@ def forward(self, x: Tensor) -> Tuple[nn.Module, nn.Module]: def fasterrcnn_resnet50_fpn( - pretrained: bool = False, progress: bool = True, num_classes: int = 91, pretrained_backbone: bool = True, trainable_backbone_layers: int = None, **kwargs + pretrained: bool = False, + progress: bool = True, + num_classes: int = 91, + pretrained_backbone: bool = True, + trainable_backbone_layers: Optional[int] = None, + **kwargs: Any, ) -> FasterRCNN: """ Constructs a Faster R-CNN model with a ResNet-50-FPN backbone. @@ -395,13 +402,13 @@ def fasterrcnn_resnet50_fpn( def _fasterrcnn_mobilenet_v3_large_fpn( - weights_name: str, - pretrained: bool = False, - progress: bool = True, - num_classes: int = 91, - pretrained_backbone: bool = True, - trainable_backbone_layers=None, - **kwargs, + weights_name: str, + pretrained: bool = False, + progress: bool = True, + num_classes: int = 91, + pretrained_backbone: bool = True, + trainable_backbone_layers: Optional[int] = None, + **kwargs: Any, ) -> FasterRCNN: trainable_backbone_layers = _validate_trainable_layers( pretrained or pretrained_backbone, trainable_backbone_layers, 6, 3 @@ -436,7 +443,12 @@ def _fasterrcnn_mobilenet_v3_large_fpn( def fasterrcnn_mobilenet_v3_large_320_fpn( - pretrained: bool = False, progress: bool = True, num_classes: int = 91, pretrained_backbone: bool = True, trainable_backbone_layers: int = None, **kwargs + pretrained: bool = False, + progress: bool = True, + num_classes: int = 91, + pretrained_backbone: bool = True, + trainable_backbone_layers: Optional[int] = None, + **kwargs: Any, ) -> FasterRCNN: """ Constructs a low resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone tunned for mobile use-cases. @@ -481,7 +493,12 @@ def fasterrcnn_mobilenet_v3_large_320_fpn( def fasterrcnn_mobilenet_v3_large_fpn( - pretrained: bool = False, progress: bool = True, num_classes: int = 91, pretrained_backbone: bool = True, trainable_backbone_layers: int = None, **kwargs + pretrained: bool = False, + progress: bool = True, + num_classes: int = 91, + pretrained_backbone: bool = True, + trainable_backbone_layers: Optional[int] = None, + **kwargs: Any, ) -> FasterRCNN: """ Constructs a high resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone. From 321bb66a550c295827efed94f55436e77a533d03 Mon Sep 17 00:00:00 2001 From: Aditya Oke Date: Mon, 18 Oct 2021 20:55:45 +0530 Subject: [PATCH 3/3] Make mypy happy --- torchvision/models/detection/faster_rcnn.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py index b0e2183165d..48b1366fe00 100644 --- a/torchvision/models/detection/faster_rcnn.py +++ b/torchvision/models/detection/faster_rcnn.py @@ -1,4 +1,4 @@ -from typing import Tuple, List, Optional, Any +from typing import Tuple, List, Optional, Any, cast import torch.nn.functional as F from torch import nn, Tensor @@ -196,7 +196,7 @@ def __init__( if box_predictor is None: raise ValueError("num_classes should not be None when box_predictor " "is not specified") - out_channels = backbone.out_channels + out_channels = cast(int, backbone.out_channels) if rpn_anchor_generator is None: anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) @@ -231,7 +231,7 @@ def __init__( if box_predictor is None: representation_size = 1024 - box_predictor = FastRCNNPredictor(representation_size, num_classes) + box_predictor = FastRCNNPredictor(representation_size, num_classes) # type: ignore[arg-type] roi_heads = RoIHeads( # Box