From 4e7fc2e38103c409a18402dc3863db25cca8f4cc Mon Sep 17 00:00:00 2001
From: Khushi Agrawal <khushiagrawal411@gmail.com>
Date: Mon, 18 Oct 2021 11:53:21 +0530
Subject: [PATCH 1/3] add typing in faster_rcnn, initial commit.

---
 torchvision/models/detection/faster_rcnn.py | 96 ++++++++++-----------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py
index 02da39e8c73..618542a4ca2 100644
--- a/torchvision/models/detection/faster_rcnn.py
+++ b/torchvision/models/detection/faster_rcnn.py
@@ -143,39 +143,39 @@ class FasterRCNN(GeneralizedRCNN):
 
     def __init__(
         self,
-        backbone,
-        num_classes=None,
+        backbone: nn.Module,
+        num_classes: int = None,
         # transform parameters
-        min_size=800,
-        max_size=1333,
-        image_mean=None,
-        image_std=None,
+        min_size: int = 800,
+        max_size: int = 1333,
+        image_mean: Tuple[float, float, float] = None,
+        image_std: Tuple[float, float, float] = None,
         # RPN parameters
-        rpn_anchor_generator=None,
-        rpn_head=None,
-        rpn_pre_nms_top_n_train=2000,
-        rpn_pre_nms_top_n_test=1000,
-        rpn_post_nms_top_n_train=2000,
-        rpn_post_nms_top_n_test=1000,
-        rpn_nms_thresh=0.7,
-        rpn_fg_iou_thresh=0.7,
-        rpn_bg_iou_thresh=0.3,
-        rpn_batch_size_per_image=256,
-        rpn_positive_fraction=0.5,
-        rpn_score_thresh=0.0,
+        rpn_anchor_generator: AnchorGenerator = None,
+        rpn_head: nn.Module = None,
+        rpn_pre_nms_top_n_train: int = 2000,
+        rpn_pre_nms_top_n_test: int = 1000,
+        rpn_post_nms_top_n_train: int = 2000,
+        rpn_post_nms_top_n_test: int = 1000,
+        rpn_nms_thresh: float = 0.7,
+        rpn_fg_iou_thresh: float = 0.7,
+        rpn_bg_iou_thresh: float = 0.3,
+        rpn_batch_size_per_image: int = 256,
+        rpn_positive_fraction: float = 0.5,
+        rpn_score_thresh: float = 0.0,
         # Box parameters
-        box_roi_pool=None,
-        box_head=None,
-        box_predictor=None,
-        box_score_thresh=0.05,
-        box_nms_thresh=0.5,
-        box_detections_per_img=100,
-        box_fg_iou_thresh=0.5,
-        box_bg_iou_thresh=0.5,
-        box_batch_size_per_image=512,
-        box_positive_fraction=0.25,
-        bbox_reg_weights=None,
-    ):
+        box_roi_pool: MultiScaleRoIAlign = None,
+        box_head: nn.Module = None,
+        box_predictor: nn.Module = None,
+        box_score_thresh: float = 0.05,
+        box_nms_thresh: float = 0.5,
+        box_detections_per_img: int = 100,
+        box_fg_iou_thresh: float = 0.5,
+        box_bg_iou_thresh: float = 0.5,
+        box_batch_size_per_image: int = 512,
+        box_positive_fraction: float = 0.25,
+        bbox_reg_weights: Tuple[float, float, float, float] = None,
+    ) -> None:
 
         if not hasattr(backbone, "out_channels"):
             raise ValueError(
@@ -264,13 +264,13 @@ class TwoMLPHead(nn.Module):
         representation_size (int): size of the intermediate representation
     """
 
-    def __init__(self, in_channels, representation_size):
+    def __init__(self, in_channels: int, representation_size: int) -> None:
         super(TwoMLPHead, self).__init__()
 
         self.fc6 = nn.Linear(in_channels, representation_size)
         self.fc7 = nn.Linear(representation_size, representation_size)
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> nn.Module:
         x = x.flatten(start_dim=1)
 
         x = F.relu(self.fc6(x))
@@ -289,12 +289,12 @@ class FastRCNNPredictor(nn.Module):
         num_classes (int): number of output classes (including background)
     """
 
-    def __init__(self, in_channels, num_classes):
+    def __init__(self, in_channels: int, num_classes: int) -> None:
         super(FastRCNNPredictor, self).__init__()
         self.cls_score = nn.Linear(in_channels, num_classes)
         self.bbox_pred = nn.Linear(in_channels, num_classes * 4)
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tuple[nn.Module, nn.Module]:
         if x.dim() == 4:
             assert list(x.shape[2:]) == [1, 1]
         x = x.flatten(start_dim=1)
@@ -312,8 +312,8 @@ def forward(self, x):
 
 
 def fasterrcnn_resnet50_fpn(
-    pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs
-):
+        pretrained: bool = False, progress: bool = True, num_classes: int = 91, pretrained_backbone: bool = True, trainable_backbone_layers: int = None, **kwargs
+) -> FasterRCNN:
     """
     Constructs a Faster R-CNN model with a ResNet-50-FPN backbone.
 
@@ -395,14 +395,14 @@ def fasterrcnn_resnet50_fpn(
 
 
 def _fasterrcnn_mobilenet_v3_large_fpn(
-    weights_name,
-    pretrained=False,
-    progress=True,
-    num_classes=91,
-    pretrained_backbone=True,
-    trainable_backbone_layers=None,
-    **kwargs,
-):
+        weights_name: str,
+        pretrained: bool = False,
+        progress: bool = True,
+        num_classes: int = 91,
+        pretrained_backbone: bool = True,
+        trainable_backbone_layers=None,
+        **kwargs,
+) -> FasterRCNN:
     trainable_backbone_layers = _validate_trainable_layers(
         pretrained or pretrained_backbone, trainable_backbone_layers, 6, 3
     )
@@ -436,8 +436,8 @@ def _fasterrcnn_mobilenet_v3_large_fpn(
 
 
 def fasterrcnn_mobilenet_v3_large_320_fpn(
-    pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs
-):
+        pretrained: bool = False, progress: bool = True, num_classes: int = 91, pretrained_backbone: bool = True, trainable_backbone_layers: int = None, **kwargs
+) -> FasterRCNN:
     """
     Constructs a low resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone tunned for mobile use-cases.
     It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See
@@ -481,8 +481,8 @@ def fasterrcnn_mobilenet_v3_large_320_fpn(
 
 
 def fasterrcnn_mobilenet_v3_large_fpn(
-    pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None, **kwargs
-):
+        pretrained: bool = False, progress: bool = True, num_classes: int = 91, pretrained_backbone: bool = True, trainable_backbone_layers: int = None, **kwargs
+) -> FasterRCNN:
     """
     Constructs a high resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone.
     It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See

From a14380ffe6b8e5af147844c8ba56a019fa26a893 Mon Sep 17 00:00:00 2001
From: Aditya Oke <okeaditya315@gmail.com>
Date: Mon, 18 Oct 2021 20:52:04 +0530
Subject: [PATCH 2/3] Enable mypy and type frcnn correctly

---
 mypy.ini                                    |  4 --
 torchvision/models/detection/faster_rcnn.py | 65 +++++++++++++--------
 2 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/mypy.ini b/mypy.ini
index a2733d3ae3b..4b7fc09b83c 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -37,10 +37,6 @@ ignore_errors = True
 
 ignore_errors = True
 
-[mypy-torchvision.models.detection.faster_rcnn]
-
-ignore_errors = True
-
 [mypy-torchvision.models.detection.mask_rcnn]
 
 ignore_errors = True
diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py
index 618542a4ca2..b0e2183165d 100644
--- a/torchvision/models/detection/faster_rcnn.py
+++ b/torchvision/models/detection/faster_rcnn.py
@@ -1,5 +1,7 @@
+from typing import Tuple, List, Optional, Any
+
 import torch.nn.functional as F
-from torch import nn
+from torch import nn, Tensor
 from torchvision.ops import MultiScaleRoIAlign
 
 from ..._internally_replaced_utils import load_state_dict_from_url
@@ -55,10 +57,10 @@ class FasterRCNN(GeneralizedRCNN):
             If box_predictor is specified, num_classes should be None.
         min_size (int): minimum size of the image to be rescaled before feeding it to the backbone
         max_size (int): maximum size of the image to be rescaled before feeding it to the backbone
-        image_mean (Tuple[float, float, float]): mean values used for input normalization.
+        image_mean (List[float]): mean values used for input normalization.
             They are generally the mean values of the dataset on which the backbone has been trained
             on
-        image_std (Tuple[float, float, float]): std values used for input normalization.
+        image_std (List[float]): std values used for input normalization.
             They are generally the std values of the dataset on which the backbone has been trained on
         rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature
             maps.
@@ -144,15 +146,15 @@ class FasterRCNN(GeneralizedRCNN):
     def __init__(
         self,
         backbone: nn.Module,
-        num_classes: int = None,
+        num_classes: Optional[int] = None,
         # transform parameters
         min_size: int = 800,
         max_size: int = 1333,
-        image_mean: Tuple[float, float, float] = None,
-        image_std: Tuple[float, float, float] = None,
+        image_mean: Optional[List[float]] = None,
+        image_std: Optional[List[float]] = None,
         # RPN parameters
-        rpn_anchor_generator: AnchorGenerator = None,
-        rpn_head: nn.Module = None,
+        rpn_anchor_generator: Optional[AnchorGenerator] = None,
+        rpn_head: Optional[nn.Module] = None,
         rpn_pre_nms_top_n_train: int = 2000,
         rpn_pre_nms_top_n_test: int = 1000,
         rpn_post_nms_top_n_train: int = 2000,
@@ -164,9 +166,9 @@ def __init__(
         rpn_positive_fraction: float = 0.5,
         rpn_score_thresh: float = 0.0,
         # Box parameters
-        box_roi_pool: MultiScaleRoIAlign = None,
-        box_head: nn.Module = None,
-        box_predictor: nn.Module = None,
+        box_roi_pool: Optional[MultiScaleRoIAlign] = None,
+        box_head: Optional[nn.Module] = None,
+        box_predictor: Optional[nn.Module] = None,
         box_score_thresh: float = 0.05,
         box_nms_thresh: float = 0.5,
         box_detections_per_img: int = 100,
@@ -174,7 +176,7 @@ def __init__(
         box_bg_iou_thresh: float = 0.5,
         box_batch_size_per_image: int = 512,
         box_positive_fraction: float = 0.25,
-        bbox_reg_weights: Tuple[float, float, float, float] = None,
+        bbox_reg_weights: Optional[Tuple[float, float, float, float]] = None,
     ) -> None:
 
         if not hasattr(backbone, "out_channels"):
@@ -270,7 +272,7 @@ def __init__(self, in_channels: int, representation_size: int) -> None:
         self.fc6 = nn.Linear(in_channels, representation_size)
         self.fc7 = nn.Linear(representation_size, representation_size)
 
-    def forward(self, x: Tensor) -> nn.Module:
+    def forward(self, x: Tensor) -> Tensor:
         x = x.flatten(start_dim=1)
 
         x = F.relu(self.fc6(x))
@@ -294,7 +296,7 @@ def __init__(self, in_channels: int, num_classes: int) -> None:
         self.cls_score = nn.Linear(in_channels, num_classes)
         self.bbox_pred = nn.Linear(in_channels, num_classes * 4)
 
-    def forward(self, x: Tensor) -> Tuple[nn.Module, nn.Module]:
+    def forward(self, x: Tensor) -> Tuple[Tensor, Tensor]:
         if x.dim() == 4:
             assert list(x.shape[2:]) == [1, 1]
         x = x.flatten(start_dim=1)
@@ -312,7 +314,12 @@ def forward(self, x: Tensor) -> Tuple[nn.Module, nn.Module]:
 
 
 def fasterrcnn_resnet50_fpn(
-        pretrained: bool = False, progress: bool = True, num_classes: int = 91, pretrained_backbone: bool = True, trainable_backbone_layers: int = None, **kwargs
+    pretrained: bool = False,
+    progress: bool = True,
+    num_classes: int = 91,
+    pretrained_backbone: bool = True,
+    trainable_backbone_layers: Optional[int] = None,
+    **kwargs: Any,
 ) -> FasterRCNN:
     """
     Constructs a Faster R-CNN model with a ResNet-50-FPN backbone.
@@ -395,13 +402,13 @@ def fasterrcnn_resnet50_fpn(
 
 
 def _fasterrcnn_mobilenet_v3_large_fpn(
-        weights_name: str,
-        pretrained: bool = False,
-        progress: bool = True,
-        num_classes: int = 91,
-        pretrained_backbone: bool = True,
-        trainable_backbone_layers=None,
-        **kwargs,
+    weights_name: str,
+    pretrained: bool = False,
+    progress: bool = True,
+    num_classes: int = 91,
+    pretrained_backbone: bool = True,
+    trainable_backbone_layers: Optional[int] = None,
+    **kwargs: Any,
 ) -> FasterRCNN:
     trainable_backbone_layers = _validate_trainable_layers(
         pretrained or pretrained_backbone, trainable_backbone_layers, 6, 3
@@ -436,7 +443,12 @@ def _fasterrcnn_mobilenet_v3_large_fpn(
 
 
 def fasterrcnn_mobilenet_v3_large_320_fpn(
-        pretrained: bool = False, progress: bool = True, num_classes: int = 91, pretrained_backbone: bool = True, trainable_backbone_layers: int = None, **kwargs
+    pretrained: bool = False,
+    progress: bool = True,
+    num_classes: int = 91,
+    pretrained_backbone: bool = True,
+    trainable_backbone_layers: Optional[int] = None,
+    **kwargs: Any,
 ) -> FasterRCNN:
     """
     Constructs a low resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone tunned for mobile use-cases.
@@ -481,7 +493,12 @@ def fasterrcnn_mobilenet_v3_large_320_fpn(
 
 
 def fasterrcnn_mobilenet_v3_large_fpn(
-        pretrained: bool = False, progress: bool = True, num_classes: int = 91, pretrained_backbone: bool = True, trainable_backbone_layers: int = None, **kwargs
+    pretrained: bool = False,
+    progress: bool = True,
+    num_classes: int = 91,
+    pretrained_backbone: bool = True,
+    trainable_backbone_layers: Optional[int] = None,
+    **kwargs: Any,
 ) -> FasterRCNN:
     """
     Constructs a high resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone.

From 321bb66a550c295827efed94f55436e77a533d03 Mon Sep 17 00:00:00 2001
From: Aditya Oke <okeaditya315@gmail.com>
Date: Mon, 18 Oct 2021 20:55:45 +0530
Subject: [PATCH 3/3] Make mypy happy

---
 torchvision/models/detection/faster_rcnn.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/torchvision/models/detection/faster_rcnn.py b/torchvision/models/detection/faster_rcnn.py
index b0e2183165d..48b1366fe00 100644
--- a/torchvision/models/detection/faster_rcnn.py
+++ b/torchvision/models/detection/faster_rcnn.py
@@ -1,4 +1,4 @@
-from typing import Tuple, List, Optional, Any
+from typing import Tuple, List, Optional, Any, cast
 
 import torch.nn.functional as F
 from torch import nn, Tensor
@@ -196,7 +196,7 @@ def __init__(
             if box_predictor is None:
                 raise ValueError("num_classes should not be None when box_predictor " "is not specified")
 
-        out_channels = backbone.out_channels
+        out_channels = cast(int, backbone.out_channels)
 
         if rpn_anchor_generator is None:
             anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
@@ -231,7 +231,7 @@ def __init__(
 
         if box_predictor is None:
             representation_size = 1024
-            box_predictor = FastRCNNPredictor(representation_size, num_classes)
+            box_predictor = FastRCNNPredictor(representation_size, num_classes)  # type: ignore[arg-type]
 
         roi_heads = RoIHeads(
             # Box