pytorch · datumbox · Jun 28, 2021 · Jun 28, 2021 · Jun 28, 2021
diff --git a/torchvision/models/detection/retinanet.py b/torchvision/models/detection/retinanet.py
@@ -256,7 +256,7 @@ class RetinaNet(nn.Module):
             It should contain an out_channels attribute, which indicates the number of output
             channels that each feature map has (and it should be the same for all feature maps).
             The backbone should return a single Tensor or an OrderedDict[Tensor].
-        num_classes (int): number of output classes of the model (excluding the background).
+        num_classes (int): number of output classes of the model (including the background).
         min_size (int): minimum size of the image to be rescaled before feeding it to the backbone
         max_size (int): maximum size of the image to be rescaled before feeding it to the backbone
         image_mean (Tuple[float, float, float]): mean values used for input normalization.

diff --git a/torchvision/models/detection/ssd.py b/torchvision/models/detection/ssd.py
@@ -141,7 +141,7 @@ class SSD(nn.Module):
             set of feature maps.
         size (Tuple[int, int]): the width and height to which images will be rescaled before feeding them
             to the backbone.
-        num_classes (int): number of output classes of the model (excluding the background).
+        num_classes (int): number of output classes of the model (including the background).
         image_mean (Tuple[float, float, float]): mean values used for input normalization.
             They are generally the mean values of the dataset on which the backbone has been trained
             on