From 501b2c1a5b1967123128b997102612dafabd2d2d Mon Sep 17 00:00:00 2001
From: Vasilis Vryniotis <vvryniotis@fb.com>
Date: Mon, 28 Jun 2021 15:12:45 +0100
Subject: [PATCH] Fix documentation for SSD and RetinaNet.

---
 torchvision/models/detection/retinanet.py | 2 +-
 torchvision/models/detection/ssd.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/torchvision/models/detection/retinanet.py b/torchvision/models/detection/retinanet.py
index aab7daa828a..4dd95285dbc 100644
--- a/torchvision/models/detection/retinanet.py
+++ b/torchvision/models/detection/retinanet.py
@@ -256,7 +256,7 @@ class RetinaNet(nn.Module):
             It should contain an out_channels attribute, which indicates the number of output
             channels that each feature map has (and it should be the same for all feature maps).
             The backbone should return a single Tensor or an OrderedDict[Tensor].
-        num_classes (int): number of output classes of the model (excluding the background).
+        num_classes (int): number of output classes of the model (including the background).
         min_size (int): minimum size of the image to be rescaled before feeding it to the backbone
         max_size (int): maximum size of the image to be rescaled before feeding it to the backbone
         image_mean (Tuple[float, float, float]): mean values used for input normalization.
diff --git a/torchvision/models/detection/ssd.py b/torchvision/models/detection/ssd.py
index 3589dd45f4c..e67c4930b30 100644
--- a/torchvision/models/detection/ssd.py
+++ b/torchvision/models/detection/ssd.py
@@ -141,7 +141,7 @@ class SSD(nn.Module):
             set of feature maps.
         size (Tuple[int, int]): the width and height to which images will be rescaled before feeding them
             to the backbone.
-        num_classes (int): number of output classes of the model (excluding the background).
+        num_classes (int): number of output classes of the model (including the background).
         image_mean (Tuple[float, float, float]): mean values used for input normalization.
             They are generally the mean values of the dataset on which the backbone has been trained
             on