diff --git a/MANIFEST.in b/MANIFEST.in
index 75f238c0a2c..782bc542766 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,6 @@
 include README.rst
 include LICENSE
+include torchvision/assets/grace_hopper_517x606.jpg
 
 recursive-exclude * __pycache__
 recursive-exclude * *.py[co]
diff --git a/docs/requirements.txt b/docs/requirements.txt
index f649853cd03..20b722ecdf1 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,3 +1,4 @@
+matplotlib
 sphinx==1.7.3
 sphinxcontrib-googleanalytics
 -e git+git://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 47f37c4fe25..2b644e6050f 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -44,6 +44,7 @@
     'sphinx.ext.mathjax',
     'sphinx.ext.napoleon',
     'sphinx.ext.viewcode',
+    'matplotlib.sphinxext.plot_directive',
     'sphinxcontrib.googleanalytics',
 ]
 
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 9de82b6e7fc..1d5c4cc86e9 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -13,6 +13,7 @@ architectures, and common image transformations for computer vision.
    models
    ops
    transforms
+   transforms_functional
    utils
 
 .. automodule:: torchvision
diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst
index 2e0c6cefb8d..2162b9fd056 100644
--- a/docs/source/transforms.rst
+++ b/docs/source/transforms.rst
@@ -9,6 +9,10 @@ Functional transforms give fine-grained control over the transformations.
 This is useful if you have to build a more complex transformation pipeline
 (e.g. in the case of segmentation tasks).
 
+.. Note::
+    Most transform classese have an equivalent in :mod:`torchvision.transforms.functional`.
+
+
 .. autoclass:: Compose
 
 Transforms on PIL Image
@@ -16,117 +20,352 @@ Transforms on PIL Image
 
 .. autoclass:: CenterCrop
 
+**Example**
+
+.. plot::
+   :include-source:
+
+    from torchvision.transforms import CenterCrop
+    from torchvision.utils import _plot_images, _sample_image
+    import torch
+    import numpy as np
+
+    np.random.seed(0); torch.manual_seed(0);
+    _plot_images(
+        _sample_image(),
+        CenterCrop(256)(_sample_image()),
+        CenterCrop((200, 300))(_sample_image()),
+    )
+
+
 .. autoclass:: ColorJitter
 
+**Example**
+
+.. plot::
+   :include-source:
+
+    from torchvision.transforms import ColorJitter
+    from torchvision.utils import _plot_images, _sample_image
+    import torch
+    import numpy as np
+
+    np.random.seed(0); torch.manual_seed(0);
+
+    transform = ColorJitter(brightness=0.5)
+    _plot_images(*[transform(_sample_image()) for _ in range(5)])
+
+    transform = ColorJitter(contrast=0.5)
+    _plot_images(*[transform(_sample_image()) for _ in range(5)])
+
+    transform = ColorJitter(saturation=0.5)
+    _plot_images(*[transform(_sample_image()) for _ in range(5)])
+
+    transform = ColorJitter(hue=0.1)
+    _plot_images(*[transform(_sample_image()) for _ in range(5)])
+
+
 .. autoclass:: FiveCrop
 
+**Example**
+
+.. plot::
+   :include-source:
+
+    from torchvision.transforms import FiveCrop
+    from torchvision.utils import _plot_images, _sample_image
+    import torch
+    import numpy as np
+
+    np.random.seed(0); torch.manual_seed(0);
+
+    transform = FiveCrop(256)
+    _plot_images(*transform(_sample_image()))
+
+
 .. autoclass:: Grayscale
 
+**Example**
+
+.. plot::
+   :include-source:
+
+    from torchvision.transforms import Grayscale
+    from torchvision.utils import _plot_images, _sample_image
+    import torch
+    import numpy as np
+
+    np.random.seed(0); torch.manual_seed(0);
+
+    transform = Grayscale(3)
+    _plot_images(transform(_sample_image()))
+
+
 .. autoclass:: Pad
 
+**Example**
+
+.. plot::
+   :include-source:
+
+    from torchvision.transforms import Pad
+    from torchvision.utils import _plot_images, _sample_image
+    import torch
+    import numpy as np
+
+    np.random.seed(0); torch.manual_seed(0);
+
+    transform = Pad(100)
+    _plot_images(
+        Pad(100, padding_mode="constant", fill=0)(_sample_image()),
+        Pad(100, padding_mode="constant", fill=(128, 128, 128))(_sample_image()),
+        Pad(100, padding_mode="constant", fill=(255, 255, 255))(_sample_image()),
+        Pad(100, padding_mode="edge")(_sample_image()),
+        Pad(100, padding_mode="reflect")(_sample_image()),
+        Pad(100, padding_mode="symmetric")(_sample_image()),
+    )
+
+
 .. autoclass:: RandomAffine
 
+**Example**
+
+.. plot::
+   :include-source:
+
+    from torchvision.transforms import RandomAffine
+    from torchvision.utils import _plot_images, _sample_image
+    import torch
+    import numpy as np
+
+    np.random.seed(0); torch.manual_seed(0);
+
+    transform = RandomAffine(degrees=30)
+    _plot_images(*[transform(_sample_image()) for _ in range(5)])
+
+    transform = RandomAffine(degrees=0, translate=(.4, .2))
+    _plot_images(*[transform(_sample_image()) for _ in range(5)])
+
+    transform = RandomAffine(degrees=0, scale=(.5, 1.5))
+    _plot_images(*[transform(_sample_image()) for _ in range(5)])
+
+    transform = RandomAffine(degrees=0, shear=30)
+    _plot_images(*[transform(_sample_image()) for _ in range(5)])
+
+
+
 .. autoclass:: RandomApply
 
 .. autoclass:: RandomChoice
 
 .. autoclass:: RandomCrop
 
+**Example**
+
+.. plot::
+   :include-source:
+
+    from torchvision.transforms import RandomCrop
+    from torchvision.utils import _plot_images, _sample_image
+    import torch
+    import numpy as np
+
+    np.random.seed(0); torch.manual_seed(0);
+
+    transform = RandomCrop(150)
+    _plot_images(*[transform(_sample_image()) for _ in range(5)])
+
+
 .. autoclass:: RandomGrayscale
 
+**Example**
+
+.. plot::
+   :include-source:
+
+    from torchvision.transforms import RandomGrayscale
+    from torchvision.utils import _plot_images, _sample_image
+    import torch
+    import numpy as np
+
+    np.random.seed(0); torch.manual_seed(0);
+
+    _plot_images(
+        RandomGrayscale(p=1.0)(_sample_image()),
+        RandomGrayscale(p=0.0)(_sample_image()),
+    )
+
+
 .. autoclass:: RandomHorizontalFlip
 
+**Example**
+
+.. plot::
+   :include-source:
+
+    from torchvision.transforms import RandomHorizontalFlip
+    from torchvision.utils import _plot_images, _sample_image
+    import torch
+    import numpy as np
+
+    np.random.seed(0); torch.manual_seed(0);
+
+    _plot_images(
+        RandomHorizontalFlip(p=1.0)(_sample_image()),
+        RandomHorizontalFlip(p=0.0)(_sample_image()),
+    )
+
+
 .. autoclass:: RandomOrder
 
 .. autoclass:: RandomPerspective
 
+**Example**
+
+.. plot::
+   :include-source:
+
+    from torchvision.transforms import RandomPerspective
+    from torchvision.utils import _plot_images, _sample_image
+    import torch
+    import numpy as np
+
+    np.random.seed(0); torch.manual_seed(0);
+
+    transform = RandomPerspective(p=1.0)
+    _plot_images(*[transform(_sample_image()) for _ in range(5)])
+
+
 .. autoclass:: RandomResizedCrop
 
+**Example**
+
+.. plot::
+   :include-source:
+
+    from torchvision.transforms import RandomResizedCrop
+    from torchvision.utils import _plot_images, _sample_image
+    import torch
+    import numpy as np
+
+    np.random.seed(0); torch.manual_seed(0);
+
+    transform = RandomResizedCrop(250)
+    _plot_images(*[transform(_sample_image()) for _ in range(5)])
+
+
 .. autoclass:: RandomRotation
 
+**Example**
+
+.. plot::
+   :include-source:
+
+    from torchvision.transforms import RandomRotation
+    from torchvision.utils import _plot_images, _sample_image
+    import torch
+    import numpy as np
+
+    np.random.seed(0); torch.manual_seed(0);
+
+    transform = RandomRotation(30)
+    _plot_images(*[transform(_sample_image()) for _ in range(5)])
+
+
 .. autoclass:: RandomSizedCrop
 
+
 .. autoclass:: RandomVerticalFlip
 
-.. autoclass:: Resize
+**Example**
 
-.. autoclass:: Scale
+.. plot::
+   :include-source:
 
-.. autoclass:: TenCrop
+    from torchvision.transforms import RandomVerticalFlip
+    from torchvision.utils import _plot_images, _sample_image
+    import torch
+    import numpy as np
 
-Transforms on torch.\*Tensor
-----------------------------
+    np.random.seed(0); torch.manual_seed(0);
 
-.. autoclass:: LinearTransformation
+    transform = RandomVerticalFlip()
+    _plot_images(
+        RandomVerticalFlip(p=0.0)(_sample_image()),
+        RandomVerticalFlip(p=1.0)(_sample_image()),
+    )
 
-.. autoclass:: Normalize
-	:members: __call__
-	:special-members:
 
-.. autoclass:: RandomErasing
+.. autoclass:: Resize
 
-Conversion Transforms
----------------------
+**Example**
 
-.. autoclass:: ToPILImage
-	:members: __call__
-	:special-members:
+.. plot::
+   :include-source:
 
-.. autoclass:: ToTensor
-	:members: __call__
-	:special-members:
+    from torchvision.transforms import Resize
+    from torchvision.utils import _plot_images, _sample_image
+    import torch
+    import numpy as np
 
-Generic Transforms
-------------------
+    np.random.seed(0); torch.manual_seed(0);
 
-.. autoclass:: Lambda
+    _plot_images(
+        Resize(200)(_sample_image()),
+        Resize((200, 200))(_sample_image()),
+        Resize(32)(_sample_image()),
+    )
 
 
-Functional Transforms
----------------------
+.. autoclass:: Scale
 
-Functional transforms give you fine-grained control of the transformation pipeline.
-As opposed to the transformations above, functional transforms don't contain a random number
-generator for their parameters.
-That means you have to specify/generate all parameters, but you can reuse the functional transform.
+.. autoclass:: TenCrop
 
-Example:
-you can apply a functional transform with the same parameters to multiple images like this:
+**Example**
 
-.. code:: python
+.. plot::
+   :include-source:
 
-    import torchvision.transforms.functional as TF
-    import random
+    from torchvision.transforms import TenCrop
+    from torchvision.utils import _plot_images, _sample_image
+    import torch
+    import numpy as np
 
-    def my_segmentation_transforms(image, segmentation):
-        if random.random() > 0.5:
-            angle = random.randint(-30, 30)
-            image = TF.rotate(image, angle)
-            segmentation = TF.rotate(segmentation, angle)
-        # more transforms ...
-        return image, segmentation
+    np.random.seed(0); torch.manual_seed(0);
 
+    _plot_images(*TenCrop(200)(_sample_image()))
 
-Example:
-you can use a functional transform to build transform classes with custom behavior:
 
-.. code:: python
+Transforms on torch.\*Tensor
+----------------------------
 
-    import torchvision.transforms.functional as TF
-    import random
+.. autoclass:: LinearTransformation
+    :members: __call__
+    :special-members:
+
+.. autoclass:: Normalize
+    :members: __call__
+    :special-members:
+
+.. autoclass:: RandomErasing
+    :members: __call__
+    :special-members:
+
+
+Conversion Transforms
+---------------------
 
-    class MyRotationTransform:
-        """Rotate by one of the given angles."""
+.. autoclass:: ToPILImage
+    :members: __call__
+    :special-members:
 
-        def __init__(self, angles):
-            self.angles = angles
+.. autoclass:: ToTensor
+    :members: __call__
+    :special-members:
 
-        def __call__(self, x):
-            angle = random.choice(self.angles)
-            return TF.rotate(x, angle)
 
-    rotation_transform = MyRotationTransform(angles=[-30, -15, 0, 15, 30])
+Generic Transforms
+------------------
 
+.. autoclass:: Lambda
 
-.. automodule:: torchvision.transforms.functional
-    :members:
diff --git a/docs/source/transforms_functional.rst b/docs/source/transforms_functional.rst
new file mode 100644
index 00000000000..d3d9cfc5a73
--- /dev/null
+++ b/docs/source/transforms_functional.rst
@@ -0,0 +1,52 @@
+torchvision.transforms.functional
+=================================
+
+Functional transforms give you fine-grained control of the transformation
+pipeline.
+As opposed to the stateful transformations from :mod:`torchvision.transforms`
+functional transforms don't contain any state or random number generator for
+their parameters.
+That means that you have to specify/generate all parameters,
+but you can reuse a functional transform.
+
+Example:
+you can apply a functional transform with the same parameters to multiple
+images like this:
+
+.. code:: python
+
+    import torchvision.transforms.functional as TF
+    import random
+
+    def my_segmentation_transforms(image, segmentation):
+        if random.random() > 0.5:
+            angle = random.randint(-30, 30)
+            image = TF.rotate(image, angle)
+            segmentation = TF.rotate(segmentation, angle)
+        # more transforms ...
+        return image, segmentation
+
+
+Example:
+you can use a functional transform to build transform classes with custom behavior:
+
+.. code:: python
+
+    import torchvision.transforms.functional as TF
+    import random
+
+    class MyRotationTransform:
+        """Rotate by one of the given angles."""
+
+        def __init__(self, angles):
+            self.angles = angles
+
+        def __call__(self, x):
+            angle = random.choice(self.angles)
+            return TF.rotate(x, angle)
+
+    rotation_transform = MyRotationTransform(angles=[-30, -15, 0, 15, 30])
+
+
+.. automodule:: torchvision.transforms.functional
+    :members:
diff --git a/mypy.ini b/mypy.ini
index c0d6fbb0840..c590a56c40b 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -28,3 +28,11 @@ ignore_errors = True
 
 ignore_missing_imports = True
 
+[mypy-numpy.*]
+
+ignore_missing_imports = True
+
+[mypy-matplotlib.*]
+
+ignore_missing_imports = True
+
diff --git a/setup.py b/setup.py
index 85a692120b3..544083223b2 100644
--- a/setup.py
+++ b/setup.py
@@ -245,6 +245,7 @@ def run(self):
     packages=find_packages(exclude=('test',)),
 
     zip_safe=False,
+    include_package_data=True,
     install_requires=requirements,
     extras_require={
         "scipy": ["scipy"],
diff --git a/torchvision/assets/__init__.py b/torchvision/assets/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/torchvision/assets/grace_hopper_517x606.jpg b/torchvision/assets/grace_hopper_517x606.jpg
new file mode 100644
index 00000000000..d2a427810f6
Binary files /dev/null and b/torchvision/assets/grace_hopper_517x606.jpg differ
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
index 5c202f384ee..f99ec05f530 100644
--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -156,6 +156,7 @@ def __repr__(self):
 
 class Normalize(object):
     """Normalize a tensor image with mean and standard deviation.
+
     Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` for ``n``
     channels, this transform will normalize each channel of the input
     ``torch.*Tensor`` i.e.,
@@ -168,7 +169,6 @@ class Normalize(object):
         mean (sequence): Sequence of means for each channel.
         std (sequence): Sequence of standard deviations for each channel.
         inplace(bool,optional): Bool to make this operation in-place.
-
     """
 
     def __init__(self, mean, std, inplace=False):
@@ -361,7 +361,7 @@ def __repr__(self):
 
 
 class RandomApply(RandomTransforms):
-    """Apply randomly a list of transformations with a given probability
+    """Apply randomly a list of transformations with a given probability.
 
     Args:
         transforms (list or tuple): list of transformations
@@ -390,7 +390,7 @@ def __repr__(self):
 
 
 class RandomOrder(RandomTransforms):
-    """Apply a list of transformations in a random order
+    """Apply a list of transformations in a random order.
     """
     def __call__(self, img):
         order = list(range(len(self.transforms)))
@@ -401,7 +401,7 @@ def __call__(self, img):
 
 
 class RandomChoice(RandomTransforms):
-    """Apply single transformation randomly picked from a list
+    """Apply single transformation randomly picked from a list.
     """
     def __call__(self, img):
         t = random.choice(self.transforms)
@@ -729,7 +729,7 @@ def __init__(self, *args, **kwargs):
 
 
 class FiveCrop(object):
-    """Crop the given PIL Image into four corners and the central crop
+    """Crop the given PIL Image into four corners and the central crop.
 
     .. Note::
          This transform returns a tuple of images and there may be a mismatch in the number of
@@ -769,7 +769,7 @@ def __repr__(self):
 
 class TenCrop(object):
     """Crop the given PIL Image into four corners and the central crop plus the flipped version of
-    these (horizontal flipping is used by default)
+    these (horizontal flipping is used by default).
 
     .. Note::
          This transform returns a tuple of images and there may be a mismatch in the number of
@@ -813,6 +813,7 @@ def __repr__(self):
 class LinearTransformation(object):
     """Transform a tensor image with a square transformation matrix and a mean_vector computed
     offline.
+
     Given transformation_matrix and mean_vector, will flatten the torch.*Tensor and
     subtract mean_vector from it which is then followed by computing the dot
     product with the transformation matrix and then reshaping the tensor to its
@@ -866,7 +867,7 @@ def __repr__(self):
 
 
 class ColorJitter(object):
-    """Randomly change the brightness, contrast and saturation of an image.
+    """Randomly change the brightness, contrast, saturation, and hue of an image.
 
     Args:
         brightness (float or tuple of float (min, max)): How much to jitter brightness.
@@ -963,7 +964,7 @@ def __repr__(self):
 
 
 class RandomRotation(object):
-    """Rotate the image by angle.
+    """Rotate the image by degrees.
 
     Args:
         degrees (sequence or float or int): Range of degrees to select from.
@@ -1037,7 +1038,7 @@ def __repr__(self):
 
 
 class RandomAffine(object):
-    """Random affine transformation of the image keeping center invariant
+    """Random affine transformation of the image keeping center invariant.
 
     Args:
         degrees (sequence or float or int): Range of degrees to select from.
@@ -1236,6 +1237,7 @@ def __repr__(self):
 
 
 class RandomErasing(object):
+
     """ Randomly selects a rectangle region in an image and erases its pixels.
     'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/pdf/1708.04896.pdf
 
diff --git a/torchvision/utils.py b/torchvision/utils.py
index be373138c5f..84f0581e04c 100644
--- a/torchvision/utils.py
+++ b/torchvision/utils.py
@@ -3,6 +3,8 @@
 import pathlib
 import torch
 import math
+import numpy as np
+
 irange = range
 
 
@@ -128,3 +130,35 @@ def save_image(
     ndarr = grid.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()
     im = Image.fromarray(ndarr)
     im.save(fp, format=format)
+
+
+def _sample_image():
+    """Private helper function to load a sample PIL image.
+
+    This function might change and/or break. Don't depend on it.
+    """
+    import os.path
+    from PIL import Image
+
+    data_dir = os.path.abspath(os.path.dirname(__file__))
+    return Image.open(os.path.join(data_dir, "assets", "grace_hopper_517x606.jpg"))
+
+
+def _plot_images(*imgs):
+    """Private helper function to display images in the docs.
+
+    This function might change and/or break. Don't depend on it.
+    """
+    import matplotlib.pyplot as plt
+    import matplotlib
+
+    n = len(imgs)
+    fig, axes = plt.subplots(1, n, figsize=(n * 2, 2))
+    if isinstance(axes, matplotlib.axes.Axes):
+        axes = np.array(axes)
+
+    for img, ax in zip(imgs, axes.flat):
+        ax.imshow(img)
+        ax.axis("off")
+
+    return fig