From f9ca6c16a63f4abbea6153c32ebde9fc1f0fdaf4 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Thu, 24 Dec 2020 08:48:37 -0700
Subject: [PATCH 01/10] Add a ton of missing docs

---
 captum/optim/__init__.py               |   1 +
 captum/optim/_core/output_hook.py      |  13 +
 captum/optim/_param/image/transform.py | 897 +++++++++++++------------
 captum/optim/_utils/image/dataset.py   |  27 +-
 captum/optim/_utils/models.py          |  20 +
 captum/optim/_utils/reducer.py         |  24 +-
 6 files changed, 565 insertions(+), 417 deletions(-)

diff --git a/captum/optim/__init__.py b/captum/optim/__init__.py
index 5d1b90f76a..7edcc2ed88 100755
--- a/captum/optim/__init__.py
+++ b/captum/optim/__init__.py
@@ -7,5 +7,6 @@
 from captum.optim._param.image import transform  # noqa: F401
 from captum.optim._param.image.images import ImageTensor  # noqa: F401
 from captum.optim._utils import circuits, models, reducer  # noqa: F401
+from captum.optim._utils.image import dataset as image_dataset  # noqa: F401
 from captum.optim._utils.image.common import nchannels_to_rgb  # noqa: F401
 from captum.optim._utils.image.common import weights_to_heatmap_2d  # noqa: F401
diff --git a/captum/optim/_core/output_hook.py b/captum/optim/_core/output_hook.py
index 9f4db55031..8521e0e730 100755
--- a/captum/optim/_core/output_hook.py
+++ b/captum/optim/_core/output_hook.py
@@ -94,6 +94,11 @@ def __del__(self) -> None:
 class ActivationFetcher:
     """
     Simple module for collecting activations from model targets.
+
+    Args:
+        model (nn.Module):  The reference to PyTorch model instance.
+        targets (nn.module or list of nn.module):  The target layers to
+            collect activations from.
     """
 
     def __init__(self, model, targets: Union[nn.Module, List[nn.Module]]) -> None:
@@ -102,6 +107,14 @@ def __init__(self, model, targets: Union[nn.Module, List[nn.Module]]) -> None:
         self.layers = ModuleOutputsHook(targets)
 
     def __call__(self, input_t: ModelInputType) -> ModuleOutputMapping:
+        """
+        Args:
+            input_t (tensor or tuple of tensors, optional):  The input to use
+                with the specified model.
+        Returns:
+            *dict*:  An dict containing the collected activations.
+        """
+
         try:
             with suppress(AbortForwardException):
                 self.model(input_t)
diff --git a/captum/optim/_param/image/transform.py b/captum/optim/_param/image/transform.py
index a4b9c1d98d..819c9f3742 100644
--- a/captum/optim/_param/image/transform.py
+++ b/captum/optim/_param/image/transform.py
@@ -1,413 +1,484 @@
-import math
-import numbers
-from typing import List, Optional, Sequence, Tuple, Union
-
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-from captum.optim._utils.image.common import nchannels_to_rgb
-from captum.optim._utils.typing import TransformSize, TransformVal, TransformValList
-
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-
-
-class BlendAlpha(nn.Module):
-    r"""Blends a 4 channel input parameterization into an RGB image.
-
-    You can specify a fixed background, or a random one will be used by default.
-    """
-
-    def __init__(self, background: Optional[torch.Tensor] = None) -> None:
-        super().__init__()
-        self.background = background
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        assert x.dim() == 4
-        assert x.size(1) == 4
-        rgb, alpha = x[:, :3, ...], x[:, 3:4, ...]
-        background = (
-            self.background if self.background is not None else torch.rand_like(rgb)
-        )
-        blended = alpha * rgb + (1 - alpha) * background
-        return blended
-
-
-class IgnoreAlpha(nn.Module):
-    r"""Ignores a 4th channel"""
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        assert x.dim() == 4
-        assert x.size(1) == 4
-        rgb = x[:, :3, ...]
-        return rgb
-
-
-class ToRGB(nn.Module):
-    """Transforms arbitrary channels to RGB. We use this to ensure our
-    image parameteriaztion itself can be decorrelated. So this goes between
-    the image parameterization and the normalization/sigmoid step.
-    We offer two transforms: Karhunen-Loève (KLT) and I1I2I3.
-    KLT corresponds to the empirically measured channel correlations on imagenet.
-    I1I2I3 corresponds to an aproximation for natural images from Ohta et al.[0]
-    [0] Y. Ohta, T. Kanade, and T. Sakai, "Color information for region segmentation,"
-    Computer Graphics and Image Processing, vol. 13, no. 3, pp. 222–241, 1980
-    https://www.sciencedirect.com/science/article/pii/0146664X80900477
-    """
-
-    @staticmethod
-    def klt_transform() -> torch.Tensor:
-        """Karhunen-Loève transform (KLT) measured on ImageNet"""
-        KLT = [[0.26, 0.09, 0.02], [0.27, 0.00, -0.05], [0.27, -0.09, 0.03]]
-        transform = torch.Tensor(KLT).float()
-        transform = transform / torch.max(torch.norm(transform, dim=0))
-        return transform
-
-    @staticmethod
-    def i1i2i3_transform() -> torch.Tensor:
-        i1i2i3_matrix = [
-            [1 / 3, 1 / 3, 1 / 3],
-            [1 / 2, 0, -1 / 2],
-            [-1 / 4, 1 / 2, -1 / 4],
-        ]
-        return torch.Tensor(i1i2i3_matrix)
-
-    def __init__(self, transform_name: str = "klt") -> None:
-        super().__init__()
-
-        if transform_name == "klt":
-            self.register_buffer("transform", ToRGB.klt_transform())
-        elif transform_name == "i1i2i3":
-            self.register_buffer("transform", ToRGB.i1i2i3_transform())
-        else:
-            raise ValueError("transform_name has to be either 'klt' or 'i1i2i3'")
-
-    def forward(self, x: torch.Tensor, inverse: bool = False) -> torch.Tensor:
-        assert x.dim() == 3 or x.dim() == 4
-
-        # alpha channel is taken off...
-        has_alpha = x.size("C") == 4
-        if has_alpha:
-            if x.dim() == 3:
-                x, alpha_channel = x[:3], x[3:]
-            elif x.dim() == 4:
-                x, alpha_channel = x[:, :3], x[:, 3:]
-            assert x.dim() == alpha_channel.dim()  # ensure we "keep_dim"
-
-        h, w = x.size("H"), x.size("W")
-        flat = x.flatten(("H", "W"), "spatials")
-        if inverse:
-            correct = torch.inverse(self.transform) @ flat
-        else:
-            correct = self.transform @ flat
-        chw = correct.unflatten("spatials", (("H", h), ("W", w)))
-
-        if x.dim() == 3:
-            chw = chw.refine_names("C", ...)
-        elif x.dim() == 4:
-            chw = chw.refine_names("B", "C", ...)
-
-        # ...alpha channel is concatenated on again.
-        if has_alpha:
-            d = 0 if x.dim() == 3 else 1
-            chw = torch.cat([chw, alpha_channel], d)
-
-        return chw
-
-
-class CenterCrop(torch.nn.Module):
-    """
-    Center crop the specified amount of pixels from the edges.
-    Arguments:
-        size (int, sequence) or (int): Number of pixels to center crop away.
-    """
-
-    def __init__(self, size: TransformSize = 0) -> None:
-        super(CenterCrop, self).__init__()
-        if type(size) is list or type(size) is tuple:
-            assert len(size) == 2, (
-                "CenterCrop requires a single crop value or a tuple of (height,width)"
-                + "in pixels for cropping."
-            )
-            self.crop_val = size
-        else:
-            self.crop_val = [size] * 2
-
-    def forward(self, input: torch.Tensor) -> torch.Tensor:
-        assert (
-            input.dim() == 3 or input.dim() == 4
-        ), "Input to CenterCrop must be 3D or 4D"
-        if input.dim() == 4:
-            h, w = input.size(2), input.size(3)
-        elif input.dim() == 3:
-            h, w = input.size(1), input.size(2)
-        h_crop = h - self.crop_val[0]
-        w_crop = w - self.crop_val[1]
-        sw, sh = w // 2 - (w_crop // 2), h // 2 - (h_crop // 2)
-        return input[..., sh : sh + h_crop, sw : sw + w_crop]
-
-
-def center_crop_shape(input: torch.Tensor, output_size: List[int]) -> torch.Tensor:
-    """
-    Crop NCHW & CHW outputs by specifying the desired output shape.
-    """
-
-    assert input.dim() == 4 or input.dim() == 3
-    output_size = [output_size] if not hasattr(output_size, "__iter__") else output_size
-    assert len(output_size) == 1 or len(output_size) == 2
-    output_size = output_size * 2 if len(output_size) == 1 else output_size
-
-    if input.dim() == 4:
-        h, w = input.size(2), input.size(3)
-    if input.dim() == 3:
-        h, w = input.size(1), input.size(2)
-
-    h_crop = h - int(round((h - output_size[0]) / 2.0))
-    w_crop = w - int(round((w - output_size[1]) / 2.0))
-
-    return input[
-        ..., h_crop - output_size[0] : h_crop, w_crop - output_size[1] : w_crop
-    ]
-
-
-def rand_select(transform_values: TransformValList) -> TransformVal:
-    """
-    Randomly return a value from the provided tuple or list
-    """
-    n = torch.randint(low=0, high=len(transform_values) - 1, size=[1]).item()
-    return transform_values[n]
-
-
-class RandomScale(nn.Module):
-    """
-    Apply random rescaling on a NCHW tensor.
-    Arguments:
-        scale (float, sequence): Tuple of rescaling values to randomly select from.
-    """
-
-    def __init__(self, scale: TransformValList) -> None:
-        super(RandomScale, self).__init__()
-        self.scale = scale
-
-    def get_scale_mat(
-        self, m: TransformVal, device: torch.device, dtype: torch.dtype
-    ) -> torch.Tensor:
-        scale_mat = torch.tensor(
-            [[m, 0.0, 0.0], [0.0, m, 0.0]], device=device, dtype=dtype
-        )
-        return scale_mat
-
-    def scale_tensor(self, x: torch.Tensor, scale: TransformVal) -> torch.Tensor:
-        scale_matrix = self.get_scale_mat(scale, x.device, x.dtype)[None, ...].repeat(
-            x.shape[0], 1, 1
-        )
-        grid = F.affine_grid(scale_matrix, x.size())
-        x = F.grid_sample(x, grid)
-        return x
-
-    def forward(self, input: torch.Tensor) -> torch.Tensor:
-        scale = rand_select(self.scale)
-        return self.scale_tensor(input, scale=scale)
-
-
-class RandomSpatialJitter(torch.nn.Module):
-    """
-    Apply random spatial translations on a NCHW tensor.
-    Arguments:
-        translate (int):
-    """
-
-    def __init__(self, translate: int) -> None:
-        super(RandomSpatialJitter, self).__init__()
-        self.pad_range = 2 * translate
-        self.pad = nn.ReflectionPad2d(translate)
-
-    def translate_tensor(self, x: torch.Tensor, insets: torch.Tensor) -> torch.Tensor:
-        padded = self.pad(x)
-        tblr = [
-            -insets[0],
-            -(self.pad_range - insets[0]),
-            -insets[1],
-            -(self.pad_range - insets[1]),
-        ]
-        cropped = F.pad(padded, pad=tblr)
-        assert cropped.shape == x.shape
-        return cropped
-
-    def forward(self, input: torch.Tensor) -> torch.Tensor:
-        insets = torch.randint(high=self.pad_range, size=(2,))
-        return self.translate_tensor(input, insets)
-
-
-class ScaleInputRange(nn.Module):
-    """
-    Multiplies the input by a specified multiplier for models with input ranges other
-    than [0,1].
-    """
-
-    def __init__(self, multiplier: float = 1.0) -> None:
-        super().__init__()
-        self.multiplier = multiplier
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        return x * self.multiplier
-
-
-class RGBToBGR(nn.Module):
-    """
-    Converts an NCHW RGB image tensor to BGR by switching the red and blue channels.
-    """
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        assert x.dim() == 4
-        assert x.size(1) == 3
-        return x[:, [2, 1, 0]]
-
-
-# class TransformationRobustness(nn.Module):
-#     def __init__(self, jitter=False, scale=False):
-#         super().__init__()
-#         if jitter:
-#             self.jitter = RandomSpatialJitter(4)
-#         if scale:
-#             self.scale = RandomScale()
-
-#     def forward(self, x):
-#         original_shape = x.shape
-#         if hasattr(self, "jitter"):
-#             x = self.jitter(x)
-#         if hasattr(self, "scale"):
-#             x = self.scale(x)
-#         cropped = center_crop(x, original_shape)
-#         return cropped
-
-
-# class RandomHomography(nn.Module):
-#     def __init__(self):
-#         super().__init__()
-
-#     def forward(self, x):
-#         _, _, H, W = x.shape
-#         self.homography_warper = HomographyWarper(
-#             height=H, width=W, padding_mode="reflection"
-#         )
-#         homography =
-#         return self.homography_warper(x, homography)
-
-
-# via https://discuss.pytorch.org/t/is-there-anyway-to-do-gaussian-
-# filtering-for-an-image-2d-3d-in-pytorch/12351/9
-class GaussianSmoothing(nn.Module):
-    """
-    Apply gaussian smoothing on a
-    1d, 2d or 3d tensor. Filtering is performed seperately for each channel
-    in the input using a depthwise convolution.
-    Arguments:
-        channels (int, sequence): Number of channels of the input tensors. Output will
-            have this number of channels as well.
-        kernel_size (int, sequence): Size of the gaussian kernel.
-        sigma (float, sequence): Standard deviation of the gaussian kernel.
-        dim (int, optional): The number of dimensions of the data.
-            Default value is 2 (spatial).
-    """
-
-    def __init__(
-        self,
-        channels: int,
-        kernel_size: Union[int, Sequence[int]],
-        sigma: Union[float, Sequence[float]],
-        dim: int = 2,
-    ) -> None:
-        super().__init__()
-        if isinstance(kernel_size, numbers.Number):
-            kernel_size = [kernel_size] * dim
-        if isinstance(sigma, numbers.Number):
-            sigma = [sigma] * dim
-
-        # The gaussian kernel is the product of the
-        # gaussian function of each dimension.
-        kernel = 1
-        meshgrids = torch.meshgrid(
-            [torch.arange(size, dtype=torch.float32) for size in kernel_size]
-        )
-        for size, std, mgrid in zip(kernel_size, sigma, meshgrids):
-            mean = (size - 1) / 2
-            kernel *= (
-                1
-                / (std * math.sqrt(2 * math.pi))
-                * torch.exp(-(((mgrid - mean) / std) ** 2) / 2)
-            )
-
-        # Make sure sum of values in gaussian kernel equals 1.
-        kernel = kernel / torch.sum(kernel)
-
-        # Reshape to depthwise convolutional weight
-        kernel = kernel.view(1, 1, *kernel.size())
-        kernel = kernel.repeat(channels, *[1] * (kernel.dim() - 1))
-
-        self.register_buffer("weight", kernel)
-        self.groups = channels
-
-        if dim == 1:
-            self.conv = F.conv1d
-        elif dim == 2:
-            self.conv = F.conv2d
-        elif dim == 3:
-            self.conv = F.conv3d
-        else:
-            raise RuntimeError(
-                "Only 1, 2 and 3 dimensions are supported. Received {}.".format(dim)
-            )
-
-    def forward(self, input: torch.Tensor) -> torch.Tensor:
-        """
-        Apply gaussian filter to input.
-        Arguments:
-            input (torch.Tensor): Input to apply gaussian filter on.
-        Returns:
-            filtered (torch.Tensor): Filtered output.
-        """
-        return self.conv(input, weight=self.weight, groups=self.groups)
-
-
-class SymmetricPadding(torch.autograd.Function):
-    """
-    Autograd compatible symmetric padding that uses NumPy's pad function.
-    """
-
-    @staticmethod
-    def forward(ctx, x: torch.Tensor, padding: List[List[int]]) -> torch.Tensor:
-        ctx.padding = padding
-        x_device = x.device
-        x = x.cpu()
-        x.data = torch.as_tensor(
-            np.pad(x.data.numpy(), pad_width=padding, mode="symmetric")
-        )
-        x = x.to(x_device)
-        return x
-
-    @staticmethod
-    def backward(ctx, grad_output: torch.Tensor) -> Tuple[torch.Tensor, None]:
-        grad_input = grad_output.clone()
-        B, C, H, W = grad_input.size()
-        b1, b2 = ctx.padding[0]
-        c1, c2 = ctx.padding[1]
-        h1, h2 = ctx.padding[2]
-        w1, w2 = ctx.padding[3]
-        grad_input = grad_input[b1 : B - b2, c1 : C - c2, h1 : H - h2, w1 : W - w2]
-        return grad_input, None
-
-
-class NChannelsToRGB(nn.Module):
-    """
-    Convert an NCHW image with n channels into a 3 channel RGB image.
-    """
-
-    def __init__(self, warp: bool = False) -> None:
-        super().__init__()
-        self.warp = warp
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        assert x.dim() == 4
-        return nchannels_to_rgb(x, self.warp)
+import math
+import numbers
+from typing import List, Optional, Sequence, Tuple, Union
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from captum.optim._utils.image.common import nchannels_to_rgb
+from captum.optim._utils.typing import TransformSize, TransformVal, TransformValList
+
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+
+class BlendAlpha(nn.Module):
+    r"""Blends a 4 channel input parameterization into an RGB image.
+    You can specify a fixed background, or a random one will be used by default.
+
+    Args:
+        background (tensor, optional):  An NCHW image tensor to be used as the
+            Alpha channel's background.
+    """
+
+    def __init__(self, background: Optional[torch.Tensor] = None) -> None:
+        super().__init__()
+        self.background = background
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Blend the Alpha channel into the RGB channels.
+        Arguments:
+            x (torch.Tensor): RGBA image tensor to blend into an RGB image tensor.
+        Returns:
+            blended (torch.Tensor): RGB image tensor.
+        """
+        assert x.dim() == 4
+        assert x.size(1) == 4
+        rgb, alpha = x[:, :3, ...], x[:, 3:4, ...]
+        background = (
+            self.background if self.background is not None else torch.rand_like(rgb)
+        )
+        blended = alpha * rgb + (1 - alpha) * background
+        return blended
+
+
+class IgnoreAlpha(nn.Module):
+    r"""Ignores a 4th channel"""
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Ignore the alpha channel.
+        Arguments:
+            x (torch.Tensor): RGBA image tensor.
+        Returns:
+            rgb (torch.Tensor): RGB image tensor.
+        """
+        assert x.dim() == 4
+        assert x.size(1) == 4
+        rgb = x[:, :3, ...]
+        return rgb
+
+
+class ToRGB(nn.Module):
+    """Transforms arbitrary channels to RGB. We use this to ensure our
+    image parameteriaztion itself can be decorrelated. So this goes between
+    the image parameterization and the normalization/sigmoid step.
+    We offer two transforms: Karhunen-Loève (KLT) and I1I2I3.
+    KLT corresponds to the empirically measured channel correlations on imagenet.
+    I1I2I3 corresponds to an aproximation for natural images from Ohta et al.[0]
+    [0] Y. Ohta, T. Kanade, and T. Sakai, "Color information for region segmentation,"
+    Computer Graphics and Image Processing, vol. 13, no. 3, pp. 222–241, 1980
+    https://www.sciencedirect.com/science/article/pii/0146664X80900477
+    """
+
+    @staticmethod
+    def klt_transform() -> torch.Tensor:
+        """Karhunen-Loève transform (KLT) measured on ImageNet"""
+        KLT = [[0.26, 0.09, 0.02], [0.27, 0.00, -0.05], [0.27, -0.09, 0.03]]
+        transform = torch.Tensor(KLT).float()
+        transform = transform / torch.max(torch.norm(transform, dim=0))
+        return transform
+
+    @staticmethod
+    def i1i2i3_transform() -> torch.Tensor:
+        i1i2i3_matrix = [
+            [1 / 3, 1 / 3, 1 / 3],
+            [1 / 2, 0, -1 / 2],
+            [-1 / 4, 1 / 2, -1 / 4],
+        ]
+        return torch.Tensor(i1i2i3_matrix)
+
+    def __init__(self, transform_name: str = "klt") -> None:
+        super().__init__()
+
+        if transform_name == "klt":
+            self.register_buffer("transform", ToRGB.klt_transform())
+        elif transform_name == "i1i2i3":
+            self.register_buffer("transform", ToRGB.i1i2i3_transform())
+        else:
+            raise ValueError("transform_name has to be either 'klt' or 'i1i2i3'")
+
+    def forward(self, x: torch.Tensor, inverse: bool = False) -> torch.Tensor:
+        """
+        Args:
+            x (tensor):  A CHW or NCHW RGB or RGBA image tensor.
+            inverse (bool):  Whether to recorrelate or decorrelate colors.
+        Returns:
+            *tensor*:  A tensor with it's colors recorrelated or decorrelated.
+        """
+
+        assert x.dim() == 3 or x.dim() == 4
+
+        # alpha channel is taken off...
+        has_alpha = x.size("C") == 4
+        if has_alpha:
+            if x.dim() == 3:
+                x, alpha_channel = x[:3], x[3:]
+            elif x.dim() == 4:
+                x, alpha_channel = x[:, :3], x[:, 3:]
+            assert x.dim() == alpha_channel.dim()  # ensure we "keep_dim"
+
+        h, w = x.size("H"), x.size("W")
+        flat = x.flatten(("H", "W"), "spatials")
+        if inverse:
+            correct = torch.inverse(self.transform) @ flat
+        else:
+            correct = self.transform @ flat
+        chw = correct.unflatten("spatials", (("H", h), ("W", w)))
+
+        if x.dim() == 3:
+            chw = chw.refine_names("C", ...)
+        elif x.dim() == 4:
+            chw = chw.refine_names("B", "C", ...)
+
+        # ...alpha channel is concatenated on again.
+        if has_alpha:
+            d = 0 if x.dim() == 3 else 1
+            chw = torch.cat([chw, alpha_channel], d)
+
+        return chw
+
+
+class CenterCrop(torch.nn.Module):
+    """
+    Center crop the specified amount of pixels from the edges.
+    Arguments:
+        size (int, sequence) or (int): Number of pixels to center crop away.
+    """
+
+    def __init__(self, size: TransformSize = 0) -> None:
+        super(CenterCrop, self).__init__()
+        if type(size) is list or type(size) is tuple:
+            assert len(size) == 2, (
+                "CenterCrop requires a single crop value or a tuple of (height,width)"
+                + "in pixels for cropping."
+            )
+            self.crop_val = size
+        else:
+            self.crop_val = [size] * 2
+
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        """
+        Center crop an input.
+        Arguments:
+            input (torch.Tensor): Input to center crop.
+        Returns:
+            tensor (torch.Tensor): A center cropped tensor.
+        """
+
+        assert (
+            input.dim() == 3 or input.dim() == 4
+        ), "Input to CenterCrop must be 3D or 4D"
+        if input.dim() == 4:
+            h, w = input.size(2), input.size(3)
+        elif input.dim() == 3:
+            h, w = input.size(1), input.size(2)
+        h_crop = h - self.crop_val[0]
+        w_crop = w - self.crop_val[1]
+        sw, sh = w // 2 - (w_crop // 2), h // 2 - (h_crop // 2)
+        return input[..., sh : sh + h_crop, sw : sw + w_crop]
+
+
+def center_crop_shape(input: torch.Tensor, output_size: List[int]) -> torch.Tensor:
+    """
+    Crop NCHW & CHW outputs by specifying the desired output shape.
+
+    Args:
+        tensor (tensor):  A CHW or NCHW image tensor to center crop.
+        output_size (int or list of int):  The desired H and W output dimensions.
+    Returns:
+        *tensor*:  A center cropped tensor.
+    """
+
+    assert input.dim() == 4 or input.dim() == 3
+    output_size = [output_size] if not hasattr(output_size, "__iter__") else output_size
+    assert len(output_size) == 1 or len(output_size) == 2
+    output_size = output_size * 2 if len(output_size) == 1 else output_size
+
+    if input.dim() == 4:
+        h, w = input.size(2), input.size(3)
+    if input.dim() == 3:
+        h, w = input.size(1), input.size(2)
+
+    h_crop = h - int(round((h - output_size[0]) / 2.0))
+    w_crop = w - int(round((w - output_size[1]) / 2.0))
+
+    return input[
+        ..., h_crop - output_size[0] : h_crop, w_crop - output_size[1] : w_crop
+    ]
+
+
+def rand_select(transform_values: TransformValList) -> TransformVal:
+    """
+    Randomly return a value from the provided tuple or list.
+
+    Args:
+        transform_values (sequence):  A sequence of values to randomly select from.
+    Returns:
+        *value*:  A single value from the specified sequence.
+    """
+    n = torch.randint(low=0, high=len(transform_values) - 1, size=[1]).item()
+    return transform_values[n]
+
+
+class RandomScale(nn.Module):
+    """
+    Apply random rescaling on a NCHW tensor.
+    Arguments:
+        scale (float, sequence): Tuple of rescaling values to randomly select from.
+    """
+
+    def __init__(self, scale: TransformValList) -> None:
+        super(RandomScale, self).__init__()
+        self.scale = scale
+
+    def get_scale_mat(
+        self, m: TransformVal, device: torch.device, dtype: torch.dtype
+    ) -> torch.Tensor:
+        scale_mat = torch.tensor(
+            [[m, 0.0, 0.0], [0.0, m, 0.0]], device=device, dtype=dtype
+        )
+        return scale_mat
+
+    def scale_tensor(self, x: torch.Tensor, scale: TransformVal) -> torch.Tensor:
+        scale_matrix = self.get_scale_mat(scale, x.device, x.dtype)[None, ...].repeat(
+            x.shape[0], 1, 1
+        )
+        grid = F.affine_grid(scale_matrix, x.size())
+        x = F.grid_sample(x, grid)
+        return x
+
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        scale = rand_select(self.scale)
+        return self.scale_tensor(input, scale=scale)
+
+
+class RandomSpatialJitter(torch.nn.Module):
+    """
+    Apply random spatial translations on a NCHW tensor.
+    Arguments:
+        translate (int):  The amount to translate the H and W dimensions
+            of an NCHW tensor.
+    """
+
+    def __init__(self, translate: int) -> None:
+        super(RandomSpatialJitter, self).__init__()
+        self.pad_range = 2 * translate
+        self.pad = nn.ReflectionPad2d(translate)
+
+    def translate_tensor(self, x: torch.Tensor, insets: torch.Tensor) -> torch.Tensor:
+        padded = self.pad(x)
+        tblr = [
+            -insets[0],
+            -(self.pad_range - insets[0]),
+            -insets[1],
+            -(self.pad_range - insets[1]),
+        ]
+        cropped = F.pad(padded, pad=tblr)
+        assert cropped.shape == x.shape
+        return cropped
+
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        insets = torch.randint(high=self.pad_range, size=(2,))
+        return self.translate_tensor(input, insets)
+
+
+class ScaleInputRange(nn.Module):
+    """
+    Multiplies the input by a specified multiplier for models with input ranges other
+    than [0,1].
+
+    Args:
+        multiplier (float):  A float value used to scale the input.
+    """
+
+    def __init__(self, multiplier: float = 1.0) -> None:
+        super().__init__()
+        self.multiplier = multiplier
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Scale an input tensor's values.
+        Arguments:
+            x (torch.Tensor): Input to scale values of.
+        Returns:
+            tensor (torch.Tensor): tensor with it's values scaled.
+        """
+        return x * self.multiplier
+
+
+class RGBToBGR(nn.Module):
+    """
+    Converts an NCHW RGB image tensor to BGR by switching the red and blue channels.
+    """
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Perform RGB to BGR conversion on an input
+        Arguments:
+            x (torch.Tensor): RGB image tensor to convert to BGR.
+        Returns:
+            BGR tensor (torch.Tensor): A BGR tensor.
+        """
+        assert x.dim() == 4
+        assert x.size(1) == 3
+        return x[:, [2, 1, 0]]
+
+
+# class RandomHomography(nn.Module):
+#     def __init__(self):
+#         super().__init__()
+
+#     def forward(self, x):
+#         _, _, H, W = x.shape
+#         self.homography_warper = HomographyWarper(
+#             height=H, width=W, padding_mode="reflection"
+#         )
+#         homography =
+#         return self.homography_warper(x, homography)
+
+
+# via https://discuss.pytorch.org/t/is-there-anyway-to-do-gaussian-
+# filtering-for-an-image-2d-3d-in-pytorch/12351/9
+class GaussianSmoothing(nn.Module):
+    """
+    Apply gaussian smoothing on a
+    1d, 2d or 3d tensor. Filtering is performed seperately for each channel
+    in the input using a depthwise convolution.
+    Arguments:
+        channels (int, sequence): Number of channels of the input tensors. Output will
+            have this number of channels as well.
+        kernel_size (int, sequence): Size of the gaussian kernel.
+        sigma (float, sequence): Standard deviation of the gaussian kernel.
+        dim (int, optional): The number of dimensions of the data.
+            Default value is 2 (spatial).
+    """
+
+    def __init__(
+        self,
+        channels: int,
+        kernel_size: Union[int, Sequence[int]],
+        sigma: Union[float, Sequence[float]],
+        dim: int = 2,
+    ) -> None:
+        super().__init__()
+        if isinstance(kernel_size, numbers.Number):
+            kernel_size = [kernel_size] * dim
+        if isinstance(sigma, numbers.Number):
+            sigma = [sigma] * dim
+
+        # The gaussian kernel is the product of the
+        # gaussian function of each dimension.
+        kernel = 1
+        meshgrids = torch.meshgrid(
+            [torch.arange(size, dtype=torch.float32) for size in kernel_size]
+        )
+        for size, std, mgrid in zip(kernel_size, sigma, meshgrids):
+            mean = (size - 1) / 2
+            kernel *= (
+                1
+                / (std * math.sqrt(2 * math.pi))
+                * torch.exp(-(((mgrid - mean) / std) ** 2) / 2)
+            )
+
+        # Make sure sum of values in gaussian kernel equals 1.
+        kernel = kernel / torch.sum(kernel)
+
+        # Reshape to depthwise convolutional weight
+        kernel = kernel.view(1, 1, *kernel.size())
+        kernel = kernel.repeat(channels, *[1] * (kernel.dim() - 1))
+
+        self.register_buffer("weight", kernel)
+        self.groups = channels
+
+        if dim == 1:
+            self.conv = F.conv1d
+        elif dim == 2:
+            self.conv = F.conv2d
+        elif dim == 3:
+            self.conv = F.conv3d
+        else:
+            raise RuntimeError(
+                "Only 1, 2 and 3 dimensions are supported. Received {}.".format(dim)
+            )
+
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        """
+        Apply gaussian filter to input.
+        Arguments:
+            input (torch.Tensor): Input to apply gaussian filter on.
+        Returns:
+            filtered (torch.Tensor): Filtered output.
+        """
+        return self.conv(input, weight=self.weight, groups=self.groups)
+
+
+class SymmetricPadding(torch.autograd.Function):
+    """
+    Autograd compatible symmetric padding that uses NumPy's pad function.
+    """
+
+    @staticmethod
+    def forward(ctx, x: torch.Tensor, padding: List[List[int]]) -> torch.Tensor:
+        """
+        Apply NumPy symmetric padding to an input tensor while preserving the gradient.
+        Arguments:
+            x (torch.Tensor): Input to apply symmetric padding on.
+        Returns:
+            tensor (torch.Tensor): Padded tensor.
+        """
+
+        ctx.padding = padding
+        x_device = x.device
+        x = x.cpu()
+        x.data = torch.as_tensor(
+            np.pad(x.data.numpy(), pad_width=padding, mode="symmetric")
+        )
+        x = x.to(x_device)
+        return x
+
+    @staticmethod
+    def backward(ctx, grad_output: torch.Tensor) -> Tuple[torch.Tensor, None]:
+        """
+        Crop away symmetric padding.
+        Arguments:
+            grad_output (torch.Tensor): Input to remove symmetric padding from.
+        Returns:
+            grad_input (torch.Tensor): Unpadded tensor.
+        """
+
+        grad_input = grad_output.clone()
+        B, C, H, W = grad_input.size()
+        b1, b2 = ctx.padding[0]
+        c1, c2 = ctx.padding[1]
+        h1, h2 = ctx.padding[2]
+        w1, w2 = ctx.padding[3]
+        grad_input = grad_input[b1 : B - b2, c1 : C - c2, h1 : H - h2, w1 : W - w2]
+        return grad_input, None
+
+
+class NChannelsToRGB(nn.Module):
+    """
+    Convert an NCHW image with n channels into a 3 channel RGB image.
+
+    Args:
+        warp (bool):  Whether or not to make the resulting RGB colors more distict
+            from each other.
+    """
+
+    def __init__(self, warp: bool = False) -> None:
+        super().__init__()
+        self.warp = warp
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Reduce any number of channels down to 3.
+        Arguments:
+            x (torch.Tensor): Input to reduce channel dimensions on.
+        Returns:
+            3 channel RGB tensor (torch.Tensor): RGB image tensor.
+        """
+        assert x.dim() == 4
+        return nchannels_to_rgb(x, self.warp)
diff --git a/captum/optim/_utils/image/dataset.py b/captum/optim/_utils/image/dataset.py
index 69a2be3453..fcc6d03742 100644
--- a/captum/optim/_utils/image/dataset.py
+++ b/captum/optim/_utils/image/dataset.py
@@ -3,7 +3,12 @@
 
 def image_cov(tensor: torch.Tensor) -> torch.Tensor:
     """
-    Calculate a tensor's RGB covariance matrix
+    Calculate a tensor's RGB covariance matrix.
+
+    Args:
+        tensor (tensor):  An NCHW image tensor.
+    Returns:
+        *tensor*:  An RGB covariance matrix for the specified tensor.
     """
 
     tensor = tensor.reshape(-1, 3)
@@ -14,6 +19,12 @@ def image_cov(tensor: torch.Tensor) -> torch.Tensor:
 def dataset_cov_matrix(loader: torch.utils.data.DataLoader) -> torch.Tensor:
     """
     Calculate the covariance matrix for an image dataset.
+
+    Args:
+        loader (torch.utils.data.DataLoader):  The reference to a PyTorch
+            dataloader instance.
+    Returns:
+        *tensor*:  A covariance matrix for the specified dataset.
     """
 
     cov_mtx = torch.zeros(3, 3)
@@ -30,6 +41,13 @@ def cov_matrix_to_klt(
 ) -> torch.Tensor:
     """
     Convert a cov matrix to a klt matrix.
+
+    Args:
+        cov_mtx (tensor):  A 3 by 3 covariance matrix generated from a dataset.
+        normalize (bool):  Whether or not to normalize the resulting KLT matrix.
+        epsilon (float):
+    Returns:
+        *tensor*:  A KLT matrix for the specified covariance matrix.
     """
 
     U, S, V = torch.svd(cov_mtx)
@@ -47,6 +65,13 @@ def dataset_klt_matrix(
     a Karhunen-Loève transform (KLT) matrix, for a dataset.
     The color correlation matrix can then used in color decorrelation
     transforms for models trained on the dataset.
+
+    Args:
+        loader (torch.utils.data.DataLoader):  The reference to a PyTorch
+            dataloader instance.
+        normalize (bool):  Whether or not to normalize the resulting KLT matrix.
+    Returns:
+        *tensor*:  A KLT matrix for the specified dataset.
     """
 
     cov_mtx = dataset_cov_matrix(loader)
diff --git a/captum/optim/_utils/models.py b/captum/optim/_utils/models.py
index bd5c175b75..a6738bb389 100644
--- a/captum/optim/_utils/models.py
+++ b/captum/optim/_utils/models.py
@@ -12,6 +12,11 @@
 def get_model_layers(model) -> List[str]:
     """
     Return a list of hookable layers for the target model.
+
+    Args:
+        model (nn.Module):  The reference to PyTorch model instance.
+    Returns:
+        *str*:  A list of all possible layer targets for the specified model.
     """
     layers = []
 
@@ -79,6 +84,12 @@ def replace_layers(model, old_layer=ReluLayer, new_layer=RedirectedReluLayer) ->
     Replace all target layers with new layers.
     The most common use case is replacing activation layers with activation layers
     that can handle gradient flow issues.
+
+    Args:
+        model (nn.Module):  The reference to PyTorch model instance.
+        old_layer (nn.module type):  The layer type you are looking to remove.
+        new_layer (nn.module type):  The layer type you are looking to replace
+            old_layer with.
     """
 
     for name, child in model._modules.items():
@@ -163,6 +174,15 @@ def collect_activations(
 ) -> ModuleOutputMapping:
     """
     Collect target activations for a model.
+
+    Args:
+        model (nn.Module):  The reference to PyTorch model instance.
+        targets (nn.module or list of nn.module):  The target layers to
+            collect activations from.
+        model_input (tensor or tuple of tensors, optional):  The input to use
+            with the specified model.
+    Returns:
+        *dict*:  An dict containing the collected activations.
     """
 
     catch_activ = ActivationFetcher(model, targets)
diff --git a/captum/optim/_utils/reducer.py b/captum/optim/_utils/reducer.py
index ccc4079df8..2c270b18e2 100644
--- a/captum/optim/_utils/reducer.py
+++ b/captum/optim/_utils/reducer.py
@@ -17,9 +17,15 @@
 class ChannelReducer:
     """
     Dimensionality reduction for the channel dimension of an input.
-
     Olah, et al., "The Building Blocks of Interpretability", Distill, 2018.
     See: https://distill.pub/2018/building-blocks/
+
+    Args:
+        n_components (int):  The number of channels to reduce the target
+            dimension to.
+        reduction_alg (str or callable):  The desired dimensionality
+            reduction algorithm to use.
+        **kwargs: Arbitrary keyword arguments used by the specified reduction_alg.
     """
 
     def __init__(
@@ -54,8 +60,13 @@ def fit_transform(
         """
         Perform dimensionality reduction on an input tensor.
 
-        If swap_2nd_and_last_dims is true, input channels are expected to be in the
-        second dimension unless the input tensor has a shape of CHW.
+        Args:
+            tensor (tensor):  A tensor to perform dimensionality reduction on.
+            swap_2nd_and_last_dims (bool):   If true, input channels are expected
+                to be in the second dimension unless the input tensor has a shape
+                of CHW.
+        Returns:
+            *tensor*:  A tensor with one of it's dimensions reduced.
         """
 
         if x.dim() == 3 and swap_2nd_and_last_dims:
@@ -107,6 +118,13 @@ def posneg(x: torch.Tensor, dim: int = 0) -> torch.Tensor:
     """
     Hack that makes a matrix positive by concatination in order to simulate
     one-sided NMF with regular NMF
+
+    Args:
+        x (tensor):  A tensor to make positive.
+        dim (int):  The dimension to concatinate the two tensor halves at.
+
+    Returns:
+        *tensor*:  A positive tensor for one-sided dimensionality reduction.
     """
 
     return torch.cat([F.relu(x), F.relu(-x)], dim=dim)

From ab19efb0ce146933e09a3187333e06148c48fc00 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Thu, 24 Dec 2020 09:07:43 -0700
Subject: [PATCH 02/10] Add docs for NumPy helpers

---
 tests/optim/helpers/image_dataset.py    | 22 +++++++++++++-
 tests/optim/helpers/numpy_common.py     |  7 +++++
 tests/optim/helpers/numpy_transforms.py | 40 +++++++++++++++++++++++--
 3 files changed, 65 insertions(+), 4 deletions(-)

diff --git a/tests/optim/helpers/image_dataset.py b/tests/optim/helpers/image_dataset.py
index 9b5e73ad48..a8cef03b87 100644
--- a/tests/optim/helpers/image_dataset.py
+++ b/tests/optim/helpers/image_dataset.py
@@ -5,6 +5,14 @@
 
 
 class ImageTestDataset(torch.utils.data.Dataset):
+    """
+    Create a simple tensor dataset for testing image dataset classes
+    and functions.
+
+    Args:
+        tensors (list):  A list of tensors to use in the dataset.
+    """
+
     def __init__(self, tensors: List[torch.Tensor]) -> None:
         assert all(t.size(0) == 1 for t in tensors if t.dim() == 4)
 
@@ -23,7 +31,12 @@ def __len__(self) -> int:
 
 def image_cov_np(array: np.ndarray) -> np.ndarray:
     """
-    Calculate an array's RGB covariance matrix
+    Calculate an array's RGB covariance matrix.
+
+    Args:
+        array (array):  An NCHW image array.
+    Returns:
+        *array*:  An RGB covariance matrix for the specified array.
     """
 
     array = array.reshape(-1, 3)
@@ -36,6 +49,13 @@ def cov_matrix_to_klt_np(
 ) -> np.ndarray:
     """
     Convert a cov matrix to a klt matrix.
+
+    Args:
+        cov_mtx (array):  A 3 by 3 covariance matrix generated from a dataset.
+        normalize (bool):  Whether or not to normalize the resulting KLT matrix.
+        epsilon (float):
+    Returns:
+        *array*:  A KLT matrix for the specified covariance matrix.
     """
 
     U, S, V = np.linalg.svd(cov_mtx)
diff --git a/tests/optim/helpers/numpy_common.py b/tests/optim/helpers/numpy_common.py
index 6013600eb7..b432829694 100644
--- a/tests/optim/helpers/numpy_common.py
+++ b/tests/optim/helpers/numpy_common.py
@@ -12,6 +12,13 @@ def weights_to_heatmap_2d(
     By default red represents excitatory values,
     blue represents inhibitory values, and white represents
     no excitation or inhibition.
+
+    Args:
+        weight (array):  A 2d array to create the heatmap from.
+        colors (List of strings):  A list of strings containing color
+        hex values to use for coloring the heatmap.
+    Returns:
+        *array*:  A weight heatmap.
     """
 
     assert array.ndim == 2
diff --git a/tests/optim/helpers/numpy_transforms.py b/tests/optim/helpers/numpy_transforms.py
index 8fe6a7b70f..9d43a1cd2c 100644
--- a/tests/optim/helpers/numpy_transforms.py
+++ b/tests/optim/helpers/numpy_transforms.py
@@ -5,7 +5,11 @@
 
 class BlendAlpha(object):
     """
-    NumPy version of the BlendAlpha transform
+    NumPy version of the BlendAlpha transform.
+
+    Args:
+        background (array, optional):  An NCHW image array to be used as the
+            Alpha channel's background.
     """
 
     def __init__(self, background: Optional[np.ndarray] = None) -> None:
@@ -13,6 +17,13 @@ def __init__(self, background: Optional[np.ndarray] = None) -> None:
         self.background = background
 
     def blend_alpha(self, x: np.ndarray) -> np.ndarray:
+        """
+        Blend the Alpha channel into the RGB channels.
+        Arguments:
+            x (array): RGBA image array to blend into an RGB image array.
+        Returns:
+            blended (array): RGB image array.
+        """
         assert x.shape[1] == 4
         assert x.ndim == 4
         rgb, alpha = x[:, :3, ...], x[:, 3:4, ...]
@@ -27,7 +38,11 @@ def blend_alpha(self, x: np.ndarray) -> np.ndarray:
 
 class RandomSpatialJitter(object):
     """
-    NumPy version of the RandomSpatialJitter transform
+    NumPy version of the RandomSpatialJitter transform.
+
+    Arguments:
+        translate (int):  The amount to translate the H and W dimensions
+            of an CHW or NCHW array.
     """
 
     def __init__(self, translate: int) -> None:
@@ -55,7 +70,10 @@ def jitter(self, x: np.ndarray) -> np.ndarray:
 
 class CenterCrop(object):
     """
-    NumPy version of the CenterCrop transform
+    NumPy version of the CenterCrop transform.
+
+    Arguments:
+        size (int, sequence) or (int): Number of pixels to center crop away.
     """
 
     def __init__(self, size=0) -> None:
@@ -71,6 +89,14 @@ def __init__(self, size=0) -> None:
         assert len(self.crop_val) == 2
 
     def crop(self, input: np.ndarray) -> np.ndarray:
+        """
+        Center crop an input.
+        Arguments:
+            input (array): Input to center crop.
+        Returns:
+            cropped input (array): A center cropped array.
+        """
+
         assert input.ndim == 3 or input.ndim == 4
         if input.ndim == 4:
             h, w = input.shape[2], input.shape[3]
@@ -114,6 +140,14 @@ def __init__(self, transform_name: str = "klt") -> None:
             raise ValueError("transform_name has to be either 'klt' or 'i1i2i3'")
 
     def to_rgb(self, x: np.ndarray, inverse: bool = False) -> np.ndarray:
+        """
+        Args:
+            x (array):  A CHW or NCHW RGB or RGBA image array.
+            inverse (bool):  Whether to recorrelate or decorrelate colors.
+        Returns:
+            *array*:  An array with it's colors recorrelated or decorrelated.
+        """
+
         assert x.ndim == 3 or x.ndim == 4
 
         # alpha channel is taken off...

From 395ab63495e925370e71a86254b8934197ef9b5d Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Thu, 24 Dec 2020 18:14:21 -0700
Subject: [PATCH 03/10] Add image docs

---
 captum/optim/_param/image/images.py     | 76 ++++++++++++++++++++++++-
 captum/optim/_utils/circuits.py         |  4 +-
 tests/optim/helpers/numpy_image.py      |  2 +-
 tests/optim/helpers/numpy_transforms.py |  8 +--
 4 files changed, 80 insertions(+), 10 deletions(-)

diff --git a/captum/optim/_param/image/images.py b/captum/optim/_param/image/images.py
index 0cb1123b45..8e8d061ab4 100755
--- a/captum/optim/_param/image/images.py
+++ b/captum/optim/_param/image/images.py
@@ -179,7 +179,17 @@ def setup_batch(
 
 
 class FFTImage(ImageParameterization):
-    """Parameterize an image using inverse real 2D FFT"""
+    """
+    Parameterize an image using inverse real 2D FFT
+
+    Arguments:
+        size (list of int): The H & W dimensions to use for creating
+            the nn.Parameter tensor.
+        channels (int): The number of channels to create.
+        batch (int): The number of batches to create.
+        init (torch.tensor, optional): Optionally specify a tensor to
+            use instead of creating one.
+    """
 
     def __init__(
         self,
@@ -224,7 +234,16 @@ def __init__(
 
     @staticmethod
     def rfft2d_freqs(height: int, width: int) -> torch.Tensor:
-        """Computes 2D spectrum frequencies."""
+        """
+        Computes 2D spectrum frequencies.
+
+        Arguments:
+            height (int): The h dimension of the 2d frequency scale.
+            width (int): The w dimension of the 2d frequency scale.
+        Returns:
+            tensor (tensor): A 2d frequency scale tensor.
+        """
+
         fy = FFTImage.pytorch_fftfreq(height)[:, None]
         # on odd input dimensions we need to keep one additional frequency
         wadd = 2 if width % 2 == 1 else 1
@@ -241,7 +260,14 @@ def pytorch_fftfreq(v: int, d: float = 1.0) -> torch.Tensor:
         return results * (1.0 / (v * d))
 
     def get_fft_funcs(self) -> Tuple[Callable, Callable]:
-        """Support older versions of PyTorch"""
+        """
+        Support older versions of PyTorch.
+
+        Returns:
+            fft functions (tuple of Callable): A list of FFT functions
+                to use for irfft and rfft operations.
+        """
+
         try:
             import torch.fft
 
@@ -262,6 +288,11 @@ def torch_irfft(x: torch.Tensor) -> torch.Tensor:
         return torch_rfft, torch_irfft
 
     def forward(self) -> torch.Tensor:
+        """
+        Returns:
+            spatially correlated tensor (tensor): A spatially recorrelated tensor.
+        """
+
         h, w = self.size
         scaled_spectrum = self.fourier_coeffs * self.spectrum_scale
         output = self.torch_irfft(scaled_spectrum)
@@ -269,6 +300,18 @@ def forward(self) -> torch.Tensor:
 
 
 class PixelImage(ImageParameterization):
+    """
+    Parameterize a simple image tensor.
+
+    Arguments:
+        size (list of int): The H & W dimensions to use for creating
+            the nn.Parameter tensor.
+        channels (int): The number of channels to create.
+        batch (int): The number of batches to create.
+        init (torch.tensor, optional): Optionally specify a tensor to
+            use instead of creating one.
+    """
+
     def __init__(
         self,
         size: InitSize = None,
@@ -290,6 +333,18 @@ def forward(self) -> torch.Tensor:
 
 
 class LaplacianImage(ImageParameterization):
+    """
+    Parameterize an image with a laplacian pyramid.
+
+    Arguments:
+        size (list of int): The H & W dimensions to use for creating
+            the nn.Parameter tensor.
+        channels (int): The number of channels to create.
+        batch (int): The number of batches to create.
+        init (torch.tensor, optional): Optionally specify a tensor to
+            use instead of creating one.
+    """
+
     def __init__(
         self,
         size: InitSize = None,
@@ -365,6 +420,13 @@ class SharedImage(ImageParameterization):
 
     Mordvintsev, et al., "Differentiable Image Parameterizations", Distill, 2018.
     https://distill.pub/2018/differentiable-parameterizations/
+
+    Arguments:
+        shapes (list of int or list of list of ints): The shapes of the shared tensors
+            to use for creating the nn.Parameter tensors.
+        parameterization (ImageParameterization):  An image parameterization instance.
+        offset (int or list of int or list of list of ints): The offsets to use for the
+            shared tensors.
     """
 
     def __init__(
@@ -400,6 +462,14 @@ def get_offset(self, offset: Union[int, Tuple[int]], n: int) -> List[List[int]]:
         return offset
 
     def apply_offset(self, x_list: List[torch.Tensor]) -> List[torch.Tensor]:
+        """
+        Apply list of offsets to list of tensors.
+        Arguments:
+            x_list (list of torch.Tensor): list of tensors to offset.
+        Returns:
+            A (list of torch.Tensor): list of offset tensors.
+        """
+
         A = []
         for x, offset in zip(x_list, self.offset):
             assert x.dim() == 4
diff --git a/captum/optim/_utils/circuits.py b/captum/optim/_utils/circuits.py
index c882607570..f6d8f49af8 100644
--- a/captum/optim/_utils/circuits.py
+++ b/captum/optim/_utils/circuits.py
@@ -28,8 +28,8 @@ def get_expanded_weights(
             specified for target2.
         target2 (nn.Module):  The end target layer. Must be above the layer
             specified for target1.
-        crop_shape (int or tuple of ints, optional):  Specify the output weight
-            size to enter crop away padding.
+        crop_shape (int or tuple of ints, optional):  Specify the exact output size
+            to crop out.
         model_input (tensor or tuple of tensors, optional):  The input to use
             with the specified model.
     Returns:
diff --git a/tests/optim/helpers/numpy_image.py b/tests/optim/helpers/numpy_image.py
index 08e397446e..5d6fb91d64 100644
--- a/tests/optim/helpers/numpy_image.py
+++ b/tests/optim/helpers/numpy_image.py
@@ -12,7 +12,7 @@ def setup_batch(x: np.ndarray, batch: int = 1, dim: int = 3) -> np.ndarray:
     return x
 
 
-class FFTImage(object):
+class FFTImage:
     """Parameterize an image using inverse real 2D FFT"""
 
     def __init__(
diff --git a/tests/optim/helpers/numpy_transforms.py b/tests/optim/helpers/numpy_transforms.py
index 9d43a1cd2c..f7f5a200c7 100644
--- a/tests/optim/helpers/numpy_transforms.py
+++ b/tests/optim/helpers/numpy_transforms.py
@@ -3,7 +3,7 @@
 import numpy as np
 
 
-class BlendAlpha(object):
+class BlendAlpha:
     """
     NumPy version of the BlendAlpha transform.
 
@@ -36,7 +36,7 @@ def blend_alpha(self, x: np.ndarray) -> np.ndarray:
         return blended
 
 
-class RandomSpatialJitter(object):
+class RandomSpatialJitter:
     """
     NumPy version of the RandomSpatialJitter transform.
 
@@ -68,7 +68,7 @@ def jitter(self, x: np.ndarray) -> np.ndarray:
         return self.translate_array(x, insets)
 
 
-class CenterCrop(object):
+class CenterCrop:
     """
     NumPy version of the CenterCrop transform.
 
@@ -108,7 +108,7 @@ def crop(self, input: np.ndarray) -> np.ndarray:
         return input[..., sh : sh + h_crop, sw : sw + w_crop]
 
 
-class ToRGB(object):
+class ToRGB:
     """
     NumPy version of the ToRGB transform
     """

From 5a5b285109ba480563f29bbe4534b0feee8cd3b3 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Fri, 25 Dec 2020 12:46:20 -0700
Subject: [PATCH 04/10] Add more docs

---
 captum/optim/_param/image/transform.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/captum/optim/_param/image/transform.py b/captum/optim/_param/image/transform.py
index 819c9f3742..b83c97c482 100644
--- a/captum/optim/_param/image/transform.py
+++ b/captum/optim/_param/image/transform.py
@@ -251,6 +251,13 @@ def scale_tensor(self, x: torch.Tensor, scale: TransformVal) -> torch.Tensor:
         return x
 
     def forward(self, input: torch.Tensor) -> torch.Tensor:
+        """
+        Randomly scale / zoom in or out of a tensor.
+        Arguments:
+            input (torch.Tensor): Input to randomly scale.
+        Returns:
+            tensor (torch.Tensor): Scaled tensor.
+        """
         scale = rand_select(self.scale)
         return self.scale_tensor(input, scale=scale)
 
@@ -281,6 +288,13 @@ def translate_tensor(self, x: torch.Tensor, insets: torch.Tensor) -> torch.Tenso
         return cropped
 
     def forward(self, input: torch.Tensor) -> torch.Tensor:
+        """
+        Randomly translate an input tensor's height and width dimensions.
+        Arguments:
+            input (torch.Tensor): Input to randomly translate.
+        Returns:
+            tensor (torch.Tensor): A randomly translated tensor.
+        """
         insets = torch.randint(high=self.pad_range, size=(2,))
         return self.translate_tensor(input, insets)
 

From 66985f8c187fe0077f7e8cfb4070ae2974044b1b Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Sun, 9 May 2021 15:49:24 -0600
Subject: [PATCH 05/10] Improve docs

---
 captum/optim/_core/output_hook.py       |  7 ++--
 captum/optim/_param/image/images.py     | 48 ++++++++++++++-----------
 captum/optim/_param/image/transforms.py |  8 +++--
 captum/optim/_utils/reducer.py          | 35 +++++++++---------
 tests/optim/helpers/numpy_transforms.py |  3 +-
 5 files changed, 56 insertions(+), 45 deletions(-)

diff --git a/captum/optim/_core/output_hook.py b/captum/optim/_core/output_hook.py
index 79b59ac270..13497f41df 100644
--- a/captum/optim/_core/output_hook.py
+++ b/captum/optim/_core/output_hook.py
@@ -92,14 +92,15 @@ def __call__(self, input_t: TupleOfTensorsOrTensorType) -> ModuleOutputMapping:
             input_t (tensor or tuple of tensors, optional):  The input to use
                 with the specified model.
         Returns:
-            *dict*:  An dict containing the collected activations.
+            activations_dict: An dict containing the collected activations. The keys
+                for the returned dictionary are the target layers.
         """
 
         try:
             with warnings.catch_warnings():
                 warnings.simplefilter("ignore")
                 self.model(input_t)
-            activations = self.layers.consume_outputs()
+            activations_dict = self.layers.consume_outputs()
         finally:
             self.layers.remove_hooks()
-        return activations
+        return activations_dict
diff --git a/captum/optim/_param/image/images.py b/captum/optim/_param/image/images.py
index d0ccff2aa9..4a932c7076 100644
--- a/captum/optim/_param/image/images.py
+++ b/captum/optim/_param/image/images.py
@@ -93,10 +93,12 @@ class FFTImage(ImageParameterization):
     Parameterize an image using inverse real 2D FFT
 
     Args:
-        size (list of int): The H & W dimensions to use for creating
-            the nn.Parameter tensor.
-        channels (int): The number of channels to create.
-        batch (int): The number of batches to create.
+        size (Tuple[int, int]): The height & width dimensions to use for the
+            parameterized output image tensor.
+        channels (int, optional): The number of channels to use for each image. Default
+            is set to 3.
+        batch (int, optional): The number of images to stack along the batch dimension.
+            Default is set to 1.
         init (torch.tensor, optional): Optionally specify a tensor to
             use instead of creating one.
     """
@@ -168,7 +170,7 @@ def get_fft_funcs(self) -> Tuple[Callable, Callable, Callable]:
 
         Returns:
             fft functions (tuple of Callable): A list of FFT functions
-                to use for irfft and rfft operations.
+                to use for irfft, rfft, and fftfreq operations.
         """
 
         if TORCH_VERSION >= "1.7.0":
@@ -209,7 +211,7 @@ def torch_fftfreq(v: int, d: float = 1.0) -> torch.Tensor:
     def forward(self) -> torch.Tensor:
         """
         Returns:
-            spatially correlated tensor (tensor): A spatially recorrelated tensor.
+            output (torch.tensor): A spatially recorrelated tensor.
         """
 
         h, w = self.size
@@ -223,10 +225,12 @@ class PixelImage(ImageParameterization):
     Parameterize a simple image tensor.
 
     Args:
-        size (list of int): The H & W dimensions to use for creating
-            the nn.Parameter tensor.
-        channels (int): The number of channels to create.
-        batch (int): The number of batches to create.
+        size (Tuple[int, int]): The height & width dimensions to use for the
+            parameterized output image tensor.
+        channels (int, optional): The number of channels to use for each image. Default
+            is set to 3.
+        batch (int, optional): The number of images to stack along the batch dimension.
+            Default is set to 1.
         init (torch.tensor, optional): Optionally specify a tensor to
             use instead of creating one.
     """
@@ -256,13 +260,15 @@ def forward(self) -> torch.Tensor:
 
 class LaplacianImage(ImageParameterization):
     """
-    Parameterize an image with a laplacian pyramid.
+    Parameterize an image tensor with a laplacian pyramid.
 
     Args:
-        size (list of int): The H & W dimensions to use for creating
-            the nn.Parameter tensor.
-        channels (int): The number of channels to create.
-        batch (int): The number of batches to create.
+        size (Tuple[int, int]): The height & width dimensions to use for the
+            parameterized output image tensor.
+        channels (int, optional): The number of channels to use for each image. Default
+            is set to 3.
+        batch (int, optional): The number of images to stack along the batch dimension.
+            Default is set to 1.
         init (torch.tensor, optional): Optionally specify a tensor to
             use instead of creating one.
     """
@@ -347,8 +353,8 @@ class SharedImage(ImageParameterization):
         shapes (list of int or list of list of ints): The shapes of the shared tensors
             to use for creating the nn.Parameter tensors.
         parameterization (ImageParameterization):  An image parameterization instance.
-        offset (int or list of int or list of list of ints): The offsets to use for the
-            shared tensors.
+        offset (int or list of int or list of list of ints , optional): The offsets to
+            use for the shared tensors.
     """
 
     def __init__(
@@ -474,10 +480,10 @@ class NaturalImage(ImageParameterization):
     Arguments:
         size (Tuple[int, int]): The height and width to use for the nn.Parameter image
             tensor.
-        channels (int): The number of channels to use when creating the
-            nn.Parameter tensor.
-        batch (int): The number of channels to use when creating the
-            nn.Parameter tensor, or stacking init images.
+        channels (int, optional): The number of channels to use when creating the
+            nn.Parameter tensor. Default is set to 3.
+        batch (int, optional): The number of channels to use when creating the
+            nn.Parameter tensor, or stacking init images. Default is set to 1.
         parameterization (ImageParameterization, optional): An image parameterization
             class.
         squash_func (Callable[[torch.Tensor], torch.Tensor]], optional): The squash
diff --git a/captum/optim/_param/image/transforms.py b/captum/optim/_param/image/transforms.py
index ff536b1dca..7a17f342ae 100644
--- a/captum/optim/_param/image/transforms.py
+++ b/captum/optim/_param/image/transforms.py
@@ -113,10 +113,12 @@ def __init__(self, transform: Union[str, torch.Tensor] = "klt") -> None:
     def forward(self, x: torch.Tensor, inverse: bool = False) -> torch.Tensor:
         """
         Args:
-            x (tensor):  A CHW or NCHW RGB or RGBA image tensor.
-            inverse (bool):  Whether to recorrelate or decorrelate colors.
+            x (torch.tensor):  A CHW or NCHW RGB or RGBA image tensor.
+            inverse (bool, optional):  Whether to recorrelate or decorrelate colors.
+                Default is set to False.
         Returns:
-            *tensor*:  A tensor with it's colors recorrelated or decorrelated.
+            chw (torch.tensor):  A tensor with it's colors recorrelated or
+                decorrelated.
         """
 
         assert x.dim() == 3 or x.dim() == 4
diff --git a/captum/optim/_utils/reducer.py b/captum/optim/_utils/reducer.py
index 21ddfada4e..2696d003d6 100644
--- a/captum/optim/_utils/reducer.py
+++ b/captum/optim/_utils/reducer.py
@@ -16,18 +16,20 @@
 
 class ChannelReducer:
     """
-    Dimensionality reduction for the channel dimension of an input.
-    The default reduction_alg is NMF from sklearn, which requires users
-    to put input on CPU before passing to fit_transform.
+    Dimensionality reduction for the channel dimension of an input tensor.
     Olah, et al., "The Building Blocks of Interpretability", Distill, 2018.
-    See: https://distill.pub/2018/building-blocks/
+
+    See here for more information: https://distill.pub/2018/building-blocks/
 
     Args:
-        n_components (int):  The number of channels to reduce the target
+        n_components (int, optional):  The number of channels to reduce the target
             dimension to.
-        reduction_alg (str or callable):  The desired dimensionality
-            reduction algorithm to use.
-        **kwargs: Arbitrary keyword arguments used by the specified reduction_alg.
+        reduction_alg (str or callable, optional):  The desired dimensionality
+            reduction algorithm to use. The default reduction_alg is set to NMF from
+            sklearn, which requires users to put inputs on CPU before passing them to
+            fit_transform.
+        **kwargs (optional): Arbitrary keyword arguments used by the specified
+            reduction_alg.
     """
 
     def __init__(
@@ -69,12 +71,11 @@ def fit_transform(
     ) -> torch.Tensor:
         """
         Perform dimensionality reduction on an input tensor.
-
         Args:
             tensor (tensor):  A tensor to perform dimensionality reduction on.
-            swap_2nd_and_last_dims (bool):   If true, input channels are expected
-                to be in the second dimension unless the input tensor has a shape
-                of CHW.
+            swap_2nd_and_last_dims (bool, optional): If true, input channels are
+                expected to be in the second dimension unless the input tensor has a
+                shape of CHW. Default is set to True.
         Returns:
             *tensor*:  A tensor with one of it's dimensions reduced.
         """
@@ -126,15 +127,15 @@ def __dir__(self) -> List:
 
 def posneg(x: torch.Tensor, dim: int = 0) -> torch.Tensor:
     """
-    Hack that makes a matrix positive by concatination in order to simulate
-    one-sided NMF with regular NMF
+    Hack that makes a matrix positive by concatination in order to simulate one-sided
+    NMF with regular NMF.
 
     Args:
         x (tensor):  A tensor to make positive.
-        dim (int):  The dimension to concatinate the two tensor halves at.
-
+        dim (int, optional):  The dimension to concatinate the two tensor halves at.
     Returns:
-        *tensor*:  A positive tensor for one-sided dimensionality reduction.
+        tensor (torch.tensor):  A positive tensor for one-sided dimensionality
+            reduction.
     """
 
     return torch.cat([F.relu(x), F.relu(-x)], dim=dim)
diff --git a/tests/optim/helpers/numpy_transforms.py b/tests/optim/helpers/numpy_transforms.py
index b28c1498e5..eec0afebac 100644
--- a/tests/optim/helpers/numpy_transforms.py
+++ b/tests/optim/helpers/numpy_transforms.py
@@ -201,7 +201,8 @@ def to_rgb(self, x: np.ndarray, inverse: bool = False) -> np.ndarray:
         """
         Args:
             x (array):  A CHW or NCHW RGB or RGBA image array.
-            inverse (bool):  Whether to recorrelate or decorrelate colors.
+            inverse (bool, optional):  Whether to recorrelate or decorrelate colors.
+                Default is set to False.
         Returns:
             *array*:  An array with it's colors recorrelated or decorrelated.
         """

From f5a755d0e63c3bc9e764869513d6e375176a3a8e Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Mon, 10 May 2021 09:18:00 -0600
Subject: [PATCH 06/10] Add missing optional to doc

---
 captum/optim/_param/image/transforms.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/captum/optim/_param/image/transforms.py b/captum/optim/_param/image/transforms.py
index 7a17f342ae..2adfd902a3 100644
--- a/captum/optim/_param/image/transforms.py
+++ b/captum/optim/_param/image/transforms.py
@@ -536,8 +536,8 @@ class NChannelsToRGB(nn.Module):
     Convert an NCHW image with n channels into a 3 channel RGB image.
 
     Args:
-        warp (bool):  Whether or not to make the resulting RGB colors more distict
-            from each other.
+        warp (bool, optional): Whether or not to make the resulting RGB colors more
+            distict from each other. Default is set to False.
     """
 
     def __init__(self, warp: bool = False) -> None:

From e020ef789963cb40c0086c15c6a748a8f1e52cc8 Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Mon, 17 May 2021 15:40:25 -0600
Subject: [PATCH 07/10] Update docs to reflect new changes

* Some minor changes that I forget when I pulled from the optim-wip master branch.
---
 captum/optim/_core/optimization.py  | 7 +++----
 captum/optim/_param/image/images.py | 4 ++--
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/captum/optim/_core/optimization.py b/captum/optim/_core/optimization.py
index 63f30a016e..4876820a49 100644
--- a/captum/optim/_core/optimization.py
+++ b/captum/optim/_core/optimization.py
@@ -123,10 +123,9 @@ def optimize(
             optimizer (Optimizer, optional):  An torch.optim.Optimizer used to
                         optimize the input based on the loss function.
         Returns:
-            *list* of *np.arrays* representing the **history**:
-            - **history** (*list*):
-                        A list of loss values per iteration.
-                        Length of the list corresponds to the number of iterations
+            history (torch.Tensor): A stack of loss values per iteration. The size
+                of the dimension on which loss values are stacked corresponds to
+                the number of iterations.
         """
         stop_criteria = stop_criteria or n_steps(512)
         optimizer = optimizer or optim.Adam(self.parameters(), lr=lr)
diff --git a/captum/optim/_param/image/images.py b/captum/optim/_param/image/images.py
index 4a932c7076..f6b36551a2 100644
--- a/captum/optim/_param/image/images.py
+++ b/captum/optim/_param/image/images.py
@@ -478,8 +478,8 @@ class NaturalImage(ImageParameterization):
     inside its computation.
 
     Arguments:
-        size (Tuple[int, int]): The height and width to use for the nn.Parameter image
-            tensor.
+        size (Tuple[int, int], optional): The height and width to use for the
+                nn.Parameter image tensor.
         channels (int, optional): The number of channels to use when creating the
             nn.Parameter tensor. Default is set to 3.
         batch (int, optional): The number of channels to use when creating the

From 7c6ef63faeec2c9a58eb3d8a3e7084efbf56b3db Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Thu, 29 Jul 2021 14:25:22 -0600
Subject: [PATCH 08/10] Changes based on feedback

---
 captum/optim/_core/optimization.py      |  13 ++
 captum/optim/_core/output_hook.py       |  46 ++++-
 captum/optim/_param/image/images.py     | 253 +++++++++++++++++-------
 captum/optim/_param/image/transforms.py | 198 +++++++++++++------
 tests/optim/param/test_images.py        |  14 +-
 5 files changed, 371 insertions(+), 153 deletions(-)

diff --git a/captum/optim/_core/optimization.py b/captum/optim/_core/optimization.py
index 7be67ee5df..6f2eda25df 100644
--- a/captum/optim/_core/optimization.py
+++ b/captum/optim/_core/optimization.py
@@ -46,6 +46,7 @@ def __init__(
     ) -> None:
         r"""
         Args:
+
             model (nn.Module):  The reference to PyTorch model instance.
             input_param (nn.Module, optional):  A module that generates an input,
                         consumed by the model.
@@ -71,6 +72,7 @@ def __init__(
 
     def loss(self) -> torch.Tensor:
         r"""Compute loss value for current iteration.
+
         Returns:
             *tensor* representing **loss**:
             - **loss** (*tensor*):
@@ -115,13 +117,22 @@ def optimize(
         lr: float = 0.025,
     ) -> torch.Tensor:
         r"""Optimize input based on loss function and objectives.
+
         Args:
+
             stop_criteria (StopCriteria, optional):  A function that is called
                         every iteration and returns a bool that determines whether
                         to stop the optimization.
                         See captum.optim.typing.StopCriteria for details.
             optimizer (Optimizer, optional):  An torch.optim.Optimizer used to
                         optimize the input based on the loss function.
+            loss_summarize_fn (Callable, optional): The function to use for summarizing
+                tensor outputs from loss functions.
+                Default: default_loss_summarize
+            lr: (float): If no optimizer is given, then lr is used as the learning rate
+                for the Adam optimizer.
+                Default: 0.025
+
         Returns:
             history (torch.Tensor): A stack of loss values per iteration. The size
                 of the dimension on which loss values are stacked corresponds to
@@ -149,10 +160,12 @@ def optimize(
 
 def n_steps(n: int, show_progress: bool = True) -> StopCriteria:
     """StopCriteria generator that uses number of steps as a stop criteria.
+
     Args:
         n (int):  Number of steps to run optimization.
         show_progress (bool, optional):  Whether or not to show progress bar.
             Default: True
+
     Returns:
         *StopCriteria* callable
     """
diff --git a/captum/optim/_core/output_hook.py b/captum/optim/_core/output_hook.py
index 13497f41df..6cfbc4ff2e 100644
--- a/captum/optim/_core/output_hook.py
+++ b/captum/optim/_core/output_hook.py
@@ -8,12 +8,13 @@
 from captum.optim._utils.typing import ModuleOutputMapping, TupleOfTensorsOrTensorType
 
 
-class ModuleReuseException(Exception):
-    pass
-
-
 class ModuleOutputsHook:
     def __init__(self, target_modules: Iterable[nn.Module]) -> None:
+        """
+        Args:
+
+            target_modules (Iterable of nn.Module): A list of nn.Module targets.
+        """
         self.outputs: ModuleOutputMapping = dict.fromkeys(target_modules, None)
         self.hooks = [
             module.register_forward_hook(self._forward_hook())
@@ -21,6 +22,9 @@ def __init__(self, target_modules: Iterable[nn.Module]) -> None:
         ]
 
     def _reset_outputs(self) -> None:
+        """
+        Delete captured activations.
+        """
         self.outputs = dict.fromkeys(self.outputs.keys(), None)
 
     @property
@@ -28,6 +32,13 @@ def is_ready(self) -> bool:
         return all(value is not None for value in self.outputs.values())
 
     def _forward_hook(self) -> Callable:
+        """
+        Return the forward_hook function.
+
+        Returns:
+            forward_hook (Callable): The forward_hook function.
+        """
+
         def forward_hook(
             module: nn.Module, input: Tuple[torch.Tensor], output: torch.Tensor
         ) -> None:
@@ -49,6 +60,12 @@ def forward_hook(
         return forward_hook
 
     def consume_outputs(self) -> ModuleOutputMapping:
+        """
+        Collect target activations and return them.
+
+        Returns:
+            outputs (ModuleOutputMapping): The captured outputs.
+        """
         if not self.is_ready:
             warn(
                 "Consume captured outputs, but not all requested target outputs "
@@ -63,25 +80,32 @@ def targets(self) -> Iterable[nn.Module]:
         return self.outputs.keys()
 
     def remove_hooks(self) -> None:
+        """
+        Remove hooks.
+        """
         for hook in self.hooks:
             hook.remove()
 
     def __del__(self) -> None:
-        # print(f"DEL HOOKS!: {list(self.outputs.keys())}")
+        """
+        Ensure that using 'del' properly deletes hooks.
+        """
         self.remove_hooks()
 
 
 class ActivationFetcher:
     """
     Simple module for collecting activations from model targets.
-
-    Args:
-        model (nn.Module):  The reference to PyTorch model instance.
-        targets (nn.module or list of nn.module):  The target layers to
-            collect activations from.
     """
 
     def __init__(self, model: nn.Module, targets: Iterable[nn.Module]) -> None:
+        """
+        Args:
+
+            model (nn.Module):  The reference to PyTorch model instance.
+            targets (nn.Module or list of nn.Module):  The target layers to
+                collect activations from.
+        """
         super(ActivationFetcher, self).__init__()
         self.model = model
         self.layers = ModuleOutputsHook(targets)
@@ -89,8 +113,10 @@ def __init__(self, model: nn.Module, targets: Iterable[nn.Module]) -> None:
     def __call__(self, input_t: TupleOfTensorsOrTensorType) -> ModuleOutputMapping:
         """
         Args:
+
             input_t (tensor or tuple of tensors, optional):  The input to use
                 with the specified model.
+
         Returns:
             activations_dict: An dict containing the collected activations. The keys
                 for the returned dictionary are the target layers.
diff --git a/captum/optim/_param/image/images.py b/captum/optim/_param/image/images.py
index 0f98f2c1a6..eee209806d 100644
--- a/captum/optim/_param/image/images.py
+++ b/captum/optim/_param/image/images.py
@@ -27,6 +27,15 @@ def __new__(
         *args,
         **kwargs,
     ) -> torch.Tensor:
+        """
+        Args:
+
+            x (list or np.ndarray or torch.Tensor): A list, NumPy array, or PyTorch
+                tensor to create an `ImageTensor` from.
+
+        Returns:
+           x (ImageTensor): An `ImageTensor` instance.
+        """
         if isinstance(x, torch.Tensor) and x.is_cuda:
             x.show = MethodType(cls.show, x)
             x.export = MethodType(cls.export, x)
@@ -36,6 +45,20 @@ def __new__(
 
     @classmethod
     def open(cls, path: str, scale: float = 255.0, mode: str = "RGB") -> "ImageTensor":
+        """
+        Load an image file from a URL or local filepath directly into an `ImageTensor`.
+
+        Args:
+
+            path (str): A URL or filepath to an image.
+            scale (float): The image scale to use.
+                Default: 255.0
+            mode (str:) The image loading mode to use.
+                Default: "RGB"
+
+        Returns:
+           x (ImageTensor): An `ImageTensor` instance.
+        """
         if path.startswith("https://") or path.startswith("http://"):
             response = requests.get(path, stream=True)
             img = Image.open(response.raw)
@@ -73,9 +96,31 @@ def __torch_function__(
     def show(
         self, figsize: Optional[Tuple[int, int]] = None, scale: float = 255.0
     ) -> None:
+        """
+        Display an `ImageTensor`.
+
+        Args:
+
+            figsize (Tuple[int, int], optional): height & width to use
+                for displaying the `ImageTensor` figure.
+            scale (float): Value to multiply the `ImageTensor` by so that
+                it's value range is [0-255] for display.
+                Default: 255.0
+        """
         show(self, figsize=figsize, scale=scale)
 
     def export(self, filename: str, scale: float = 255.0) -> None:
+        """
+        Save an `ImageTensor` as an image file.
+
+        Args:
+
+            filename (str): The filename to use when saving the `ImageTensor` as an
+                image file.
+            scale (float): Value to multiply the `ImageTensor` by so that
+                it's value range is [0-255] for saving.
+                Default: 255.0
+        """
         save_tensor_as_image(self, filename=filename, scale=scale)
 
 
@@ -91,16 +136,6 @@ class ImageParameterization(InputParameterization):
 class FFTImage(ImageParameterization):
     """
     Parameterize an image using inverse real 2D FFT
-
-    Args:
-        size (Tuple[int, int]): The height & width dimensions to use for the
-            parameterized output image tensor.
-        channels (int, optional): The number of channels to use for each image. Default
-            is set to 3.
-        batch (int, optional): The number of images to stack along the batch dimension.
-            Default is set to 1.
-        init (torch.tensor, optional): Optionally specify a tensor to
-            use instead of creating one.
     """
 
     def __init__(
@@ -110,6 +145,19 @@ def __init__(
         batch: int = 1,
         init: Optional[torch.Tensor] = None,
     ) -> None:
+        """
+        Args:
+
+            size (Tuple[int, int]): The height & width dimensions to use for the
+                parameterized output image tensor.
+            channels (int, optional): The number of channels to use for each image.
+                Default: 3
+            batch (int, optional): The number of images to stack along the batch dimension.
+                Default: 1
+            init (torch.tensor, optional): Optionally specify a tensor to
+                use instead of creating one.
+                Default: None
+        """
         super().__init__()
         if init is None:
             assert len(size) == 2
@@ -153,10 +201,12 @@ def rfft2d_freqs(self, height: int, width: int) -> torch.Tensor:
         Computes 2D spectrum frequencies.
 
         Args:
+
             height (int): The h dimension of the 2d frequency scale.
             width (int): The w dimension of the 2d frequency scale.
+
         Returns:
-            tensor (tensor): A 2d frequency scale tensor.
+            **tensor** (tensor): A 2d frequency scale tensor.
         """
 
         fy = self.torch_fftfreq(height)[:, None]
@@ -165,7 +215,9 @@ def rfft2d_freqs(self, height: int, width: int) -> torch.Tensor:
 
     def get_fft_funcs(self) -> Tuple[Callable, Callable, Callable]:
         """
-        Support older versions of PyTorch.
+        Support older versions of PyTorch. This function ensures that the same FFT
+        operations are carried regardless of whether your PyTorch version has the
+        torch.fft update.
 
         Returns:
             fft functions (tuple of Callable): A list of FFT functions
@@ -210,7 +262,7 @@ def torch_fftfreq(v: int, d: float = 1.0) -> torch.Tensor:
     def forward(self) -> torch.Tensor:
         """
         Returns:
-            output (torch.tensor): A spatially recorrelated tensor.
+            **output** (torch.tensor): A spatially recorrelated tensor.
         """
 
         scaled_spectrum = self.fourier_coeffs * self.spectrum_scale
@@ -220,17 +272,7 @@ def forward(self) -> torch.Tensor:
 
 class PixelImage(ImageParameterization):
     """
-    Parameterize a simple image tensor.
-
-    Args:
-        size (Tuple[int, int]): The height & width dimensions to use for the
-            parameterized output image tensor.
-        channels (int, optional): The number of channels to use for each image. Default
-            is set to 3.
-        batch (int, optional): The number of images to stack along the batch dimension.
-            Default is set to 1.
-        init (torch.tensor, optional): Optionally specify a tensor to
-            use instead of creating one.
+    Parameterize a simple pixel image tensor that requires no additional transforms.
     """
 
     def __init__(
@@ -240,6 +282,19 @@ def __init__(
         batch: int = 1,
         init: Optional[torch.Tensor] = None,
     ) -> None:
+        """
+        Args:
+
+            size (Tuple[int, int]): The height & width dimensions to use for the
+                parameterized output image tensor.
+            channels (int, optional): The number of channels to use for each image.
+                Default: 3
+            batch (int, optional): The number of images to stack along the batch dimension.
+                Default: 1
+            init (torch.tensor, optional): Optionally specify a tensor to
+                use instead of creating one.
+                Default: None
+        """
         super().__init__()
         if init is None:
             assert size is not None and channels is not None and batch is not None
@@ -260,16 +315,6 @@ class LaplacianImage(ImageParameterization):
     """
     TODO: Fix divison by 6 in setup_input when init is not None.
     Parameterize an image tensor with a laplacian pyramid.
-
-    Args:
-        size (Tuple[int, int]): The height & width dimensions to use for the
-            parameterized output image tensor.
-        channels (int, optional): The number of channels to use for each image. Default
-            is set to 3.
-        batch (int, optional): The number of images to stack along the batch dimension.
-            Default is set to 1.
-        init (torch.tensor, optional): Optionally specify a tensor to
-            use instead of creating one.
     """
 
     def __init__(
@@ -279,11 +324,24 @@ def __init__(
         batch: int = 1,
         init: Optional[torch.Tensor] = None,
     ) -> None:
+        """
+        Args:
+
+            size (Tuple[int, int]): The height & width dimensions to use for the
+                parameterized output image tensor.
+            channels (int, optional): The number of channels to use for each image.
+                Default: 3
+            batch (int, optional): The number of images to stack along the batch dimension.
+                Default: 1
+            init (torch.tensor, optional): Optionally specify a tensor to
+                use instead of creating one.
+                Default: None
+        """
         super().__init__()
         power = 0.1
 
         if init is None:
-            tensor_params, self.scaler = self.setup_input(size, channels, power, init)
+            tensor_params, self.scaler = self._setup_input(size, channels, power, init)
 
             self.tensor_params = torch.nn.ModuleList(
                 [deepcopy(tensor_params) for b in range(batch)]
@@ -292,13 +350,13 @@ def __init__(
             init = init.unsqueeze(0) if init.dim() == 3 else init
             P = []
             for b in range(init.size(0)):
-                tensor_params, self.scaler = self.setup_input(
+                tensor_params, self.scaler = self._setup_input(
                     size, channels, power, init[b].unsqueeze(0)
                 )
                 P.append(tensor_params)
             self.tensor_params = torch.nn.ModuleList(P)
 
-    def setup_input(
+    def _setup_input(
         self,
         size: Tuple[int, int],
         channels: int,
@@ -322,16 +380,26 @@ def setup_input(
         tensor_params = torch.nn.ParameterList(tensor_params)
         return tensor_params, scaler
 
-    def create_tensor(self, params_list: torch.nn.ParameterList) -> torch.Tensor:
-        A = []
+    def _create_tensor(self, params_list: torch.nn.ParameterList) -> torch.Tensor:
+        """
+        Resize tensor parameters to the target size.
+
+        Args:
+
+            params_list (torch.nn.ParameterList): List of tensors to resize.
+
+        Returns:
+            **tensor** (torch.Tensor): The sum of all tensor parameters.
+        """
+        A: List[torch.Tensor] = []
         for xi, upsamplei in zip(params_list, self.scaler):
             A.append(upsamplei(xi))
         return torch.sum(torch.cat(A), 0) + 0.5
 
     def forward(self) -> torch.Tensor:
-        A = []
+        A: List[torch.Tensor] = []
         for params_list in self.tensor_params:
-            tensor = self.create_tensor(params_list)
+            tensor = self._create_tensor(params_list)
             A.append(tensor)
         return torch.stack(A).refine_names("B", "C", "H", "W")
 
@@ -347,13 +415,6 @@ class SharedImage(ImageParameterization):
 
     Mordvintsev, et al., "Differentiable Image Parameterizations", Distill, 2018.
     https://distill.pub/2018/differentiable-parameterizations/
-
-    Args:
-        shapes (list of int or list of list of ints): The shapes of the shared tensors
-            to use for creating the nn.Parameter tensors.
-        parameterization (ImageParameterization):  An image parameterization instance.
-        offset (int or list of int or list of list of ints , optional): The offsets to
-            use for the shared tensors.
     """
 
     def __init__(
@@ -362,6 +423,17 @@ def __init__(
         parameterization: ImageParameterization = None,
         offset: Union[int, Tuple[int], Tuple[Tuple[int]], None] = None,
     ) -> None:
+        """
+        Args:
+
+            shapes (list of int or list of list of ints): The shapes of the shared
+                tensors to use for creating the nn.Parameter tensors.
+            parameterization (ImageParameterization): An image parameterization
+                instance.
+            offset (int or list of int or list of list of ints , optional): The offsets
+                to use for the shared tensors.
+                Default: None
+        """
         super().__init__()
         assert shapes is not None
         A = []
@@ -373,9 +445,21 @@ def __init__(
             A.append(torch.nn.Parameter(torch.randn([batch, channels, height, width])))
         self.shared_init = torch.nn.ParameterList(A)
         self.parameterization = parameterization
-        self.offset = self.get_offset(offset, len(A)) if offset is not None else None
+        self.offset = self._get_offset(offset, len(A)) if offset is not None else None
+
+    def _get_offset(self, offset: Union[int, Tuple[int]], n: int) -> List[List[int]]:
+        """
+        Given offset values, return a list of offsets for _apply_offset to use.
+
+        Args:
 
-    def get_offset(self, offset: Union[int, Tuple[int]], n: int) -> List[List[int]]:
+            offset (int or list of int or list of list of ints , optional): The offsets
+                to use for the shared tensors.
+            n (int): The number of tensors needing offset values.
+
+        Returns:
+            **offset** (list of list of int): A list of offset values.
+        """
         if type(offset) is tuple or type(offset) is list:
             if type(offset[0]) is tuple or type(offset[0]) is list:
                 assert len(offset) == n and all(len(t) == 4 for t in offset)
@@ -388,17 +472,19 @@ def get_offset(self, offset: Union[int, Tuple[int]], n: int) -> List[List[int]]:
         assert all([all([type(o) is int for o in v]) for v in offset])
         return offset
 
-    def apply_offset(self, x_list: List[torch.Tensor]) -> List[torch.Tensor]:
+    def _apply_offset(self, x_list: List[torch.Tensor]) -> List[torch.Tensor]:
         """
         Apply list of offsets to list of tensors.
 
         Args:
+
             x_list (list of torch.Tensor): list of tensors to offset.
+
         Returns:
-            A (list of torch.Tensor): list of offset tensors.
+            **A** (list of torch.Tensor): list of offset tensors.
         """
 
-        A = []
+        A: List[torch.Tensor] = []
         for x, offset in zip(x_list, self.offset):
             assert x.dim() == 4
             size = list(x.size())
@@ -419,13 +505,23 @@ def apply_offset(self, x_list: List[torch.Tensor]) -> List[torch.Tensor]:
             A.append(x)
         return A
 
-    def interpolate_tensor(
+    def _interpolate_tensor(
         self, x: torch.Tensor, batch: int, channels: int, height: int, width: int
     ) -> torch.Tensor:
         """
-        Linear interpolation for 4D, 5D, and 6D tensors.
-        If the batch dimension needs to be resized,
-        we move it's location temporarily for F.interpolate.
+        Linear interpolation for 4D, 5D, and 6D tensors. If the batch dimension needs
+        to be resized, we move it's location temporarily for F.interpolate.
+
+        Args:
+
+            x (torch.Tensor): The tensor to resize.
+            batch (int): The batch size to resize the tensor to.
+            channels (int): The channel size to resize the tensor to.
+            height (int): The height to resize the tensor to.
+            width (int): The width to resize the tensor to.
+
+        Returns:
+            **tensor** (torch.Tensor): A resized tensor.
         """
 
         if x.size(1) == channels:
@@ -450,7 +546,7 @@ def interpolate_tensor(
     def forward(self) -> torch.Tensor:
         image = self.parameterization()
         x = [
-            self.interpolate_tensor(
+            self._interpolate_tensor(
                 shared_tensor,
                 image.size(0),
                 image.size(1),
@@ -460,7 +556,7 @@ def forward(self) -> torch.Tensor:
             for shared_tensor in self.shared_init
         ]
         if self.offset is not None:
-            x = self.apply_offset(x)
+            x = self._apply_offset(x)
         return (image + sum(x)).refine_names("B", "C", "H", "W")
 
 
@@ -475,21 +571,6 @@ class NaturalImage(ImageParameterization):
     If a model requires a normalization step, such as normalizing imagenet RGB values,
     or rescaling to [0,255], it can perform those steps with the provided transforms or
     inside its computation.
-
-    Arguments:
-        size (Tuple[int, int], optional): The height and width to use for the
-                nn.Parameter image tensor.
-        channels (int, optional): The number of channels to use when creating the
-            nn.Parameter tensor. Default is set to 3.
-        batch (int, optional): The number of channels to use when creating the
-            nn.Parameter tensor, or stacking init images. Default is set to 1.
-        parameterization (ImageParameterization, optional): An image parameterization
-            class.
-        squash_func (Callable[[torch.Tensor], torch.Tensor]], optional): The squash
-            function to use after color recorrelation. A funtion or lambda function.
-        decorrelation_module (nn.Module, optional): A ToRGB instance.
-        decorrelate_init (bool, optional): Whether or not to apply color decorrelation
-            to the init tensor input.
     """
 
     def __init__(
@@ -503,6 +584,30 @@ def __init__(
         decorrelation_module: Optional[nn.Module] = ToRGB(transform="klt"),
         decorrelate_init: bool = True,
     ) -> None:
+        """
+        Args:
+
+            size (Tuple[int, int], optional): The height and width to use for the
+                nn.Parameter image tensor.
+                Default: (224, 224)
+            channels (int, optional): The number of channels to use when creating the
+                nn.Parameter tensor.
+                Default: 3
+            batch (int, optional): The number of channels to use when creating the
+                nn.Parameter tensor, or stacking init images.
+                Default: 1
+            parameterization (ImageParameterization, optional): An image parameterization
+                class.
+                Default: FFTImage
+            squash_func (Callable[[torch.Tensor], torch.Tensor]], optional): The squash
+                function to use after color recorrelation. A funtion or lambda function.
+                Default: None
+            decorrelation_module (nn.Module, optional): A ToRGB instance.
+                Default: ToRGB
+            decorrelate_init (bool, optional): Whether or not to apply color decorrelation
+                to the init tensor input.
+                Default: True
+        """
         super().__init__()
         self.decorrelate = decorrelation_module
         if init is not None:
diff --git a/captum/optim/_param/image/transforms.py b/captum/optim/_param/image/transforms.py
index 300c9d186b..74cf645ad5 100644
--- a/captum/optim/_param/image/transforms.py
+++ b/captum/optim/_param/image/transforms.py
@@ -13,25 +13,30 @@
 
 class BlendAlpha(nn.Module):
     r"""Blends a 4 channel input parameterization into an RGB image.
-
     You can specify a fixed background, or a random one will be used by default.
-
-    Args:
-        background (tensor, optional):  An NCHW image tensor to be used as the
-            Alpha channel's background.
     """
 
     def __init__(self, background: Optional[torch.Tensor] = None) -> None:
+        """
+        Args:
+
+            background (tensor, optional):  An NCHW image tensor to be used as the
+                Alpha channel's background.
+                Default: None
+        """
         super().__init__()
         self.background = background
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         """
         Blend the Alpha channel into the RGB channels.
+
         Args:
+
             x (torch.Tensor): RGBA image tensor to blend into an RGB image tensor.
+
         Returns:
-            blended (torch.Tensor): RGB image tensor.
+            **blended** (torch.Tensor): RGB image tensor.
         """
         assert x.dim() == 4
         assert x.size(1) == 4
@@ -49,10 +54,13 @@ class IgnoreAlpha(nn.Module):
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         """
         Ignore the alpha channel.
-        Arguments:
+
+        Args:
+
             x (torch.Tensor): RGBA image tensor.
+
         Returns:
-            rgb (torch.Tensor): RGB image tensor without the alpha channel.
+            **rgb** (torch.Tensor): RGB image tensor without the alpha channel.
         """
         assert x.dim() == 4
         assert x.size(1) == 4
@@ -70,16 +78,17 @@ class ToRGB(nn.Module):
     [0] Y. Ohta, T. Kanade, and T. Sakai, "Color information for region segmentation,"
     Computer Graphics and Image Processing, vol. 13, no. 3, pp. 222–241, 1980
     https://www.sciencedirect.com/science/article/pii/0146664X80900477
-
-    Arguments:
-        transform (str or tensor):  Either a string for one of the precalculated
-            transform matrices, or a 3x3 matrix for the 3 RGB channels of input
-            tensors.
     """
 
     @staticmethod
     def klt_transform() -> torch.Tensor:
-        """Karhunen-Loève transform (KLT) measured on ImageNet"""
+        """
+        Karhunen-Loève transform (KLT) measured on ImageNet
+
+        Returns:
+            **transform** (torch.Tensor): A Karhunen-Loève transform (KLT) measured on
+                the ImageNet dataset.
+        """
         KLT = [[0.26, 0.09, 0.02], [0.27, 0.00, -0.05], [0.27, -0.09, 0.03]]
         transform = torch.Tensor(KLT).float()
         transform = transform / torch.max(torch.norm(transform, dim=0))
@@ -87,6 +96,11 @@ def klt_transform() -> torch.Tensor:
 
     @staticmethod
     def i1i2i3_transform() -> torch.Tensor:
+        """
+        Returns:
+            **transform** (torch.Tensor): An approximation of natural colors transform
+                (i1i2i3).
+        """
         i1i2i3_matrix = [
             [1 / 3, 1 / 3, 1 / 3],
             [1 / 2, 0, -1 / 2],
@@ -95,6 +109,13 @@ def i1i2i3_transform() -> torch.Tensor:
         return torch.Tensor(i1i2i3_matrix)
 
     def __init__(self, transform: Union[str, torch.Tensor] = "klt") -> None:
+        """
+        Args:
+
+            transform (str or tensor):  Either a string for one of the precalculated
+                transform matrices, or a 3x3 matrix for the 3 RGB channels of input
+                tensors.
+        """
         super().__init__()
         assert isinstance(transform, str) or torch.is_tensor(transform)
         if torch.is_tensor(transform):
@@ -113,9 +134,11 @@ def __init__(self, transform: Union[str, torch.Tensor] = "klt") -> None:
     def forward(self, x: torch.Tensor, inverse: bool = False) -> torch.Tensor:
         """
         Args:
+
             x (torch.tensor):  A CHW or NCHW RGB or RGBA image tensor.
             inverse (bool, optional):  Whether to recorrelate or decorrelate colors.
-                Default is set to False.
+                Default: False.
+
         Returns:
             chw (torch.tensor):  A tensor with it's colors recorrelated or
                 decorrelated.
@@ -156,15 +179,6 @@ def forward(self, x: torch.Tensor, inverse: bool = False) -> torch.Tensor:
 class CenterCrop(torch.nn.Module):
     """
     Center crop a specified amount from a tensor.
-    Arguments:
-        size (int, sequence, int): Number of pixels to center crop away.
-        pixels_from_edges (bool, optional): Whether to treat crop size
-            values as the number of pixels from the tensor's edge, or an
-            exact shape in the center.
-        offset_left (bool, optional): If the cropped away sides are not
-            equal in size, offset center by +1 to the left and/or top.
-            Default is set to False. This parameter is only valid when
-            pixels_from_edges is False.
     """
 
     def __init__(
@@ -173,6 +187,18 @@ def __init__(
         pixels_from_edges: bool = False,
         offset_left: bool = False,
     ) -> None:
+        """
+        Args:
+
+            size (int, sequence, int): Number of pixels to center crop away.
+                pixels_from_edges (bool, optional): Whether to treat crop size
+                values as the number of pixels from the tensor's edge, or an
+                exact shape in the center.
+            offset_left (bool, optional): If the cropped away sides are not
+                equal in size, offset center by +1 to the left and/or top.
+                This parameter is only valid when `pixels_from_edges` is False.
+                Default: False
+        """
         super().__init__()
         self.crop_vals = size
         self.pixels_from_edges = pixels_from_edges
@@ -181,10 +207,12 @@ def __init__(
     def forward(self, input: torch.Tensor) -> torch.Tensor:
         """
         Center crop an input.
-        Arguments:
+
+        Args:
             input (torch.Tensor): Input to center crop.
+
         Returns:
-            tensor (torch.Tensor): A center cropped tensor.
+            **tensor** (torch.Tensor): A center cropped *tensor*.
         """
 
         return center_crop(
@@ -200,18 +228,22 @@ def center_crop(
 ) -> torch.Tensor:
     """
     Center crop a specified amount from a tensor.
-    Arguments:
+
+    Args:
+
         input (tensor):  A CHW or NCHW image tensor to center crop.
         size (int, sequence, int): Number of pixels to center crop away.
         pixels_from_edges (bool, optional): Whether to treat crop size
             values as the number of pixels from the tensor's edge, or an
             exact shape in the center.
+            Default: False
         offset_left (bool, optional): If the cropped away sides are not
             equal in size, offset center by +1 to the left and/or top.
-            Default is set to False. This parameter is only valid when
-            pixels_from_edges is False.
+            This parameter is only valid when `pixels_from_edges` is False.
+            Default: False
+
     Returns:
-        *tensor*:  A center cropped tensor.
+        **tensor**:  A center cropped *tensor*.
     """
 
     assert input.dim() == 3 or input.dim() == 4
@@ -246,10 +278,13 @@ def _rand_select(
 ) -> Union[int, float, torch.Tensor]:
     """
     Randomly return a single value from the provided tuple, list, or tensor.
+
     Args:
+
         transform_values (sequence):  A sequence of values to randomly select from.
+
     Returns:
-        *value*:  A single value from the specified sequence.
+        **value**:  A single value from the specified sequence.
     """
     n = torch.randint(low=0, high=len(transform_values), size=[1]).item()
     return transform_values[n]
@@ -258,11 +293,14 @@ def _rand_select(
 class RandomScale(nn.Module):
     """
     Apply random rescaling on a NCHW tensor.
-    Arguments:
-        scale (float, sequence): Tuple of rescaling values to randomly select from.
     """
 
     def __init__(self, scale: NumSeqOrTensorType) -> None:
+        """
+        Args:
+
+            scale (float, sequence): Tuple of rescaling values to randomly select from.
+        """
         super().__init__()
         self.scale = scale
 
@@ -292,10 +330,13 @@ def scale_tensor(
     def forward(self, input: torch.Tensor) -> torch.Tensor:
         """
         Randomly scale / zoom in or out of a tensor.
+
         Args:
+
             input (torch.Tensor): Input to randomly scale.
+
         Returns:
-            tensor (torch.Tensor): Scaled tensor.
+            **tensor** (torch.Tensor): Scaled *tensor*.
         """
         scale = _rand_select(self.scale)
         return self.scale_tensor(input, scale=scale)
@@ -304,11 +345,14 @@ def forward(self, input: torch.Tensor) -> torch.Tensor:
 class RandomSpatialJitter(torch.nn.Module):
     """
     Apply random spatial translations on a NCHW tensor.
-    Arguments:
-        translate (int):
     """
 
     def __init__(self, translate: int) -> None:
+        """
+        Args:
+
+            translate (int): The max horizontal and vertical translation to use.
+        """
         super().__init__()
         self.pad_range = 2 * translate
         self.pad = nn.ReflectionPad2d(translate)
@@ -328,10 +372,13 @@ def translate_tensor(self, x: torch.Tensor, insets: torch.Tensor) -> torch.Tenso
     def forward(self, input: torch.Tensor) -> torch.Tensor:
         """
         Randomly translate an input tensor's height and width dimensions.
+
         Args:
+
             input (torch.Tensor): Input to randomly translate.
+
         Returns:
-            tensor (torch.Tensor): A randomly translated tensor.
+            **tensor** (torch.Tensor): A randomly translated *tensor*.
         """
         insets = torch.randint(high=self.pad_range, size=(2,))
         return self.translate_tensor(input, insets)
@@ -341,22 +388,27 @@ class ScaleInputRange(nn.Module):
     """
     Multiplies the input by a specified multiplier for models with input ranges other
     than [0,1].
-
-    Args:
-        multiplier (float):  A float value used to scale the input.
     """
 
     def __init__(self, multiplier: float = 1.0) -> None:
+        """
+        Args:
+
+            multiplier (float):  A float value used to scale the input.
+        """
         super().__init__()
         self.multiplier = multiplier
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         """
         Scale an input tensor's values.
+
         Args:
+
             x (torch.Tensor): Input to scale values of.
+
         Returns:
-            tensor (torch.Tensor): tensor with it's values scaled.
+            **tensor** (torch.Tensor): tensor with it's values scaled.
         """
         return x * self.multiplier
 
@@ -369,10 +421,13 @@ class RGBToBGR(nn.Module):
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         """
         Perform RGB to BGR conversion on an input
+
         Args:
+
             x (torch.Tensor): RGB image tensor to convert to BGR.
+
         Returns:
-            BGR tensor (torch.Tensor): A BGR tensor.
+            **BGR tensor** (torch.Tensor): A BGR tensor.
         """
         assert x.dim() == 4
         assert x.size(1) == 3
@@ -417,13 +472,6 @@ class GaussianSmoothing(nn.Module):
     Apply gaussian smoothing on a
     1d, 2d or 3d tensor. Filtering is performed seperately for each channel
     in the input using a depthwise convolution.
-    Arguments:
-        channels (int, sequence): Number of channels of the input tensors. Output will
-            have this number of channels as well.
-        kernel_size (int, sequence): Size of the gaussian kernel.
-        sigma (float, sequence): Standard deviation of the gaussian kernel.
-        dim (int, optional): The number of dimensions of the data.
-            Default value is 2 (spatial).
     """
 
     def __init__(
@@ -433,6 +481,16 @@ def __init__(
         sigma: Union[float, Sequence[float]],
         dim: int = 2,
     ) -> None:
+        """
+        Args:
+
+            channels (int, sequence): Number of channels of the input tensors. Output
+                will have this number of channels as well.
+            kernel_size (int, sequence): Size of the gaussian kernel.
+            sigma (float, sequence): Standard deviation of the gaussian kernel.
+            dim (int, optional): The number of dimensions of the data.
+                Default value is 2 (spatial).
+        """
         super().__init__()
         if isinstance(kernel_size, numbers.Number):
             kernel_size = [kernel_size] * dim
@@ -477,10 +535,13 @@ def __init__(
     def forward(self, input: torch.Tensor) -> torch.Tensor:
         """
         Apply gaussian filter to input.
-        Arguments:
+
+        Args:
+
             input (torch.Tensor): Input to apply gaussian filter on.
+
         Returns:
-            filtered (torch.Tensor): Filtered output.
+            **filtered** (torch.Tensor): Filtered output.
         """
         return self.conv(input, weight=self.weight, groups=self.groups)
 
@@ -496,10 +557,13 @@ def forward(
     ) -> torch.Tensor:
         """
         Apply NumPy symmetric padding to an input tensor while preserving the gradient.
+
         Args:
+
             x (torch.Tensor): Input to apply symmetric padding on.
+
         Returns:
-            tensor (torch.Tensor): Padded tensor.
+            **tensor** (torch.Tensor): Padded tensor.
         """
         ctx.padding = padding
         x_device = x.device
@@ -516,10 +580,13 @@ def backward(
     ) -> Tuple[torch.Tensor, None]:
         """
         Crop away symmetric padding.
+
         Args:
+
             grad_output (torch.Tensor): Input to remove symmetric padding from.
+
         Returns:
-            grad_input (torch.Tensor): Unpadded tensor.
+            **grad_input** (torch.Tensor): Unpadded tensor.
         """
         grad_input = grad_output.clone()
         B, C, H, W = grad_input.size()
@@ -534,23 +601,28 @@ def backward(
 class NChannelsToRGB(nn.Module):
     """
     Convert an NCHW image with n channels into a 3 channel RGB image.
-
-    Args:
-        warp (bool, optional): Whether or not to make the resulting RGB colors more
-            distict from each other. Default is set to False.
     """
 
     def __init__(self, warp: bool = False) -> None:
+        """
+        Args:
+
+            warp (bool, optional): Whether or not to make the resulting RGB colors more
+                distict from each other. Default is set to False.
+        """
         super().__init__()
         self.warp = warp
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         """
         Reduce any number of channels down to 3.
+
         Args:
+
             x (torch.Tensor): Input to reduce channel dimensions on.
+
         Returns:
-            3 channel RGB tensor (torch.Tensor): RGB image tensor.
+            **3 channel RGB tensor** (torch.Tensor): RGB image tensor.
         """
         assert x.dim() == 4
         return nchannels_to_rgb(x, self.warp)
@@ -558,16 +630,18 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
 class RandomCrop(nn.Module):
     """
-        Randomly crop out a specific size from an NCHW image tensor.
-    ​
-        Args:
-            crop_size (int, sequence, int): The desired cropped output size.
+    Randomly crop out a specific size from an NCHW image tensor.
     """
 
     def __init__(
         self,
         crop_size: IntSeqOrIntType,
     ) -> None:
+        """
+        Args:
+
+            crop_size (int, sequence, int): The desired cropped output size.
+        """
         super().__init__()
         crop_size = [crop_size] * 2 if not hasattr(crop_size, "__iter__") else crop_size
         crop_size = list(crop_size) * 2 if len(crop_size) == 1 else crop_size
diff --git a/tests/optim/param/test_images.py b/tests/optim/param/test_images.py
index 525d6277aa..7c420aa579 100644
--- a/tests/optim/param/test_images.py
+++ b/tests/optim/param/test_images.py
@@ -337,7 +337,7 @@ def test_sharedimage_get_offset_single_number(self) -> None:
             shapes=shared_shapes, parameterization=test_param
         )
 
-        offset = image_param.get_offset(4, 3)
+        offset = image_param._get_offset(4, 3)
 
         self.assertEqual(len(offset), 3)
         self.assertEqual(offset, [[4, 4, 4, 4]] * 3)
@@ -354,7 +354,7 @@ def test_sharedimage_get_offset_exact(self) -> None:
         )
 
         offset_vals = ((1, 2, 3, 4), (4, 3, 2, 1), (1, 2, 3, 4))
-        offset = image_param.get_offset(offset_vals, 3)
+        offset = image_param._get_offset(offset_vals, 3)
 
         self.assertEqual(len(offset), 3)
         self.assertEqual(offset, [[int(o) for o in v] for v in offset_vals])
@@ -371,7 +371,7 @@ def test_sharedimage_get_offset_single_set_four_numbers(self) -> None:
         )
 
         offset_vals = (1, 2, 3, 4)
-        offset = image_param.get_offset(offset_vals, 3)
+        offset = image_param._get_offset(offset_vals, 3)
 
         self.assertEqual(len(offset), 3)
         self.assertEqual(offset, [list(offset_vals)] * 3)
@@ -388,7 +388,7 @@ def test_sharedimage_get_offset_single_set_three_numbers(self) -> None:
         )
 
         offset_vals = (2, 3, 4)
-        offset = image_param.get_offset(offset_vals, 3)
+        offset = image_param._get_offset(offset_vals, 3)
 
         self.assertEqual(len(offset), 3)
         self.assertEqual(offset, [[0] + list(offset_vals)] * 3)
@@ -405,7 +405,7 @@ def test_sharedimage_get_offset_single_set_two_numbers(self) -> None:
         )
 
         offset_vals = (3, 4)
-        offset = image_param.get_offset(offset_vals, 3)
+        offset = image_param._get_offset(offset_vals, 3)
 
         self.assertEqual(len(offset), 3)
         self.assertEqual(offset, [[0, 0] + list(offset_vals)] * 3)
@@ -448,7 +448,7 @@ def test_apply_offset(self):
         )
 
         test_x_list = [torch.ones(*size) for x in range(size[0])]
-        output_A = image_param.apply_offset(test_x_list)
+        output_A = image_param._apply_offset(test_x_list)
 
         x_list = [torch.ones(*size) for x in range(size[0])]
         self.assertEqual(image_param.offset, [list(offset_vals)])
@@ -475,7 +475,7 @@ def test_interpolate_tensor(self) -> None:
         batch = 1
 
         test_tensor = torch.ones(6, 4, 128, 128)
-        output_tensor = image_param.interpolate_tensor(
+        output_tensor = image_param._interpolate_tensor(
             test_tensor, batch, channels, size[0], size[1]
         )
 

From 1a4c0bfa277eb297b9320ed6d4599d67a411093b Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Thu, 29 Jul 2021 15:45:54 -0600
Subject: [PATCH 09/10] Fix Flake8

---
 captum/optim/_param/image/images.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/captum/optim/_param/image/images.py b/captum/optim/_param/image/images.py
index eee209806d..f16064f83f 100644
--- a/captum/optim/_param/image/images.py
+++ b/captum/optim/_param/image/images.py
@@ -152,7 +152,8 @@ def __init__(
                 parameterized output image tensor.
             channels (int, optional): The number of channels to use for each image.
                 Default: 3
-            batch (int, optional): The number of images to stack along the batch dimension.
+            batch (int, optional): The number of images to stack along the batch
+                dimension.
                 Default: 1
             init (torch.tensor, optional): Optionally specify a tensor to
                 use instead of creating one.
@@ -289,7 +290,8 @@ def __init__(
                 parameterized output image tensor.
             channels (int, optional): The number of channels to use for each image.
                 Default: 3
-            batch (int, optional): The number of images to stack along the batch dimension.
+            batch (int, optional): The number of images to stack along the batch
+                dimension.
                 Default: 1
             init (torch.tensor, optional): Optionally specify a tensor to
                 use instead of creating one.
@@ -331,7 +333,8 @@ def __init__(
                 parameterized output image tensor.
             channels (int, optional): The number of channels to use for each image.
                 Default: 3
-            batch (int, optional): The number of images to stack along the batch dimension.
+            batch (int, optional): The number of images to stack along the batch
+                dimension.
                 Default: 1
             init (torch.tensor, optional): Optionally specify a tensor to
                 use instead of creating one.
@@ -596,16 +599,16 @@ def __init__(
             batch (int, optional): The number of channels to use when creating the
                 nn.Parameter tensor, or stacking init images.
                 Default: 1
-            parameterization (ImageParameterization, optional): An image parameterization
-                class.
+            parameterization (ImageParameterization, optional): An image
+                parameterization class.
                 Default: FFTImage
             squash_func (Callable[[torch.Tensor], torch.Tensor]], optional): The squash
                 function to use after color recorrelation. A funtion or lambda function.
                 Default: None
             decorrelation_module (nn.Module, optional): A ToRGB instance.
                 Default: ToRGB
-            decorrelate_init (bool, optional): Whether or not to apply color decorrelation
-                to the init tensor input.
+            decorrelate_init (bool, optional): Whether or not to apply color
+                decorrelation to the init tensor input.
                 Default: True
         """
         super().__init__()

From bdc3f17cc6f24051b7e7f7098e5b92f9db871ffc Mon Sep 17 00:00:00 2001
From: ProGamerGov <ProGamerGov@users.noreply.github.com>
Date: Fri, 30 Jul 2021 19:45:24 -0600
Subject: [PATCH 10/10] Add missing 'optional's to docs

---
 captum/optim/_core/optimization.py      | 4 ++--
 captum/optim/_param/image/images.py     | 8 ++++----
 captum/optim/_param/image/transforms.py | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/captum/optim/_core/optimization.py b/captum/optim/_core/optimization.py
index 6f2eda25df..c251dfc8ec 100644
--- a/captum/optim/_core/optimization.py
+++ b/captum/optim/_core/optimization.py
@@ -129,8 +129,8 @@ def optimize(
             loss_summarize_fn (Callable, optional): The function to use for summarizing
                 tensor outputs from loss functions.
                 Default: default_loss_summarize
-            lr: (float): If no optimizer is given, then lr is used as the learning rate
-                for the Adam optimizer.
+            lr: (float, optional): If no optimizer is given, then lr is used as the
+                learning rate for the Adam optimizer.
                 Default: 0.025
 
         Returns:
diff --git a/captum/optim/_param/image/images.py b/captum/optim/_param/image/images.py
index f16064f83f..cf4b01da0d 100644
--- a/captum/optim/_param/image/images.py
+++ b/captum/optim/_param/image/images.py
@@ -51,9 +51,9 @@ def open(cls, path: str, scale: float = 255.0, mode: str = "RGB") -> "ImageTenso
         Args:
 
             path (str): A URL or filepath to an image.
-            scale (float): The image scale to use.
+            scale (float, optional): The image scale to use.
                 Default: 255.0
-            mode (str:) The image loading mode to use.
+            mode (str, optional): The image loading mode to use.
                 Default: "RGB"
 
         Returns:
@@ -103,7 +103,7 @@ def show(
 
             figsize (Tuple[int, int], optional): height & width to use
                 for displaying the `ImageTensor` figure.
-            scale (float): Value to multiply the `ImageTensor` by so that
+            scale (float, optional): Value to multiply the `ImageTensor` by so that
                 it's value range is [0-255] for display.
                 Default: 255.0
         """
@@ -117,7 +117,7 @@ def export(self, filename: str, scale: float = 255.0) -> None:
 
             filename (str): The filename to use when saving the `ImageTensor` as an
                 image file.
-            scale (float): Value to multiply the `ImageTensor` by so that
+            scale (float, optional): Value to multiply the `ImageTensor` by so that
                 it's value range is [0-255] for saving.
                 Default: 255.0
         """
diff --git a/captum/optim/_param/image/transforms.py b/captum/optim/_param/image/transforms.py
index 74cf645ad5..93df78243e 100644
--- a/captum/optim/_param/image/transforms.py
+++ b/captum/optim/_param/image/transforms.py
@@ -394,7 +394,7 @@ def __init__(self, multiplier: float = 1.0) -> None:
         """
         Args:
 
-            multiplier (float):  A float value used to scale the input.
+            multiplier (float, optional):  A float value used to scale the input.
         """
         super().__init__()
         self.multiplier = multiplier