diff --git a/docs/source/backend.rst b/docs/source/backend.rst
new file mode 100644
index 0000000000..5ec0891c19
--- /dev/null
+++ b/docs/source/backend.rst
@@ -0,0 +1,138 @@
+.. _backend:
+
+torchaudio.backend
+==================
+
+:mod:`torchaudio.backend` module provides implementations for audio file I/O, using different backend libraries.
+To switch backend, use :py:func:`torchaudio.set_audio_backend`. To check the current backend use :py:func:`torchaudio.get_audio_backend`.
+
+.. warning::
+   Although ``sox`` backend is default for backward compatibility reason, it has a number of issues, therefore it is highly recommended to use ``sox_io`` backend instead. Note, however, that due to the interface refinement, functions defined in ``sox`` backend and those defined in ``sox_io`` backend do not have the same signatures.
+
+.. note::
+   Instead of calling functions in :mod:`torchaudio.backend` directly, please use ``torchaudio.info``, ``torhcaudio.load``, ``torchaudio.load_wav`` and ``torchaudio.save`` with proper backend set with :func:`torchaudio.get_audio_backend`.
+
+There are currently three implementations available.
+
+    * :ref:`sox<sox_backend>`
+    * :ref:`sox_io<sox_io_backend>`
+    * :ref:`soundfile<soundfile_backend>`
+
+``sox`` backend is the original backend which is built on ``libsox``. This module is currently default but is known to have number of issues, such as wrong handling of WAV files other than 16-bit signed integer. Users are encouraged to use ``sox_io`` backend. This backend requires C++ extension module and is not available on Windows system.
+
+``sox_io`` backend is the new backend which is built on ``libsox`` and bound to Python with ``Torchscript``. This module addresses all the known issues ``sox`` backend has. Function calls to this backend can be Torchscriptable. This backend requires C++ extension module and is not available on Windows system.
+
+``soundfile`` backend is built on ``PySoundFile``. You need to install ``PySoundFile`` separately.
+
+Common Data Structure
+~~~~~~~~~~~~~~~~~~~~~
+
+Structures used to exchange data between Python interface and ``libsox``. They are used by :ref:`sox<sox_backend>` and :ref:`soundfile<soundfile_backend>` but not by :ref:`sox_io<sox_io_backend>`.
+
+.. autoclass:: torchaudio.backend.common.SignalInfo
+
+.. autoclass:: torchaudio.backend.common.EncodingInfo
+
+.. _sox_backend:
+
+Sox Backend
+~~~~~~~~~~~
+
+``sox`` backend is available on ``torchaudio`` installation with C++ extension. It is currently not available on Windows system.
+
+It is currently default backend when it's available. You can switch from another backend to ``sox`` backend with the following;
+
+.. code::
+
+   torchaudio.set_audio_backend("sox")
+
+info
+----
+
+.. autofunction:: torchaudio.backend.sox_backend.info
+
+load
+----
+
+.. autofunction:: torchaudio.backend.sox_backend.load
+
+.. autofunction:: torchaudio.backend.sox_backend.load_wav
+
+
+save
+----
+
+.. autofunction:: torchaudio.backend.sox_backend.save
+
+others
+------
+
+.. automodule:: torchaudio.backend.sox_backend
+   :members:
+   :exclude-members: info, load, load_wav, save
+
+.. _sox_io_backend:
+
+Sox IO Backend
+~~~~~~~~~~~~~~
+
+``sox_io`` backend is available on ``torchaudio`` installation with C++ extension. It is currently not available on Windows system.
+
+This new backend is recommended over ``sox`` backend. You can switch from another backend to ``sox_io`` backend with the following;
+
+.. code::
+
+   torchaudio.set_audio_backend("sox_io")
+
+The function call to this backend can be Torchsript-able. You can apply :func:`torch.jit.script` and dump the object to file, then call it from C++ application.
+
+info
+----
+
+.. autoclass:: torchaudio.backend.sox_io_backend.AudioMetaData
+
+.. autofunction:: torchaudio.backend.sox_io_backend.info
+
+load
+----
+
+.. autofunction:: torchaudio.backend.sox_io_backend.load
+
+.. autofunction:: torchaudio.backend.sox_io_backend.load_wav
+
+
+save
+----
+
+.. autofunction:: torchaudio.backend.sox_io_backend.save
+
+.. _soundfile_backend:
+
+Soundfile Backend
+~~~~~~~~~~~~~~~~~
+
+``soundfile`` backend is available when ``PySoundFile`` is installed. This backend works on ``torchaudio`` installation without C++ extension. (i.e. Windows)
+
+You can switch from another backend to ``soundfile`` backend with the following;
+
+.. code::
+
+   torchaudio.set_audio_backend("soundfile")
+
+info
+----
+
+.. autofunction:: torchaudio.backend.soundfile_backend.info
+
+load
+----
+
+.. autofunction:: torchaudio.backend.soundfile_backend.load
+
+.. autofunction:: torchaudio.backend.soundfile_backend.load_wav
+
+
+save
+----
+
+.. autofunction:: torchaudio.backend.soundfile_backend.save
diff --git a/docs/source/index.rst b/docs/source/index.rst
index c6d0efde69..8146e545c3 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,5 +1,5 @@
 torchaudio
-===========
+==========
 
 The :mod:`torchaudio` package consists of I/O, popular datasets and common audio transformations.
 
@@ -7,12 +7,13 @@ The :mod:`torchaudio` package consists of I/O, popular datasets and common audio
    :maxdepth: 2
    :caption: Package Reference
 
-   sox_effects
+   torchaudio
+   backend
+   functional
+   transforms
    datasets
+   models
+   sox_effects
    compliance.kaldi
    kaldi_io
-   transforms
-   functional
-
-.. automodule:: torchaudio
-   :members:
+   utils
diff --git a/docs/source/sox_effects.rst b/docs/source/sox_effects.rst
index 56cd985d0a..c5bb24cd73 100644
--- a/docs/source/sox_effects.rst
+++ b/docs/source/sox_effects.rst
@@ -1,5 +1,4 @@
-.. role:: hidden
-    :class: hidden-section
+.. _sox_effects:
 
 torchaudio.sox_effects
 ======================
@@ -14,8 +13,8 @@ Create SoX effects chain for preprocessing audio.
 .. autoclass:: SoxEffect
   :members:
 
-:hidden:`SoxEffectsChain`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+SoxEffectsChain
+---------------
 
 .. autoclass:: SoxEffectsChain
   :members: append_effect_to_chain, sox_build_flow_effects, clear_chain, set_input_file
diff --git a/docs/source/torchaudio.rst b/docs/source/torchaudio.rst
new file mode 100644
index 0000000000..f7b826b571
--- /dev/null
+++ b/docs/source/torchaudio.rst
@@ -0,0 +1,43 @@
+torchaudio
+==========
+
+I/O functionalities
+~~~~~~~~~~~~~~~~~~~
+
+Audio I/O functions are implemented in :ref:`torchaudio.backend<backend>` module, but for the ease of use, the following functions are made available on :mod:`torchaudio` module. There are different backends available and you can switch backends with :func:`set_audio_backend`.
+
+Refer to :ref:`backend` for the detail.
+
+.. function:: torchaudio.info(filepath: str, ...)
+
+   Fetch meta data of an audio file. Refer to :ref:`backend` for the detail.
+
+.. function:: torchaudio.load(filepath: str, ...)
+
+   Load audio file into torch.Tensor object. Refer to :ref:`backend` for the detail.
+
+.. function:: torchaudio.load_wav(filepath: str, ...)
+
+   Load audio file into torch.Tensor, Refer to :ref:`backend` for the detail.
+
+.. function:: torchaudio.save(filepath: str, src: torch.Tensor, sample_rate: int, ...)
+
+   Save torch.Tensor object into an audio format. Refer to :ref:`backend` for the detail.
+
+.. currentmodule:: torchaudio
+
+Backend Utilities
+~~~~~~~~~~~~~~~~~
+
+.. autofunction:: list_audio_backends
+
+.. autofunction:: get_audio_backend
+
+.. autofunction:: set_audio_backend
+
+Sox Effects Utilities
+~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: initialize_sox
+
+.. autofunction:: shutdown_sox
diff --git a/torchaudio/__init__.py b/torchaudio/__init__.py
index 16419ff3ff..1f507a3526 100644
--- a/torchaudio/__init__.py
+++ b/torchaudio/__init__.py
@@ -34,10 +34,10 @@
 @_mod_utils.deprecated(
     "Please remove the function call to initialize_sox. "
     "Resource initialization is now automatically handled.")
-def initialize_sox() -> int:
+def initialize_sox():
     """Initialize sox effects.
 
-    This function is deprecated. See ``torchaudio.sox_effects.init_sox_effects``
+    This function is deprecated. See :py:func:`torchaudio.sox_effects.init_sox_effects`
     """
     _init_sox_effects()
 
@@ -50,6 +50,6 @@ def initialize_sox() -> int:
 def shutdown_sox():
     """Shutdown sox effects.
 
-    This function is deprecated. See ``torchaudio.sox_effects.shutdown_sox_effects``
+    This function is deprecated. See :py:func:`torchaudio.sox_effects.shutdown_sox_effects`
     """
     _shutdown_sox_effects()
diff --git a/torchaudio/backend/common.py b/torchaudio/backend/common.py
index 0593c34bd5..bc61df4746 100644
--- a/torchaudio/backend/common.py
+++ b/torchaudio/backend/common.py
@@ -2,6 +2,18 @@
 
 
 class SignalInfo:
+    """Data class returned ``info`` functions.
+
+    Used by :ref:`sox backend<sox_backend>` and :ref:`soundfile backend<soundfile_backend>`
+
+    See https://fossies.org/dox/sox-14.4.2/structsox__signalinfo__t.html
+
+    :ivar Optional[int] channels: The number of channels
+    :ivar Optional[float] rate: Sampleing rate
+    :ivar Optional[int] precision: Bit depth
+    :ivar Optional[int] length: For :ref:`sox backend<sox_backend>`, the number of samples.
+        (frames * channels). For :ref:`soundfile backend<soundfile_backend>`, the number of frames.
+    """
     def __init__(self,
                  channels: Optional[int] = None,
                  rate: Optional[float] = None,
@@ -14,6 +26,20 @@ def __init__(self,
 
 
 class EncodingInfo:
+    """Data class returned ``info`` functions.
+
+    Used by :ref:`sox backend<sox_backend>` and :ref:`soundfile backend<soundfile_backend>`
+
+    See https://fossies.org/dox/sox-14.4.2/structsox__encodinginfo__t.html
+
+    :ivar Optional[int] encoding: sox_encoding_t
+    :ivar Optional[int] bits_per_sample: bit depth
+    :ivar Optional[float] compression: Compression option
+    :ivar Any reverse_bytes:
+    :ivar Any reverse_nibbles:
+    :ivar Any reverse_bits:
+    :ivar Optional[bool] opposite_endian:
+    """
     def __init__(self,
                  encoding: Any = None,
                  bits_per_sample: Optional[int] = None,
diff --git a/torchaudio/backend/sox_io_backend.py b/torchaudio/backend/sox_io_backend.py
index 4664f733c6..35949bf122 100644
--- a/torchaudio/backend/sox_io_backend.py
+++ b/torchaudio/backend/sox_io_backend.py
@@ -7,6 +7,12 @@
 
 
 class AudioMetaData:
+    """Data class to be returned by :py:func:`~torchaudio.backend.sox_io_backend.info`.
+
+    :ivar int sample_rate: Sample rate
+    :ivar int num_frames: The number of frames
+    :ivar int num_channels: The number of channels
+    """
     def __init__(self, sample_rate: int, num_frames: int, num_channels: int):
         self.sample_rate = sample_rate
         self.num_frames = num_frames
@@ -15,7 +21,14 @@ def __init__(self, sample_rate: int, num_frames: int, num_channels: int):
 
 @_mod_utils.requires_module('torchaudio._torchaudio')
 def info(filepath: str) -> AudioMetaData:
-    """Get signal information of an audio file."""
+    """Get signal information of an audio file.
+
+    Args:
+        filepath (str): Path to audio file
+
+    Returns:
+        AudioMetaData: meta data of the given audio.
+    """
     sinfo = torch.ops.torchaudio.sox_io_get_info(filepath)
     return AudioMetaData(sinfo.get_sample_rate(), sinfo.get_num_frames(), sinfo.get_num_channels())
 
@@ -30,21 +43,28 @@ def load(
 ) -> Tuple[torch.Tensor, int]:
     """Load audio data from file.
 
-    This function can handle all the codecs that underlying libsox can handle, however note the
-    followings.
-
     Note:
-        This function is tested on the following formats;
-         - WAV
-            - 32-bit floating-point
-            - 32-bit signed integer
-            - 16-bit signed integer
-            -  8-bit unsigned integer
-         - MP3
-         - FLAC
-         - OGG/VORBIS
-
-    By default, this function returns Tensor with ``float32`` dtype and the shape of ``[channel, time]``.
+        This function can handle all the codecs that underlying libsox can handle,
+        however it is tested on the following formats;
+
+        * WAV
+
+            * 32-bit floating-point
+            * 32-bit signed integer
+            * 16-bit signed integer
+            * 8-bit unsigned integer
+
+        * MP3
+        * FLAC
+        * OGG/VORBIS
+        * OPUS
+
+        To load ``MP3``, ``FLAC``, ``OGG/VORBIS``, ``OPUS`` and other codecs ``libsox`` does not
+        handle natively, your installation of ``torchaudio`` has to be linked to ``libsox``
+        and corresponding codec libraries such as ``libmad`` or ``libmp3lame`` etc.
+
+    By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with
+    ``float32`` dtype and the shape of ``[channel, time]``.
     The samples are normalized to fit in the range of ``[-1.0, 1.0]``.
 
     When the input format is WAV with integer type, such as 32-bit signed integer, 16-bit
@@ -54,24 +74,33 @@ def load(
     for 32-bit signed PCM, ``int16`` for 16-bit signed PCM and ``uint8`` for 8-bit unsigned PCM.
 
     ``normalize`` parameter has no effect on 32-bit floating-point WAV and other formats, such as
-    flac and mp3. For these formats, this function always returns ``float32`` Tensor with values
-    normalized  to ``[-1.0, 1.0]``.
+    ``flac`` and ``mp3``.
+    For these formats, this function always returns ``float32`` Tensor with values normalized to
+    ``[-1.0, 1.0]``.
 
     Args:
-        filepath: Path to audio file
-        frame_offset: Number of frames to skip before start reading data.
-        num_frames: Maximum number of frames to read. -1 reads all the remaining samples, starting
-            from ``frame_offset``. This function may return the less number of frames if there is
-            not enough frames in the given file.
-        normalize: When ``True``, this function always return ``float32``, and sample values are
-            normalized to ``[-1.0, 1.0]``. If input file is integer WAV, giving ``False`` will change
-            the resulting Tensor type to integer type. This argument has no effect for formats other
-            than integer WAV type.
-        channels_first: When True, the returned Tensor has dimension ``[channel, time]``.
+        filepath (str):
+            Path to audio file
+        frame_offset (int):
+            Number of frames to skip before start reading data.
+        num_frames (int):
+            Maximum number of frames to read. ``-1`` reads all the remaining samples,
+            starting from ``frame_offset``.
+            This function may return the less number of frames if there is not enough
+            frames in the given file.
+        normalize (bool):
+            When ``True``, this function always return ``float32``, and sample values are
+            normalized to ``[-1.0, 1.0]``.
+            If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
+            integer type.
+            This argument has no effect for formats other than integer WAV type.
+        channels_first (bool):
+            When True, the returned Tensor has dimension ``[channel, time]``.
             Otherwise, the returned Tensor's dimension is ``[time, channel]``.
 
     Returns:
-        torch.Tensor: If the input file has integer wav format and normalization is off, then it has
+        torch.Tensor:
+            If the input file has integer wav format and normalization is off, then it has
             integer type, else ``float32`` type. If ``channels_first=True``, it has
             ``[channel, time]`` else ``[time, channel]``.
     """
@@ -90,30 +119,42 @@ def save(
 ):
     """Save audio data to file.
 
-    Supported formats are;
-     - WAV
-        - 32-bit floating-point
-        - 32-bit signed integer
-        - 16-bit signed integer
-        -  8-bit unsigned integer
-     - MP3
-     - FLAC
-     - OGG/VORBIS
+    Note:
+        Supported formats are;
+
+        * WAV
+
+            * 32-bit floating-point
+            * 32-bit signed integer
+            * 16-bit signed integer
+            * 8-bit unsigned integer
+
+        * MP3
+        * FLAC
+        * OGG/VORBIS
+
+        To save ``MP3``, ``FLAC``, ``OGG/VORBIS``, and other codecs ``libsox`` does not
+        handle natively, your installation of ``torchaudio`` has to be linked to ``libsox``
+        and corresponding codec libraries such as ``libmad`` or ``libmp3lame`` etc.
 
     Args:
-        filepath: Path to save file.
-        tensor: Audio data to save. must be 2D tensor.
-        sample_rate: sampling rate
-        channels_first: If True, the given tensor is interpreted as ``[channel, time]``.
-        compression: Used for formats other than WAV. This corresponds to ``-C`` option
-            of ``sox`` command.
+        filepath (str): Path to save file.
+        tensor (torch.Tensor): Audio data to save. must be 2D tensor.
+        sample_rate (int): sampling rate
+        channels_first (bool):
+            If ``True``, the given tensor is interpreted as ``[channel, time]``,
+            otherwise ``[time, channel]``.
+        compression (Optional[float]):
+            Used for formats other than WAV. This corresponds to ``-C`` option of ``sox`` command.
+
+                * | ``MP3``: Either bitrate (in ``kbps``) with quality factor, such as ``128.2``, or
+                  | VBR encoding with quality factor such as ``-4.2``. Default: ``-4.5``.
+                * | ``FLAC``: compression level. Whole number from ``0`` to ``8``.
+                  | ``8`` is default and highest compression.
+                * | ``OGG/VORBIS``: number from ``-1`` to ``10``; ``-1`` is the highest compression
+                  | and lowest quality. Default: ``3``.
+
             See the detail at http://sox.sourceforge.net/soxformat.html.
-            - MP3: Either bitrate [kbps] with quality factor, such as ``128.2`` or
-                VBR encoding with quality factor such as ``-4.2``. Default: ``-4.5``
-            - FLAC: compression level. Whole number from ``0`` to ``8``.
-                ``8`` is default and highest compression.
-            - OGG/VORBIS: number from -1 to 10; -1 is the highest compression and lowest
-                quality. Default: ``3``.
     """
     if compression is None:
         ext = str(filepath)[-3:].lower()
diff --git a/torchaudio/backend/utils.py b/torchaudio/backend/utils.py
index cb53b3e02f..84ec2fa3f5 100644
--- a/torchaudio/backend/utils.py
+++ b/torchaudio/backend/utils.py
@@ -19,7 +19,11 @@
 
 
 def list_audio_backends() -> List[str]:
-    """List available backends"""
+    """List available backends
+
+    Returns:
+        List[str]: The list of available backends.
+    """
     backends = []
     if is_module_available('soundfile'):
         backends.append('soundfile')
@@ -29,12 +33,13 @@ def list_audio_backends() -> List[str]:
     return backends
 
 
-def set_audio_backend(backend: Optional[str]) -> None:
+def set_audio_backend(backend: Optional[str]):
     """Set the backend for I/O operation
 
     Args:
-        backend (str): Name of the backend. One of "sox" or "soundfile",
-            based on availability of the system.
+        backend (Optional[str]): Name of the backend.
+            One of ``"sox"``, ``"sox_io"`` or ``"soundfile"`` based on availability
+            of the system. If ``None`` is provided the  current backend is unassigned.
     """
     if backend is not None and backend not in list_audio_backends():
         raise RuntimeError(
@@ -68,7 +73,11 @@ def _init_audio_backend():
 
 
 def get_audio_backend() -> Optional[str]:
-    """Get the name of the current backend"""
+    """Get the name of the current backend
+
+    Returns:
+        Optional[str]: The name of the current backend or ``None`` if no backend is assigned.
+    """
     if torchaudio.load == no_backend.load:
         return None
     if torchaudio.load == sox_backend.load:
diff --git a/torchaudio/functional.py b/torchaudio/functional.py
index 78c8c594c9..713544cc74 100644
--- a/torchaudio/functional.py
+++ b/torchaudio/functional.py
@@ -1646,11 +1646,11 @@ def compute_deltas(
     r"""Compute delta coefficients of a tensor, usually a spectrogram:
 
     .. math::
-        d_t = \frac{\sum_{n=1}^{\text{N}} n (c_{t+n} - c_{t-n})}{2 \sum_{n=1}^{\text{N} n^2}
+       d_t = \frac{\sum_{n=1}^{\text{N}} n (c_{t+n} - c_{t-n})}{2 \sum_{n=1}^{\text{N}} n^2}
 
     where :math:`d_t` is the deltas at time :math:`t`,
     :math:`c_t` is the spectrogram coeffcients at time :math:`t`,
-    :math:`N` is (`win_length`-1)//2.
+    :math:`N` is ``(win_length-1)//2``.
 
     Args:
         specgram (Tensor): Tensor of audio of dimension (..., freq, time)
diff --git a/torchaudio/models/wav2letter.py b/torchaudio/models/wav2letter.py
index 3466e42dd2..acd4dfe4b3 100644
--- a/torchaudio/models/wav2letter.py
+++ b/torchaudio/models/wav2letter.py
@@ -1,5 +1,3 @@
-from typing import Optional
-
 from torch import Tensor
 from torch import nn
 
@@ -7,8 +5,9 @@
 
 
 class Wav2Letter(nn.Module):
-    r"""Wav2Letter model architecture from the `"Wav2Letter: an End-to-End ConvNet-based Speech Recognition System"
-     <https://arxiv.org/abs/1609.03193>`_ paper.
+    r"""Wav2Letter model architecture from the `Wav2Letter an End-to-End ConvNet-based Speech Recognition System`_.
+
+    .. _Wav2Letter an End-to-End ConvNet-based Speech Recognition System: https://arxiv.org/abs/1609.03193
 
      :math:`\text{padding} = \frac{\text{ceil}(\text{kernel} - \text{stride})}{2}`
 
@@ -63,7 +62,7 @@ def __init__(self, num_classes: int = 40,
     def forward(self, x: Tensor) -> Tensor:
         r"""
         Args:
-            x (Tensor): Tensor of dimension (batch_size, num_features, input_length).
+            x (torch.Tensor): Tensor of dimension (batch_size, num_features, input_length).
 
         Returns:
             Tensor: Predictor tensor of dimension (batch_size, number_of_classes, input_length).
diff --git a/torchaudio/sox_effects/sox_effects.py b/torchaudio/sox_effects/sox_effects.py
index 0aee312126..de0bf7f688 100644
--- a/torchaudio/sox_effects/sox_effects.py
+++ b/torchaudio/sox_effects/sox_effects.py
@@ -13,31 +13,29 @@
 
 
 @_mod_utils.requires_module('torchaudio._torchaudio')
-def init_sox_effects() -> None:
-    """Initialize resources required to use ``SoxEffectsChain``
-
-    You do not need to call this function manually. It is called automatically.
-
-    Once initialized, you do not need to call this function again across the multiple call of
-    ``SoxEffectsChain.sox_build_flow_effects``, though it is safe to do so as long as
-    ``shutdown_sox_effects`` is not called yet.
-    Once ``shutdown_sox_effects`` is called, you can no longer use SoX effects and
-    initializing again will result in error.
+def init_sox_effects():
+    """Initialize resources required to use sox effects.
 
     Note:
-        This function is not required for simple loading.
+        You do not need to call this function manually. It is called automatically.
+
+    Once initialized, you do not need to call this function again across the multiple uses of
+    sox effects though it is safe to do so as long as :func:`shutdown_sox_effects` is not called yet.
+    Once :func:`shutdown_sox_effects` is called, you can no longer use SoX effects and initializing
+    again will result in error.
     """
     torch.ops.torchaudio.sox_effects_initialize_sox_effects()
 
 
 @_mod_utils.requires_module("torchaudio._torchaudio")
-def shutdown_sox_effects() -> None:
-    """Clean up resources required to use ``SoxEffectsChain``
+def shutdown_sox_effects():
+    """Clean up resources required to use sox effects.
 
-    You do not need to call this function manually. It is called automatically.
+    Note:
+        You do not need to call this function manually. It is called automatically.
 
     It is safe to call this function multiple times.
-    Once ``shutdown_sox_effects`` is called, you can no longer use SoX effects and
+    Once :py:func:`shutdown_sox_effects` is called, you can no longer use SoX effects and
     initializing again will result in error.
     """
     torch.ops.torchaudio.sox_effects_shutdown_sox_effects()
@@ -47,10 +45,12 @@ def shutdown_sox_effects() -> None:
 def effect_names() -> List[str]:
     """Gets list of valid sox effect names
 
-    Returns: list[str]
+    Returns:
+        List[str]: list of available effect names.
 
     Example
-        >>> EFFECT_NAMES = torchaudio.sox_effects.effect_names()
+        >>> torchaudio.sox_effects.effect_names()
+        ['allpass', 'band', 'bandpass', ... ]
     """
     return torch.ops.torchaudio.sox_effects_list_effects()
 
@@ -70,45 +70,51 @@ class SoxEffectsChain(object):
     r"""SoX effects chain class.
 
     Args:
-        normalization (bool, number, or callable, optional): If boolean `True`, then output is divided by `1 << 31`
-            (assumes signed 32-bit audio), and normalizes to `[-1, 1]`. If `number`, then output is divided by that
-            number. If `callable`, then the output is passed as a parameter to the given function, then the
-            output is divided by the result. (Default: ``True``)
-        channels_first (bool, optional): Set channels first or length first in result.  (Default: ``True``)
-        out_siginfo (sox_signalinfo_t, optional): a sox_signalinfo_t type, which could be helpful if the
-            audio type cannot be automatically determined. (Default: ``None``)
-        out_encinfo (sox_encodinginfo_t, optional): a sox_encodinginfo_t type, which could be set if the
-            audio type cannot be automatically determined. (Default: ``None``)
-        filetype (str, optional): a filetype or extension to be set if sox cannot determine it
-            automatically. . (Default: ``'raw'``)
+        normalization (bool, number, or callable, optional):
+            If boolean ``True``, then output is divided by ``1 << 31``
+            (assumes signed 32-bit audio), and normalizes to ``[-1, 1]``.
+            If ``number``, then output is divided by that number.
+            If ``callable``, then the output is passed as a parameter to the given function, then
+            the output is divided by the result. (Default: ``True``)
+        channels_first (bool, optional):
+            Set channels first or length first in result.  (Default: ``True``)
+        out_siginfo (sox_signalinfo_t, optional):
+            a sox_signalinfo_t type, which could be helpful if the audio type cannot be
+            automatically determined. (Default: ``None``)
+        out_encinfo (sox_encodinginfo_t, optional):
+            a sox_encodinginfo_t type, which could be set if the audio type cannot be
+            automatically determined. (Default: ``None``)
+        filetype (str, optional):
+            a filetype or extension to be set if sox cannot determine it automatically.
+            (Default: ``'raw'``)
 
     Returns:
-        Tuple[Tensor, int]: An output Tensor of size `[C x L]` or `[L x C]` where L is the number
+        Tuple[Tensor, int]:
+        An output Tensor of size ``[C x L]`` or ``[L x C]`` where L is the number
         of audio frames and C is the number of channels. An integer which is the sample rate of the
         audio (as listed in the metadata of the file)
 
     Example
         >>> class MyDataset(Dataset):
-        >>>     def __init__(self, audiodir_path):
-        >>>         self.data = [os.path.join(audiodir_path, fn) for fn in os.listdir(audiodir_path)]
-        >>>         self.E = torchaudio.sox_effects.SoxEffectsChain()
-        >>>         self.E.append_effect_to_chain("rate", [16000])  # resample to 16000hz
-        >>>         self.E.append_effect_to_chain("channels", ["1"])  # mono signal
-        >>>     def __getitem__(self, index):
-        >>>         fn = self.data[index]
-        >>>         self.E.set_input_file(fn)
-        >>>         x, sr = self.E.sox_build_flow_effects()
-        >>>         return x, sr
-        >>>
-        >>>     def __len__(self):
-        >>>         return len(self.data)
-        >>>
-        >>> torchaudio.initialize_sox()
+        ...     def __init__(self, audiodir_path):
+        ...         self.data = [
+        ...             os.path.join(audiodir_path, fn)
+        ...             for fn in os.listdir(audiodir_path)]
+        ...         self.E = torchaudio.sox_effects.SoxEffectsChain()
+        ...         self.E.append_effect_to_chain("rate", [16000])  # resample to 16000hz
+        ...         self.E.append_effect_to_chain("channels", ["1"])  # mono signal
+        ...     def __getitem__(self, index):
+        ...         fn = self.data[index]
+        ...         self.E.set_input_file(fn)
+        ...         x, sr = self.E.sox_build_flow_effects()
+        ...         return x, sr
+        ...
+        ...     def __len__(self):
+        ...         return len(self.data)
+        ...
         >>> ds = MyDataset(path_to_audio_files)
         >>> for sig, sr in ds:
-        >>>   [do something here]
-        >>> torchaudio.shutdown_sox()
-
+        ...    pass
     """
 
     EFFECTS_UNIMPLEMENTED = {"spectrogram", "splice", "noiseprof", "fir"}
@@ -165,9 +171,9 @@ def sox_build_flow_effects(self,
             out (Tensor, optional): Where the output will be written to. (Default: ``None``)
 
         Returns:
-            Tuple[Tensor, int]: An output Tensor of size `[C x L]` or `[L x C]` where L is the number
-            of audio frames and C is the number of channels. An integer which is the sample rate of the
-            audio (as listed in the metadata of the file)
+            Tuple[Tensor, int]: An output Tensor of size `[C x L]` or `[L x C]` where
+            L is the number of audio frames and C is the number of channels.
+            An integer which is the sample rate of the audio (as listed in the metadata of the file)
         """
         # initialize output tensor
         if out is not None:
diff --git a/torchaudio/transforms.py b/torchaudio/transforms.py
index f34f5c468e..72e55c94c8 100644
--- a/torchaudio/transforms.py
+++ b/torchaudio/transforms.py
@@ -86,20 +86,8 @@ def forward(self, waveform: Tensor) -> Tensor:
 
 class GriffinLim(torch.nn.Module):
     r"""Compute waveform from a linear scale magnitude spectrogram using the Griffin-Lim transformation.
-        Implementation ported from `librosa`.
 
-    .. [1] McFee, Brian, Colin Raffel, Dawen Liang, Daniel PW Ellis, Matt McVicar, Eric Battenberg, and Oriol Nieto.
-        "librosa: Audio and music signal analysis in python."
-        In Proceedings of the 14th python in science conference, pp. 18-25. 2015.
-
-    .. [2] Perraudin, N., Balazs, P., & Søndergaard, P. L.
-        "A fast Griffin-Lim algorithm,"
-        IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (pp. 1-4),
-        Oct. 2013.
-
-    .. [3] D. W. Griffin and J. S. Lim,
-        "Signal estimation from modified short-time Fourier transform,"
-        IEEE Trans. ASSP, vol.32, no.2, pp.236–243, Apr. 1984.
+    Implementation ported from ``librosa`` [1]_, [2]_, [3]_.
 
     Args:
         n_fft (int, optional): Size of FFT, creates ``n_fft // 2 + 1`` bins. (Default: ``400``)
@@ -117,6 +105,24 @@ class GriffinLim(torch.nn.Module):
             Values near 1 can lead to faster convergence, but above 1 may not converge. (Default: ``0.99``)
         length (int, optional): Array length of the expected output. (Default: ``None``)
         rand_init (bool, optional): Initializes phase randomly if True and to zero otherwise. (Default: ``True``)
+
+    References:
+        .. [1]
+           | McFee, Brian, Colin Raffel, Dawen Liang, Daniel PW Ellis, Matt McVicar, Eric Battenberg,
+             and Oriol Nieto.
+           | "librosa: Audio and music signal analysis in python."
+           | In Proceedings of the 14th python in science conference, pp. 18-25. 2015.
+
+        .. [2]
+           | Perraudin, N., Balazs, P., & Søndergaard, P. L.
+           | "A fast Griffin-Lim algorithm,"
+           | IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (pp. 1-4),
+           | Oct. 2013.
+
+        .. [3]
+           | D. W. Griffin and J. S. Lim,
+           | "Signal estimation from modified short-time Fourier transform,"
+           | IEEE Trans. ASSP, vol.32, no.2, pp.236–243, Apr. 1984.
     """
     __constants__ = ['n_fft', 'n_iter', 'win_length', 'hop_length', 'power', 'normalized',
                      'length', 'momentum', 'rand_init']
@@ -153,8 +159,9 @@ def __init__(self,
     def forward(self, specgram: Tensor) -> Tensor:
         r"""
         Args:
-            specgram (Tensor): A magnitude-only STFT spectrogram of dimension (..., freq, frames)
-            where freq is ``n_fft // 2 + 1``.
+            specgram (Tensor):
+                A magnitude-only STFT spectrogram of dimension (..., freq, frames)
+                where freq is ``n_fft // 2 + 1``.
 
         Returns:
             Tensor: waveform of (..., time), where time equals the ``length`` parameter if given.