Update docstrings and add examples

mthrok · mthrok · commit a17fffbc2a8d · 2020-07-15T20:48:35.000Z
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -13,6 +13,7 @@ The :mod:`torchaudio` package consists of I/O, popular datasets and common audio
    kaldi_io
    transforms
    functional
+   utils
 
 .. automodule:: torchaudio
    :members:
diff --git a/docs/source/sox_effects.rst b/docs/source/sox_effects.rst
@@ -4,10 +4,16 @@
 torchaudio.sox_effects
 ======================
 
-Create SoX effects chain for preprocessing audio.
-
 .. currentmodule:: torchaudio.sox_effects
 
+Apply SoX effects chain on torch.Tensor or on file and load as torch.Tensor.
+
+.. autofunction:: apply_effects_tensor
+
+.. autofunction:: apply_effects_file
+
+Create SoX effects chain for preprocessing audio.
+
 :hidden:`SoxEffect`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/source/utils.rst b/docs/source/utils.rst
@@ -0,0 +1,21 @@
+.. role:: hidden
+    :class: hidden-section
+
+torchaudio.utils.sox_utils
+==========================
+
+Utility module to configure libsox. This affects functionalities in ``sox_io`` backend and ``torchaudio.sox_effects``.
+
+.. currentmodule:: torchaudio.utils.sox_utils
+
+.. autofunction:: set_seed
+
+.. autofunction:: set_verbosity
+
+.. autofunction:: set_buffer_size
+
+.. autofunction:: set_use_threads
+
+.. autofunction:: list_effects
+
+.. autofunction:: list_formats
diff --git a/torchaudio/sox_effects/sox_effects.py b/torchaudio/sox_effects/sox_effects.py
@@ -67,18 +67,53 @@ def apply_effects_tensor(
     """Apply sox effects to given Tensor
 
     Args:
-        tensor: Input 2D Tensor.
-        sample_rate: Sample rate
-        effects: List of effects.
-        channels_first: Indicates if the input Tensor's dimension is
+        tensor (torch.Tensor): Input 2D Tensor.
+        sample_rate (int): Sample rate
+        effects (List[List[str]]): List of effects.
+        channels_first (bool): Indicates if the input Tensor's dimension is
             ``[channels, time]`` or ``[time, channels]``
 
+    Returns:
+        Tuple[torch.Tensor, int]: Resulting Tensor and sample rate.
+        The resulting Tensor has the same ``dtype`` as the input Tensor, and
+        the same channels order. The shape of the Tensor can be different based on the
+        effects applied. Sample rate can also be different based on the effects applied.
+
     Notes:
-        This function works in the way very similar to ```sox``` command, however there are slight
+        This function works in the way very similar to ``sox`` command, however there are slight
         differences. For example, ``sox`` commnad adds certain effects automatically (such as
         ``rate`` effect after ``speed`` and ``pitch`` and other effects), but this function does
         only applies the given effects. (Therefore, to actually apply ``speed`` effect, you also
         need to give ``rate`` effect with desired sampling rate.)
+
+    Examples:
+        >>> # Defines the effects to apply
+        >>> effects = [
+        ...     ['gain', '-n'],  # normalises to 0dB
+        ...     ['pitch', '5'],  # 5 cent pitch shift
+        ...     ['rate', '8000'],  # resample to 8000 Hz
+        ... ]
+        >>> # Generate pseudo wave:
+        >>> # normalized, channels first, 2ch, sampling rate 16000, 1 second
+        >>> sample_rate = 16000
+        >>> waveform = 2 * torch.rand([2, sample_rate * 1]) - 1
+        >>> waveform.shape
+        torch.Size([2, 16000])
+        >>> waveform
+        tensor([[ 0.3138,  0.7620, -0.9019,  ..., -0.7495, -0.4935,  0.5442],
+                [-0.0832,  0.0061,  0.8233,  ..., -0.5176, -0.9140, -0.2434]])
+        >>> # Apply effects
+        >>> waveform, sample_rate = apply_effects_tensor(
+        ...     wave_form, sample_rate, effects, channels_first=True)
+        >>> # The new waveform is sampling rate 8000, 1 second.
+        >>> # normalization and channel order are preserved
+        >>> waveform.shape
+        torch.Size([2, 8000])
+        >>> waveform
+        tensor([[ 0.5054, -0.5518, -0.4800,  ..., -0.0076,  0.0096, -0.0110],
+                [ 0.1331,  0.0436, -0.3783,  ..., -0.0035,  0.0012,  0.0008]])
+        >>> sample_rate
+        8000
     """
     in_signal = torch.classes.torchaudio.TensorSignal(tensor, sample_rate, channels_first)
     out_signal = torch.ops.torchaudio.sox_effects_apply_effects_tensor(in_signal, effects)
@@ -92,25 +127,52 @@ def apply_effects_file(
         normalize: bool = True,
         channels_first: bool = True,
 ) -> Tuple[torch.Tensor, int]:
-    """Apply sox effects to the audio file and load Tensor
+    """Apply sox effects to the audio file and load the resulting data as Tensor
 
     Args:
-        path: Path to the audio file.
-        effects: List of effects.
-        normalize: When ``True``, this function always return ``float32``, and sample values are
+        path (str): Path to the audio file.
+        effects (List[List[str]]): List of effects.
+        normalize (bool): When ``True``, this function always return ``float32``, and sample values are
             normalized to ``[-1.0, 1.0]``. If input file is integer WAV, giving ``False`` will change
             the resulting Tensor type to integer type. This argument has no effect for formats other
             than integer WAV type.
-        channels_first: When True, the returned Tensor has dimension ``[channel, time]``.
+        channels_first (bool): When True, the returned Tensor has dimension ``[channel, time]``.
             Otherwise, the returned Tensor's dimension is ``[time, channel]``.
 
+    Returns:
+        Tuple[torch.Tensor, int]: Resulting Tensor and sample rate.
+        If ``normalize=True``, the resulting Tensor is always ``float32`` type.
+        If ``normalize=False`` and the input audio file is of integer WAV file, then the
+        resulting Tensor has corresponding integer type. (Note 24 bit integer type is not supported)
+        If ``channels_first=True``, the resulting Tensor has dimension ``[channel, time]``,
+        otherwise ``[time, channel]``.
+
     Notes:
         This function works in the way very similar to ``sox`` command, however there are slight
         differences. For example, ``sox`` commnad adds certain effects automatically (such as
         ``rate`` effect after ``speed``, ``pitch`` etc), but this function only applies the given
         effects. Therefore, to actually apply ``speed`` effect, you also need to give ``rate``
         effect with desired sampling rate, because internally, ``speed`` effects only alter sampling
         rate and leave samples untouched.
+
+    Examples:
+        >>> # Defines the effects to apply
+        >>> effects = [
+        ...     ['gain', '-n'],  # normalises to 0dB
+        ...     ['pitch', '5'],  # 5 cent pitch shift
+        ...     ['rate', '8000'],  # resample to 8000 Hz
+        ... ]
+        >>> # Apply effects and load data with channels_first=True
+        >>> waveform, sample_rate = apply_effects_file("data.wav", effects, channels_first=True)
+        >>> waveform.shape
+        torch.Size([2, 8000])
+        >>> waveform
+        tensor([[ 5.1151e-03,  1.8073e-02,  2.2188e-02,  ...,  1.0431e-07,
+                 -1.4761e-07,  1.8114e-07],
+                [-2.6924e-03,  2.1860e-03,  1.0650e-02,  ...,  6.4122e-07,
+                 -5.6159e-07,  4.8103e-07]])
+        >>> sample_rate
+        8000
     """
     signal = torch.ops.torchaudio.sox_effects_apply_effects_file(path, effects, normalize, channels_first)
     return signal.get_tensor(), signal.get_sample_rate()
diff --git a/torchaudio/utils/sox_utils.py b/torchaudio/utils/sox_utils.py
@@ -12,7 +12,7 @@ def set_seed(seed: int):
     """Set libsox's PRNG
 
     Args:
-        seed: seed value. valid range is int32.
+        seed (int): seed value. valid range is int32.
 
     See Also:
         http://sox.sourceforge.net/sox.html
@@ -25,7 +25,7 @@ def set_verbosity(verbosity: int):
     """Set libsox's verbosity
 
     Args:
-        verbosity: Set verbosity level of libsox.
+        verbosity (int): Set verbosity level of libsox.
             1: failure messages
             2: warnings
             3: details of processing
@@ -42,7 +42,7 @@ def set_buffer_size(buffer_size: int):
     """Set buffer size for sox effect chain
 
     Args:
-        buffer_size: Set the size in bytes of the buffers used for processing audio.
+        buffer_size (int): Set the size in bytes of the buffers used for processing audio.
 
     See Also:
         http://sox.sourceforge.net/sox.html
@@ -55,7 +55,7 @@ def set_use_threads(use_threads: bool):
     """Set multithread option for sox effect chain
 
     Args:
-        use_threads: When True, enables libsox's parallel effects channels processing.
+        use_threads (bool): When True, enables libsox's parallel effects channels processing.
             To use mutlithread, the underlying libsox has to be compiled with OpenMP support.
 
     See Also:
@@ -69,7 +69,7 @@ def list_effects() -> Dict[str, str]:
     """List the available sox effect names
 
     Returns:
-        Mapping from "effect name" to "usage"
+        Dict[str, str]: Mapping from "effect name" to "usage"
     """
     return dict(torch.ops.torchaudio.sox_utils_list_effects())
 
@@ -78,7 +78,7 @@ def list_effects() -> Dict[str, str]:
 def list_formats() -> List[str]:
     """List the supported audio formats
 
-    Returns: list[str]
-        List of supported audio formats
+    Returns:
+        List[str]: List of supported audio formats
     """
     return torch.ops.torchaudio.sox_utils_list_formats()