Update docstrings and add examples

mthrok · mthrok · commit c4dafdb48097 · 2020-07-15T18:39:21.000Z
diff --git a/docs/source/sox_effects.rst b/docs/source/sox_effects.rst
@@ -8,6 +8,17 @@ Create SoX effects chain for preprocessing audio.
 
 .. currentmodule:: torchaudio.sox_effects
 
+:hidden:`apply_effects_tensor`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: apply_effects_tensor
+
+:hidden:`apply_effects_file`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: apply_effects_file
+
+
 :hidden:`SoxEffect`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/torchaudio/sox_effects/sox_effects.py b/torchaudio/sox_effects/sox_effects.py
@@ -67,14 +67,49 @@ def apply_effects_tensor(
     """Apply sox effects to given Tensor
 
     Args:
-        tensor: Input 2D Tensor.
-        sample_rate: Sample rate
-        effects: List of effects.
-        channels_first: Indicates if the input Tensor's dimension is
+        tensor (torch.Tensor): Input 2D Tensor.
+        sample_rate (int): Sample rate
+        effects (List[List[str]]): List of effects.
+        channels_first (bool): Indicates if the input Tensor's dimension is
             ``[channels, time]`` or ``[time, channels]``
 
+    Returns:
+        Tuple[torch.Tensor, int]: Resulting Tensor and sample rate.
+        The resulting Tensor has the same ``dtype`` as the input Tensor, and
+        the same channels order. The shape of the Tensor can be different based on the
+        effects applied. Sample rate can also be different based on the effects applied.
+
+    Examples:
+        >>> # Defines the effects to apply
+        >>> effects = [
+        ...     ['gain', '-n'],  # normalises to 0dB
+        ...     ['pitch', '5'],  # 5 cent pitch shift
+        ...     ['rate', '8000'],  # resample to 8000 Hz
+        ... ]
+        >>> # Generate pseudo wave:
+        >>> # normalized, channels first, 2ch, sampling rate 16000, 1 second
+        >>> sample_rate = 16000
+        >>> waveform = 2 * torch.rand([2, sample_rate * 1]) - 1
+        >>> waveform.shape
+        torch.Size([2, 16000])
+        >>> waveform
+        tensor([[ 0.3138,  0.7620, -0.9019,  ..., -0.7495, -0.4935,  0.5442],
+                [-0.0832,  0.0061,  0.8233,  ..., -0.5176, -0.9140, -0.2434]])
+        >>> # Apply effects
+        >>> waveform, sample_rate = apply_effects_tensor(
+        ...     wave_form, sample_rate, effects, channels_first=True)
+        >>> # The new waveform his sampling rate 8000, 1 second.
+        >>> # normalization and channel order are preserved
+        >>> waveform.shape
+        torch.Size([2, 8000])
+        >>> waveform
+        tensor([[ 0.5054, -0.5518, -0.4800,  ..., -0.0076,  0.0096, -0.0110],
+                [ 0.1331,  0.0436, -0.3783,  ..., -0.0035,  0.0012,  0.0008]])
+        >>> sample_rate
+        8000
+
     Notes:
-        This function works in the way very similar to ```sox``` command, however there are slight
+        This function works in the way very similar to ``sox`` command, however there are slight
         differences. For example, ``sox`` commnad adds certain effects automatically (such as
         ``rate`` effect after ``speed`` and ``pitch`` and other effects), but this function does
         only applies the given effects. (Therefore, to actually apply ``speed`` effect, you also
@@ -95,15 +130,42 @@ def apply_effects_file(
     """Apply sox effects to the audio file and load Tensor
 
     Args:
-        path: Path to the audio file.
-        effects: List of effects.
-        normalize: When ``True``, this function always return ``float32``, and sample values are
+        path (str): Path to the audio file.
+        effects (List[List[str]]): List of effects.
+        normalize (bool): When ``True``, this function always return ``float32``, and sample values are
             normalized to ``[-1.0, 1.0]``. If input file is integer WAV, giving ``False`` will change
             the resulting Tensor type to integer type. This argument has no effect for formats other
             than integer WAV type.
-        channels_first: When True, the returned Tensor has dimension ``[channel, time]``.
+        channels_first (bool): When True, the returned Tensor has dimension ``[channel, time]``.
             Otherwise, the returned Tensor's dimension is ``[time, channel]``.
 
+    Returns:
+        Tuple[torch.Tensor, int]: Resulting Tensor and sample rate.
+        If ``normalize=True``, the resulting Tensor is always ``float32`` type.
+        If ``normalize=False`` and the input audio file is of integer WAV file, then the
+        resulting Tensor has corresponding integer type. (Note 24 bit integer type is not supported)
+        If ``channels_first=True``, the resulting Tensor has dimension ``[channel, time]``,
+        otherwise ``[time, channel]``.
+
+    Examples:
+        >>> # Defines the effects to apply
+        >>> effects = [
+        ...     ['gain', '-n'],  # normalises to 0dB
+        ...     ['pitch', '5'],  # 5 cent pitch shift
+        ...     ['rate', '8000'],  # resample to 8000 Hz
+        ... ]
+        >>> # Apply effects and load data with channels_first=True
+        >>> waveform, sample_rate = apply_effects_file("data.wav", effects, channels_first=True)
+        >>> waveform.shape
+        torch.Size([2, 8000])
+        >>> waveform
+        tensor([[ 5.1151e-03,  1.8073e-02,  2.2188e-02,  ...,  1.0431e-07,
+                 -1.4761e-07,  1.8114e-07],
+                [-2.6924e-03,  2.1860e-03,  1.0650e-02,  ...,  6.4122e-07,
+                 -5.6159e-07,  4.8103e-07]])
+        >>> sample_rate
+        8000
+
     Notes:
         This function works in the way very similar to ``sox`` command, however there are slight
         differences. For example, ``sox`` commnad adds certain effects automatically (such as