Skip to content

Commit a17fffb

Browse files
committed
Update docstrings and add examples
1 parent b3e5b5f commit a17fffb

File tree

5 files changed

+109
-19
lines changed

5 files changed

+109
-19
lines changed

docs/source/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ The :mod:`torchaudio` package consists of I/O, popular datasets and common audio
1313
kaldi_io
1414
transforms
1515
functional
16+
utils
1617

1718
.. automodule:: torchaudio
1819
:members:

docs/source/sox_effects.rst

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,16 @@
44
torchaudio.sox_effects
55
======================
66

7-
Create SoX effects chain for preprocessing audio.
8-
97
.. currentmodule:: torchaudio.sox_effects
108

9+
Apply SoX effects chain on torch.Tensor or on file and load as torch.Tensor.
10+
11+
.. autofunction:: apply_effects_tensor
12+
13+
.. autofunction:: apply_effects_file
14+
15+
Create SoX effects chain for preprocessing audio.
16+
1117
:hidden:`SoxEffect`
1218
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1319

docs/source/utils.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
.. role:: hidden
2+
:class: hidden-section
3+
4+
torchaudio.utils.sox_utils
5+
==========================
6+
7+
Utility module to configure libsox. This affects functionalities in ``sox_io`` backend and ``torchaudio.sox_effects``.
8+
9+
.. currentmodule:: torchaudio.utils.sox_utils
10+
11+
.. autofunction:: set_seed
12+
13+
.. autofunction:: set_verbosity
14+
15+
.. autofunction:: set_buffer_size
16+
17+
.. autofunction:: set_use_threads
18+
19+
.. autofunction:: list_effects
20+
21+
.. autofunction:: list_formats

torchaudio/sox_effects/sox_effects.py

Lines changed: 72 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -67,18 +67,53 @@ def apply_effects_tensor(
6767
"""Apply sox effects to given Tensor
6868
6969
Args:
70-
tensor: Input 2D Tensor.
71-
sample_rate: Sample rate
72-
effects: List of effects.
73-
channels_first: Indicates if the input Tensor's dimension is
70+
tensor (torch.Tensor): Input 2D Tensor.
71+
sample_rate (int): Sample rate
72+
effects (List[List[str]]): List of effects.
73+
channels_first (bool): Indicates if the input Tensor's dimension is
7474
``[channels, time]`` or ``[time, channels]``
7575
76+
Returns:
77+
Tuple[torch.Tensor, int]: Resulting Tensor and sample rate.
78+
The resulting Tensor has the same ``dtype`` as the input Tensor, and
79+
the same channels order. The shape of the Tensor can be different based on the
80+
effects applied. Sample rate can also be different based on the effects applied.
81+
7682
Notes:
77-
This function works in the way very similar to ```sox``` command, however there are slight
83+
This function works in the way very similar to ``sox`` command, however there are slight
7884
differences. For example, ``sox`` commnad adds certain effects automatically (such as
7985
``rate`` effect after ``speed`` and ``pitch`` and other effects), but this function does
8086
only applies the given effects. (Therefore, to actually apply ``speed`` effect, you also
8187
need to give ``rate`` effect with desired sampling rate.)
88+
89+
Examples:
90+
>>> # Defines the effects to apply
91+
>>> effects = [
92+
... ['gain', '-n'], # normalises to 0dB
93+
... ['pitch', '5'], # 5 cent pitch shift
94+
... ['rate', '8000'], # resample to 8000 Hz
95+
... ]
96+
>>> # Generate pseudo wave:
97+
>>> # normalized, channels first, 2ch, sampling rate 16000, 1 second
98+
>>> sample_rate = 16000
99+
>>> waveform = 2 * torch.rand([2, sample_rate * 1]) - 1
100+
>>> waveform.shape
101+
torch.Size([2, 16000])
102+
>>> waveform
103+
tensor([[ 0.3138, 0.7620, -0.9019, ..., -0.7495, -0.4935, 0.5442],
104+
[-0.0832, 0.0061, 0.8233, ..., -0.5176, -0.9140, -0.2434]])
105+
>>> # Apply effects
106+
>>> waveform, sample_rate = apply_effects_tensor(
107+
... wave_form, sample_rate, effects, channels_first=True)
108+
>>> # The new waveform is sampling rate 8000, 1 second.
109+
>>> # normalization and channel order are preserved
110+
>>> waveform.shape
111+
torch.Size([2, 8000])
112+
>>> waveform
113+
tensor([[ 0.5054, -0.5518, -0.4800, ..., -0.0076, 0.0096, -0.0110],
114+
[ 0.1331, 0.0436, -0.3783, ..., -0.0035, 0.0012, 0.0008]])
115+
>>> sample_rate
116+
8000
82117
"""
83118
in_signal = torch.classes.torchaudio.TensorSignal(tensor, sample_rate, channels_first)
84119
out_signal = torch.ops.torchaudio.sox_effects_apply_effects_tensor(in_signal, effects)
@@ -92,25 +127,52 @@ def apply_effects_file(
92127
normalize: bool = True,
93128
channels_first: bool = True,
94129
) -> Tuple[torch.Tensor, int]:
95-
"""Apply sox effects to the audio file and load Tensor
130+
"""Apply sox effects to the audio file and load the resulting data as Tensor
96131
97132
Args:
98-
path: Path to the audio file.
99-
effects: List of effects.
100-
normalize: When ``True``, this function always return ``float32``, and sample values are
133+
path (str): Path to the audio file.
134+
effects (List[List[str]]): List of effects.
135+
normalize (bool): When ``True``, this function always return ``float32``, and sample values are
101136
normalized to ``[-1.0, 1.0]``. If input file is integer WAV, giving ``False`` will change
102137
the resulting Tensor type to integer type. This argument has no effect for formats other
103138
than integer WAV type.
104-
channels_first: When True, the returned Tensor has dimension ``[channel, time]``.
139+
channels_first (bool): When True, the returned Tensor has dimension ``[channel, time]``.
105140
Otherwise, the returned Tensor's dimension is ``[time, channel]``.
106141
142+
Returns:
143+
Tuple[torch.Tensor, int]: Resulting Tensor and sample rate.
144+
If ``normalize=True``, the resulting Tensor is always ``float32`` type.
145+
If ``normalize=False`` and the input audio file is of integer WAV file, then the
146+
resulting Tensor has corresponding integer type. (Note 24 bit integer type is not supported)
147+
If ``channels_first=True``, the resulting Tensor has dimension ``[channel, time]``,
148+
otherwise ``[time, channel]``.
149+
107150
Notes:
108151
This function works in the way very similar to ``sox`` command, however there are slight
109152
differences. For example, ``sox`` commnad adds certain effects automatically (such as
110153
``rate`` effect after ``speed``, ``pitch`` etc), but this function only applies the given
111154
effects. Therefore, to actually apply ``speed`` effect, you also need to give ``rate``
112155
effect with desired sampling rate, because internally, ``speed`` effects only alter sampling
113156
rate and leave samples untouched.
157+
158+
Examples:
159+
>>> # Defines the effects to apply
160+
>>> effects = [
161+
... ['gain', '-n'], # normalises to 0dB
162+
... ['pitch', '5'], # 5 cent pitch shift
163+
... ['rate', '8000'], # resample to 8000 Hz
164+
... ]
165+
>>> # Apply effects and load data with channels_first=True
166+
>>> waveform, sample_rate = apply_effects_file("data.wav", effects, channels_first=True)
167+
>>> waveform.shape
168+
torch.Size([2, 8000])
169+
>>> waveform
170+
tensor([[ 5.1151e-03, 1.8073e-02, 2.2188e-02, ..., 1.0431e-07,
171+
-1.4761e-07, 1.8114e-07],
172+
[-2.6924e-03, 2.1860e-03, 1.0650e-02, ..., 6.4122e-07,
173+
-5.6159e-07, 4.8103e-07]])
174+
>>> sample_rate
175+
8000
114176
"""
115177
signal = torch.ops.torchaudio.sox_effects_apply_effects_file(path, effects, normalize, channels_first)
116178
return signal.get_tensor(), signal.get_sample_rate()

torchaudio/utils/sox_utils.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def set_seed(seed: int):
1212
"""Set libsox's PRNG
1313
1414
Args:
15-
seed: seed value. valid range is int32.
15+
seed (int): seed value. valid range is int32.
1616
1717
See Also:
1818
http://sox.sourceforge.net/sox.html
@@ -25,7 +25,7 @@ def set_verbosity(verbosity: int):
2525
"""Set libsox's verbosity
2626
2727
Args:
28-
verbosity: Set verbosity level of libsox.
28+
verbosity (int): Set verbosity level of libsox.
2929
1: failure messages
3030
2: warnings
3131
3: details of processing
@@ -42,7 +42,7 @@ def set_buffer_size(buffer_size: int):
4242
"""Set buffer size for sox effect chain
4343
4444
Args:
45-
buffer_size: Set the size in bytes of the buffers used for processing audio.
45+
buffer_size (int): Set the size in bytes of the buffers used for processing audio.
4646
4747
See Also:
4848
http://sox.sourceforge.net/sox.html
@@ -55,7 +55,7 @@ def set_use_threads(use_threads: bool):
5555
"""Set multithread option for sox effect chain
5656
5757
Args:
58-
use_threads: When True, enables libsox's parallel effects channels processing.
58+
use_threads (bool): When True, enables libsox's parallel effects channels processing.
5959
To use mutlithread, the underlying libsox has to be compiled with OpenMP support.
6060
6161
See Also:
@@ -69,7 +69,7 @@ def list_effects() -> Dict[str, str]:
6969
"""List the available sox effect names
7070
7171
Returns:
72-
Mapping from "effect name" to "usage"
72+
Dict[str, str]: Mapping from "effect name" to "usage"
7373
"""
7474
return dict(torch.ops.torchaudio.sox_utils_list_effects())
7575

@@ -78,7 +78,7 @@ def list_effects() -> Dict[str, str]:
7878
def list_formats() -> List[str]:
7979
"""List the supported audio formats
8080
81-
Returns: list[str]
82-
List of supported audio formats
81+
Returns:
82+
List[str]: List of supported audio formats
8383
"""
8484
return torch.ops.torchaudio.sox_utils_list_formats()

0 commit comments

Comments
 (0)