@@ -67,18 +67,53 @@ def apply_effects_tensor(
6767 """Apply sox effects to given Tensor
6868
6969 Args:
70- tensor: Input 2D Tensor.
71- sample_rate: Sample rate
72- effects: List of effects.
73- channels_first: Indicates if the input Tensor's dimension is
70+ tensor (torch.Tensor) : Input 2D Tensor.
71+ sample_rate (int) : Sample rate
72+ effects (List[List[str]]) : List of effects.
73+ channels_first (bool) : Indicates if the input Tensor's dimension is
7474 ``[channels, time]`` or ``[time, channels]``
7575
76+ Returns:
77+ Tuple[torch.Tensor, int]: Resulting Tensor and sample rate.
78+ The resulting Tensor has the same ``dtype`` as the input Tensor, and
79+ the same channels order. The shape of the Tensor can be different based on the
80+ effects applied. Sample rate can also be different based on the effects applied.
81+
7682 Notes:
77- This function works in the way very similar to ``` sox` `` command, however there are slight
83+ This function works in the way very similar to ``sox`` command, however there are slight
7884 differences. For example, ``sox`` commnad adds certain effects automatically (such as
7985 ``rate`` effect after ``speed`` and ``pitch`` and other effects), but this function does
8086 only applies the given effects. (Therefore, to actually apply ``speed`` effect, you also
8187 need to give ``rate`` effect with desired sampling rate.)
88+
89+ Examples:
90+ >>> # Defines the effects to apply
91+ >>> effects = [
92+ ... ['gain', '-n'], # normalises to 0dB
93+ ... ['pitch', '5'], # 5 cent pitch shift
94+ ... ['rate', '8000'], # resample to 8000 Hz
95+ ... ]
96+ >>> # Generate pseudo wave:
97+ >>> # normalized, channels first, 2ch, sampling rate 16000, 1 second
98+ >>> sample_rate = 16000
99+ >>> waveform = 2 * torch.rand([2, sample_rate * 1]) - 1
100+ >>> waveform.shape
101+ torch.Size([2, 16000])
102+ >>> waveform
103+ tensor([[ 0.3138, 0.7620, -0.9019, ..., -0.7495, -0.4935, 0.5442],
104+ [-0.0832, 0.0061, 0.8233, ..., -0.5176, -0.9140, -0.2434]])
105+ >>> # Apply effects
106+ >>> waveform, sample_rate = apply_effects_tensor(
107+ ... wave_form, sample_rate, effects, channels_first=True)
108+ >>> # The new waveform is sampling rate 8000, 1 second.
109+ >>> # normalization and channel order are preserved
110+ >>> waveform.shape
111+ torch.Size([2, 8000])
112+ >>> waveform
113+ tensor([[ 0.5054, -0.5518, -0.4800, ..., -0.0076, 0.0096, -0.0110],
114+ [ 0.1331, 0.0436, -0.3783, ..., -0.0035, 0.0012, 0.0008]])
115+ >>> sample_rate
116+ 8000
82117 """
83118 in_signal = torch .classes .torchaudio .TensorSignal (tensor , sample_rate , channels_first )
84119 out_signal = torch .ops .torchaudio .sox_effects_apply_effects_tensor (in_signal , effects )
@@ -92,25 +127,52 @@ def apply_effects_file(
92127 normalize : bool = True ,
93128 channels_first : bool = True ,
94129) -> Tuple [torch .Tensor , int ]:
95- """Apply sox effects to the audio file and load Tensor
130+ """Apply sox effects to the audio file and load the resulting data as Tensor
96131
97132 Args:
98- path: Path to the audio file.
99- effects: List of effects.
100- normalize: When ``True``, this function always return ``float32``, and sample values are
133+ path (str) : Path to the audio file.
134+ effects (List[List[str]]) : List of effects.
135+ normalize (bool) : When ``True``, this function always return ``float32``, and sample values are
101136 normalized to ``[-1.0, 1.0]``. If input file is integer WAV, giving ``False`` will change
102137 the resulting Tensor type to integer type. This argument has no effect for formats other
103138 than integer WAV type.
104- channels_first: When True, the returned Tensor has dimension ``[channel, time]``.
139+ channels_first (bool) : When True, the returned Tensor has dimension ``[channel, time]``.
105140 Otherwise, the returned Tensor's dimension is ``[time, channel]``.
106141
142+ Returns:
143+ Tuple[torch.Tensor, int]: Resulting Tensor and sample rate.
144+ If ``normalize=True``, the resulting Tensor is always ``float32`` type.
145+ If ``normalize=False`` and the input audio file is of integer WAV file, then the
146+ resulting Tensor has corresponding integer type. (Note 24 bit integer type is not supported)
147+ If ``channels_first=True``, the resulting Tensor has dimension ``[channel, time]``,
148+ otherwise ``[time, channel]``.
149+
107150 Notes:
108151 This function works in the way very similar to ``sox`` command, however there are slight
109152 differences. For example, ``sox`` commnad adds certain effects automatically (such as
110153 ``rate`` effect after ``speed``, ``pitch`` etc), but this function only applies the given
111154 effects. Therefore, to actually apply ``speed`` effect, you also need to give ``rate``
112155 effect with desired sampling rate, because internally, ``speed`` effects only alter sampling
113156 rate and leave samples untouched.
157+
158+ Examples:
159+ >>> # Defines the effects to apply
160+ >>> effects = [
161+ ... ['gain', '-n'], # normalises to 0dB
162+ ... ['pitch', '5'], # 5 cent pitch shift
163+ ... ['rate', '8000'], # resample to 8000 Hz
164+ ... ]
165+ >>> # Apply effects and load data with channels_first=True
166+ >>> waveform, sample_rate = apply_effects_file("data.wav", effects, channels_first=True)
167+ >>> waveform.shape
168+ torch.Size([2, 8000])
169+ >>> waveform
170+ tensor([[ 5.1151e-03, 1.8073e-02, 2.2188e-02, ..., 1.0431e-07,
171+ -1.4761e-07, 1.8114e-07],
172+ [-2.6924e-03, 2.1860e-03, 1.0650e-02, ..., 6.4122e-07,
173+ -5.6159e-07, 4.8103e-07]])
174+ >>> sample_rate
175+ 8000
114176 """
115177 signal = torch .ops .torchaudio .sox_effects_apply_effects_file (path , effects , normalize , channels_first )
116178 return signal .get_tensor (), signal .get_sample_rate ()
0 commit comments