Skip to content

Commit a53314e

Browse files
committed
Add sox effects chain
1 parent 4b583ea commit a53314e

17 files changed

+837
-7
lines changed
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
{"effects": [["allpass", "300", "10"]]}
2+
{"effects": [["band", "300", "10"]]}
3+
{"effects": [["bandpass", "300", "10"]]}
4+
{"effects": [["bandreject", "300", "10"]]}
5+
{"effects": [["bass", "-10"]]}
6+
{"effects": [["bend", ".35,180,.25", ".15,740,.53", "0,-520,.3"]]}
7+
{"effects": [["biquad", "0.4", "0.2", "0.9", "0.7", "0.2", "0.6"]]}
8+
{"effects": [["chorus", "0.7", "0.9", "55", "0.4", "0.25", "2", "-t"]]}
9+
{"effects": [["chorus", "0.6", "0.9", "50", "0.4", "0.25", "2", "-t", "60", "0.32", "0.4", "1.3", "-s"]]}
10+
{"effects": [["chorus", "0.5", "0.9", "50", "0.4", "0.25", "2", "-t", "60", "0.32", "0.4", "2.3", "-t", "40", "0.3", "0.3", "1.3", "-s"]]}
11+
{"effects": [["channels", "1"]]}
12+
{"effects": [["channels", "2"]]}
13+
{"effects": [["channels", "3"]]}
14+
{"effects": [["compand", "0.3,1", "6:-70,-60,-20", "-5", "-90", "0.2"]]}
15+
{"effects": [["compand", ".1,.2", "-inf,-50.1,-inf,-50,-50", "0", "-90", ".1"]]}
16+
{"effects": [["compand", ".1,.1", "-45.1,-45,-inf,0,-inf", "45", "-90", ".1"]]}
17+
{"effects": [["contrast", "0"]]}
18+
{"effects": [["contrast", "25"]]}
19+
{"effects": [["contrast", "50"]]}
20+
{"effects": [["contrast", "75"]]}
21+
{"effects": [["contrast", "100"]]}
22+
{"effects": [["dcshift", "1.0"]]}
23+
{"effects": [["dcshift", "-1.0"]]}
24+
{"effects": [["deemph"]], "input_sample_rate": 44100}
25+
{"effects": [["delay", "1.5", "+1"]]}
26+
{"effects": [["dither", "-s"]]}
27+
{"effects": [["dither", "-S"]]}
28+
{"effects": [["divide"]]}
29+
{"effects": [["downsample", "2"]], "input_sample_rate": 8000, "output_sample_rate": 4000}
30+
{"effects": [["earwax"]], "input_sample_rate": 44100}
31+
{"effects": [["echo", "0.8", "0.88", "60", "0.4"]]}
32+
{"effects": [["echo", "0.8", "0.88", "6", "0.4"]]}
33+
{"effects": [["echo", "0.8", "0.9", "1000", "0.3"]]}
34+
{"effects": [["echo", "0.8", "0.9", "1000", "0.3", "1800", "0.25"]]}
35+
{"effects": [["echos", "0.8", "0.7", "700", "0.25", "700", "0.3"]]}
36+
{"effects": [["echos", "0.8", "0.7", "700", "0.25", "900", "0.3"]]}
37+
{"effects": [["echos", "0.8", "0.7", "40", "0.25", "63", "0.3"]]}
38+
{"effects": [["equalizer", "300", "10", "5"]]}
39+
{"effects": [["fade", "q", "3"]]}
40+
{"effects": [["fade", "h", "3"]]}
41+
{"effects": [["fade", "t", "3"]]}
42+
{"effects": [["fade", "l", "3"]]}
43+
{"effects": [["fade", "p", "3"]]}
44+
{"effects": [["fir", "0.0195", "-0.082", "0.234", "0.891", "-0.145", "0.043"]]}
45+
{"effects": [["fir", "test/assets/sox_effect_test_fir_coeffs.txt"]]}
46+
{"effects": [["flanger"]]}
47+
{"effects": [["gain", "-n"]]}
48+
{"effects": [["gain", "-n", "-3"]]}
49+
{"effects": [["gain", "-l", "-6"]]}
50+
{"effects": [["highpass", "-1", "300"]]}
51+
{"effects": [["highpass", "-2", "300"]]}
52+
{"effects": [["hilbert"]]}
53+
{"effects": [["loudness"]]}
54+
{"effects": [["lowpass", "-1", "300"]]}
55+
{"effects": [["lowpass", "-2", "300"]]}
56+
{"effects": [["mcompand", "0.005,0.1 -47,-40,-34,-34,-17,-33", "100", "0.003,0.05 -47,-40,-34,-34,-17,-33", "400", "0.000625,0.0125 -47,-40,-34,-34,-15,-33", "1600", "0.0001,0.025 -47,-40,-34,-34,-31,-31,-0,-30", "6400", "0,0.025 -38,-31,-28,-28,-0,-25"]], "input_sample_rate": 44100}
57+
{"effects": [["norm"]]}
58+
{"effects": [["oops"]]}
59+
{"effects": [["overdrive"]]}
60+
{"effects": [["pad"]]}
61+
{"effects": [["phaser"]]}
62+
{"effects": [["pitch", "6.48"], ["rate", "8030"]], "output_sample_rate": 8030}
63+
{"effects": [["pitch", "-6.50"], ["rate", "7970"]], "output_sample_rate": 7970}
64+
{"effects": [["rate", "4567"]], "output_sample_rate": 4567}
65+
{"effects": [["remix", "6", "7", "8", "0"]], "num_channels": 8}
66+
{"effects": [["remix", "1-3,7", "3"]], "num_channels": 8}
67+
{"effects": [["repeat"]]}
68+
{"effects": [["reverb"]]}
69+
{"effects": [["reverse"]]}
70+
{"effects": [["riaa"]], "input_sample_rate": 44100}
71+
{"effects": [["silence", "0"]]}
72+
{"effects": [["sinc", "3k"]]}
73+
{"effects": [["speed", "1.3"]], "input_sample_rate": 4000, "output_sample_rate": 5200}
74+
{"effects": [["speed", "0.7"]], "input_sample_rate": 4000, "output_sample_rate": 2800}
75+
{"effects": [["stat"]]}
76+
{"effects": [["stats"]]}
77+
{"effects": [["stretch"]]}
78+
{"effects": [["swap"]]}
79+
{"effects": [["synth"]]}
80+
{"effects": [["tempo", "0.9"]]}
81+
{"effects": [["tempo", "1.1"]]}
82+
{"effects": [["treble", "3"]]}
83+
{"effects": [["tremolo", "300", "40"]]}
84+
{"effects": [["tremolo", "300", "50"]]}
85+
{"effects": [["trim", "0", "0.1"]]}
86+
{"effects": [["upsample", "2"]], "input_sample_rate": 8000, "output_sample_rate": 16000}
87+
{"effects": [["vad"]]}
88+
{"effects": [["vol", "3"]]}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
0.0195 -0.082 0.234 0.891 -0.145 0.043

test/common_utils/data_utils.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ def get_sinusoid(
5656
n_channels: int = 1,
5757
dtype: Union[str, torch.dtype] = "float32",
5858
device: Union[str, torch.device] = "cpu",
59+
channels_first: bool = True,
5960
):
6061
"""Generate pseudo audio data with sine wave.
6162
@@ -75,4 +76,7 @@ def get_sinusoid(
7576
pie2 = 2 * 3.141592653589793
7677
end = pie2 * frequency * duration
7778
theta = torch.linspace(0, end, sample_rate * duration, dtype=dtype, device=device)
78-
return torch.sin(theta, out=None).repeat([n_channels, 1])
79+
sin = torch.sin(theta, out=None).repeat([n_channels, 1])
80+
if not channels_first:
81+
sin = sin.transpose(1, 0)
82+
return sin

test/common_utils/sox_utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,12 @@ def convert_audio_file(
7777
command += [dst_path]
7878
print(' '.join(command))
7979
subprocess.run(command, check=True)
80+
81+
82+
def run_sox_effect(input_file, output_file, effect, output_sample_rate):
83+
"""Run sox effects"""
84+
command = ['sox', '-V', input_file, output_file] + effect
85+
if output_sample_rate:
86+
command += ['rate', str(output_sample_rate)]
87+
print(' '.join(command))
88+
subprocess.run(command, check=True)

test/sox_effect/__init__.py

Whitespace-only changes.

test/sox_effect/common.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
def name_func(func, _, params):
2+
if isinstance(params.args[0], str):
3+
args = "_".join([str(arg) for arg in params.args])
4+
else:
5+
args = "_".join([str(arg) for arg in params.args[0]])
6+
return f'{func.__name__}_{args}'
7+
8+
9+
def flatten_lists(l):
10+
return [item for sublist in l for item in sublist]

test/sox_effect/test_sox_effect.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
import itertools
2+
3+
from torchaudio import sox_effects
4+
from parameterized import parameterized
5+
6+
from ..common_utils import (
7+
TempDirMixin,
8+
PytorchTestCase,
9+
skipIfNoExtension,
10+
get_sinusoid,
11+
get_wav_data,
12+
save_wav,
13+
load_wav,
14+
load_params,
15+
sox_utils,
16+
)
17+
from .common import (
18+
name_func,
19+
flatten_lists,
20+
)
21+
22+
23+
@skipIfNoExtension
24+
class TestSoxEffects(TempDirMixin, PytorchTestCase):
25+
def test_list_effects(self):
26+
"""effect_names returns the list of available effects"""
27+
effects = sox_effects.effect_names()
28+
# We cannot infer what effects are available, so only check some of them.
29+
assert 'highpass' in effects
30+
assert 'phaser' in effects
31+
assert 'gain' in effects
32+
33+
@parameterized.expand(list(itertools.product(
34+
['float32', 'int32', 'int16', 'uint8'],
35+
[8000, 16000],
36+
[1, 2, 4, 8],
37+
[True, False]
38+
)), name_func=name_func)
39+
def test_apply_no_effect_tensor(self, dtype, sample_rate, num_channels, channels_first):
40+
"""`apply_effects_tensor` without effects should return identical data as input"""
41+
original = get_wav_data(dtype, num_channels, channels_first=channels_first)
42+
expected = original.clone()
43+
found, output_sample_rate = sox_effects.apply_effects_tensor(
44+
expected, sample_rate, [], channels_first)
45+
46+
assert output_sample_rate == sample_rate
47+
# SoxEffect should not alter the input Tensor object
48+
self.assertEqual(original, expected)
49+
# SoxEffect should not return the same Tensor object
50+
assert expected is not found
51+
# Returned Tensor should equal to the input Tensor
52+
self.assertEqual(expected, found)
53+
54+
@parameterized.expand(list(itertools.product(
55+
['float32', 'int32', 'int16', 'uint8'],
56+
[8000, 16000],
57+
[1, 2, 4, 8],
58+
[False, True],
59+
)), name_func=name_func)
60+
def test_apply_no_effect_file(self, dtype, sample_rate, num_channels, channels_first):
61+
"""`apply_effects_file` without effects should return identical data as input"""
62+
path = self.get_temp_path('input.wav')
63+
data = get_wav_data(dtype, num_channels, channels_first=channels_first)
64+
save_wav(path, data, sample_rate, channels_first=channels_first)
65+
66+
found, output_sample_rate = sox_effects.apply_effects_file(path, [], channels_first)
67+
68+
assert output_sample_rate == sample_rate
69+
self.assertEqual(data, found)
70+
71+
@parameterized.expand(
72+
load_params("sox_effect_test_args.json"),
73+
name_func=lambda f, i, p: f'{f.__name__}_{i}_{p.args[0]["effects"][0][0]}',
74+
)
75+
def test_apply_effect_tensor(self, args):
76+
"""`apply_effects_tensor` should return identical data as sox command"""
77+
effects = args['effects']
78+
num_channels = args.get("num_channels", 2)
79+
input_sr = args.get("input_sample_rate", 8000)
80+
output_sr = args.get("output_sample_rate")
81+
82+
input_path = self.get_temp_path('input.wav')
83+
output_path = self.get_temp_path('output.wav')
84+
85+
original = get_sinusoid(
86+
frequency=800, sample_rate=input_sr,
87+
n_channels=num_channels, dtype='float32')
88+
save_wav(input_path, original, input_sr)
89+
sox_utils.run_sox_effect(input_path, output_path, flatten_lists(effects), output_sr)
90+
91+
expected, expected_sr = load_wav(output_path)
92+
found, sr = sox_effects.apply_effects_tensor(original, input_sr, effects)
93+
94+
assert sr == expected_sr
95+
self.assertEqual(expected, found)
96+
97+
@parameterized.expand(
98+
load_params("sox_effect_test_args.json"),
99+
name_func=lambda f, i, p: f'{f.__name__}_{i}_{p.args[0]["effects"][0][0]}',
100+
)
101+
def test_apply_effects_file(self, args):
102+
"""`apply_effects_file` should return identical data as sox command"""
103+
dtype = 'int32'
104+
channels_first = True
105+
effects = args['effects']
106+
num_channels = args.get("num_channels", 2)
107+
input_sr = args.get("input_sample_rate", 8000)
108+
output_sr = args.get("output_sample_rate")
109+
110+
input_path = self.get_temp_path('input.wav')
111+
output_path = self.get_temp_path('output.wav')
112+
data = get_wav_data(dtype, num_channels, channels_first=channels_first)
113+
save_wav(input_path, data, input_sr, channels_first=channels_first)
114+
sox_utils.run_sox_effect(input_path, output_path, flatten_lists(effects), output_sr)
115+
116+
expected, expected_sr = load_wav(output_path)
117+
found, sr = sox_effects.apply_effects_file(input_path, effects, channels_first)
118+
119+
assert sr == expected_sr
120+
self.assertEqual(found, expected)
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
from typing import List
2+
3+
import torch
4+
from torchaudio import sox_effects
5+
from parameterized import parameterized
6+
7+
from ..common_utils import (
8+
TempDirMixin,
9+
PytorchTestCase,
10+
skipIfNoExtension,
11+
get_sinusoid,
12+
load_params,
13+
save_wav,
14+
)
15+
16+
17+
class SoxEffectTensorTransform(torch.nn.Module):
18+
effects: List[List[str]]
19+
20+
def __init__(self, effects: List[List[str]], sample_rate: int, channels_first: bool):
21+
super().__init__()
22+
self.effects = effects
23+
self.sample_rate = sample_rate
24+
self.channels_first = channels_first
25+
26+
def forward(self, tensor: torch.Tensor):
27+
return sox_effects.apply_effects_tensor(
28+
tensor, self.sample_rate, self.effects, self.channels_first)
29+
30+
31+
class SoxEffectFileTransform(torch.nn.Module):
32+
effects: List[List[str]]
33+
channels_first: bool
34+
35+
def __init__(self, effects: List[List[str]], channels_first: bool):
36+
super().__init__()
37+
self.effects = effects
38+
self.channels_first = channels_first
39+
40+
def forward(self, path: str):
41+
return sox_effects.apply_effects_file(path, self.effects, self.channels_first)
42+
43+
44+
@skipIfNoExtension
45+
class TestTorchScript(TempDirMixin, PytorchTestCase):
46+
@parameterized.expand(
47+
load_params("sox_effect_test_args.json"),
48+
name_func=lambda f, i, p: f'{f.__name__}_{i}_{p.args[0]["effects"][0][0]}',
49+
)
50+
def test_apply_effects_tensor(self, args):
51+
effects = args['effects']
52+
channels_first = True
53+
num_channels = args.get("num_channels", 2)
54+
input_sr = args.get("input_sample_rate", 8000)
55+
56+
trans = SoxEffectTensorTransform(effects, input_sr, channels_first)
57+
58+
path = self.get_temp_path('sox_effect.zip')
59+
torch.jit.script(trans).save(path)
60+
trans = torch.jit.load(path)
61+
62+
wav = get_sinusoid(
63+
frequency=800, sample_rate=input_sr,
64+
n_channels=num_channels, dtype='float32', channels_first=channels_first)
65+
found, sr_found = trans(wav)
66+
expected, sr_expected = sox_effects.apply_effects_tensor(
67+
wav, input_sr, effects, channels_first)
68+
69+
assert sr_found == sr_expected
70+
self.assertEqual(expected, found)
71+
72+
@parameterized.expand(
73+
load_params("sox_effect_test_args.json"),
74+
name_func=lambda f, i, p: f'{f.__name__}_{i}_{p.args[0]["effects"][0][0]}',
75+
)
76+
def test_apply_effects_file(self, args):
77+
effects = args['effects']
78+
channels_first = True
79+
num_channels = args.get("num_channels", 2)
80+
input_sr = args.get("input_sample_rate", 8000)
81+
82+
trans = SoxEffectFileTransform(effects, channels_first)
83+
84+
path = self.get_temp_path('sox_effect.zip')
85+
torch.jit.script(trans).save(path)
86+
trans = torch.jit.load(path)
87+
88+
path = self.get_temp_path('input.wav')
89+
wav = get_sinusoid(
90+
frequency=800, sample_rate=input_sr,
91+
n_channels=num_channels, dtype='float32', channels_first=channels_first)
92+
save_wav(path, wav, sample_rate=input_sr, channels_first=channels_first)
93+
94+
found, sr_found = trans(path)
95+
expected, sr_expected = sox_effects.apply_effects_file(path, effects, channels_first)
96+
97+
assert sr_found == sr_expected
98+
self.assertEqual(expected, found)

torchaudio/csrc/register.cpp

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,15 @@ static auto registerTensorSignal =
2929
.def("get_sample_rate", &sox_utils::TensorSignal::getSampleRate)
3030
.def("get_channels_first", &sox_utils::TensorSignal::getChannelsFirst);
3131

32+
static auto registerSetSoxOptions =
33+
torch::RegisterOperators()
34+
.op("torchaudio::sox_utils_set_seed", &sox_utils::set_seed)
35+
.op("torchaudio::sox_utils_set_verbosity", &sox_utils::set_verbosity)
36+
.op("torchaudio::sox_utils_set_use_threads",
37+
&sox_utils::set_use_threads)
38+
.op("torchaudio::sox_utils_set_buffer_size",
39+
&sox_utils::set_buffer_size);
40+
3241
////////////////////////////////////////////////////////////////////////////////
3342
// sox_io.h
3443
////////////////////////////////////////////////////////////////////////////////
@@ -58,12 +67,24 @@ static auto registerSaveAudioFile = torch::RegisterOperators().op(
5867
// sox_effects.h
5968
////////////////////////////////////////////////////////////////////////////////
6069
static auto registerSoxEffects =
61-
torch::RegisterOperators(
62-
"torchaudio::sox_effects_initialize_sox_effects",
63-
&sox_effects::initialize_sox_effects)
70+
torch::RegisterOperators()
71+
.op("torchaudio::sox_effects_initialize_sox_effects",
72+
&sox_effects::initialize_sox_effects)
6473
.op("torchaudio::sox_effects_shutdown_sox_effects",
6574
&sox_effects::shutdown_sox_effects)
66-
.op("torchaudio::sox_effects_list_effects", &sox_effects::list_effects);
75+
.op("torchaudio::sox_effects_list_effects", &sox_effects::list_effects)
76+
.op(torch::RegisterOperators::options()
77+
.schema(
78+
"torchaudio::sox_effects_apply_effects_tensor(__torch__.torch.classes.torchaudio.TensorSignal input_signal, str[][] effects) -> __torch__.torch.classes.torchaudio.TensorSignal output_signal")
79+
.catchAllKernel<
80+
decltype(sox_effects::apply_effects_tensor),
81+
&sox_effects::apply_effects_tensor>())
82+
.op(torch::RegisterOperators::options()
83+
.schema(
84+
"torchaudio::sox_effects_apply_effects_file(str path, str[][] effects, bool channels_first) -> __torch__.torch.classes.torchaudio.TensorSignal output_signal")
85+
.catchAllKernel<
86+
decltype(sox_effects::apply_effects_file),
87+
&sox_effects::apply_effects_file>());
6788

6889
} // namespace
6990
} // namespace torchaudio

0 commit comments

Comments
 (0)