pytorch
diff --git a/‎test/assets/sox_effect_test_args.json‎
Lines changed: 88 additions & 0 deletions b/‎test/assets/sox_effect_test_args.json‎
Lines changed: 88 additions & 0 deletions
diff --git a/‎test/assets/sox_effect_test_fir_coeffs.txt‎
Lines changed: 1 addition & 0 deletions b/‎test/assets/sox_effect_test_fir_coeffs.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎test/common_utils/data_utils.py‎
Lines changed: 5 additions & 1 deletion b/‎test/common_utils/data_utils.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎test/common_utils/sox_utils.py‎
Lines changed: 9 additions & 0 deletions b/‎test/common_utils/sox_utils.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎test/sox_effect/__init__.py‎ b/‎test/sox_effect/__init__.py‎
diff --git a/‎test/sox_effect/common.py‎
Lines changed: 10 additions & 0 deletions b/‎test/sox_effect/common.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎test/sox_effect/test_sox_effect.py‎
Lines changed: 120 additions & 0 deletions b/‎test/sox_effect/test_sox_effect.py‎
Lines changed: 120 additions & 0 deletions
diff --git a/‎test/sox_effect/test_torchscript.py‎
Lines changed: 98 additions & 0 deletions b/‎test/sox_effect/test_torchscript.py‎
Lines changed: 98 additions & 0 deletions
diff --git a/‎torchaudio/csrc/register.cpp‎
Lines changed: 25 additions & 4 deletions b/‎torchaudio/csrc/register.cpp‎
Lines changed: 25 additions & 4 deletions
@@ -0,0 +1,88 @@
+{"effects": [["allpass", "300", "10"]]}
+{"effects": [["band", "300", "10"]]}
+{"effects": [["bandpass", "300", "10"]]}
+{"effects": [["bandreject", "300", "10"]]}
+{"effects": [["bass", "-10"]]}
+{"effects": [["bend", ".35,180,.25", ".15,740,.53", "0,-520,.3"]]}
+{"effects": [["biquad", "0.4", "0.2", "0.9", "0.7", "0.2", "0.6"]]}
+{"effects": [["chorus", "0.7", "0.9", "55", "0.4", "0.25", "2", "-t"]]}
+{"effects": [["chorus", "0.6", "0.9", "50", "0.4", "0.25", "2", "-t", "60", "0.32", "0.4", "1.3", "-s"]]}
+{"effects": [["chorus", "0.5", "0.9", "50", "0.4", "0.25", "2", "-t", "60", "0.32", "0.4", "2.3", "-t", "40", "0.3", "0.3", "1.3", "-s"]]}
+{"effects": [["channels", "1"]]}
+{"effects": [["channels", "2"]]}
+{"effects": [["channels", "3"]]}
+{"effects": [["compand", "0.3,1", "6:-70,-60,-20", "-5", "-90", "0.2"]]}
+{"effects": [["compand", ".1,.2", "-inf,-50.1,-inf,-50,-50", "0", "-90", ".1"]]}
+{"effects": [["compand", ".1,.1", "-45.1,-45,-inf,0,-inf", "45", "-90", ".1"]]}
+{"effects": [["contrast", "0"]]}
+{"effects": [["contrast", "25"]]}
+{"effects": [["contrast", "50"]]}
+{"effects": [["contrast", "75"]]}
+{"effects": [["contrast", "100"]]}
+{"effects": [["dcshift", "1.0"]]}
+{"effects": [["dcshift", "-1.0"]]}
+{"effects": [["deemph"]], "input_sample_rate": 44100}
+{"effects": [["delay", "1.5", "+1"]]}
+{"effects": [["dither", "-s"]]}
+{"effects": [["dither", "-S"]]}
+{"effects": [["divide"]]}
+{"effects": [["downsample", "2"]], "input_sample_rate": 8000, "output_sample_rate": 4000}
+{"effects": [["earwax"]], "input_sample_rate": 44100}
+{"effects": [["echo", "0.8", "0.88", "60", "0.4"]]}
+{"effects": [["echo", "0.8", "0.88", "6", "0.4"]]}
+{"effects": [["echo", "0.8", "0.9", "1000", "0.3"]]}
+{"effects": [["echo", "0.8", "0.9", "1000", "0.3", "1800", "0.25"]]}
+{"effects": [["echos", "0.8", "0.7", "700", "0.25", "700", "0.3"]]}
+{"effects": [["echos", "0.8", "0.7", "700", "0.25", "900", "0.3"]]}
+{"effects": [["echos", "0.8", "0.7", "40", "0.25", "63", "0.3"]]}
+{"effects": [["equalizer", "300", "10", "5"]]}
+{"effects": [["fade", "q", "3"]]}
+{"effects": [["fade", "h", "3"]]}
+{"effects": [["fade", "t", "3"]]}
+{"effects": [["fade", "l", "3"]]}
+{"effects": [["fade", "p", "3"]]}
+{"effects": [["fir", "0.0195", "-0.082", "0.234", "0.891", "-0.145", "0.043"]]}
+{"effects": [["fir", "test/assets/sox_effect_test_fir_coeffs.txt"]]}
+{"effects": [["flanger"]]}
+{"effects": [["gain", "-n"]]}
+{"effects": [["gain", "-n", "-3"]]}
+{"effects": [["gain", "-l", "-6"]]}
+{"effects": [["highpass", "-1", "300"]]}
+{"effects": [["highpass", "-2", "300"]]}
+{"effects": [["hilbert"]]}
+{"effects": [["loudness"]]}
+{"effects": [["lowpass", "-1", "300"]]}
+{"effects": [["lowpass", "-2", "300"]]}
+{"effects": [["mcompand", "0.005,0.1 -47,-40,-34,-34,-17,-33", "100", "0.003,0.05 -47,-40,-34,-34,-17,-33", "400", "0.000625,0.0125 -47,-40,-34,-34,-15,-33", "1600", "0.0001,0.025 -47,-40,-34,-34,-31,-31,-0,-30", "6400", "0,0.025 -38,-31,-28,-28,-0,-25"]], "input_sample_rate": 44100}
+{"effects": [["norm"]]}
+{"effects": [["oops"]]}
+{"effects": [["overdrive"]]}
+{"effects": [["pad"]]}
+{"effects": [["phaser"]]}
+{"effects": [["pitch", "6.48"], ["rate", "8030"]], "output_sample_rate": 8030}
+{"effects": [["pitch", "-6.50"], ["rate", "7970"]], "output_sample_rate": 7970}
+{"effects": [["rate", "4567"]], "output_sample_rate": 4567}
+{"effects": [["remix", "6", "7", "8", "0"]], "num_channels": 8}
+{"effects": [["remix", "1-3,7", "3"]], "num_channels": 8}
+{"effects": [["repeat"]]}
+{"effects": [["reverb"]]}
+{"effects": [["reverse"]]}
+{"effects": [["riaa"]], "input_sample_rate": 44100}
+{"effects": [["silence", "0"]]}
+{"effects": [["sinc", "3k"]]}
+{"effects": [["speed", "1.3"]], "input_sample_rate": 4000, "output_sample_rate": 5200}
+{"effects": [["speed", "0.7"]], "input_sample_rate": 4000, "output_sample_rate": 2800}
+{"effects": [["stat"]]}
+{"effects": [["stats"]]}
+{"effects": [["stretch"]]}
+{"effects": [["swap"]]}
+{"effects": [["synth"]]}
+{"effects": [["tempo", "0.9"]]}
+{"effects": [["tempo", "1.1"]]}
+{"effects": [["treble", "3"]]}
+{"effects": [["tremolo", "300", "40"]]}
+{"effects": [["tremolo", "300", "50"]]}
+{"effects": [["trim", "0", "0.1"]]}
+{"effects": [["upsample", "2"]], "input_sample_rate": 8000, "output_sample_rate": 16000}
+{"effects": [["vad"]]}
+{"effects": [["vol", "3"]]}
@@ -0,0 +1 @@
+0.0195 -0.082 0.234 0.891 -0.145 0.043
@@ -56,6 +56,7 @@ def get_sinusoid(
     n_channels: int = 1,
     dtype: Union[str, torch.dtype] = "float32",
     device: Union[str, torch.device] = "cpu",
+    channels_first: bool = True,
 ):
     """Generate pseudo audio data with sine wave.
 
@@ -75,4 +76,7 @@ def get_sinusoid(
     pie2 = 2 * 3.141592653589793
     end = pie2 * frequency * duration
     theta = torch.linspace(0, end, sample_rate * duration, dtype=dtype, device=device)
-    return torch.sin(theta, out=None).repeat([n_channels, 1])
+    sin = torch.sin(theta, out=None).repeat([n_channels, 1])
+    if not channels_first:
+        sin = sin.transpose(1, 0)
+    return sin
@@ -77,3 +77,12 @@ def convert_audio_file(
     command += [dst_path]
     print(' '.join(command))
     subprocess.run(command, check=True)
+
+
+def run_sox_effect(input_file, output_file, effect, output_sample_rate):
+    """Run sox effects"""
+    command = ['sox', '-V', input_file, output_file] + effect
+    if output_sample_rate:
+        command += ['rate', str(output_sample_rate)]
+    print(' '.join(command))
+    subprocess.run(command, check=True)
@@ -0,0 +1,10 @@
+def name_func(func, _, params):
+    if isinstance(params.args[0], str):
+        args = "_".join([str(arg) for arg in params.args])
+    else:
+        args = "_".join([str(arg) for arg in params.args[0]])
+    return f'{func.__name__}_{args}'
+
+
+def flatten_lists(l):
+    return [item for sublist in l for item in sublist]
@@ -0,0 +1,120 @@
+import itertools
+
+from torchaudio import sox_effects
+from parameterized import parameterized
+
+from ..common_utils import (
+    TempDirMixin,
+    PytorchTestCase,
+    skipIfNoExtension,
+    get_sinusoid,
+    get_wav_data,
+    save_wav,
+    load_wav,
+    load_params,
+    sox_utils,
+)
+from .common import (
+    name_func,
+    flatten_lists,
+)
+
+
+@skipIfNoExtension
+class TestSoxEffects(TempDirMixin, PytorchTestCase):
+    def test_list_effects(self):
+        """effect_names returns the list of available effects"""
+        effects = sox_effects.effect_names()
+        # We cannot infer what effects are available, so only check some of them.
+        assert 'highpass' in effects
+        assert 'phaser' in effects
+        assert 'gain' in effects
+
+    @parameterized.expand(list(itertools.product(
+        ['float32', 'int32', 'int16', 'uint8'],
+        [8000, 16000],
+        [1, 2, 4, 8],
+        [True, False]
+    )), name_func=name_func)
+    def test_apply_no_effect_tensor(self, dtype, sample_rate, num_channels, channels_first):
+        """`apply_effects_tensor` without effects should return identical data as input"""
+        original = get_wav_data(dtype, num_channels, channels_first=channels_first)
+        expected = original.clone()
+        found, output_sample_rate = sox_effects.apply_effects_tensor(
+            expected, sample_rate, [], channels_first)
+
+        assert output_sample_rate == sample_rate
+        # SoxEffect should not alter the input Tensor object
+        self.assertEqual(original, expected)
+        # SoxEffect should not return the same Tensor object
+        assert expected is not found
+        # Returned Tensor should equal to the input Tensor
+        self.assertEqual(expected, found)
+
+    @parameterized.expand(list(itertools.product(
+        ['float32', 'int32', 'int16', 'uint8'],
+        [8000, 16000],
+        [1, 2, 4, 8],
+        [False, True],
+    )), name_func=name_func)
+    def test_apply_no_effect_file(self, dtype, sample_rate, num_channels, channels_first):
+        """`apply_effects_file` without effects should return identical data as input"""
+        path = self.get_temp_path('input.wav')
+        data = get_wav_data(dtype, num_channels, channels_first=channels_first)
+        save_wav(path, data, sample_rate, channels_first=channels_first)
+
+        found, output_sample_rate = sox_effects.apply_effects_file(path, [], channels_first)
+
+        assert output_sample_rate == sample_rate
+        self.assertEqual(data, found)
+
+    @parameterized.expand(
+        load_params("sox_effect_test_args.json"),
+        name_func=lambda f, i, p: f'{f.__name__}_{i}_{p.args[0]["effects"][0][0]}',
+    )
+    def test_apply_effect_tensor(self, args):
+        """`apply_effects_tensor` should return identical data as sox command"""
+        effects = args['effects']
+        num_channels = args.get("num_channels", 2)
+        input_sr = args.get("input_sample_rate", 8000)
+        output_sr = args.get("output_sample_rate")
+
+        input_path = self.get_temp_path('input.wav')
+        output_path = self.get_temp_path('output.wav')
+
+        original = get_sinusoid(
+            frequency=800, sample_rate=input_sr,
+            n_channels=num_channels, dtype='float32')
+        save_wav(input_path, original, input_sr)
+        sox_utils.run_sox_effect(input_path, output_path, flatten_lists(effects), output_sr)
+
+        expected, expected_sr = load_wav(output_path)
+        found, sr = sox_effects.apply_effects_tensor(original, input_sr, effects)
+
+        assert sr == expected_sr
+        self.assertEqual(expected, found)
+
+    @parameterized.expand(
+        load_params("sox_effect_test_args.json"),
+        name_func=lambda f, i, p: f'{f.__name__}_{i}_{p.args[0]["effects"][0][0]}',
+    )
+    def test_apply_effects_file(self, args):
+        """`apply_effects_file` should return identical data as sox command"""
+        dtype = 'int32'
+        channels_first = True
+        effects = args['effects']
+        num_channels = args.get("num_channels", 2)
+        input_sr = args.get("input_sample_rate", 8000)
+        output_sr = args.get("output_sample_rate")
+
+        input_path = self.get_temp_path('input.wav')
+        output_path = self.get_temp_path('output.wav')
+        data = get_wav_data(dtype, num_channels, channels_first=channels_first)
+        save_wav(input_path, data, input_sr, channels_first=channels_first)
+        sox_utils.run_sox_effect(input_path, output_path, flatten_lists(effects), output_sr)
+
+        expected, expected_sr = load_wav(output_path)
+        found, sr = sox_effects.apply_effects_file(input_path, effects, channels_first)
+
+        assert sr == expected_sr
+        self.assertEqual(found, expected)
@@ -0,0 +1,98 @@
+from typing import List
+
+import torch
+from torchaudio import sox_effects
+from parameterized import parameterized
+
+from ..common_utils import (
+    TempDirMixin,
+    PytorchTestCase,
+    skipIfNoExtension,
+    get_sinusoid,
+    load_params,
+    save_wav,
+)
+
+
+class SoxEffectTensorTransform(torch.nn.Module):
+    effects: List[List[str]]
+
+    def __init__(self, effects: List[List[str]], sample_rate: int, channels_first: bool):
+        super().__init__()
+        self.effects = effects
+        self.sample_rate = sample_rate
+        self.channels_first = channels_first
+
+    def forward(self, tensor: torch.Tensor):
+        return sox_effects.apply_effects_tensor(
+            tensor, self.sample_rate, self.effects, self.channels_first)
+
+
+class SoxEffectFileTransform(torch.nn.Module):
+    effects: List[List[str]]
+    channels_first: bool
+
+    def __init__(self, effects: List[List[str]], channels_first: bool):
+        super().__init__()
+        self.effects = effects
+        self.channels_first = channels_first
+
+    def forward(self, path: str):
+        return sox_effects.apply_effects_file(path, self.effects, self.channels_first)
+
+
+@skipIfNoExtension
+class TestTorchScript(TempDirMixin, PytorchTestCase):
+    @parameterized.expand(
+        load_params("sox_effect_test_args.json"),
+        name_func=lambda f, i, p: f'{f.__name__}_{i}_{p.args[0]["effects"][0][0]}',
+    )
+    def test_apply_effects_tensor(self, args):
+        effects = args['effects']
+        channels_first = True
+        num_channels = args.get("num_channels", 2)
+        input_sr = args.get("input_sample_rate", 8000)
+
+        trans = SoxEffectTensorTransform(effects, input_sr, channels_first)
+
+        path = self.get_temp_path('sox_effect.zip')
+        torch.jit.script(trans).save(path)
+        trans = torch.jit.load(path)
+
+        wav = get_sinusoid(
+            frequency=800, sample_rate=input_sr,
+            n_channels=num_channels, dtype='float32', channels_first=channels_first)
+        found, sr_found = trans(wav)
+        expected, sr_expected = sox_effects.apply_effects_tensor(
+            wav, input_sr, effects, channels_first)
+
+        assert sr_found == sr_expected
+        self.assertEqual(expected, found)
+
+    @parameterized.expand(
+        load_params("sox_effect_test_args.json"),
+        name_func=lambda f, i, p: f'{f.__name__}_{i}_{p.args[0]["effects"][0][0]}',
+    )
+    def test_apply_effects_file(self, args):
+        effects = args['effects']
+        channels_first = True
+        num_channels = args.get("num_channels", 2)
+        input_sr = args.get("input_sample_rate", 8000)
+
+        trans = SoxEffectFileTransform(effects, channels_first)
+
+        path = self.get_temp_path('sox_effect.zip')
+        torch.jit.script(trans).save(path)
+        trans = torch.jit.load(path)
+
+        path = self.get_temp_path('input.wav')
+        wav = get_sinusoid(
+            frequency=800, sample_rate=input_sr,
+            n_channels=num_channels, dtype='float32', channels_first=channels_first)
+        save_wav(path, wav, sample_rate=input_sr, channels_first=channels_first)
+
+        found, sr_found = trans(path)
+        expected, sr_expected = sox_effects.apply_effects_file(path, effects, channels_first)
+
+        assert sr_found == sr_expected
+        self.assertEqual(expected, found)
@@ -29,6 +29,15 @@ static auto registerTensorSignal =
         .def("get_sample_rate", &sox_utils::TensorSignal::getSampleRate)
         .def("get_channels_first", &sox_utils::TensorSignal::getChannelsFirst);
 
+static auto registerSetSoxOptions =
+    torch::RegisterOperators()
+        .op("torchaudio::sox_utils_set_seed", &sox_utils::set_seed)
+        .op("torchaudio::sox_utils_set_verbosity", &sox_utils::set_verbosity)
+        .op("torchaudio::sox_utils_set_use_threads",
+            &sox_utils::set_use_threads)
+        .op("torchaudio::sox_utils_set_buffer_size",
+            &sox_utils::set_buffer_size);
+
 ////////////////////////////////////////////////////////////////////////////////
 // sox_io.h
 ////////////////////////////////////////////////////////////////////////////////
@@ -58,12 +67,24 @@ static auto registerSaveAudioFile = torch::RegisterOperators().op(
 // sox_effects.h
 ////////////////////////////////////////////////////////////////////////////////
 static auto registerSoxEffects =
-    torch::RegisterOperators(
-        "torchaudio::sox_effects_initialize_sox_effects",
-        &sox_effects::initialize_sox_effects)
+    torch::RegisterOperators()
+        .op("torchaudio::sox_effects_initialize_sox_effects",
+            &sox_effects::initialize_sox_effects)
         .op("torchaudio::sox_effects_shutdown_sox_effects",
             &sox_effects::shutdown_sox_effects)
-        .op("torchaudio::sox_effects_list_effects", &sox_effects::list_effects);
+        .op("torchaudio::sox_effects_list_effects", &sox_effects::list_effects)
+        .op(torch::RegisterOperators::options()
+                .schema(
+                    "torchaudio::sox_effects_apply_effects_tensor(__torch__.torch.classes.torchaudio.TensorSignal input_signal, str[][] effects) -> __torch__.torch.classes.torchaudio.TensorSignal output_signal")
+                .catchAllKernel<
+                    decltype(sox_effects::apply_effects_tensor),
+                    &sox_effects::apply_effects_tensor>())
+        .op(torch::RegisterOperators::options()
+                .schema(
+                    "torchaudio::sox_effects_apply_effects_file(str path, str[][] effects, bool channels_first) -> __torch__.torch.classes.torchaudio.TensorSignal output_signal")
+                .catchAllKernel<
+                    decltype(sox_effects::apply_effects_file),
+                    &sox_effects::apply_effects_file>());
 
 } // namespace
 } // namespace torchaudio