Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ The :mod:`torchaudio` package consists of I/O, popular datasets and common audio
kaldi_io
transforms
functional
utils

.. automodule:: torchaudio
:members:
10 changes: 8 additions & 2 deletions docs/source/sox_effects.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,16 @@
torchaudio.sox_effects
======================

Create SoX effects chain for preprocessing audio.

.. currentmodule:: torchaudio.sox_effects

Apply SoX effects chain on torch.Tensor or on file and load as torch.Tensor.

.. autofunction:: apply_effects_tensor

.. autofunction:: apply_effects_file

Create SoX effects chain for preprocessing audio.

:hidden:`SoxEffect`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
21 changes: 21 additions & 0 deletions docs/source/utils.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
.. role:: hidden
:class: hidden-section

torchaudio.utils.sox_utils
==========================

Utility module to configure libsox. This affects functionalities in ``sox_io`` backend and ``torchaudio.sox_effects``.

.. currentmodule:: torchaudio.utils.sox_utils

.. autofunction:: set_seed

.. autofunction:: set_verbosity

.. autofunction:: set_buffer_size

.. autofunction:: set_use_threads

.. autofunction:: list_effects

.. autofunction:: list_formats
88 changes: 88 additions & 0 deletions test/assets/sox_effect_test_args.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
{"effects": [["allpass", "300", "10"]]}
{"effects": [["band", "300", "10"]]}
{"effects": [["bandpass", "300", "10"]]}
{"effects": [["bandreject", "300", "10"]]}
{"effects": [["bass", "-10"]]}
{"effects": [["bend", ".35,180,.25", ".15,740,.53", "0,-520,.3"]]}
{"effects": [["biquad", "0.4", "0.2", "0.9", "0.7", "0.2", "0.6"]]}
{"effects": [["chorus", "0.7", "0.9", "55", "0.4", "0.25", "2", "-t"]]}
{"effects": [["chorus", "0.6", "0.9", "50", "0.4", "0.25", "2", "-t", "60", "0.32", "0.4", "1.3", "-s"]]}
{"effects": [["chorus", "0.5", "0.9", "50", "0.4", "0.25", "2", "-t", "60", "0.32", "0.4", "2.3", "-t", "40", "0.3", "0.3", "1.3", "-s"]]}
{"effects": [["channels", "1"]]}
{"effects": [["channels", "2"]]}
{"effects": [["channels", "3"]]}
{"effects": [["compand", "0.3,1", "6:-70,-60,-20", "-5", "-90", "0.2"]]}
{"effects": [["compand", ".1,.2", "-inf,-50.1,-inf,-50,-50", "0", "-90", ".1"]]}
{"effects": [["compand", ".1,.1", "-45.1,-45,-inf,0,-inf", "45", "-90", ".1"]]}
{"effects": [["contrast", "0"]]}
{"effects": [["contrast", "25"]]}
{"effects": [["contrast", "50"]]}
{"effects": [["contrast", "75"]]}
{"effects": [["contrast", "100"]]}
{"effects": [["dcshift", "1.0"]]}
{"effects": [["dcshift", "-1.0"]]}
{"effects": [["deemph"]], "input_sample_rate": 44100}
{"effects": [["delay", "1.5", "+1"]]}
{"effects": [["dither", "-s"]]}
{"effects": [["dither", "-S"]]}
{"effects": [["divide"]]}
{"effects": [["downsample", "2"]], "input_sample_rate": 8000, "output_sample_rate": 4000}
{"effects": [["earwax"]], "input_sample_rate": 44100}
{"effects": [["echo", "0.8", "0.88", "60", "0.4"]]}
{"effects": [["echo", "0.8", "0.88", "6", "0.4"]]}
{"effects": [["echo", "0.8", "0.9", "1000", "0.3"]]}
{"effects": [["echo", "0.8", "0.9", "1000", "0.3", "1800", "0.25"]]}
{"effects": [["echos", "0.8", "0.7", "700", "0.25", "700", "0.3"]]}
{"effects": [["echos", "0.8", "0.7", "700", "0.25", "900", "0.3"]]}
{"effects": [["echos", "0.8", "0.7", "40", "0.25", "63", "0.3"]]}
{"effects": [["equalizer", "300", "10", "5"]]}
{"effects": [["fade", "q", "3"]]}
{"effects": [["fade", "h", "3"]]}
{"effects": [["fade", "t", "3"]]}
{"effects": [["fade", "l", "3"]]}
{"effects": [["fade", "p", "3"]]}
{"effects": [["fir", "0.0195", "-0.082", "0.234", "0.891", "-0.145", "0.043"]]}
{"effects": [["fir", "test/assets/sox_effect_test_fir_coeffs.txt"]]}
{"effects": [["flanger"]]}
{"effects": [["gain", "-n"]]}
{"effects": [["gain", "-n", "-3"]]}
{"effects": [["gain", "-l", "-6"]]}
{"effects": [["highpass", "-1", "300"]]}
{"effects": [["highpass", "-2", "300"]]}
{"effects": [["hilbert"]]}
{"effects": [["loudness"]]}
{"effects": [["lowpass", "-1", "300"]]}
{"effects": [["lowpass", "-2", "300"]]}
{"effects": [["mcompand", "0.005,0.1 -47,-40,-34,-34,-17,-33", "100", "0.003,0.05 -47,-40,-34,-34,-17,-33", "400", "0.000625,0.0125 -47,-40,-34,-34,-15,-33", "1600", "0.0001,0.025 -47,-40,-34,-34,-31,-31,-0,-30", "6400", "0,0.025 -38,-31,-28,-28,-0,-25"]], "input_sample_rate": 44100}
{"effects": [["norm"]]}
{"effects": [["oops"]]}
{"effects": [["overdrive"]]}
{"effects": [["pad"]]}
{"effects": [["phaser"]]}
{"effects": [["pitch", "6.48"], ["rate", "8030"]], "output_sample_rate": 8030}
{"effects": [["pitch", "-6.50"], ["rate", "7970"]], "output_sample_rate": 7970}
{"effects": [["rate", "4567"]], "output_sample_rate": 4567}
{"effects": [["remix", "6", "7", "8", "0"]], "num_channels": 8}
{"effects": [["remix", "1-3,7", "3"]], "num_channels": 8}
{"effects": [["repeat"]]}
{"effects": [["reverb"]]}
{"effects": [["reverse"]]}
{"effects": [["riaa"]], "input_sample_rate": 44100}
{"effects": [["silence", "0"]]}
{"effects": [["sinc", "3k"]]}
{"effects": [["speed", "1.3"]], "input_sample_rate": 4000, "output_sample_rate": 5200}
{"effects": [["speed", "0.7"]], "input_sample_rate": 4000, "output_sample_rate": 2800}
{"effects": [["stat"]]}
{"effects": [["stats"]]}
{"effects": [["stretch"]]}
{"effects": [["swap"]]}
{"effects": [["synth"]]}
{"effects": [["tempo", "0.9"]]}
{"effects": [["tempo", "1.1"]]}
{"effects": [["treble", "3"]]}
{"effects": [["tremolo", "300", "40"]]}
{"effects": [["tremolo", "300", "50"]]}
{"effects": [["trim", "0", "0.1"]]}
{"effects": [["upsample", "2"]], "input_sample_rate": 8000, "output_sample_rate": 16000}
{"effects": [["vad"]]}
{"effects": [["vol", "3"]]}
1 change: 1 addition & 0 deletions test/assets/sox_effect_test_fir_coeffs.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.0195 -0.082 0.234 0.891 -0.145 0.043
6 changes: 5 additions & 1 deletion test/common_utils/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def get_sinusoid(
n_channels: int = 1,
dtype: Union[str, torch.dtype] = "float32",
device: Union[str, torch.device] = "cpu",
channels_first: bool = True,
):
"""Generate pseudo audio data with sine wave.

Expand All @@ -91,4 +92,7 @@ def get_sinusoid(
pie2 = 2 * 3.141592653589793
end = pie2 * frequency * duration
theta = torch.linspace(0, end, sample_rate * duration, dtype=dtype, device=device)
return torch.sin(theta, out=None).repeat([n_channels, 1])
sin = torch.sin(theta, out=None).repeat([n_channels, 1])
if not channels_first:
sin = sin.t()
return sin
21 changes: 21 additions & 0 deletions test/common_utils/sox_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,24 @@ def convert_audio_file(
command += [dst_path]
print(' '.join(command))
subprocess.run(command, check=True)


def _flattern(effects):
if not effects:
return effects
if isinstance(effects[0], str):
return effects
return [item for sublist in effects for item in sublist]


def run_sox_effect(input_file, output_file, effect, *, output_sample_rate=None, output_bitdepth=None):
"""Run sox effects"""
effect = _flattern(effect)
command = ['sox', '-V', '--no-dither', input_file]
if output_bitdepth:
command += ['--bits', str(output_bitdepth)]
command += [output_file] + effect
if output_sample_rate:
command += ['rate', str(output_sample_rate)]
print(' '.join(command))
subprocess.run(command, check=True)
25 changes: 10 additions & 15 deletions test/common_utils/test_case_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,33 +14,28 @@
class TempDirMixin:
"""Mixin to provide easy access to temp dir"""
temp_dir_ = None
base_temp_dir = None
temp_dir = None

@classmethod
def setUpClass(cls):
super().setUpClass()
@property
def base_temp_dir(self):
# If TORCHAUDIO_TEST_TEMP_DIR is set, use it instead of temporary directory.
# this is handy for debugging.
key = 'TORCHAUDIO_TEST_TEMP_DIR'
if key in os.environ:
cls.base_temp_dir = os.environ[key]
else:
cls.temp_dir_ = tempfile.TemporaryDirectory()
cls.base_temp_dir = cls.temp_dir_.name
return os.environ[key]
if self.__class__.temp_dir_ is None:
self.__class__.temp_dir_ = tempfile.TemporaryDirectory()
return self.__class__.temp_dir_.name

@classmethod
def tearDownClass(cls):
super().tearDownClass()
if isinstance(cls.temp_dir_, tempfile.TemporaryDirectory):
if cls.temp_dir_ is not None:
cls.temp_dir_.cleanup()

def setUp(self):
super().setUp()
self.temp_dir = os.path.join(self.base_temp_dir, self.id())
cls.temp_dir_ = None

def get_temp_path(self, *paths):
path = os.path.join(self.temp_dir, *paths)
temp_dir = os.path.join(self.base_temp_dir, self.id())
path = os.path.join(temp_dir, *paths)
os.makedirs(os.path.dirname(path), exist_ok=True)
return path

Expand Down
Empty file added test/sox_effect/__init__.py
Empty file.
6 changes: 6 additions & 0 deletions test/sox_effect/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
def name_func(func, _, params):
if isinstance(params.args[0], str):
args = "_".join([str(arg) for arg in params.args])
else:
args = "_".join([str(arg) for arg in params.args[0]])
return f'{func.__name__}_{args}'
115 changes: 115 additions & 0 deletions test/sox_effect/test_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from typing import List, Tuple

import numpy as np
import torch
import torchaudio

from ..common_utils import (
TempDirMixin,
PytorchTestCase,
skipIfNoExtension,
get_whitenoise,
load_wav,
save_wav,
)


class RandomPerturbationFile(torch.utils.data.Dataset):
"""Given flist, apply random speed perturbation"""
def __init__(self, flist: List[str], sample_rate: int):
super().__init__()
self.flist = flist
self.sample_rate = sample_rate
self.rng = None

def __getitem__(self, index):
speed = self.rng.uniform(0.5, 2.0)
effects = [
['gain', '-n', '-10'],
['speed', f'{speed:.5f}'], # duration of data is 0.5 ~ 2.0 seconds.
['rate', f'{self.sample_rate}'],
['pad', '0', '1.5'], # add 1.5 seconds silence at the end
['trim', '0', '2'], # get the first 2 seconds
]
data, _ = torchaudio.sox_effects.apply_effects_file(self.flist[index], effects)
return data

def __len__(self):
return len(self.flist)


class RandomPerturbationTensor(torch.utils.data.Dataset):
"""Apply speed purturbation to (synthetic) Tensor data"""
def __init__(self, signals: List[Tuple[torch.Tensor, int]], sample_rate: int):
super().__init__()
self.signals = signals
self.sample_rate = sample_rate
self.rng = None

def __getitem__(self, index):
speed = self.rng.uniform(0.5, 2.0)
effects = [
['gain', '-n', '-10'],
['speed', f'{speed:.5f}'], # duration of data is 0.5 ~ 2.0 seconds.
['rate', f'{self.sample_rate}'],
['pad', '0', '1.5'], # add 1.5 seconds silence at the end
['trim', '0', '2'], # get the first 2 seconds
]
tensor, sample_rate = self.signals[index]
data, _ = torchaudio.sox_effects.apply_effects_tensor(tensor, sample_rate, effects)
return data

def __len__(self):
return len(self.signals)


def init_random_seed(worker_id):
dataset = torch.utils.data.get_worker_info().dataset
dataset.rng = np.random.RandomState(worker_id)


@skipIfNoExtension
class TestSoxEffectsDataset(TempDirMixin, PytorchTestCase):
"""Test `apply_effects_file` in multi-process dataloader setting"""

def _generate_dataset(self, num_samples=128):
flist = []
for i in range(num_samples):
sample_rate = np.random.choice([8000, 16000, 44100])
dtype = np.random.choice(['float32', 'int32', 'int16', 'uint8'])
data = get_whitenoise(n_channels=2, sample_rate=sample_rate, duration=1, dtype=dtype)
path = self.get_temp_path(f'{i:03d}_{dtype}_{sample_rate}.wav')
save_wav(path, data, sample_rate)
flist.append(path)
return flist

def test_apply_effects_file(self):
sample_rate = 12000
flist = self._generate_dataset()
dataset = RandomPerturbationFile(flist, sample_rate)
loader = torch.utils.data.DataLoader(
dataset, batch_size=32, num_workers=16,
worker_init_fn=init_random_seed,
)
for batch in loader:
assert batch.shape == (32, 2, 2 * sample_rate)

def _generate_signals(self, num_samples=128):
signals = []
for _ in range(num_samples):
sample_rate = np.random.choice([8000, 16000, 44100])
data = get_whitenoise(
n_channels=2, sample_rate=sample_rate, duration=1, dtype='float32')
signals.append((data, sample_rate))
return signals

def test_apply_effects_tensor(self):
sample_rate = 12000
signals = self._generate_signals()
dataset = RandomPerturbationTensor(signals, sample_rate)
loader = torch.utils.data.DataLoader(
dataset, batch_size=32, num_workers=16,
worker_init_fn=init_random_seed,
)
for batch in loader:
assert batch.shape == (32, 2, 2 * sample_rate)
Loading