Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions docs/source/functional.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,6 @@ torchaudio.functional

Functions to perform common audio operations.

:hidden:`istft`
~~~~~~~~~~~~~~~

.. autofunction:: istft

:hidden:`spectrogram`
~~~~~~~~~~~~~~~~~~~~~

Expand Down
261 changes: 0 additions & 261 deletions test/functional_cpu_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,6 @@
from .functional_impl import Lfilter


def random_float_tensor(seed, size, a=22695477, c=1, m=2 ** 32):
""" Generates random tensors given a seed and size
https://en.wikipedia.org/wiki/Linear_congruential_generator
X_{n + 1} = (a * X_n + c) % m
Using Borland C/C++ values

The tensor will have values between [0,1)
Inputs:
seed (int): an int
size (Tuple[int]): the size of the output tensor
a (int): the multiplier constant to the generator
c (int): the additive constant to the generator
m (int): the modulus constant to the generator
"""
num_elements = 1
for s in size:
num_elements *= s

arr = [(a * seed + c) % m]
for i in range(num_elements - 1):
arr.append((a * arr[i] + c) % m)

return torch.tensor(arr).float().view(size) / m


class TestLFilterFloat32(Lfilter, common_utils.PytorchTestCase):
dtype = torch.float32
device = torch.device('cpu')
Expand Down Expand Up @@ -63,242 +38,6 @@ def test_two_channels(self):
torch.testing.assert_allclose(computed, expected)


def _compare_estimate(sound, estimate, atol=1e-6, rtol=1e-8):
# trim sound for case when constructed signal is shorter than original
sound = sound[..., :estimate.size(-1)]
torch.testing.assert_allclose(estimate, sound, atol=atol, rtol=rtol)


def _test_istft_is_inverse_of_stft(kwargs):
# generates a random sound signal for each tril and then does the stft/istft
# operation to check whether we can reconstruct signal
for data_size in [(2, 20), (3, 15), (4, 10)]:
for i in range(100):

sound = random_float_tensor(i, data_size)

stft = torch.stft(sound, **kwargs)
estimate = torchaudio.functional.istft(stft, length=sound.size(1), **kwargs)

_compare_estimate(sound, estimate)


class TestIstft(common_utils.TorchaudioTestCase):
"""Test suite for correctness of istft with various input"""
number_of_trials = 100

def test_istft_is_inverse_of_stft1(self):
# hann_window, centered, normalized, onesided
kwargs1 = {
'n_fft': 12,
'hop_length': 4,
'win_length': 12,
'window': torch.hann_window(12),
'center': True,
'pad_mode': 'reflect',
'normalized': True,
'onesided': True,
}
_test_istft_is_inverse_of_stft(kwargs1)

def test_istft_is_inverse_of_stft2(self):
# hann_window, centered, not normalized, not onesided
kwargs2 = {
'n_fft': 12,
'hop_length': 2,
'win_length': 8,
'window': torch.hann_window(8),
'center': True,
'pad_mode': 'reflect',
'normalized': False,
'onesided': False,
}
_test_istft_is_inverse_of_stft(kwargs2)

def test_istft_is_inverse_of_stft3(self):
# hamming_window, centered, normalized, not onesided
kwargs3 = {
'n_fft': 15,
'hop_length': 3,
'win_length': 11,
'window': torch.hamming_window(11),
'center': True,
'pad_mode': 'constant',
'normalized': True,
'onesided': False,
}
_test_istft_is_inverse_of_stft(kwargs3)

def test_istft_is_inverse_of_stft4(self):
# hamming_window, not centered, not normalized, onesided
# window same size as n_fft
kwargs4 = {
'n_fft': 5,
'hop_length': 2,
'win_length': 5,
'window': torch.hamming_window(5),
'center': False,
'pad_mode': 'constant',
'normalized': False,
'onesided': True,
}
_test_istft_is_inverse_of_stft(kwargs4)

def test_istft_is_inverse_of_stft5(self):
# hamming_window, not centered, not normalized, not onesided
# window same size as n_fft
kwargs5 = {
'n_fft': 3,
'hop_length': 2,
'win_length': 3,
'window': torch.hamming_window(3),
'center': False,
'pad_mode': 'reflect',
'normalized': False,
'onesided': False,
}
_test_istft_is_inverse_of_stft(kwargs5)

def test_istft_of_ones(self):
# stft = torch.stft(torch.ones(4), 4)
stft = torch.tensor([
[[4., 0.], [4., 0.], [4., 0.], [4., 0.], [4., 0.]],
[[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]],
[[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]]
])

estimate = torchaudio.functional.istft(stft, n_fft=4, length=4)
_compare_estimate(torch.ones(4), estimate)

def test_istft_of_zeros(self):
# stft = torch.stft(torch.zeros(4), 4)
stft = torch.zeros((3, 5, 2))

estimate = torchaudio.functional.istft(stft, n_fft=4, length=4)
_compare_estimate(torch.zeros(4), estimate)

def test_istft_requires_overlap_windows(self):
# the window is size 1 but it hops 20 so there is a gap which throw an error
stft = torch.zeros((3, 5, 2))
self.assertRaises(RuntimeError, torchaudio.functional.istft, stft, n_fft=4,
hop_length=20, win_length=1, window=torch.ones(1))

def test_istft_requires_nola(self):
stft = torch.zeros((3, 5, 2))
kwargs_ok = {
'n_fft': 4,
'win_length': 4,
'window': torch.ones(4),
}

kwargs_not_ok = {
'n_fft': 4,
'win_length': 4,
'window': torch.zeros(4),
}

# A window of ones meets NOLA but a window of zeros does not. This should
# throw an error.
torchaudio.functional.istft(stft, **kwargs_ok)
self.assertRaises(RuntimeError, torchaudio.functional.istft, stft, **kwargs_not_ok)

def test_istft_requires_non_empty(self):
self.assertRaises(RuntimeError, torchaudio.functional.istft, torch.zeros((3, 0, 2)), 2)
self.assertRaises(RuntimeError, torchaudio.functional.istft, torch.zeros((0, 3, 2)), 2)

def _test_istft_of_sine(self, amplitude, L, n):
# stft of amplitude*sin(2*pi/L*n*x) with the hop length and window size equaling L
x = torch.arange(2 * L + 1, dtype=torch.get_default_dtype())
sound = amplitude * torch.sin(2 * math.pi / L * x * n)
# stft = torch.stft(sound, L, hop_length=L, win_length=L,
# window=torch.ones(L), center=False, normalized=False)
stft = torch.zeros((L // 2 + 1, 2, 2))
stft_largest_val = (amplitude * L) / 2.0
if n < stft.size(0):
stft[n, :, 1] = -stft_largest_val

if 0 <= L - n < stft.size(0):
# symmetric about L // 2
stft[L - n, :, 1] = stft_largest_val

estimate = torchaudio.functional.istft(stft, L, hop_length=L, win_length=L,
window=torch.ones(L), center=False, normalized=False)
# There is a larger error due to the scaling of amplitude
_compare_estimate(sound, estimate, atol=1e-3)

def test_istft_of_sine(self):
self._test_istft_of_sine(amplitude=123, L=5, n=1)
self._test_istft_of_sine(amplitude=150, L=5, n=2)
self._test_istft_of_sine(amplitude=111, L=5, n=3)
self._test_istft_of_sine(amplitude=160, L=7, n=4)
self._test_istft_of_sine(amplitude=145, L=8, n=5)
self._test_istft_of_sine(amplitude=80, L=9, n=6)
self._test_istft_of_sine(amplitude=99, L=10, n=7)

def _test_linearity_of_istft(self, data_size, kwargs, atol=1e-6, rtol=1e-8):
for i in range(self.number_of_trials):
tensor1 = random_float_tensor(i, data_size)
tensor2 = random_float_tensor(i * 2, data_size)
a, b = torch.rand(2)
istft1 = torchaudio.functional.istft(tensor1, **kwargs)
istft2 = torchaudio.functional.istft(tensor2, **kwargs)
istft = a * istft1 + b * istft2
estimate = torchaudio.functional.istft(a * tensor1 + b * tensor2, **kwargs)
_compare_estimate(istft, estimate, atol, rtol)

def test_linearity_of_istft1(self):
# hann_window, centered, normalized, onesided
kwargs1 = {
'n_fft': 12,
'window': torch.hann_window(12),
'center': True,
'pad_mode': 'reflect',
'normalized': True,
'onesided': True,
}
data_size = (2, 7, 7, 2)
self._test_linearity_of_istft(data_size, kwargs1)

def test_linearity_of_istft2(self):
# hann_window, centered, not normalized, not onesided
kwargs2 = {
'n_fft': 12,
'window': torch.hann_window(12),
'center': True,
'pad_mode': 'reflect',
'normalized': False,
'onesided': False,
}
data_size = (2, 12, 7, 2)
self._test_linearity_of_istft(data_size, kwargs2)

def test_linearity_of_istft3(self):
# hamming_window, centered, normalized, not onesided
kwargs3 = {
'n_fft': 12,
'window': torch.hamming_window(12),
'center': True,
'pad_mode': 'constant',
'normalized': True,
'onesided': False,
}
data_size = (2, 12, 7, 2)
self._test_linearity_of_istft(data_size, kwargs3)

def test_linearity_of_istft4(self):
# hamming_window, not centered, not normalized, onesided
kwargs4 = {
'n_fft': 12,
'window': torch.hamming_window(12),
'center': False,
'pad_mode': 'constant',
'normalized': False,
'onesided': True,
}
data_size = (2, 7, 3, 2)
self._test_linearity_of_istft(data_size, kwargs4, atol=1e-5, rtol=1e-8)


class TestDetectPitchFrequency(common_utils.TorchaudioTestCase):
@parameterized.expand([(100,), (440,)])
def test_pitch(self, frequency):
Expand Down
8 changes: 0 additions & 8 deletions test/test_batch_consistency.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,6 @@ def test_detect_pitch_frequency(self, frequency, sample_rate, n_channels):
n_channels=n_channels, duration=5)
self.assert_batch_consistencies(F.detect_pitch_frequency, waveform, sample_rate)

def test_istft(self):
stft = torch.tensor([
[[4., 0.], [4., 0.], [4., 0.], [4., 0.], [4., 0.]],
[[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]],
[[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]]
])
self.assert_batch_consistencies(F.istft, stft, n_fft=4, length=4)

def test_contrast(self):
waveform = torch.rand(2, 100) - 0.5
self.assert_batch_consistencies(F.contrast, waveform, enhancement_amount=80.)
Expand Down
Loading