Remove istft (#841)

j3remych3n · Jeremy Chen · web-flow · commit dab7f64b2bea · 2020-07-30T15:16:21.000-04:00
* `istft` has been migrated to `pytorch`, and `torchaudio.functional.istft` has been deprecated in 0.6.0 release. This PR removes it

Co-authored-by: Jeremy Chen &lt;jeremyyy@fb.com&gt;
diff --git a/docs/source/functional.rst b/docs/source/functional.rst
@@ -8,11 +8,6 @@ torchaudio.functional
 
 Functions to perform common audio operations.
 
-:hidden:`istft`
-~~~~~~~~~~~~~~~
-
-.. autofunction:: istft
-
 :hidden:`spectrogram`
 ~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/test/functional_cpu_test.py b/test/functional_cpu_test.py
@@ -11,31 +11,6 @@
 from .functional_impl import Lfilter
 
 
-def random_float_tensor(seed, size, a=22695477, c=1, m=2 ** 32):
-    """ Generates random tensors given a seed and size
-    https://en.wikipedia.org/wiki/Linear_congruential_generator
-    X_{n + 1} = (a * X_n + c) % m
-    Using Borland C/C++ values
-
-    The tensor will have values between [0,1)
-    Inputs:
-        seed (int): an int
-        size (Tuple[int]): the size of the output tensor
-        a (int): the multiplier constant to the generator
-        c (int): the additive constant to the generator
-        m (int): the modulus constant to the generator
-    """
-    num_elements = 1
-    for s in size:
-        num_elements *= s
-
-    arr = [(a * seed + c) % m]
-    for i in range(num_elements - 1):
-        arr.append((a * arr[i] + c) % m)
-
-    return torch.tensor(arr).float().view(size) / m
-
-
 class TestLFilterFloat32(Lfilter, common_utils.PytorchTestCase):
     dtype = torch.float32
     device = torch.device('cpu')
@@ -63,242 +38,6 @@ def test_two_channels(self):
         torch.testing.assert_allclose(computed, expected)
 
 
-def _compare_estimate(sound, estimate, atol=1e-6, rtol=1e-8):
-    # trim sound for case when constructed signal is shorter than original
-    sound = sound[..., :estimate.size(-1)]
-    torch.testing.assert_allclose(estimate, sound, atol=atol, rtol=rtol)
-
-
-def _test_istft_is_inverse_of_stft(kwargs):
-    # generates a random sound signal for each tril and then does the stft/istft
-    # operation to check whether we can reconstruct signal
-    for data_size in [(2, 20), (3, 15), (4, 10)]:
-        for i in range(100):
-
-            sound = random_float_tensor(i, data_size)
-
-            stft = torch.stft(sound, **kwargs)
-            estimate = torchaudio.functional.istft(stft, length=sound.size(1), **kwargs)
-
-            _compare_estimate(sound, estimate)
-
-
-class TestIstft(common_utils.TorchaudioTestCase):
-    """Test suite for correctness of istft with various input"""
-    number_of_trials = 100
-
-    def test_istft_is_inverse_of_stft1(self):
-        # hann_window, centered, normalized, onesided
-        kwargs1 = {
-            'n_fft': 12,
-            'hop_length': 4,
-            'win_length': 12,
-            'window': torch.hann_window(12),
-            'center': True,
-            'pad_mode': 'reflect',
-            'normalized': True,
-            'onesided': True,
-        }
-        _test_istft_is_inverse_of_stft(kwargs1)
-
-    def test_istft_is_inverse_of_stft2(self):
-        # hann_window, centered, not normalized, not onesided
-        kwargs2 = {
-            'n_fft': 12,
-            'hop_length': 2,
-            'win_length': 8,
-            'window': torch.hann_window(8),
-            'center': True,
-            'pad_mode': 'reflect',
-            'normalized': False,
-            'onesided': False,
-        }
-        _test_istft_is_inverse_of_stft(kwargs2)
-
-    def test_istft_is_inverse_of_stft3(self):
-        # hamming_window, centered, normalized, not onesided
-        kwargs3 = {
-            'n_fft': 15,
-            'hop_length': 3,
-            'win_length': 11,
-            'window': torch.hamming_window(11),
-            'center': True,
-            'pad_mode': 'constant',
-            'normalized': True,
-            'onesided': False,
-        }
-        _test_istft_is_inverse_of_stft(kwargs3)
-
-    def test_istft_is_inverse_of_stft4(self):
-        # hamming_window, not centered, not normalized, onesided
-        # window same size as n_fft
-        kwargs4 = {
-            'n_fft': 5,
-            'hop_length': 2,
-            'win_length': 5,
-            'window': torch.hamming_window(5),
-            'center': False,
-            'pad_mode': 'constant',
-            'normalized': False,
-            'onesided': True,
-        }
-        _test_istft_is_inverse_of_stft(kwargs4)
-
-    def test_istft_is_inverse_of_stft5(self):
-        # hamming_window, not centered, not normalized, not onesided
-        # window same size as n_fft
-        kwargs5 = {
-            'n_fft': 3,
-            'hop_length': 2,
-            'win_length': 3,
-            'window': torch.hamming_window(3),
-            'center': False,
-            'pad_mode': 'reflect',
-            'normalized': False,
-            'onesided': False,
-        }
-        _test_istft_is_inverse_of_stft(kwargs5)
-
-    def test_istft_of_ones(self):
-        # stft = torch.stft(torch.ones(4), 4)
-        stft = torch.tensor([
-            [[4., 0.], [4., 0.], [4., 0.], [4., 0.], [4., 0.]],
-            [[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]],
-            [[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]]
-        ])
-
-        estimate = torchaudio.functional.istft(stft, n_fft=4, length=4)
-        _compare_estimate(torch.ones(4), estimate)
-
-    def test_istft_of_zeros(self):
-        # stft = torch.stft(torch.zeros(4), 4)
-        stft = torch.zeros((3, 5, 2))
-
-        estimate = torchaudio.functional.istft(stft, n_fft=4, length=4)
-        _compare_estimate(torch.zeros(4), estimate)
-
-    def test_istft_requires_overlap_windows(self):
-        # the window is size 1 but it hops 20 so there is a gap which throw an error
-        stft = torch.zeros((3, 5, 2))
-        self.assertRaises(RuntimeError, torchaudio.functional.istft, stft, n_fft=4,
-                          hop_length=20, win_length=1, window=torch.ones(1))
-
-    def test_istft_requires_nola(self):
-        stft = torch.zeros((3, 5, 2))
-        kwargs_ok = {
-            'n_fft': 4,
-            'win_length': 4,
-            'window': torch.ones(4),
-        }
-
-        kwargs_not_ok = {
-            'n_fft': 4,
-            'win_length': 4,
-            'window': torch.zeros(4),
-        }
-
-        # A window of ones meets NOLA but a window of zeros does not. This should
-        # throw an error.
-        torchaudio.functional.istft(stft, **kwargs_ok)
-        self.assertRaises(RuntimeError, torchaudio.functional.istft, stft, **kwargs_not_ok)
-
-    def test_istft_requires_non_empty(self):
-        self.assertRaises(RuntimeError, torchaudio.functional.istft, torch.zeros((3, 0, 2)), 2)
-        self.assertRaises(RuntimeError, torchaudio.functional.istft, torch.zeros((0, 3, 2)), 2)
-
-    def _test_istft_of_sine(self, amplitude, L, n):
-        # stft of amplitude*sin(2*pi/L*n*x) with the hop length and window size equaling L
-        x = torch.arange(2 * L + 1, dtype=torch.get_default_dtype())
-        sound = amplitude * torch.sin(2 * math.pi / L * x * n)
-        # stft = torch.stft(sound, L, hop_length=L, win_length=L,
-        #                   window=torch.ones(L), center=False, normalized=False)
-        stft = torch.zeros((L // 2 + 1, 2, 2))
-        stft_largest_val = (amplitude * L) / 2.0
-        if n < stft.size(0):
-            stft[n, :, 1] = -stft_largest_val
-
-        if 0 <= L - n < stft.size(0):
-            # symmetric about L // 2
-            stft[L - n, :, 1] = stft_largest_val
-
-        estimate = torchaudio.functional.istft(stft, L, hop_length=L, win_length=L,
-                                               window=torch.ones(L), center=False, normalized=False)
-        # There is a larger error due to the scaling of amplitude
-        _compare_estimate(sound, estimate, atol=1e-3)
-
-    def test_istft_of_sine(self):
-        self._test_istft_of_sine(amplitude=123, L=5, n=1)
-        self._test_istft_of_sine(amplitude=150, L=5, n=2)
-        self._test_istft_of_sine(amplitude=111, L=5, n=3)
-        self._test_istft_of_sine(amplitude=160, L=7, n=4)
-        self._test_istft_of_sine(amplitude=145, L=8, n=5)
-        self._test_istft_of_sine(amplitude=80, L=9, n=6)
-        self._test_istft_of_sine(amplitude=99, L=10, n=7)
-
-    def _test_linearity_of_istft(self, data_size, kwargs, atol=1e-6, rtol=1e-8):
-        for i in range(self.number_of_trials):
-            tensor1 = random_float_tensor(i, data_size)
-            tensor2 = random_float_tensor(i * 2, data_size)
-            a, b = torch.rand(2)
-            istft1 = torchaudio.functional.istft(tensor1, **kwargs)
-            istft2 = torchaudio.functional.istft(tensor2, **kwargs)
-            istft = a * istft1 + b * istft2
-            estimate = torchaudio.functional.istft(a * tensor1 + b * tensor2, **kwargs)
-            _compare_estimate(istft, estimate, atol, rtol)
-
-    def test_linearity_of_istft1(self):
-        # hann_window, centered, normalized, onesided
-        kwargs1 = {
-            'n_fft': 12,
-            'window': torch.hann_window(12),
-            'center': True,
-            'pad_mode': 'reflect',
-            'normalized': True,
-            'onesided': True,
-        }
-        data_size = (2, 7, 7, 2)
-        self._test_linearity_of_istft(data_size, kwargs1)
-
-    def test_linearity_of_istft2(self):
-        # hann_window, centered, not normalized, not onesided
-        kwargs2 = {
-            'n_fft': 12,
-            'window': torch.hann_window(12),
-            'center': True,
-            'pad_mode': 'reflect',
-            'normalized': False,
-            'onesided': False,
-        }
-        data_size = (2, 12, 7, 2)
-        self._test_linearity_of_istft(data_size, kwargs2)
-
-    def test_linearity_of_istft3(self):
-        # hamming_window, centered, normalized, not onesided
-        kwargs3 = {
-            'n_fft': 12,
-            'window': torch.hamming_window(12),
-            'center': True,
-            'pad_mode': 'constant',
-            'normalized': True,
-            'onesided': False,
-        }
-        data_size = (2, 12, 7, 2)
-        self._test_linearity_of_istft(data_size, kwargs3)
-
-    def test_linearity_of_istft4(self):
-        # hamming_window, not centered, not normalized, onesided
-        kwargs4 = {
-            'n_fft': 12,
-            'window': torch.hamming_window(12),
-            'center': False,
-            'pad_mode': 'constant',
-            'normalized': False,
-            'onesided': True,
-        }
-        data_size = (2, 7, 3, 2)
-        self._test_linearity_of_istft(data_size, kwargs4, atol=1e-5, rtol=1e-8)
-
-
 class TestDetectPitchFrequency(common_utils.TorchaudioTestCase):
     @parameterized.expand([(100,), (440,)])
     def test_pitch(self, frequency):
diff --git a/test/test_batch_consistency.py b/test/test_batch_consistency.py
@@ -59,14 +59,6 @@ def test_detect_pitch_frequency(self, frequency, sample_rate, n_channels):
                                              n_channels=n_channels, duration=5)
         self.assert_batch_consistencies(F.detect_pitch_frequency, waveform, sample_rate)
 
-    def test_istft(self):
-        stft = torch.tensor([
-            [[4., 0.], [4., 0.], [4., 0.], [4., 0.], [4., 0.]],
-            [[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]],
-            [[0., 0.], [0., 0.], [0., 0.], [0., 0.], [0., 0.]]
-        ])
-        self.assert_batch_consistencies(F.istft, stft, n_fft=4, length=4)
-
     def test_contrast(self):
         waveform = torch.rand(2, 100) - 0.5
         self.assert_batch_consistencies(F.contrast, waveform, enhancement_amount=80.)
diff --git a/torchaudio/functional.py b/torchaudio/functional.py