From e4fdcda66245c7eb516e6c7ff0d1e00343a14d6a Mon Sep 17 00:00:00 2001 From: Vincent Quenneville-Belair Date: Tue, 9 Feb 2021 19:32:52 -0500 Subject: [PATCH 1/2] Typos in deprecation message. --- torchaudio/backend/utils.py | 6 +++--- torchaudio/compliance/kaldi.py | 2 +- torchaudio/kaldi_io.py | 2 +- torchaudio/transforms.py | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/torchaudio/backend/utils.py b/torchaudio/backend/utils.py index b888c04c48..b9f6b13edf 100644 --- a/torchaudio/backend/utils.py +++ b/torchaudio/backend/utils.py @@ -44,7 +44,7 @@ def set_audio_backend(backend: Optional[str]): """ if torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE is not None: warnings.warn( - '"torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE" flag is depredated and will be removed in 0.9.0. ' + '"torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE" flag is deprecated and will be removed in 0.9.0. ' 'Please remove the use of flag.' ) @@ -57,7 +57,7 @@ def set_audio_backend(backend: Optional[str]): module = no_backend elif backend == 'sox': warnings.warn( - '"sox" backend is depredated and will be removed in 0.9.0. ' + '"sox" backend is deprecated and will be removed in 0.9.0. ' 'Please use "sox_io" backend.' ) module = sox_backend @@ -66,7 +66,7 @@ def set_audio_backend(backend: Optional[str]): elif backend == 'soundfile': if torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE: warnings.warn( - 'The legacy interface of "soundfile" backend is depredated and will be removed in 0.9.0. ' + 'The legacy interface of "soundfile" backend is deprecated and will be removed in 0.9.0. ' 'Please migrate to the new interface.' ) module = soundfile_backend diff --git a/torchaudio/compliance/kaldi.py b/torchaudio/compliance/kaldi.py index 5b47ef3350..f1e1ddf5b5 100644 --- a/torchaudio/compliance/kaldi.py +++ b/torchaudio/compliance/kaldi.py @@ -793,7 +793,7 @@ def _get_sinc_resample_kernel(orig_freq: int, new_freq: int, lowpass_filter_widt t = t.clamp_(-lowpass_filter_width, lowpass_filter_width) t *= math.pi # we do not use torch.hann_window here as we need to evaluate the window - # at spectifics positions, not over a regular grid. + # at specific positions, not over a regular grid. window = torch.cos(t / lowpass_filter_width / 2)**2 kernel = torch.where(t == 0, torch.tensor(1.).to(t), torch.sin(t) / t) kernel.mul_(window) diff --git a/torchaudio/kaldi_io.py b/torchaudio/kaldi_io.py index 7f14f48e2c..ba1689da2b 100644 --- a/torchaudio/kaldi_io.py +++ b/torchaudio/kaldi_io.py @@ -57,7 +57,7 @@ def read_vec_int_ark(file_or_fd: Any) -> Iterable[Tuple[str, Tensor]]: >>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_int_ark(file) } """ # Requires convert_contiguous to be True because elements from int32 vector are - # sored in tuples: (sizeof(int32), value) so strides are (5,) instead of (4,) which will throw an error + # sorted in tuples: (sizeof(int32), value) so strides are (5,) instead of (4,) which will throw an error # in from_numpy as it expects strides to be a multiple of 4 (int32). return _convert_method_output_to_tensor(file_or_fd, kaldi_io.read_vec_int_ark, convert_contiguous=True) diff --git a/torchaudio/transforms.py b/torchaudio/transforms.py index af89d5b250..8c9df3321f 100644 --- a/torchaudio/transforms.py +++ b/torchaudio/transforms.py @@ -71,7 +71,7 @@ def __init__(self, super(Spectrogram, self).__init__() self.n_fft = n_fft # number of FFT bins. the returned STFT result will have n_fft // 2 + 1 - # number of frequecies due to onesided=True in torch.stft + # number of frequencies due to onesided=True in torch.stft self.win_length = win_length if win_length is not None else n_fft self.hop_length = hop_length if hop_length is not None else self.win_length // 2 window = window_fn(self.win_length) if wkwargs is None else window_fn(self.win_length, **wkwargs) @@ -547,8 +547,8 @@ def forward(self, waveform: Tensor) -> Tensor: else: mel_specgram = self.amplitude_to_DB(mel_specgram) - # (..., channel, n_mels, time).tranpose(...) dot (n_mels, n_mfcc) - # -> (..., channel, time, n_mfcc).tranpose(...) + # (..., channel, n_mels, time).transpose(...) dot (n_mels, n_mfcc) + # -> (..., channel, time, n_mfcc).transpose(...) mfcc = torch.matmul(mel_specgram.transpose(-2, -1), self.dct_mat).transpose(-2, -1) return mfcc From 2d200e8b3d5ec047763383aaad173affa3090329 Mon Sep 17 00:00:00 2001 From: Vincent Quenneville-Belair Date: Tue, 9 Feb 2021 20:44:23 -0500 Subject: [PATCH 2/2] address feedback in https://github.com/pytorch/audio/pull/1062/files\#r543635338 --- README.md | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 527b1a22da..288df4621d 100644 --- a/README.md +++ b/README.md @@ -195,16 +195,16 @@ Conventions With torchaudio being a machine learning library and built on top of PyTorch, torchaudio is standardized around the following naming conventions. Tensors are -assumed to have channels as the first dimension and time as the last +assumed to have "channel" as the first dimension and time as the last dimension (when applicable). This makes it consistent with PyTorch's dimensions. For size names, the prefix `n_` is used (e.g. "a tensor of size (`n_freq`, `n_mel`)") whereas dimension names do not have this prefix (e.g. "a tensor of -dimension (channels, time)") +dimension (channel, time)") -* `waveform`: a tensor of audio samples with dimensions (channels, time) +* `waveform`: a tensor of audio samples with dimensions (channel, time) * `sample_rate`: the rate of audio dimensions (samples per second) -* `specgram`: a tensor of spectrogram with dimensions (channels, freq, time) -* `mel_specgram`: a mel spectrogram with dimensions (channels, mel, time) +* `specgram`: a tensor of spectrogram with dimensions (channel, freq, time) +* `mel_specgram`: a mel spectrogram with dimensions (channel, mel, time) * `hop_length`: the number of samples between the starts of consecutive frames * `n_fft`: the number of Fourier bins * `n_mel`, `n_mfcc`: the number of mel and MFCC bins @@ -216,16 +216,16 @@ dimension (channels, time)") Transforms expect and return the following dimensions. -* `Spectrogram`: (channels, time) -> (channels, freq, time) -* `AmplitudeToDB`: (channels, freq, time) -> (channels, freq, time) -* `MelScale`: (channels, freq, time) -> (channels, mel, time) -* `MelSpectrogram`: (channels, time) -> (channels, mel, time) -* `MFCC`: (channels, time) -> (channel, mfcc, time) -* `MuLawEncode`: (channels, time) -> (channels, time) -* `MuLawDecode`: (channels, time) -> (channels, time) -* `Resample`: (channels, time) -> (channels, time) -* `Fade`: (channels, time) -> (channels, time) -* `Vol`: (channels, time) -> (channels, time) +* `Spectrogram`: (channel, time) -> (channel, freq, time) +* `AmplitudeToDB`: (channel, freq, time) -> (channel, freq, time) +* `MelScale`: (channel, freq, time) -> (channel, mel, time) +* `MelSpectrogram`: (channel, time) -> (channel, mel, time) +* `MFCC`: (channel, time) -> (channel, mfcc, time) +* `MuLawEncode`: (channel, time) -> (channel, time) +* `MuLawDecode`: (channel, time) -> (channel, time) +* `Resample`: (channel, time) -> (channel, time) +* `Fade`: (channel, time) -> (channel, time) +* `Vol`: (channel, time) -> (channel, time) Complex numbers are supported via tensors of dimension (..., 2), and torchaudio provides `complex_norm` and `angle` to convert such a tensor into its magnitude and phase. Here, and in the documentation, we use an ellipsis "..." as a placeholder for the rest of the dimensions of a tensor, e.g. optional batching and channel dimensions.