pytorch · vincentqb · Feb 10, 2021 · Feb 10, 2021 · Feb 10, 2021
diff --git a/README.md b/README.md
@@ -195,16 +195,16 @@ Conventions
 
 With torchaudio being a machine learning library and built on top of PyTorch,
 torchaudio is standardized around the following naming conventions. Tensors are
-assumed to have channels as the first dimension and time as the last
+assumed to have "channel" as the first dimension and time as the last
 dimension (when applicable). This makes it consistent with PyTorch's dimensions.
 For size names, the prefix `n_` is used (e.g. "a tensor of size (`n_freq`, `n_mel`)")
 whereas dimension names do not have this prefix (e.g. "a tensor of
-dimension (channels, time)")
+dimension (channel, time)")
 
-* `waveform`: a tensor of audio samples with dimensions (channels, time)
+* `waveform`: a tensor of audio samples with dimensions (channel, time)
 * `sample_rate`: the rate of audio dimensions (samples per second)
-* `specgram`: a tensor of spectrogram with dimensions (channels, freq, time)
-* `mel_specgram`: a mel spectrogram with dimensions (channels, mel, time)
+* `specgram`: a tensor of spectrogram with dimensions (channel, freq, time)
+* `mel_specgram`: a mel spectrogram with dimensions (channel, mel, time)
 * `hop_length`: the number of samples between the starts of consecutive frames
 * `n_fft`: the number of Fourier bins
 * `n_mel`, `n_mfcc`: the number of mel and MFCC bins
@@ -216,16 +216,16 @@ dimension (channels, time)")
 
 Transforms expect and return the following dimensions.
 
-* `Spectrogram`: (channels, time) -> (channels, freq, time)
-* `AmplitudeToDB`: (channels, freq, time) -> (channels, freq, time)
-* `MelScale`: (channels, freq, time) -> (channels, mel, time)
-* `MelSpectrogram`: (channels, time) -> (channels, mel, time)
-* `MFCC`: (channels, time) -> (channel, mfcc, time)
-* `MuLawEncode`: (channels, time) -> (channels, time)
-* `MuLawDecode`: (channels, time) -> (channels, time)
-* `Resample`: (channels, time) -> (channels, time)
-* `Fade`: (channels, time) -> (channels, time)
-* `Vol`: (channels, time) -> (channels, time)
+* `Spectrogram`: (channel, time) -> (channel, freq, time)
+* `AmplitudeToDB`: (channel, freq, time) -> (channel, freq, time)
+* `MelScale`: (channel, freq, time) -> (channel, mel, time)
+* `MelSpectrogram`: (channel, time) -> (channel, mel, time)
+* `MFCC`: (channel, time) -> (channel, mfcc, time)
+* `MuLawEncode`: (channel, time) -> (channel, time)
+* `MuLawDecode`: (channel, time) -> (channel, time)
+* `Resample`: (channel, time) -> (channel, time)
+* `Fade`: (channel, time) -> (channel, time)
+* `Vol`: (channel, time) -> (channel, time)
 
 Complex numbers are supported via tensors of dimension (..., 2), and torchaudio provides `complex_norm` and `angle` to convert such a tensor into its magnitude and phase. Here, and in the documentation, we use an ellipsis "..." as a placeholder for the rest of the dimensions of a tensor, e.g. optional batching and channel dimensions.
 

diff --git a/torchaudio/backend/utils.py b/torchaudio/backend/utils.py
@@ -44,7 +44,7 @@ def set_audio_backend(backend: Optional[str]):
     """
     if torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE is not None:
         warnings.warn(
-            '"torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE" flag is depredated and will be removed in 0.9.0. '
+            '"torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE" flag is deprecated and will be removed in 0.9.0. '
             'Please remove the use of flag.'
         )
 
@@ -57,7 +57,7 @@ def set_audio_backend(backend: Optional[str]):
         module = no_backend
     elif backend == 'sox':
         warnings.warn(
-            '"sox" backend is depredated and will be removed in 0.9.0. '
+            '"sox" backend is deprecated and will be removed in 0.9.0. '
             'Please use "sox_io" backend.'
         )
         module = sox_backend
@@ -66,7 +66,7 @@ def set_audio_backend(backend: Optional[str]):
     elif backend == 'soundfile':
         if torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE:
             warnings.warn(
-                'The legacy interface of "soundfile" backend is depredated and will be removed in 0.9.0. '
+                'The legacy interface of "soundfile" backend is deprecated and will be removed in 0.9.0. '
                 'Please migrate to the new interface.'
             )
             module = soundfile_backend

diff --git a/torchaudio/compliance/kaldi.py b/torchaudio/compliance/kaldi.py
@@ -793,7 +793,7 @@ def _get_sinc_resample_kernel(orig_freq: int, new_freq: int, lowpass_filter_widt
         t = t.clamp_(-lowpass_filter_width, lowpass_filter_width)
         t *= math.pi
         # we do not use torch.hann_window here as we need to evaluate the window
-        # at spectifics positions, not over a regular grid.
+        # at specific positions, not over a regular grid.
         window = torch.cos(t / lowpass_filter_width / 2)**2
         kernel = torch.where(t == 0, torch.tensor(1.).to(t), torch.sin(t) / t)
         kernel.mul_(window)

diff --git a/torchaudio/kaldi_io.py b/torchaudio/kaldi_io.py
@@ -57,7 +57,7 @@ def read_vec_int_ark(file_or_fd: Any) -> Iterable[Tuple[str, Tensor]]:
         >>> d = { u:d for u,d in torchaudio.kaldi_io.read_vec_int_ark(file) }
     """
     # Requires convert_contiguous to be True because elements from int32 vector are
-    # sored in tuples: (sizeof(int32), value) so strides are (5,) instead of (4,) which will throw an error
+    # sorted in tuples: (sizeof(int32), value) so strides are (5,) instead of (4,) which will throw an error
     # in from_numpy as it expects strides to be a multiple of 4 (int32).
     return _convert_method_output_to_tensor(file_or_fd, kaldi_io.read_vec_int_ark, convert_contiguous=True)
 

diff --git a/torchaudio/transforms.py b/torchaudio/transforms.py
@@ -71,7 +71,7 @@ def __init__(self,
         super(Spectrogram, self).__init__()
         self.n_fft = n_fft
         # number of FFT bins. the returned STFT result will have n_fft // 2 + 1
-        # number of frequecies due to onesided=True in torch.stft
+        # number of frequencies due to onesided=True in torch.stft
         self.win_length = win_length if win_length is not None else n_fft
         self.hop_length = hop_length if hop_length is not None else self.win_length // 2
         window = window_fn(self.win_length) if wkwargs is None else window_fn(self.win_length, **wkwargs)
@@ -547,8 +547,8 @@ def forward(self, waveform: Tensor) -> Tensor:
         else:
             mel_specgram = self.amplitude_to_DB(mel_specgram)
 
-        # (..., channel, n_mels, time).tranpose(...) dot (n_mels, n_mfcc)
-        # -> (..., channel, time, n_mfcc).tranpose(...)
+        # (..., channel, n_mels, time).transpose(...) dot (n_mels, n_mfcc)
+        # -> (..., channel, time, n_mfcc).transpose(...)
         mfcc = torch.matmul(mel_specgram.transpose(-2, -1), self.dct_mat).transpose(-2, -1)
         return mfcc