diff --git a/docs/source/_static/img/specaugment-freq.jpg b/docs/source/_static/img/specaugment-freq.jpg new file mode 100644 index 0000000000..6bd15a2c32 Binary files /dev/null and b/docs/source/_static/img/specaugment-freq.jpg differ diff --git a/docs/source/_static/img/specaugment-time.jpg b/docs/source/_static/img/specaugment-time.jpg new file mode 100644 index 0000000000..ba6d25a6d6 Binary files /dev/null and b/docs/source/_static/img/specaugment-time.jpg differ diff --git a/torchaudio/transforms.py b/torchaudio/transforms.py index 2e7e4f37ee..6e515fa666 100644 --- a/torchaudio/transforms.py +++ b/torchaudio/transforms.py @@ -821,6 +821,9 @@ def forward(self, specgram: Tensor, mask_value: float = 0.) -> Tensor: class FrequencyMasking(_AxisMasking): r"""Apply masking to a spectrogram in the frequency domain. + In the image below, a frequency band has been masked on a spectrogram. + .. image:: _static/img/specaugment-freq.jpg + Args: freq_mask_param (int): maximum possible length of the mask. Indices uniformly sampled from [0, freq_mask_param). @@ -835,6 +838,9 @@ def __init__(self, freq_mask_param: int, iid_masks: bool = False) -> None: class TimeMasking(_AxisMasking): r"""Apply masking to a spectrogram in the time domain. + In the image below, a time band has been masked on a spectrogram. + .. image:: _static/img/specaugment-time.jpg + Args: time_mask_param (int): maximum possible length of the mask. Indices uniformly sampled from [0, time_mask_param).