@@ -196,10 +196,12 @@ def forward(self, specgram: Tensor) -> Tensor:
196196
197197
198198class _WaveRNN (nn .Module ):
199- r"""WaveRNN model based on "Efficient Neural Audio Synthesis" .
199+ r"""WaveRNN model based on the implementation from `fatchord <https://github.com/fatchord/WaveRNN>`_ .
200200
201- The paper link is `<https://arxiv.org/pdf/1802.08435.pdf>`_. The input channels of waveform
202- and spectrogram have to be 1. The product of `upsample_scales` must equal `hop_length`.
201+ The original implementation was introduced in
202+ `"Efficient Neural Audio Synthesis" <https://arxiv.org/pdf/1802.08435.pdf>`_.
203+ The input channels of waveform and spectrogram have to be 1. The product of
204+ `upsample_scales` must equal `hop_length`.
203205
204206 Args:
205207 upsample_scales: the list of upsample scales
@@ -215,14 +217,13 @@ class _WaveRNN(nn.Module):
215217 n_output: the number of output dimensions (default=128)
216218 mode: the mode of waveform in ['waveform', 'mol'] (default='waveform')
217219
218- Examples
220+ Example
219221 >>> wavernn = _waveRNN(upsample_scales=[5,5,8], n_bits=9, sample_rate=24000, hop_length=200)
220222 >>> waveform, sample_rate = torchaudio.load(file)
221223 >>> # waveform shape: (n_batch, n_channel, (n_time - kernel_size + 1) * hop_length)
222224 >>> specgram = MelSpectrogram(sample_rate)(waveform) # shape: (n_batch, n_channel, n_freq, n_time)
223225 >>> output = wavernn(waveform.squeeze(1), specgram.squeeze(1))
224226 >>> # output shape in 'waveform' mode: (n_batch, (n_time - kernel_size + 1) * hop_length, 2 ** n_bits)
225- >>> # output shape in 'mol' mode: (n_batch, (n_time - kernel_size + 1) * hop_length, 30)
226227 """
227228
228229 def __init__ (self ,
0 commit comments