From 4e894234bfd305b4373347d18934a9955efef3c7 Mon Sep 17 00:00:00 2001 From: Ji Chen Date: Mon, 8 Jun 2020 06:44:01 -0700 Subject: [PATCH 01/10] Add MelResNet Block --- test/test_models.py | 16 +++++++- torchaudio/models/__init__.py | 1 + torchaudio/models/wavernn.py | 70 +++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 torchaudio/models/wavernn.py diff --git a/test/test_models.py b/test/test_models.py index 1f2716d5fc..d05abc2c3a 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -1,5 +1,5 @@ import torch -from torchaudio.models import Wav2Letter +from torchaudio.models import Wav2Letter, MelResNet class TestWav2Letter: @@ -29,3 +29,17 @@ def test_mfcc(self): out = model(x) assert out.size() == (batch_size, num_classes, 2) + + +class TestMelResNet: + @pytest.mark.parametrize('batch_size', [2]) + @pytest.mark.parametrize('num_features', [200]) + @pytest.mark.parametrize('input_dims', [100]) + @pytest.mark.parametrize('output_dims', [128]) + def test_waveform(self, batch_size, num_features, input_dims, output_dims): + model = MelResNet() + + x = torch.rand(batch_size, input_dims, num_features) + out = model(x) + + assert out.size() == (batch_size, output_dims, num_features - 4) diff --git a/torchaudio/models/__init__.py b/torchaudio/models/__init__.py index 1abdac6271..8e05b8b509 100644 --- a/torchaudio/models/__init__.py +++ b/torchaudio/models/__init__.py @@ -1 +1,2 @@ from .wav2letter import * +from .wavernn import * diff --git a/torchaudio/models/wavernn.py b/torchaudio/models/wavernn.py new file mode 100644 index 0000000000..9e4a7d135b --- /dev/null +++ b/torchaudio/models/wavernn.py @@ -0,0 +1,70 @@ +from typing import Optional + +from torch import Tensor +from torch import nn + +__all__ = ["ResBlock", "MelResNet"] + + +class ResBlock(nn.Module): + r""" + Args: + num_dims (int, optional): Number of compute dimensions in ResBlock. (Default: ``128``) + """ + def __init__(self, num_dims: int) -> None: + super().__init__() + + self.conv1 = nn.Conv1d(num_dims, num_dims, kernel_size=1, bias=False) + self.conv2 = nn.Conv1d(num_dims, num_dims, kernel_size=1, bias=False) + self.batch_norm1 = nn.BatchNorm1d(num_dims) + self.relu = nn.ReLU(inplace=True) + self.batch_norm2 = nn.BatchNorm1d(num_dims) + + def forward(self, x: Tensor) -> Tensor: + residual = x + x = self.conv1(x) + x = self.batch_norm1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.batch_norm2(x) + return x + residual + + +class MelResNet(nn.Module): + r""" + Args: + res_blocks (int, optional): Number of ResBlocks. (Default: ``40``). + input_dims (int, optional): Number of input dimensions (Default: ``100``). + hidden_dims (int, optional): Number of hidden dimensions (Default: ``128``). + output_dims (int, optional): Number of ouput dimensions (Default: ``128``). + """ + def __init__(self, res_blocks: int = 10, + input_dims: int = 100, + hidden_dims: int = 128, + output_dims: int = 128) -> None: + super().__init__() + + self.conv_in = nn.Conv1d(input_dims, hidden_dims, kernel_size=5, bias=False) + self.batch_norm = nn.BatchNorm1d(hidden_dims) + self.layers = nn.ModuleList() + for i in range(res_blocks): + self.layers.append(ResBlock(hidden_dims)) + self.relu = nn.ReLU(inplace=True) + self.conv_out = nn.Conv1d(hidden_dims, output_dims, kernel_size=1) + + def forward(self, x: Tensor) -> Tensor: + r""" + Args: + x (Tensor): Tensor of dimension (batch_size, input_dims, input_length). + + Returns: + Tensor: Predictor tensor of dimension (batch_size, output_dims, input_length-4). + """ + + x = self.conv_in(x) + x = self.batch_norm(x) + x = self.relu(x) + for f in self.layers: + x = f(x) + x = self.conv_out(x) + return x From 0d4263240ce3ebce2f2f1a1c83984788313518d4 Mon Sep 17 00:00:00 2001 From: Ji Chen Date: Wed, 10 Jun 2020 11:20:31 -0700 Subject: [PATCH 02/10] add default value --- torchaudio/models/wavernn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchaudio/models/wavernn.py b/torchaudio/models/wavernn.py index 9e4a7d135b..e43478bf60 100644 --- a/torchaudio/models/wavernn.py +++ b/torchaudio/models/wavernn.py @@ -11,7 +11,7 @@ class ResBlock(nn.Module): Args: num_dims (int, optional): Number of compute dimensions in ResBlock. (Default: ``128``) """ - def __init__(self, num_dims: int) -> None: + def __init__(self, num_dims: int = 128) -> None: super().__init__() self.conv1 = nn.Conv1d(num_dims, num_dims, kernel_size=1, bias=False) @@ -33,7 +33,7 @@ def forward(self, x: Tensor) -> Tensor: class MelResNet(nn.Module): r""" Args: - res_blocks (int, optional): Number of ResBlocks. (Default: ``40``). + res_blocks (int, optional): Number of ResBlocks. (Default: ``10``). input_dims (int, optional): Number of input dimensions (Default: ``100``). hidden_dims (int, optional): Number of hidden dimensions (Default: ``128``). output_dims (int, optional): Number of ouput dimensions (Default: ``128``). From 0a6cd7a4b38c744b7e4a01c98b288d0b8267ef3c Mon Sep 17 00:00:00 2001 From: Ji Chen Date: Fri, 12 Jun 2020 06:15:07 -0700 Subject: [PATCH 03/10] update model and test --- test/test_models.py | 17 ++++--- torchaudio/models/wavernn.py | 99 ++++++++++++++++++++++++------------ 2 files changed, 77 insertions(+), 39 deletions(-) diff --git a/test/test_models.py b/test/test_models.py index d05abc2c3a..be0448a95d 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -32,12 +32,17 @@ def test_mfcc(self): class TestMelResNet: - @pytest.mark.parametrize('batch_size', [2]) - @pytest.mark.parametrize('num_features', [200]) - @pytest.mark.parametrize('input_dims', [100]) - @pytest.mark.parametrize('output_dims', [128]) - def test_waveform(self, batch_size, num_features, input_dims, output_dims): - model = MelResNet() + + def test_waveform(self): + + batch_size = 2 + num_features = 200 + input_dims = 100 + output_dims = 128 + res_blocks = 10 + hidden_dims = 128 + + model = MelResNet(res_blocks, input_dims, hidden_dims, output_dims) x = torch.rand(batch_size, input_dims, num_features) out = model(x) diff --git a/torchaudio/models/wavernn.py b/torchaudio/models/wavernn.py index e43478bf60..29c0811a33 100644 --- a/torchaudio/models/wavernn.py +++ b/torchaudio/models/wavernn.py @@ -7,64 +7,97 @@ class ResBlock(nn.Module): - r""" + r"""This is a ResNet block layer. This layer is based on the paper "Deep Residual Learning + for Image Recognition". Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. CVPR, 2016. + Users may modify or implement in a different way during application. It is a block used in WaveRNN + (https://github.com/G-Wang/WaveRNN-Pytorch) + Args: - num_dims (int, optional): Number of compute dimensions in ResBlock. (Default: ``128``) + num_dims: the number of compute dimensions in the input (default=128). + + Examples:: + >>> resblock = ResBlock(num_dims=128) + >>> input = torch.rand(10, 128, 512) + >>> output = resblock(input) """ + def __init__(self, num_dims: int = 128) -> None: super().__init__() - self.conv1 = nn.Conv1d(num_dims, num_dims, kernel_size=1, bias=False) - self.conv2 = nn.Conv1d(num_dims, num_dims, kernel_size=1, bias=False) - self.batch_norm1 = nn.BatchNorm1d(num_dims) - self.relu = nn.ReLU(inplace=True) - self.batch_norm2 = nn.BatchNorm1d(num_dims) + self.resblock_model = nn.Sequential( + nn.Conv1d(in_channels=num_dims, out_channels=num_dims, kernel_size=1, bias=False), + nn.BatchNorm1d(num_dims), + nn.ReLU(inplace=True), + nn.Conv1d(in_channels=num_dims, out_channels=num_dims, kernel_size=1, bias=False), + nn.BatchNorm1d(num_dims) + ) def forward(self, x: Tensor) -> Tensor: + r"""Pass the input through the ResBlock layer. + + Args: + x: the input sequence to the ResBlock layer (required). + + Shape: + - x: :math:`(N, S, T)`. + - output: :math:`(N, S, T)`. + where N is the batch size, S is the number of input sequence, + T is the length of input sequence. + """ + residual = x - x = self.conv1(x) - x = self.batch_norm1(x) - x = self.relu(x) - x = self.conv2(x) - x = self.batch_norm2(x) + x = self.resblock_model(x) return x + residual class MelResNet(nn.Module): - r""" + r"""This is a MelResNet layer based on a stack of ResBlocks. It is a block used in WaveRNN + (https://github.com/G-Wang/WaveRNN-Pytorch) + Args: - res_blocks (int, optional): Number of ResBlocks. (Default: ``10``). - input_dims (int, optional): Number of input dimensions (Default: ``100``). - hidden_dims (int, optional): Number of hidden dimensions (Default: ``128``). - output_dims (int, optional): Number of ouput dimensions (Default: ``128``). + res_blocks: the number of ResBlock in stack (default=10). + input_dims: the number of input sequence (default=100). + hidden_dims: the number of compute dimensions (default=128). + output_dims: the number of output sequence (default=128). + + Examples:: + >>> melresnet = MelResNet(res_blocks=10, input_dims=100, + hidden_dims=128, output_dims=128) + >>> input = torch.rand(10, 100, 512) + >>> output = melresnet(input) """ + def __init__(self, res_blocks: int = 10, input_dims: int = 100, hidden_dims: int = 128, output_dims: int = 128) -> None: super().__init__() - self.conv_in = nn.Conv1d(input_dims, hidden_dims, kernel_size=5, bias=False) - self.batch_norm = nn.BatchNorm1d(hidden_dims) - self.layers = nn.ModuleList() + ResBlocks = [] + for i in range(res_blocks): - self.layers.append(ResBlock(hidden_dims)) - self.relu = nn.ReLU(inplace=True) - self.conv_out = nn.Conv1d(hidden_dims, output_dims, kernel_size=1) + ResBlocks.append(ResBlock(hidden_dims)) + + self.melresnet_model = nn.Sequential( + nn.Conv1d(in_channels=input_dims, out_channels=hidden_dims, kernel_size=5, bias=False), + nn.BatchNorm1d(hidden_dims), + nn.ReLU(inplace=True), + *ResBlocks, + nn.Conv1d(in_channels=hidden_dims, out_channels=output_dims, kernel_size=1) + ) def forward(self, x: Tensor) -> Tensor: - r""" + r"""Pass the input through the MelResNet layer. + Args: - x (Tensor): Tensor of dimension (batch_size, input_dims, input_length). + x: the input sequence to the MelResNet layer (required). - Returns: - Tensor: Predictor tensor of dimension (batch_size, output_dims, input_length-4). + Shape: + - x: :math:`(N, S, T)`. + - output: :math:`(N, P, T-4)`. + where N is the batch size, S is the number of input sequence, + P is the number of ouput sequence, T is the length of input sequence. """ - x = self.conv_in(x) - x = self.batch_norm(x) - x = self.relu(x) - for f in self.layers: - x = f(x) - x = self.conv_out(x) + x = self.melresnet_model(x) return x From 383550117de673248a215ac1d959f62f37486a33 Mon Sep 17 00:00:00 2001 From: Ji Chen Date: Fri, 12 Jun 2020 14:07:31 -0700 Subject: [PATCH 04/10] rebase and small changes --- torchaudio/models/wavernn.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/torchaudio/models/wavernn.py b/torchaudio/models/wavernn.py index 29c0811a33..6fc0e8f152 100644 --- a/torchaudio/models/wavernn.py +++ b/torchaudio/models/wavernn.py @@ -9,8 +9,7 @@ class ResBlock(nn.Module): r"""This is a ResNet block layer. This layer is based on the paper "Deep Residual Learning for Image Recognition". Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. CVPR, 2016. - Users may modify or implement in a different way during application. It is a block used in WaveRNN - (https://github.com/G-Wang/WaveRNN-Pytorch) + It is a block used in WaveRNN(https://github.com/G-Wang/WaveRNN-Pytorch). Args: num_dims: the number of compute dimensions in the input (default=128). @@ -46,13 +45,12 @@ def forward(self, x: Tensor) -> Tensor: """ residual = x - x = self.resblock_model(x) - return x + residual + return self.resblock_model(x) + residual class MelResNet(nn.Module): r"""This is a MelResNet layer based on a stack of ResBlocks. It is a block used in WaveRNN - (https://github.com/G-Wang/WaveRNN-Pytorch) + (https://github.com/G-Wang/WaveRNN-Pytorch). Args: res_blocks: the number of ResBlock in stack (default=10). @@ -62,7 +60,7 @@ class MelResNet(nn.Module): Examples:: >>> melresnet = MelResNet(res_blocks=10, input_dims=100, - hidden_dims=128, output_dims=128) + hidden_dims=128, output_dims=128) >>> input = torch.rand(10, 100, 512) >>> output = melresnet(input) """ @@ -99,5 +97,4 @@ def forward(self, x: Tensor) -> Tensor: P is the number of ouput sequence, T is the length of input sequence. """ - x = self.melresnet_model(x) - return x + return self.melresnet_model(x) From 89810b9c1edf16052a382a747e1abb67c4cc559d Mon Sep 17 00:00:00 2001 From: Ji Chen Date: Sat, 13 Jun 2020 13:35:27 -0700 Subject: [PATCH 05/10] add pad variable --- test/test_models.py | 5 +++-- torchaudio/models/wavernn.py | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/test/test_models.py b/test/test_models.py index be0448a95d..bcf7a98c7d 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -41,10 +41,11 @@ def test_waveform(self): output_dims = 128 res_blocks = 10 hidden_dims = 128 + pad = 2 - model = MelResNet(res_blocks, input_dims, hidden_dims, output_dims) + model = MelResNet(res_blocks, input_dims, hidden_dims, output_dims, pad) x = torch.rand(batch_size, input_dims, num_features) out = model(x) - assert out.size() == (batch_size, output_dims, num_features - 4) + assert out.size() == (batch_size, output_dims, num_features - pad * 2) diff --git a/torchaudio/models/wavernn.py b/torchaudio/models/wavernn.py index 6fc0e8f152..217e481b60 100644 --- a/torchaudio/models/wavernn.py +++ b/torchaudio/models/wavernn.py @@ -68,16 +68,18 @@ class MelResNet(nn.Module): def __init__(self, res_blocks: int = 10, input_dims: int = 100, hidden_dims: int = 128, - output_dims: int = 128) -> None: + output_dims: int = 128, + pad: int = 2) -> None: super().__init__() + kernel_size = pad * 2 + 1 ResBlocks = [] for i in range(res_blocks): ResBlocks.append(ResBlock(hidden_dims)) self.melresnet_model = nn.Sequential( - nn.Conv1d(in_channels=input_dims, out_channels=hidden_dims, kernel_size=5, bias=False), + nn.Conv1d(in_channels=input_dims, out_channels=hidden_dims, kernel_size=kernel_size, bias=False), nn.BatchNorm1d(hidden_dims), nn.ReLU(inplace=True), *ResBlocks, From fa3cb00ccce6e92f060541db2a509de675385b83 Mon Sep 17 00:00:00 2001 From: Ji Chen Date: Mon, 15 Jun 2020 06:25:45 -0700 Subject: [PATCH 06/10] update format --- torchaudio/models/wavernn.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/torchaudio/models/wavernn.py b/torchaudio/models/wavernn.py index 217e481b60..ac6bd880b8 100644 --- a/torchaudio/models/wavernn.py +++ b/torchaudio/models/wavernn.py @@ -57,10 +57,11 @@ class MelResNet(nn.Module): input_dims: the number of input sequence (default=100). hidden_dims: the number of compute dimensions (default=128). output_dims: the number of output sequence (default=128). + pad: the number of kernal size (pad * 2 + 1) in the first Conv1d layer (default=2). Examples:: >>> melresnet = MelResNet(res_blocks=10, input_dims=100, - hidden_dims=128, output_dims=128) + hidden_dims=128, output_dims=128, pad=2) >>> input = torch.rand(10, 100, 512) >>> output = melresnet(input) """ @@ -94,7 +95,7 @@ def forward(self, x: Tensor) -> Tensor: Shape: - x: :math:`(N, S, T)`. - - output: :math:`(N, P, T-4)`. + - output: :math:`(N, P, T-2*pad)`. where N is the batch size, S is the number of input sequence, P is the number of ouput sequence, T is the length of input sequence. """ From 2215c6cafac4359a3a83d3d5fcb1ea8544ceb87a Mon Sep 17 00:00:00 2001 From: Ji Chen Date: Mon, 15 Jun 2020 08:44:26 -0700 Subject: [PATCH 07/10] update reference in docstrings --- torchaudio/models/wavernn.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/torchaudio/models/wavernn.py b/torchaudio/models/wavernn.py index ac6bd880b8..fb992b23a6 100644 --- a/torchaudio/models/wavernn.py +++ b/torchaudio/models/wavernn.py @@ -9,7 +9,9 @@ class ResBlock(nn.Module): r"""This is a ResNet block layer. This layer is based on the paper "Deep Residual Learning for Image Recognition". Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. CVPR, 2016. - It is a block used in WaveRNN(https://github.com/G-Wang/WaveRNN-Pytorch). + It is a block used in WaveRNN. WaveRNN is based on the paper "Efficient Neural Audio Synthesis". + Nal Kalchbrenner, Erich Elsen, Karen Simonyan, Seb Noury, Norman Casagrande, Edward Lockhart, + Florian Stimberg, Aaron van den Oord, Sander Dieleman, Koray Kavukcuoglu. arXiv:1802.08435, 2018. Args: num_dims: the number of compute dimensions in the input (default=128). @@ -49,8 +51,10 @@ def forward(self, x: Tensor) -> Tensor: class MelResNet(nn.Module): - r"""This is a MelResNet layer based on a stack of ResBlocks. It is a block used in WaveRNN - (https://github.com/G-Wang/WaveRNN-Pytorch). + r"""This is a MelResNet layer based on a stack of ResBlocks. It is a block used in WaveRNN. + WaveRNN is based on the paper "Efficient Neural Audio Synthesis". Nal Kalchbrenner, Erich Elsen, + Karen Simonyan, Seb Noury, Norman Casagrande, Edward Lockhart, Florian Stimberg, Aaron van den Oord, + Sander Dieleman, Koray Kavukcuoglu. arXiv:1802.08435, 2018. Args: res_blocks: the number of ResBlock in stack (default=10). From b211af5466a9b9df4082cf06e74a49e66fb8a451 Mon Sep 17 00:00:00 2001 From: Ji Chen Date: Tue, 16 Jun 2020 13:34:30 -0700 Subject: [PATCH 08/10] add underscore name --- torchaudio/models/{wavernn.py => _wavernn.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename torchaudio/models/{wavernn.py => _wavernn.py} (100%) diff --git a/torchaudio/models/wavernn.py b/torchaudio/models/_wavernn.py similarity index 100% rename from torchaudio/models/wavernn.py rename to torchaudio/models/_wavernn.py From 4b1e4817185c1d9af5d623f41130d91c2abc123e Mon Sep 17 00:00:00 2001 From: Ji Chen Date: Tue, 16 Jun 2020 13:44:56 -0700 Subject: [PATCH 09/10] add underscore name --- torchaudio/models/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchaudio/models/__init__.py b/torchaudio/models/__init__.py index 8e05b8b509..20c1bdf265 100644 --- a/torchaudio/models/__init__.py +++ b/torchaudio/models/__init__.py @@ -1,2 +1,2 @@ from .wav2letter import * -from .wavernn import * +from ._wavernn import * From 9eaefd528313584a690bfb76d3bd7a3e21f801a0 Mon Sep 17 00:00:00 2001 From: Ji Chen Date: Tue, 16 Jun 2020 14:45:22 -0700 Subject: [PATCH 10/10] add underscore name --- test/test_models.py | 4 ++-- torchaudio/models/_wavernn.py | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/test/test_models.py b/test/test_models.py index bcf7a98c7d..7bd3f3819d 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -1,5 +1,5 @@ import torch -from torchaudio.models import Wav2Letter, MelResNet +from torchaudio.models import Wav2Letter, _MelResNet class TestWav2Letter: @@ -43,7 +43,7 @@ def test_waveform(self): hidden_dims = 128 pad = 2 - model = MelResNet(res_blocks, input_dims, hidden_dims, output_dims, pad) + model = _MelResNet(res_blocks, input_dims, hidden_dims, output_dims, pad) x = torch.rand(batch_size, input_dims, num_features) out = model(x) diff --git a/torchaudio/models/_wavernn.py b/torchaudio/models/_wavernn.py index fb992b23a6..a08af175f8 100644 --- a/torchaudio/models/_wavernn.py +++ b/torchaudio/models/_wavernn.py @@ -3,10 +3,10 @@ from torch import Tensor from torch import nn -__all__ = ["ResBlock", "MelResNet"] +__all__ = ["_ResBlock", "_MelResNet"] -class ResBlock(nn.Module): +class _ResBlock(nn.Module): r"""This is a ResNet block layer. This layer is based on the paper "Deep Residual Learning for Image Recognition". Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. CVPR, 2016. It is a block used in WaveRNN. WaveRNN is based on the paper "Efficient Neural Audio Synthesis". @@ -17,7 +17,7 @@ class ResBlock(nn.Module): num_dims: the number of compute dimensions in the input (default=128). Examples:: - >>> resblock = ResBlock(num_dims=128) + >>> resblock = _ResBlock(num_dims=128) >>> input = torch.rand(10, 128, 512) >>> output = resblock(input) """ @@ -34,10 +34,10 @@ def __init__(self, num_dims: int = 128) -> None: ) def forward(self, x: Tensor) -> Tensor: - r"""Pass the input through the ResBlock layer. + r"""Pass the input through the _ResBlock layer. Args: - x: the input sequence to the ResBlock layer (required). + x: the input sequence to the _ResBlock layer (required). Shape: - x: :math:`(N, S, T)`. @@ -50,7 +50,7 @@ def forward(self, x: Tensor) -> Tensor: return self.resblock_model(x) + residual -class MelResNet(nn.Module): +class _MelResNet(nn.Module): r"""This is a MelResNet layer based on a stack of ResBlocks. It is a block used in WaveRNN. WaveRNN is based on the paper "Efficient Neural Audio Synthesis". Nal Kalchbrenner, Erich Elsen, Karen Simonyan, Seb Noury, Norman Casagrande, Edward Lockhart, Florian Stimberg, Aaron van den Oord, @@ -64,7 +64,7 @@ class MelResNet(nn.Module): pad: the number of kernal size (pad * 2 + 1) in the first Conv1d layer (default=2). Examples:: - >>> melresnet = MelResNet(res_blocks=10, input_dims=100, + >>> melresnet = _MelResNet(res_blocks=10, input_dims=100, hidden_dims=128, output_dims=128, pad=2) >>> input = torch.rand(10, 100, 512) >>> output = melresnet(input) @@ -81,7 +81,7 @@ def __init__(self, res_blocks: int = 10, ResBlocks = [] for i in range(res_blocks): - ResBlocks.append(ResBlock(hidden_dims)) + ResBlocks.append(_ResBlock(hidden_dims)) self.melresnet_model = nn.Sequential( nn.Conv1d(in_channels=input_dims, out_channels=hidden_dims, kernel_size=kernel_size, bias=False), @@ -92,10 +92,10 @@ def __init__(self, res_blocks: int = 10, ) def forward(self, x: Tensor) -> Tensor: - r"""Pass the input through the MelResNet layer. + r"""Pass the input through the _MelResNet layer. Args: - x: the input sequence to the MelResNet layer (required). + x: the input sequence to the _MelResNet layer (required). Shape: - x: :math:`(N, S, T)`.