diff --git a/docs/source/nn_modules.rst b/docs/source/nn_modules.rst index 064f51488a..5b979581e0 100644 --- a/docs/source/nn_modules.rst +++ b/docs/source/nn_modules.rst @@ -1,11 +1,11 @@ .. role:: hidden :class: hidden-section -torchtext.nn.modules.multiheadattention +torchtext.nn ======================================= -.. automodule:: torchtext.nn.modules.multiheadattention -.. currentmodule:: torchtext.nn.modules.multiheadattention +.. automodule:: torchtext.nn +.. currentmodule:: torchtext.nn :hidden:`MultiheadAttentionContainer` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/torchtext/nn/modules/multiheadattention.py b/torchtext/nn/modules/multiheadattention.py index b581d245c4..e0909d70b3 100644 --- a/torchtext/nn/modules/multiheadattention.py +++ b/torchtext/nn/modules/multiheadattention.py @@ -20,6 +20,7 @@ def __init__(self, nhead, in_proj_container, attention_layer, out_proj, batch_fi Examples:: >>> import torch + >>> from torchtext.nn import MultiheadAttentionContainer, InProjContainer, ScaledDotProduct >>> embed_dim, num_heads, bsz = 10, 5, 64 >>> in_proj_container = InProjContainer(torch.nn.Linear(embed_dim, embed_dim), torch.nn.Linear(embed_dim, embed_dim), @@ -122,6 +123,7 @@ def __init__(self, dropout=0.0, batch_first=False): as `(batch, seq, feature)`. Default: ``False`` Examples:: + >>> import torch, torchtext >>> SDP = torchtext.nn.ScaledDotProduct(dropout=0.1) >>> q = torch.randn(21, 256, 3) >>> k = v = torch.randn(21, 256, 3) @@ -245,6 +247,7 @@ def forward(self, value (Tensor): The values to be projected. Examples:: + >>> import torch >>> from torchtext.nn import InProjContainer >>> embed_dim, bsz = 10, 64 >>> in_proj_container = InProjContainer(torch.nn.Linear(embed_dim, embed_dim),