diff --git a/docs/requirements.txt b/docs/requirements.txt index 560a2b3600..d58c576129 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,2 @@ -sphinx==2.4.4 +sphinx==3.5.4 -e git+git://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme diff --git a/docs/source/functional.rst b/docs/source/functional.rst new file mode 100644 index 0000000000..a40c0941f5 --- /dev/null +++ b/docs/source/functional.rst @@ -0,0 +1,25 @@ +.. role:: hidden + :class: hidden-section + +torchtext.functional +=========================== + +.. automodule:: torchtext.functional +.. currentmodule:: torchtext.functional + +to_tensor +--------- + +.. autofunction:: to_tensor + + +truncate +-------- + +.. autofunction:: truncate + + +add_token +--------- + +.. autofunction:: add_token \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index 23b2fb1b52..8a29be9bc3 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -44,6 +44,9 @@ popular datasets for natural language. experimental_vectors experimental_vocab models_utils + transforms + functional + models .. automodule:: torchtext :members: diff --git a/docs/source/models.rst b/docs/source/models.rst new file mode 100644 index 0000000000..500b2a6c7d --- /dev/null +++ b/docs/source/models.rst @@ -0,0 +1,34 @@ +.. role:: hidden + :class: hidden-section + +torchtext.models +=========================== + +.. automodule:: torchtext.models +.. currentmodule:: torchtext.models + +RobertaModelBundle +------------------ + +.. autoclass:: RobertaModelBundle + :members: transform + + .. automethod:: get_model + +XLMR_BASE_ENCODER +----------------- + +.. container:: py attribute + + .. autodata:: XLMR_BASE_ENCODER + :no-value: + + +XLMR_LARGE_ENCODER +------------------ + +.. container:: py attribute + + .. autodata:: XLMR_LARGE_ENCODER + :no-value: + diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst new file mode 100644 index 0000000000..220f18bf34 --- /dev/null +++ b/docs/source/transforms.rst @@ -0,0 +1,39 @@ +.. role:: hidden + :class: hidden-section + +torchtext.transforms +=========================== + +.. automodule:: torchtext.transforms +.. currentmodule:: torchtext.transforms + +Transforms are common text transforms. They can be chained together using :class:`torch.nn.Sequential` + +SentencePieceTokenizer +---------------------- + +.. autoclass:: SentencePieceTokenizer + + .. automethod:: forward + + +VocabTransform +-------------- + +.. autoclass:: VocabTransform + + .. automethod:: forward + +ToTensor +-------- + +.. autoclass:: ToTensor + + .. automethod:: forward + +LabelToIndex +------------ + +.. autoclass:: LabelToIndex + + .. automethod:: forward diff --git a/torchtext/models/roberta/bundler.py b/torchtext/models/roberta/bundler.py index bbecc47410..d0ba4b7028 100644 --- a/torchtext/models/roberta/bundler.py +++ b/torchtext/models/roberta/bundler.py @@ -23,7 +23,8 @@ @dataclass class RobertaModelBundle: - """ + """RobertaModelBundle(_params: torchtext.models.RobertaEncoderParams, _path: Optional[str] = None, _head: Optional[torch.nn.Module] = None, transform: Optional[Callable] = None) + Example - Pretrained encoder >>> import torch, torchtext >>> xlmr_base = torchtext.models.XLMR_BASE_ENCODER @@ -66,6 +67,8 @@ class RobertaModelBundle: transform: Optional[Callable] = None def get_model(self, head: Optional[Module] = None, load_weights: bool = True, freeze_encoder: bool = False, *, dl_kwargs=None) -> RobertaModel: + r"""get_model(head: Optional[torch.nn.Module] = None, load_weights: bool = True, freeze_encoder: bool = False, *, dl_kwargs=None) -> torctext.models.RobertaModel + """ if load_weights: assert self._path is not None, "load_weights cannot be True. The pre-trained model weights are not available for the current object" @@ -108,6 +111,15 @@ def encoderConf(self) -> RobertaEncoderConf: ) ) +XLMR_BASE_ENCODER.__doc__ = ( + ''' + XLM-R Encoder with base configuration + + Please refer to :func:`torchtext.models.RobertaModelBundle` for the usage. + ''' +) + + XLMR_LARGE_ENCODER = RobertaModelBundle( _path=urljoin(_TEXT_BUCKET, "xlmr.large.encoder.pt"), _encoder_conf=RobertaEncoderConf(vocab_size=250002, embedding_dim=1024, ffn_dimension=4096, num_attention_heads=16, num_encoder_layers=24), @@ -116,3 +128,11 @@ def encoderConf(self) -> RobertaEncoderConf: spm_model_path=urljoin(_TEXT_BUCKET, "xlmr.sentencepiece.bpe.model"), ) ) + +XLMR_LARGE_ENCODER.__doc__ = ( + ''' + XLM-R Encoder with Large configuration + + Please refer to :func:`torchtext.models.RobertaModelBundle` for the usage. + ''' +) diff --git a/torchtext/transforms.py b/torchtext/transforms.py index d13f690bc8..cf43e40f4d 100644 --- a/torchtext/transforms.py +++ b/torchtext/transforms.py @@ -63,7 +63,7 @@ class VocabTransform(Module): >>> jit_vocab_transform = torch.jit.script(vocab_transform) """ - def __init__(self, vocab): + def __init__(self, vocab: Vocab): super().__init__() assert isinstance(vocab, Vocab) self.vocab = vocab