From e4fd8c18e1e193595696a0845005054b29db8e03 Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Tue, 2 Nov 2021 21:57:58 -0400 Subject: [PATCH 1/4] add doc --- docs/source/functional.rst | 25 ++++++++++++++++++++++ docs/source/index.rst | 3 +++ docs/source/models.rst | 14 +++++++++++++ docs/source/transforms.rst | 32 +++++++++++++++++++++++++++++ torchtext/models/roberta/bundler.py | 11 ++++++++++ 5 files changed, 85 insertions(+) create mode 100644 docs/source/functional.rst create mode 100644 docs/source/models.rst create mode 100644 docs/source/transforms.rst diff --git a/docs/source/functional.rst b/docs/source/functional.rst new file mode 100644 index 0000000000..38c00cf2ee --- /dev/null +++ b/docs/source/functional.rst @@ -0,0 +1,25 @@ +.. role:: hidden + :class: hidden-section + +torchtext.functional +=========================== + +.. automodule:: torchtext.functional +.. currentmodule:: torchtext.functional + +:hidden:`to_tensor` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autofunction:: to_tensor + + +:hidden:`truncate` +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autofunction:: truncate + + +:hidden:`add_token` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autofunction:: add_token \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index 23b2fb1b52..8a29be9bc3 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -44,6 +44,9 @@ popular datasets for natural language. experimental_vectors experimental_vocab models_utils + transforms + functional + models .. automodule:: torchtext :members: diff --git a/docs/source/models.rst b/docs/source/models.rst new file mode 100644 index 0000000000..2f5e66f5ed --- /dev/null +++ b/docs/source/models.rst @@ -0,0 +1,14 @@ +.. role:: hidden + :class: hidden-section + +torchtext.models +=========================== + +.. automodule:: torchtext.models +.. currentmodule:: torchtext.models + +:hidden:`RobertaModelBundle` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autofunction:: RobertaModelBundle + diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst new file mode 100644 index 0000000000..cec2b69031 --- /dev/null +++ b/docs/source/transforms.rst @@ -0,0 +1,32 @@ +.. role:: hidden + :class: hidden-section + +torchtext.transforms +=========================== + +.. automodule:: torchtext.transforms +.. currentmodule:: torchtext.transforms + +:hidden:`SentencePieceTokenizer` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autofunction:: SentencePieceTokenizer + + +:hidden:`VocabTransform` +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autofunction:: VocabTransform + + +:hidden:`ToTensor` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autofunction:: ToTensor + + +:hidden:`LabelToIndex` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autofunction:: LabelToIndex + diff --git a/torchtext/models/roberta/bundler.py b/torchtext/models/roberta/bundler.py index 186a5ff688..785d23ba0d 100644 --- a/torchtext/models/roberta/bundler.py +++ b/torchtext/models/roberta/bundler.py @@ -99,6 +99,12 @@ def params(self) -> RobertaEncoderParams: ) ) +XLMR_BASE_ENCODER.__doc__ = ( + ''' + ''' +) + + XLMR_LARGE_ENCODER = RobertaModelBundle( _path=os.path.join(_TEXT_BUCKET, "xlmr.large.encoder.pt"), _params=RobertaEncoderParams(vocab_size=250002, embedding_dim=1024, ffn_dimension=4096, num_attention_heads=16, num_encoder_layers=24), @@ -107,3 +113,8 @@ def params(self) -> RobertaEncoderParams: spm_model_path=os.path.join(_TEXT_BUCKET, "xlmr.sentencepiece.bpe.model"), ) ) + +XLMR_LARGE_ENCODER.__doc__ = ( + ''' + ''' +) \ No newline at end of file From 716fe5aba4d2dc1072b252ee96e116e02525e1b8 Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Wed, 3 Nov 2021 12:50:21 -0400 Subject: [PATCH 2/4] additional updates --- docs/requirements.txt | 2 +- docs/source/functional.rst | 12 +++++------ docs/source/models.rst | 26 +++++++++++++++++++++--- docs/source/transforms.rst | 31 ++++++++++++++++++----------- torchtext/models/roberta/bundler.py | 13 ++++++++++-- torchtext/transforms.py | 2 +- torchtext/vocab/vocab_factory.py | 4 +++- 7 files changed, 64 insertions(+), 26 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 560a2b3600..d58c576129 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,2 @@ -sphinx==2.4.4 +sphinx==3.5.4 -e git+git://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme diff --git a/docs/source/functional.rst b/docs/source/functional.rst index 38c00cf2ee..a40c0941f5 100644 --- a/docs/source/functional.rst +++ b/docs/source/functional.rst @@ -7,19 +7,19 @@ torchtext.functional .. automodule:: torchtext.functional .. currentmodule:: torchtext.functional -:hidden:`to_tensor` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +to_tensor +--------- .. autofunction:: to_tensor -:hidden:`truncate` -~~~~~~~~~~~~~~~~~~~~~~~~ +truncate +-------- .. autofunction:: truncate -:hidden:`add_token` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +add_token +--------- .. autofunction:: add_token \ No newline at end of file diff --git a/docs/source/models.rst b/docs/source/models.rst index 2f5e66f5ed..500b2a6c7d 100644 --- a/docs/source/models.rst +++ b/docs/source/models.rst @@ -7,8 +7,28 @@ torchtext.models .. automodule:: torchtext.models .. currentmodule:: torchtext.models -:hidden:`RobertaModelBundle` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +RobertaModelBundle +------------------ -.. autofunction:: RobertaModelBundle +.. autoclass:: RobertaModelBundle + :members: transform + + .. automethod:: get_model + +XLMR_BASE_ENCODER +----------------- + +.. container:: py attribute + + .. autodata:: XLMR_BASE_ENCODER + :no-value: + + +XLMR_LARGE_ENCODER +------------------ + +.. container:: py attribute + + .. autodata:: XLMR_LARGE_ENCODER + :no-value: diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst index cec2b69031..220f18bf34 100644 --- a/docs/source/transforms.rst +++ b/docs/source/transforms.rst @@ -7,26 +7,33 @@ torchtext.transforms .. automodule:: torchtext.transforms .. currentmodule:: torchtext.transforms -:hidden:`SentencePieceTokenizer` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Transforms are common text transforms. They can be chained together using :class:`torch.nn.Sequential` -.. autofunction:: SentencePieceTokenizer +SentencePieceTokenizer +---------------------- +.. autoclass:: SentencePieceTokenizer -:hidden:`VocabTransform` -~~~~~~~~~~~~~~~~~~~~~~~~ + .. automethod:: forward -.. autofunction:: VocabTransform +VocabTransform +-------------- -:hidden:`ToTensor` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autoclass:: VocabTransform -.. autofunction:: ToTensor + .. automethod:: forward +ToTensor +-------- -:hidden:`LabelToIndex` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autoclass:: ToTensor -.. autofunction:: LabelToIndex + .. automethod:: forward + +LabelToIndex +------------ +.. autoclass:: LabelToIndex + + .. automethod:: forward diff --git a/torchtext/models/roberta/bundler.py b/torchtext/models/roberta/bundler.py index 785d23ba0d..6cd67cacc1 100644 --- a/torchtext/models/roberta/bundler.py +++ b/torchtext/models/roberta/bundler.py @@ -23,7 +23,8 @@ @dataclass class RobertaModelBundle: - """ + """RobertaModelBundle(_params: torchtext.models.RobertaEncoderParams, _path: Optional[str] = None, _head: Optional[torch.nn.Module] = None, transform: Optional[Callable] = None) + Example - Pretrained encoder >>> import torch, torchtext >>> xlmr_base = torchtext.models.XLMR_BASE_ENCODER @@ -57,6 +58,8 @@ class RobertaModelBundle: transform: Optional[Callable] = None def get_model(self, head: Optional[Module] = None, load_weights: bool = True, freeze_encoder: bool = False, *, dl_kwargs=None) -> RobertaModel: + r"""get_model(head: Optional[torch.nn.Module] = None, load_weights: bool = True, freeze_encoder: bool = False, *, dl_kwargs=None) -> torctext.models.RobertaModel + """ if load_weights: assert self._path is not None, "load_weights cannot be True. The pre-trained model weights are not available for the current object" @@ -101,6 +104,9 @@ def params(self) -> RobertaEncoderParams: XLMR_BASE_ENCODER.__doc__ = ( ''' + XLMR Encoder with base configuration + + Please refer to :func:`torchtext.models.RobertaModelBundle` for the usage. ''' ) @@ -116,5 +122,8 @@ def params(self) -> RobertaEncoderParams: XLMR_LARGE_ENCODER.__doc__ = ( ''' + XLMR Encoder with Large configuration + + Please refer to :func:`torchtext.models.RobertaModelBundle` for the usage. ''' -) \ No newline at end of file +) diff --git a/torchtext/transforms.py b/torchtext/transforms.py index d13f690bc8..cf43e40f4d 100644 --- a/torchtext/transforms.py +++ b/torchtext/transforms.py @@ -63,7 +63,7 @@ class VocabTransform(Module): >>> jit_vocab_transform = torch.jit.script(vocab_transform) """ - def __init__(self, vocab): + def __init__(self, vocab: Vocab): super().__init__() assert isinstance(vocab, Vocab) self.vocab = vocab diff --git a/torchtext/vocab/vocab_factory.py b/torchtext/vocab/vocab_factory.py index bdea76f0a6..d20adc5708 100644 --- a/torchtext/vocab/vocab_factory.py +++ b/torchtext/vocab/vocab_factory.py @@ -7,7 +7,9 @@ def vocab(ordered_dict: Dict, min_freq: int = 1) -> Vocab: - r"""Factory method for creating a vocab object which maps tokens to indices. + r"""vocab(ordered_dict: Dict, min_freq: int = 1) -> torchtext.vocab.Vocab + + Factory method for creating a vocab object which maps tokens to indices. Note that the ordering in which key value pairs were inserted in the `ordered_dict` will be respected when building the vocab. Therefore if sorting by token frequency is important to the user, the `ordered_dict` should be created in a way to reflect this. From d394348af7150c5d3568e51a0191a9b24a79e138 Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Fri, 5 Nov 2021 19:29:30 -0400 Subject: [PATCH 3/4] fix flake --- torchtext/models/roberta/bundler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchtext/models/roberta/bundler.py b/torchtext/models/roberta/bundler.py index 6cd67cacc1..e7e07e4c1a 100644 --- a/torchtext/models/roberta/bundler.py +++ b/torchtext/models/roberta/bundler.py @@ -124,6 +124,6 @@ def params(self) -> RobertaEncoderParams: ''' XLMR Encoder with Large configuration - Please refer to :func:`torchtext.models.RobertaModelBundle` for the usage. + Please refer to :func:`torchtext.models.RobertaModelBundle` for the usage. ''' ) From 7e5d818e3aa9f7228f116b7d2c5a1f46ec591492 Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Fri, 12 Nov 2021 12:07:35 -0500 Subject: [PATCH 4/4] fix naming issue --- torchtext/models/roberta/bundler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchtext/models/roberta/bundler.py b/torchtext/models/roberta/bundler.py index e5b4bfccb0..d0ba4b7028 100644 --- a/torchtext/models/roberta/bundler.py +++ b/torchtext/models/roberta/bundler.py @@ -113,7 +113,7 @@ def encoderConf(self) -> RobertaEncoderConf: XLMR_BASE_ENCODER.__doc__ = ( ''' - XLMR Encoder with base configuration + XLM-R Encoder with base configuration Please refer to :func:`torchtext.models.RobertaModelBundle` for the usage. ''' @@ -131,7 +131,7 @@ def encoderConf(self) -> RobertaEncoderConf: XLMR_LARGE_ENCODER.__doc__ = ( ''' - XLMR Encoder with Large configuration + XLM-R Encoder with Large configuration Please refer to :func:`torchtext.models.RobertaModelBundle` for the usage. '''