Updated truncate length. Added info on how model was trained along with license info

nayef211 · nayef211 · commit 3303894e148a · 2022-01-07T12:45:45.000-08:00
diff --git a/torchtext/models/roberta/bundler.py b/torchtext/models/roberta/bundler.py
@@ -205,7 +205,7 @@ def encoderConf(self) -> RobertaEncoderConf:
         T.VocabTransform(
             load_state_dict_from_url(urljoin(_TEXT_BUCKET, "roberta.vocab.pt"))
         ),
-        T.Truncate(510),
+        T.Truncate(254),
         T.AddToken(token=0, begin=True),
         T.AddToken(token=2, begin=False),
     ),
@@ -215,6 +215,16 @@ def encoderConf(self) -> RobertaEncoderConf:
     '''
     Roberta Encoder with Base configuration
 
+    RoBERTa iterates on BERT's pretraining procedure, including training the model longer,
+    with bigger batches over more data; removing the next sentence prediction objective;
+    training on longer sequences; and dynamically changing the masking pattern applied
+    to the training data.
+
+    Originally published by the authors of RoBERTa under MIT License
+    and redistributed with the same license.
+    [`License <https://github.com/pytorch/fairseq/blob/main/LICENSE>`__,
+    `Source <https://github.com/pytorch/fairseq/tree/main/examples/roberta#pre-trained-models>`__]
+
     Please refer to :func:`torchtext.models.RobertaModelBundle` for the usage.
     '''
 )
@@ -247,6 +257,16 @@ def encoderConf(self) -> RobertaEncoderConf:
     '''
     Roberta Encoder with Large configuration
 
+    RoBERTa iterates on BERT's pretraining procedure, including training the model longer,
+    with bigger batches over more data; removing the next sentence prediction objective;
+    training on longer sequences; and dynamically changing the masking pattern applied
+    to the training data.
+
+    Originally published by the authors of RoBERTa under MIT License
+    and redistributed with the same license.
+    [`License <https://github.com/pytorch/fairseq/blob/main/LICENSE>`__,
+    `Source <https://github.com/pytorch/fairseq/tree/main/examples/roberta#pre-trained-models>`__]
+
     Please refer to :func:`torchtext.models.RobertaModelBundle` for the usage.
     '''
 )