@@ -205,7 +205,7 @@ def encoderConf(self) -> RobertaEncoderConf:
205205 T .VocabTransform (
206206 load_state_dict_from_url (urljoin (_TEXT_BUCKET , "roberta.vocab.pt" ))
207207 ),
208- T .Truncate (510 ),
208+ T .Truncate (254 ),
209209 T .AddToken (token = 0 , begin = True ),
210210 T .AddToken (token = 2 , begin = False ),
211211 ),
@@ -215,6 +215,16 @@ def encoderConf(self) -> RobertaEncoderConf:
215215 '''
216216 Roberta Encoder with Base configuration
217217
218+ RoBERTa iterates on BERT's pretraining procedure, including training the model longer,
219+ with bigger batches over more data; removing the next sentence prediction objective;
220+ training on longer sequences; and dynamically changing the masking pattern applied
221+ to the training data.
222+
223+ Originally published by the authors of RoBERTa under MIT License
224+ and redistributed with the same license.
225+ [`License <https://github.com/pytorch/fairseq/blob/main/LICENSE>`__,
226+ `Source <https://github.com/pytorch/fairseq/tree/main/examples/roberta#pre-trained-models>`__]
227+
218228 Please refer to :func:`torchtext.models.RobertaModelBundle` for the usage.
219229 '''
220230)
@@ -247,6 +257,16 @@ def encoderConf(self) -> RobertaEncoderConf:
247257 '''
248258 Roberta Encoder with Large configuration
249259
260+ RoBERTa iterates on BERT's pretraining procedure, including training the model longer,
261+ with bigger batches over more data; removing the next sentence prediction objective;
262+ training on longer sequences; and dynamically changing the masking pattern applied
263+ to the training data.
264+
265+ Originally published by the authors of RoBERTa under MIT License
266+ and redistributed with the same license.
267+ [`License <https://github.com/pytorch/fairseq/blob/main/LICENSE>`__,
268+ `Source <https://github.com/pytorch/fairseq/tree/main/examples/roberta#pre-trained-models>`__]
269+
250270 Please refer to :func:`torchtext.models.RobertaModelBundle` for the usage.
251271 '''
252272)
0 commit comments