diff --git a/torchtext/models/roberta/bundler.py b/torchtext/models/roberta/bundler.py index 7cd9e2c833..6e9f94f86c 100644 --- a/torchtext/models/roberta/bundler.py +++ b/torchtext/models/roberta/bundler.py @@ -158,7 +158,7 @@ def encoderConf(self) -> RobertaEncoderConf: transform=lambda: T.Sequential( T.SentencePieceTokenizer(urljoin(_TEXT_BUCKET, "xlmr.sentencepiece.bpe.model")), T.VocabTransform(load_state_dict_from_url(urljoin(_TEXT_BUCKET, "xlmr.vocab.pt"))), - T.Truncate(510), + T.Truncate(254), T.AddToken(token=0, begin=True), T.AddToken(token=2, begin=False), )