From cad4a17b40f60a7e89ba51c78e44ecc351d8a4d4 Mon Sep 17 00:00:00 2001 From: Parmeet Singh Bhatia Date: Fri, 7 Jan 2022 17:24:59 -0500 Subject: [PATCH] fix max sequence length for xlmr transform --- torchtext/models/roberta/bundler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchtext/models/roberta/bundler.py b/torchtext/models/roberta/bundler.py index 7cd9e2c833..6e9f94f86c 100644 --- a/torchtext/models/roberta/bundler.py +++ b/torchtext/models/roberta/bundler.py @@ -158,7 +158,7 @@ def encoderConf(self) -> RobertaEncoderConf: transform=lambda: T.Sequential( T.SentencePieceTokenizer(urljoin(_TEXT_BUCKET, "xlmr.sentencepiece.bpe.model")), T.VocabTransform(load_state_dict_from_url(urljoin(_TEXT_BUCKET, "xlmr.vocab.pt"))), - T.Truncate(510), + T.Truncate(254), T.AddToken(token=0, begin=True), T.AddToken(token=2, begin=False), )