@@ -272,7 +272,6 @@ def forward(self, input: Any) -> Any:
272272
273273
274274class GPT2BPETokenizer (Module ):
275- __jit_unused_properties__ = ["is_jitable" ]
276275 """
277276 Transform for GPT-2 BPE Tokenizer.
278277
@@ -286,6 +285,8 @@ class GPT2BPETokenizer(Module):
286285 :param return_tokens: Indicate whether to return split tokens. If False, it will return encoded token IDs as strings (default: False)
287286 :type return_input: bool
288287 """
288+
289+ __jit_unused_properties__ = ["is_jitable" ]
289290 _seperator : torch .jit .Final [str ]
290291
291292 def __init__ (self , encoder_json_path : str , vocab_bpe_path : str , return_tokens : bool = False ):
@@ -382,7 +383,6 @@ def __prepare_scriptable__(self):
382383
383384
384385class CLIPTokenizer (Module ):
385- __jit_unused_properties__ = ["is_jitable" ]
386386 """
387387 Transform for CLIP Tokenizer. Based on Byte-Level BPE.
388388
@@ -414,6 +414,7 @@ class CLIPTokenizer(Module):
414414 :type return_input: bool
415415 """
416416
417+ __jit_unused_properties__ = ["is_jitable" ]
417418 _seperator : torch .jit .Final [str ]
418419
419420 def __init__ (
@@ -534,23 +535,25 @@ def __prepare_scriptable__(self):
534535
535536
536537class BERTTokenizer (Module ):
537- __jit_unused_properties__ = ["is_jitable" ]
538538 """
539539 Transform for BERT Tokenizer.
540540
541541 Based on WordPiece algorithm introduced in paper:
542542 https://static.googleusercontent.com/media/research.google.com/ja//pubs/archive/37842.pdf
543543
544- The backend kernel implementation is the modified form of https://github.com/LieluoboAi/radish.
545- See https://github.com/pytorch/text/pull/1707 summary for more details.
544+ The backend kernel implementation is taken and modified from https://github.com/LieluoboAi/radish.
545+
546+ See PR https://github.com/pytorch/text/pull/1707 summary for more details.
546547
547548 The below code snippet shows how to use the BERT tokenizer using the pre-trained vocab files.
549+
548550 Example
549551 >>> from torchtext.transforms import BERTTokenizer
550552 >>> VOCAB_FILE = "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt"
551553 >>> tokenizer = BERTTokenizer(vocab_path=VOCAB_FILE, do_lower_case=True, return_tokens=True)
552554 >>> tokenizer("Hello World, How are you!") # single sentence input
553555 >>> tokenizer(["Hello World","How are you!"]) # batch input
556+
554557 :param vocab_path: Path to pre-trained vocabulary file. The path can be either local or URL.
555558 :type vocab_path: str
556559 :param do_lower_case: Indicate whether to do lower case. (default: True)
@@ -561,6 +564,8 @@ class BERTTokenizer(Module):
561564 :type return_tokens: bool
562565 """
563566
567+ __jit_unused_properties__ = ["is_jitable" ]
568+
564569 def __init__ (
565570 self , vocab_path : str , do_lower_case : bool = True , strip_accents : Optional [bool ] = None , return_tokens = False
566571 ) -> None :
0 commit comments