diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst
index a7fd85f878..909efaf485 100644
--- a/docs/source/transforms.rst
+++ b/docs/source/transforms.rst
@@ -30,6 +30,14 @@ CLIPTokenizer
 
    .. automethod:: forward
 
+BERTTokenizer
+----------------------
+
+.. autoclass:: BERTTokenizer
+
+   .. automethod:: forward
+
+
 VocabTransform
 --------------
 
diff --git a/torchtext/transforms.py b/torchtext/transforms.py
index 555892d690..5879e44a2d 100644
--- a/torchtext/transforms.py
+++ b/torchtext/transforms.py
@@ -272,7 +272,6 @@ def forward(self, input: Any) -> Any:
 
 
 class GPT2BPETokenizer(Module):
-    __jit_unused_properties__ = ["is_jitable"]
     """
     Transform for GPT-2 BPE Tokenizer.
 
@@ -286,6 +285,8 @@ class GPT2BPETokenizer(Module):
     :param return_tokens: Indicate whether to return split tokens. If False, it will return encoded token IDs as strings (default: False)
     :type return_input: bool
     """
+
+    __jit_unused_properties__ = ["is_jitable"]
     _seperator: torch.jit.Final[str]
 
     def __init__(self, encoder_json_path: str, vocab_bpe_path: str, return_tokens: bool = False):
@@ -382,7 +383,6 @@ def __prepare_scriptable__(self):
 
 
 class CLIPTokenizer(Module):
-    __jit_unused_properties__ = ["is_jitable"]
     """
     Transform for CLIP Tokenizer. Based on Byte-Level BPE.
 
@@ -414,6 +414,7 @@ class CLIPTokenizer(Module):
     :type return_input: bool
     """
 
+    __jit_unused_properties__ = ["is_jitable"]
     _seperator: torch.jit.Final[str]
 
     def __init__(
@@ -534,23 +535,25 @@ def __prepare_scriptable__(self):
 
 
 class BERTTokenizer(Module):
-    __jit_unused_properties__ = ["is_jitable"]
     """
     Transform for BERT Tokenizer.
 
     Based on WordPiece algorithm introduced in paper:
     https://static.googleusercontent.com/media/research.google.com/ja//pubs/archive/37842.pdf
 
-    The backend kernel implementation is the modified form of https://github.com/LieluoboAi/radish.
-    See https://github.com/pytorch/text/pull/1707 summary for more details.
+    The backend kernel implementation is taken and modified from https://github.com/LieluoboAi/radish.
+
+    See PR https://github.com/pytorch/text/pull/1707 summary for more details.
 
     The below code snippet shows how to use the BERT tokenizer using the pre-trained vocab files.
+
     Example
         >>> from torchtext.transforms import BERTTokenizer
         >>> VOCAB_FILE = "https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt"
         >>> tokenizer = BERTTokenizer(vocab_path=VOCAB_FILE, do_lower_case=True, return_tokens=True)
         >>> tokenizer("Hello World, How are you!") # single sentence input
         >>> tokenizer(["Hello World","How are you!"]) # batch input
+
     :param vocab_path: Path to pre-trained vocabulary file. The path can be either local or URL.
     :type vocab_path: str
     :param do_lower_case: Indicate whether to do lower case. (default: True)
@@ -561,6 +564,8 @@ class BERTTokenizer(Module):
     :type return_tokens: bool
     """
 
+    __jit_unused_properties__ = ["is_jitable"]
+
     def __init__(
         self, vocab_path: str, do_lower_case: bool = True, strip_accents: Optional[bool] = None, return_tokens=False
     ) -> None: