update docstring

mthrok · mthrok · commit 6de7ae739013 · 2021-10-05T20:57:27.000-04:00
diff --git a/torchaudio/models/wav2vec2/pretrained.py b/torchaudio/models/wav2vec2/pretrained.py
@@ -155,7 +155,9 @@ def _get_labels():
 )
 WAV2VEC2_BASE.__doc__ = """wav2vec 2.0 model with "Base" configuration.
 
-Trained on 960 hours of *LibriSpeech* [:footcite:`7178964`] dataset. Not fine-tuned.
+Pre-trained on 960 hours of unlabeled audio from *LibriSpeech* dataset [:footcite:`7178964`]
+(the combination of "train-clean-100", "train-clean-360", and "train-other-500").
+Not fine-tuned.
 
 Originally published by the authors of *wav2vec 2.0* [:footcite:`baevski2020wav2vec`].
 [`Source <https://github.com/pytorch/fairseq/tree/main/examples/wav2vec#pre-trained-models>`__]
@@ -193,8 +195,10 @@ def _get_labels():
 )
 WAV2VEC2_ASR_BASE_10M.__doc__ = """Build "base" wav2vec2 model with an extra linear module
 
-Pre-trained on 960 hours of *LibriSpeech* [:footcite:`7178964`] dataset, and
-fine-tuned for ASR on 10 minutes of *Libri-Light* [:footcite:`librilight`] dataset.
+Pre-trained on 960 hours of unlabeled audio from *LibriSpeech* dataset [:footcite:`7178964`]
+(the combination of "train-clean-100", "train-clean-360", and "train-other-500"), and
+fine-tuned for ASR on 10 minutes of transcribed audio from *Libri-Light* dataset
+[:footcite:`librilight`] ("train-10min" subset).
 
 Originally published by the authors of *wav2vec 2.0*
 [:footcite:`baevski2020wav2vec`].
@@ -234,9 +238,10 @@ def _get_labels():
 
 WAV2VEC2_ASR_BASE_100H.__doc__ = """Build "base" wav2vec2 model with an extra linear module
 
-Pre-trained on 960 hours of *LibriSpeech* [:footcite:`7178964`] dataset, and
-fine-tuned for ASR on 100 hours of *LibriSpeech* [:footcite:`librilight`] dataset
-(test-clean-100 subset).
+Pre-trained on 960 hours of unlabeled audio from *LibriSpeech* dataset [:footcite:`7178964`]
+(the combination of "train-clean-100", "train-clean-360", and "train-other-500"), and
+fine-tuned for ASR on 100 hours of transcribed audio from the same dataset
+("train-clean-100" subset).
 
 Originally published by the authors of *wav2vec 2.0*
 [:footcite:`baevski2020wav2vec`].
@@ -275,8 +280,9 @@ def _get_labels():
 )
 WAV2VEC2_ASR_BASE_960H.__doc__ = """Build "base" wav2vec2 model with an extra linear module
 
-Pre-trained and fine-tuned for ASR on 960 hours of
-*LibriSpeech* [:footcite:`7178964`] dataset.
+Pre-trained on 960 hours of unlabeled audio from *LibriSpeech* dataset [:footcite:`7178964`]
+(the combination of "train-clean-100", "train-clean-360", and "train-other-500"), and
+fine-tuned for ASR on the same audio with the corresponding transcripts.
 
 Originally published by the authors of *wav2vec 2.0*
 [:footcite:`baevski2020wav2vec`].
@@ -315,7 +321,9 @@ def _get_labels():
 )
 WAV2VEC2_LARGE.__doc__ = """Build "large" wav2vec2 model.
 
-Trained on 960 hours of *LibriSpeech* [:footcite:`7178964`] dataset. Not fine-tuned.
+Pre-trained on 960 hours of unlabeled audio from *LibriSpeech* dataset [:footcite:`7178964`]
+(the combination of "train-clean-100", "train-clean-360", and "train-other-500").
+Not fine-tuned.
 
 Originally published by the authors of *wav2vec 2.0*
 [:footcite:`baevski2020wav2vec`].
@@ -354,8 +362,10 @@ def _get_labels():
 )
 WAV2VEC2_ASR_LARGE_10M.__doc__ = """Build "large" wav2vec2 model with an extra linear module
 
-Pre-trained on 960 hours of *LibriSpeech* [:footcite:`7178964`] dataset, and
-fine-tuned for ASR on 10 minutes of *Libri-Light* [:footcite:`librilight`] dataset.
+Pre-trained on 960 hours of unlabeled audio from *LibriSpeech* dataset [:footcite:`7178964`]
+(the combination of "train-clean-100", "train-clean-360", and "train-other-500"), and
+fine-tuned for ASR on 10 minutes of transcribed audio from *Libri-Light* dataset
+[:footcite:`librilight`] ("train-10min" subset).
 
 Originally published by the authors of *wav2vec 2.0*
 [:footcite:`baevski2020wav2vec`].
@@ -394,9 +404,10 @@ def _get_labels():
 )
 WAV2VEC2_ASR_LARGE_100H.__doc__ = """Build "large" wav2vec2 model with an extra linear module
 
-Pre-trained on 960 hours of *LibriSpeech* [:footcite:`7178964`] dataset, and
-fine-tuned for ASR on 100 hours of *LibriSpeech* [:footcite:`librilight`] dataset
-(test-clean-100 subset).
+Pre-trained on 960 hours of unlabeled audio from *LibriSpeech* dataset [:footcite:`7178964`]
+(the combination of "train-clean-100", "train-clean-360", and "train-other-500"), and
+fine-tuned for ASR on 100 hours of transcribed audio from
+the same dataset ("train-clean-100" subset).
 
 Originally published by the authors of *wav2vec 2.0*
 [:footcite:`baevski2020wav2vec`].
@@ -435,8 +446,9 @@ def _get_labels():
 )
 WAV2VEC2_ASR_LARGE_960H.__doc__ = """Build "large" wav2vec2 model with an extra linear module
 
-Pre-trained and fine-tuned for ASR on 960 hours of
-*LibriSpeech* [:footcite:`7178964`] dataset.
+Pre-trained on 960 hours of unlabeled audio from *LibriSpeech* dataset [:footcite:`7178964`]
+(the combination of "train-clean-100", "train-clean-360", and "train-other-500"), and
+fine-tuned for ASR on the same audio with the corresponding transcripts.
 
 Originally published by the authors of *wav2vec 2.0*
 [:footcite:`baevski2020wav2vec`].
@@ -475,7 +487,9 @@ def _get_labels():
 )
 WAV2VEC2_LARGE_LV60K.__doc__ = """Build "large-lv60k" wav2vec2 model.
 
-Trained on 60,000 hours of *LibriLight* [:footcite:`librilight`] dataset. Not fine-tuned.
+Pre-trained on 60,000 hours of unlabeled audio from
+*Libri-Light* dataset [:footcite:`librilight`].
+Not fine-tuned.
 
 Originally published by the authors of *wav2vec 2.0*
 [:footcite:`baevski2020wav2vec`].
@@ -514,8 +528,10 @@ def _get_labels():
 )
 WAV2VEC2_ASR_LARGE_LV60K_10M.__doc__ = """Build "large-lv60k" wav2vec2 model with an extra linear module
 
-Pre-trained on 60,000 hours of *Libri-Light* [:footcite:`librilight`] dataset, and
-fine-tuned for ASR on 10 minutes of *Libri-Light* [:footcite:`librilight`] dataset.
+Pre-trained on 60,000 hours of unlabeled audio from
+*Libri-Light* dataset [:footcite:`librilight`], and
+fine-tuned for ASR on 10 minutes of transcribed audio from
+the same dataset dataset ("train-10min" subset).
 
 Originally published by the authors of *wav2vec 2.0*
 [:footcite:`baevski2020wav2vec`].
@@ -554,9 +570,10 @@ def _get_labels():
 )
 WAV2VEC2_ASR_LARGE_LV60K_100H.__doc__ = """Build "large-lv60k" wav2vec2 model with an extra linear module
 
-Pre-trained on 60,000 hours of *Libri-Light* [:footcite:`librilight`] dataset, and
-fine-tuned for ASR on 100 hours of *LibriSpeech* [:footcite:`librilight`] dataset
-(test-clean-100 subset).
+Pre-trained on 60,000 hours of unlabeled audio from
+*Libri-Light* dataset [:footcite:`librilight`], and
+fine-tuned for ASR on 100 hours of transcribed audio from
+*LibriSpeech* dataset [:footcite:`7178964`] ("train-clean-100" subset).
 
 Originally published by the authors of *wav2vec 2.0*
 [:footcite:`baevski2020wav2vec`].
@@ -595,8 +612,11 @@ def _get_labels():
 )
 WAV2VEC2_ASR_LARGE_LV60K_960H.__doc__ = """Build "large-lv60k" wav2vec2 model with an extra linear module
 
-Pre-trained on 60,000 hours of *Libri-Light* [:footcite:`librilight`] dataset, and
-fine-tuned for ASR on 960 hours of *LibriSpeech* [:footcite:`7178964`] dataset.
+Pre-trained on 60,000 hours of unlabeled audio from *Libri-Light*
+[:footcite:`librilight`] dataset, and
+fine-tuned for ASR on 960 hours of transcribed audio from
+*LibriSpeech* dataset [:footcite:`7178964`]
+(the combination of "train-clean-100", "train-clean-360", and "train-other-500").
 
 Originally published by the authors of *wav2vec 2.0*
 [:footcite:`baevski2020wav2vec`].
@@ -638,7 +658,7 @@ def _get_labels():
 Trained on 56,000 hours of multiple datasets (
 *Multilingual LibriSpeech* [:footcite:`Pratap_2020`],
 *CommonVoice* [:footcite:`ardila2020common`] and
-*BABEL* [:footcite:`Gales2014SpeechRA`])
+*BABEL* [:footcite:`Gales2014SpeechRA`]) Not fine-tuned.
 
 Originally published by the authors of
 *Unsupervised Cross-lingual Representation Learning for Speech Recognition*
@@ -678,7 +698,8 @@ def _get_labels():
 )
 HUBERT_BASE.__doc__ = """HuBERT model with "Base" configuration.
 
-Trained on 960 hours of *LibriSpeech* [:footcite:`7178964`] dataset. Not fine-tuned.
+Trained on 960 hours of unlabeled audio from *LibriSpeech* dataset [:footcite:`7178964`].
+Not fine-tuned.
 
 Originally published by the authors of *HuBERT* [:footcite:`hsu2021hubert`].
 [`Source <https://github.com/pytorch/fairseq/tree/main/examples/hubert#pre-trained-and-fine-tuned-asr-models>`__]
@@ -716,8 +737,11 @@ def _get_labels():
 )
 HUBERT_ASR_LARGE.__doc__ = """HuBERT model with "Large" configuration.
 
-Pre-trained on 60,000 hours of *Libri-Light* [:footcite:`librilight`] dataset, and
-fine-tuned for ASR on 960 hours of *LibriSpeech* [:footcite:`7178964`] dataset.
+Pre-trained on 60,000 hours of unlabeled audio from
+*Libri-Light* dataset [:footcite:`librilight`], and
+fine-tuned for ASR on 960 hours of transcribed audio from
+*LibriSpeech* dataset [:footcite:`7178964`]
+(the combination of "train-clean-100", "train-clean-360", and "train-other-500").
 
 Originally published by the authors of *HuBERT* [:footcite:`hsu2021hubert`].
 [`Source <https://github.com/pytorch/fairseq/tree/main/examples/hubert#pre-trained-and-fine-tuned-asr-models>`__]