Skip to content

Commit 5b1cd9a

Browse files
committed
Add pretrained weights from wav2vec2.0 and XLSR papers (#1827)
Add pretrained weights from https://github.com/pytorch/fairseq/tree/main/examples/wav2vec#pre-trained-models - Wav2Vec 2.0 Base / Large / Large (LV-60) - XLSR-53
1 parent 384e447 commit 5b1cd9a

File tree

4 files changed

+634
-0
lines changed

4 files changed

+634
-0
lines changed

docs/source/models.rst

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,45 @@ Pre-trained Models
139139

140140
.. autoproperty:: labels
141141

142+
.. autodata:: WAV2VEC2_BASE
143+
:no-value:
144+
145+
.. autodata:: WAV2VEC2_ASR_BASE_10M
146+
:no-value:
147+
148+
.. autodata:: WAV2VEC2_ASR_BASE_100H
149+
:no-value:
150+
151+
.. autodata:: WAV2VEC2_ASR_BASE_960H
152+
:no-value:
153+
154+
.. autodata:: WAV2VEC2_LARGE
155+
:no-value:
156+
157+
.. autodata:: WAV2VEC2_ASR_LARGE_10M
158+
:no-value:
159+
160+
.. autodata:: WAV2VEC2_ASR_LARGE_100H
161+
:no-value:
162+
163+
.. autodata:: WAV2VEC2_ASR_LARGE_960H
164+
:no-value:
165+
166+
.. autodata:: WAV2VEC2_LARGE_LV60K
167+
:no-value:
168+
169+
.. autodata:: WAV2VEC2_ASR_LARGE_LV60K_10M
170+
:no-value:
171+
172+
.. autodata:: WAV2VEC2_ASR_LARGE_LV60K_100H
173+
:no-value:
174+
175+
.. autodata:: WAV2VEC2_ASR_LARGE_LV60K_960H
176+
:no-value:
177+
178+
.. autodata:: WAV2VEC2_XLSR53
179+
:no-value:
180+
142181
.. autodata:: HUBERT_BASE
143182
:no-value:
144183

test/integration_tests/wav2vec2_model_test.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,18 @@
11
import torchaudio
22
from torchaudio.models import (
3+
WAV2VEC2_BASE,
4+
WAV2VEC2_LARGE,
5+
WAV2VEC2_LARGE_LV60K,
6+
WAV2VEC2_ASR_BASE_10M,
7+
WAV2VEC2_ASR_BASE_100H,
8+
WAV2VEC2_ASR_BASE_960H,
9+
WAV2VEC2_ASR_LARGE_10M,
10+
WAV2VEC2_ASR_LARGE_100H,
11+
WAV2VEC2_ASR_LARGE_960H,
12+
WAV2VEC2_ASR_LARGE_LV60K_10M,
13+
WAV2VEC2_ASR_LARGE_LV60K_100H,
14+
WAV2VEC2_ASR_LARGE_LV60K_960H,
15+
WAV2VEC2_XLSR53,
316
HUBERT_BASE,
417
HUBERT_LARGE,
518
HUBERT_XLARGE,
@@ -12,6 +25,10 @@
1225
@pytest.mark.parametrize(
1326
"bundle",
1427
[
28+
WAV2VEC2_BASE,
29+
WAV2VEC2_LARGE,
30+
WAV2VEC2_LARGE_LV60K,
31+
WAV2VEC2_XLSR53,
1532
HUBERT_BASE,
1633
HUBERT_LARGE,
1734
HUBERT_XLARGE,
@@ -25,6 +42,15 @@ def test_pretraining_models(bundle):
2542
@pytest.mark.parametrize(
2643
"bundle,expected",
2744
[
45+
(WAV2VEC2_ASR_BASE_10M, 'I|HAD|THAT|CURIYOSSITY|BESID|ME|AT|THIS|MOMENT|'),
46+
(WAV2VEC2_ASR_BASE_100H, 'I|HAD|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'),
47+
(WAV2VEC2_ASR_BASE_960H, 'I|HAD|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'),
48+
(WAV2VEC2_ASR_LARGE_10M, 'I|HAD|THAT|CURIOUSITY|BESIDE|ME|AT|THIS|MOMENT|'),
49+
(WAV2VEC2_ASR_LARGE_100H, 'I|HAD|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'),
50+
(WAV2VEC2_ASR_LARGE_960H, 'I|HAD|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'),
51+
(WAV2VEC2_ASR_LARGE_LV60K_10M, 'I|HAD|THAT|CURIOUSSITY|BESID|ME|AT|THISS|MOMENT|'),
52+
(WAV2VEC2_ASR_LARGE_LV60K_100H, 'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'),
53+
(WAV2VEC2_ASR_LARGE_LV60K_960H, 'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'),
2854
(HUBERT_ASR_LARGE, 'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'),
2955
(HUBERT_ASR_XLARGE, 'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|')
3056
]

torchaudio/models/__init__.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,19 @@
1919
)
2020
from .wav2vec2.pretrained import (
2121
Wav2Vec2PretrainedModelBundle,
22+
WAV2VEC2_BASE,
23+
WAV2VEC2_LARGE,
24+
WAV2VEC2_LARGE_LV60K,
25+
WAV2VEC2_ASR_BASE_10M,
26+
WAV2VEC2_ASR_BASE_100H,
27+
WAV2VEC2_ASR_BASE_960H,
28+
WAV2VEC2_ASR_LARGE_10M,
29+
WAV2VEC2_ASR_LARGE_100H,
30+
WAV2VEC2_ASR_LARGE_960H,
31+
WAV2VEC2_ASR_LARGE_LV60K_10M,
32+
WAV2VEC2_ASR_LARGE_LV60K_100H,
33+
WAV2VEC2_ASR_LARGE_LV60K_960H,
34+
WAV2VEC2_XLSR53,
2235
HUBERT_BASE,
2336
HUBERT_LARGE,
2437
HUBERT_XLARGE,
@@ -45,6 +58,19 @@
4558
'hubert_ft_large',
4659
'hubert_ft_xlarge',
4760
'Wav2Vec2PretrainedModelBundle',
61+
'WAV2VEC2_BASE',
62+
'WAV2VEC2_LARGE',
63+
'WAV2VEC2_LARGE_LV60K',
64+
'WAV2VEC2_ASR_BASE_10M',
65+
'WAV2VEC2_ASR_BASE_100H',
66+
'WAV2VEC2_ASR_BASE_960H',
67+
'WAV2VEC2_ASR_LARGE_10M',
68+
'WAV2VEC2_ASR_LARGE_100H',
69+
'WAV2VEC2_ASR_LARGE_960H',
70+
'WAV2VEC2_ASR_LARGE_LV60K_10M',
71+
'WAV2VEC2_ASR_LARGE_LV60K_100H',
72+
'WAV2VEC2_ASR_LARGE_LV60K_960H',
73+
'WAV2VEC2_XLSR53',
4874
'HUBERT_BASE',
4975
'HUBERT_LARGE',
5076
'HUBERT_XLARGE',

0 commit comments

Comments
 (0)