Skip to content

Commit 82ea389

Browse files
committed
Add ES model
1 parent 19d8f1c commit 82ea389

File tree

4 files changed

+77
-0
lines changed

4 files changed

+77
-0
lines changed

test/integration_tests/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def ctc_decoder():
3434

3535
_FILES = {
3636
'en': 'Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.flac',
37+
'es': '20130207-0900-PLENARY-7-es_20130207-13_02_05_5.flac',
3738
}
3839

3940

test/integration_tests/wav2vec2_pipeline_test.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
HUBERT_XLARGE,
1919
HUBERT_ASR_LARGE,
2020
HUBERT_ASR_XLARGE,
21+
WAV2VEC2_ASR_VOXPOPULI_ES_BASE_10K,
2122
)
2223
import pytest
2324

@@ -53,6 +54,7 @@ def test_pretraining_models(bundle):
5354
(WAV2VEC2_ASR_LARGE_LV60K_960H, 'en', 'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'),
5455
(HUBERT_ASR_LARGE, 'en', 'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'),
5556
(HUBERT_ASR_XLARGE, 'en', 'I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|'),
57+
(WAV2VEC2_ASR_VOXPOPULI_ES_BASE_10K, 'es', "la|primera|que|es|imprescindible|pensar|a|pequeña|a|escala|para|implicar|y|complementar|así|la|actuación|global"),
5658
]
5759
)
5860
def test_finetune_asr_model(

torchaudio/pipelines/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
WAV2VEC2_ASR_LARGE_LV60K_100H,
1515
WAV2VEC2_ASR_LARGE_LV60K_960H,
1616
WAV2VEC2_XLSR53,
17+
WAV2VEC2_ASR_VOXPOPULI_ES_BASE_10K,
1718
HUBERT_BASE,
1819
HUBERT_LARGE,
1920
HUBERT_XLARGE,
@@ -44,6 +45,7 @@
4445
'WAV2VEC2_ASR_LARGE_LV60K_100H',
4546
'WAV2VEC2_ASR_LARGE_LV60K_960H',
4647
'WAV2VEC2_XLSR53',
48+
'WAV2VEC2_ASR_VOXPOPULI_ES_BASE_10K',
4749
'HUBERT_BASE',
4850
'HUBERT_LARGE',
4951
'HUBERT_XLARGE',

torchaudio/pipelines/_wav2vec2.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1000,3 +1000,75 @@ def _get_labels():
10001000
10011001
Please refer to :func:`torchaudio.pipelines.Wav2Vec2ASRBundle` for the usage.
10021002
""" # noqa: E501
1003+
1004+
1005+
def _get_es_labels():
1006+
return (
1007+
"|",
1008+
"e",
1009+
"a",
1010+
"o",
1011+
"s",
1012+
"n",
1013+
"r",
1014+
"i",
1015+
"l",
1016+
"d",
1017+
"c",
1018+
"t",
1019+
"u",
1020+
"p",
1021+
"m",
1022+
"b",
1023+
"q",
1024+
"y",
1025+
"g",
1026+
"v",
1027+
"h",
1028+
"ó",
1029+
"f",
1030+
"í",
1031+
"á",
1032+
"j",
1033+
"z",
1034+
"ñ",
1035+
"é",
1036+
"x",
1037+
"ú",
1038+
"k",
1039+
"w",
1040+
"ü",
1041+
"1",
1042+
)
1043+
1044+
WAV2VEC2_ASR_VOXPOPULI_ES_BASE_10K = Wav2Vec2ASRBundle(
1045+
'wav2vec2_voxpopuli_base_10k_asr_es.pt',
1046+
{
1047+
"extractor_mode": "group_norm",
1048+
"extractor_conv_layer_config": [
1049+
(512, 10, 5),
1050+
(512, 3, 2),
1051+
(512, 3, 2),
1052+
(512, 3, 2),
1053+
(512, 3, 2),
1054+
(512, 2, 2),
1055+
(512, 2, 2),
1056+
],
1057+
"extractor_conv_bias": False,
1058+
"encoder_embed_dim": 768,
1059+
"encoder_projection_dropout": 0.0,
1060+
"encoder_pos_conv_kernel": 128,
1061+
"encoder_pos_conv_groups": 16,
1062+
"encoder_num_layers": 12,
1063+
"encoder_num_heads": 12,
1064+
"encoder_attention_dropout": 0.0,
1065+
"encoder_ff_interm_features": 3072,
1066+
"encoder_ff_interm_dropout": 0.1,
1067+
"encoder_dropout": 0.0,
1068+
"encoder_layer_norm_first": False,
1069+
"encoder_layer_drop": 0.1,
1070+
"aux_num_out": 36
1071+
},
1072+
_labels=_get_es_labels(),
1073+
_sample_rate=16000,
1074+
)

0 commit comments

Comments
 (0)