1- import os
21import csv
2+ import os
33from pathlib import Path
4+ from typing import Tuple , Dict
45
5- from torchaudio . datasets import COMMONVOICE
6+ from torch import Tensor
67from torchaudio_unittest .common_utils import (
78 TempDirMixin ,
89 TorchaudioTestCase ,
1112 normalize_wav ,
1213)
1314
15+ from torchaudio .datasets import COMMONVOICE
1416
15- class TestCommonVoice (TempDirMixin , TorchaudioTestCase ):
16- backend = 'default'
17+ _ORIGINAL_EXT_AUDIO = COMMONVOICE ._ext_audio
18+ _SAMPLE_RATE = 48000
19+ _HEADERS = [u"client_ids" , u"path" , u"sentence" , u"up_votes" , u"down_votes" , u"age" , u"gender" , u"accent" ]
1720
18- root_dir = None
19- data = []
20- _headers = [u"client_ids" , u"path" , u"sentence" , u"up_votes" , u"down_votes" , u"age" , u"gender" , u"accent" ]
21+
22+ def get_mock_dataset_en ( root_dir ) -> Tuple [ Tensor , int , Dict [ str , str ]]:
23+ mocked_data = []
2124 # Note: extension is changed to wav for the sake of test
2225 # Note: the first content is missing values for `age`, `gender` and `accent` as in the original data.
23- _train_csv_contents = [
26+ _en_train_csv_contents = [
2427 ["9d16c5d980247861130e0480e2719f448be73d86a496c36d01a477cbdecd8cfd1399403d7a77bf458d211a70711b2da0845c" ,
25- "common_voice_en_18885784.wav" ,
26- "He was accorded a State funeral, and was buried in Drayton and Toowoomba Cemetery." , "2" , "0" , "" , "" , "" ],
28+ "common_voice_en_18885784.wav" ,
29+ "He was accorded a State funeral, and was buried in Drayton and Toowoomba Cemetery." , "2" , "0" , "" , "" ,
30+ "" ],
2731 ["c82eb9291328620f06025a1f8112b909099e447e485e99236cb87df008650250e79fea5ca772061fb6a370830847b9c44d20" ,
28- "common_voice_en_556542.wav" , "Once more into the breach" , "2" , "0" , "thirties" , "male" , "us" ],
32+ "common_voice_en_556542.wav" , "Once more into the breach" , "2" , "0" , "thirties" , "male" , "us" ],
2933 ["f74d880c5ad4c5917f314a604d3fc4805159d255796fb9f8defca35333ecc002bdf53dc463503c12674ea840b21b4a507b7c" ,
30- "common_voice_en_18607573.wav" ,
31- "Caddy, show Miss Clare and Miss Summerson their rooms." , "2" , "0" , "twenties" , "male" , "canada" ],
34+ "common_voice_en_18607573.wav" ,
35+ "Caddy, show Miss Clare and Miss Summerson their rooms." , "2" , "0" , "twenties" , "male" , "canada" ],
36+ ]
37+ # Tsv file name difference does not mean different subset, testing as a whole dataset here
38+ tsv_filename = os .path .join (root_dir , "train.tsv" )
39+ audio_base_path = os .path .join (root_dir , "clips" )
40+ os .makedirs (audio_base_path , exist_ok = True )
41+ with open (tsv_filename , "w" , newline = '' ) as tsv :
42+ writer = csv .writer (tsv , delimiter = '\t ' )
43+ writer .writerow (_HEADERS )
44+ for i , content in enumerate (_en_train_csv_contents ):
45+ writer .writerow (content )
46+ # Generate and store audio
47+ audio_path = os .path .join (audio_base_path , content [1 ])
48+ data = get_whitenoise (sample_rate = _SAMPLE_RATE , duration = 1 , n_channels = 1 , seed = i , dtype = 'float32' )
49+ save_wav (audio_path , data , _SAMPLE_RATE )
50+ # Append data entry
51+ mocked_data .append ((normalize_wav (data ), _SAMPLE_RATE , dict (zip (_HEADERS , content ))))
52+ return mocked_data
53+
54+
55+ def get_mock_dataset_fr (root_dir ) -> Tuple [Tensor , int , Dict [str , str ]]:
56+ mocked_data = []
57+ _fr_train_csv_contents = [
58+ [
59+ "a2e8e1e1cc74d08c92a53d7b9ff84e077eb90410edd85b8882f16fd037cecfcb6a19413c6c63ce6458cfea9579878fa91cef"
60+ "18343441c601cae0597a4b0d3144" ,
61+ "89e67e7682b36786a0b4b4022c4d42090c86edd96c78c12d30088e62522b8fe466ea4912e6a1055dfb91b296a0743e0a2bbe"
62+ "16cebac98ee5349e3e8262cb9329" ,
63+ "Or sur ce point nous n’avons aucune réponse de votre part." , "2" , "0" , "twenties" , "male" , "france" ],
64+ [
65+ "a2e8e1e1cc74d08c92a53d7b9ff84e077eb90410edd85b8882f16fd037cecfcb6a19413c6c63ce6458cfea9579878fa91cef18"
66+ "343441c601cae0597a4b0d3144" ,
67+ "87d71819a26179e93acfee149d0b21b7bf5e926e367d80b2b3792d45f46e04853a514945783ff764c1fc237b4eb0ee2b0a7a7"
68+ "cbd395acbdfcfa9d76a6e199bbd" ,
69+ "Monsieur de La Verpillière, laissez parler le ministre" , "2" , "0" , "twenties" , "male" , "france" ],
70+
3271 ]
33- sample_rate = 48000
72+ # Tsv file name difference does not mean different subset, testing as a whole dataset here
73+ tsv_filename = os .path .join (root_dir , "train.tsv" )
74+ audio_base_path = os .path .join (root_dir , "clips" )
75+ os .makedirs (audio_base_path , exist_ok = True )
76+ with open (tsv_filename , "w" , newline = '' ) as tsv :
77+ writer = csv .writer (tsv , delimiter = '\t ' )
78+ writer .writerow (_HEADERS )
79+ for i , content in enumerate (_fr_train_csv_contents ):
80+ content [2 ] = str (content [2 ].encode ("utf-8" ))
81+ writer .writerow (content )
82+ # Generate and store audio
83+ audio_path = os .path .join (audio_base_path , content [1 ] + _ORIGINAL_EXT_AUDIO )
84+ data = get_whitenoise (sample_rate = _SAMPLE_RATE , duration = 1 , n_channels = 1 , seed = i , dtype = 'float32' )
85+ save_wav (audio_path , data , _SAMPLE_RATE )
86+
87+ # Append data entry
88+ mocked_data .append ((normalize_wav (data ), _SAMPLE_RATE , dict (zip (_HEADERS , content ))))
89+ return mocked_data
90+
91+
92+ class TestCommonVoiceEN (TempDirMixin , TorchaudioTestCase ):
93+ backend = 'default'
94+ root_dir = None
3495
3596 @classmethod
3697 def setUpClass (cls ):
3798 cls .root_dir = cls .get_base_temp_dir ()
38- # Tsv file name difference does not mean different subset, testing as a whole dataset here
39- tsv_filename = os .path .join (cls .root_dir , "train.tsv" )
40- audio_base_path = os .path .join (cls .root_dir , "clips" )
41- os .makedirs (audio_base_path , exist_ok = True )
42- with open (tsv_filename , "w" , newline = '' ) as tsv :
43- writer = csv .writer (tsv , delimiter = '\t ' )
44- writer .writerow (cls ._headers )
45- for i , content in enumerate (cls ._train_csv_contents ):
46- writer .writerow (content )
47-
48- # Generate and store audio
49- audio_path = os .path .join (audio_base_path , content [1 ])
50- data = get_whitenoise (sample_rate = cls .sample_rate , duration = 1 , n_channels = 1 , seed = i , dtype = 'float32' )
51- save_wav (audio_path , data , cls .sample_rate )
52-
53- # Append data entry
54- cls .data .append ((normalize_wav (data ), cls .sample_rate , dict (zip (cls ._headers , content ))))
99+ cls .data = get_mock_dataset_en (cls .root_dir )
100+ COMMONVOICE ._ext_audio = ".wav"
101+
102+ @classmethod
103+ def tearDownClass (cls ):
104+ COMMONVOICE ._ext_audio = _ORIGINAL_EXT_AUDIO
55105
56106 def _test_commonvoice (self , dataset ):
57107 n_ite = 0
58108 for i , (waveform , sample_rate , dictionary ) in enumerate (dataset ):
59109 expected_dictionary = self .data [i ][2 ]
60110 expected_data = self .data [i ][0 ]
61111 self .assertEqual (expected_data , waveform , atol = 5e-5 , rtol = 1e-8 )
62- assert sample_rate == TestCommonVoice . sample_rate
112+ assert sample_rate == _SAMPLE_RATE
63113 assert dictionary == expected_dictionary
64114 n_ite += 1
65115 assert n_ite == len (self .data )
@@ -71,3 +121,33 @@ def test_commonvoice_str(self):
71121 def test_commonvoice_path (self ):
72122 dataset = COMMONVOICE (Path (self .root_dir ))
73123 self ._test_commonvoice (dataset )
124+
125+
126+ class TestCommonVoiceFR (TempDirMixin , TorchaudioTestCase ):
127+ backend = 'default'
128+ root_dir = None
129+
130+ @classmethod
131+ def setUpClass (cls ):
132+ cls .root_dir = cls .get_base_temp_dir ()
133+ cls .data = get_mock_dataset_fr (cls .root_dir )
134+ COMMONVOICE ._ext_audio = ".mp3"
135+
136+ @classmethod
137+ def tearDownClass (cls ):
138+ COMMONVOICE ._ext_audio = _ORIGINAL_EXT_AUDIO
139+
140+ def _test_commonvoice (self , dataset ):
141+ n_ite = 0
142+ for i , (waveform , sample_rate , dictionary ) in enumerate (dataset ):
143+ expected_dictionary = self .data [i ][2 ]
144+ expected_data = self .data [i ][0 ]
145+ self .assertEqual (expected_data , waveform , atol = 5e-5 , rtol = 1e-8 )
146+ assert sample_rate == _SAMPLE_RATE
147+ assert dictionary == expected_dictionary
148+ n_ite += 1
149+ assert n_ite == len (self .data )
150+
151+ def test_commonvoice_str (self ):
152+ dataset = COMMONVOICE (self .root_dir )
153+ self ._test_commonvoice (dataset )
0 commit comments