From 06f069f973ac59fcb3065fd836a61c69e68e9841 Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Sun, 20 Dec 2020 01:39:59 +0100 Subject: [PATCH 01/20] remove walk_files --- .../datasets/utils_test.py | 41 -------------- torchaudio/datasets/utils.py | 55 +++---------------- 2 files changed, 8 insertions(+), 88 deletions(-) diff --git a/test/torchaudio_unittest/datasets/utils_test.py b/test/torchaudio_unittest/datasets/utils_test.py index 5263319e51..7dc18f3573 100644 --- a/test/torchaudio_unittest/datasets/utils_test.py +++ b/test/torchaudio_unittest/datasets/utils_test.py @@ -1,6 +1,3 @@ -import os -from pathlib import Path - from torchaudio.datasets import utils as dataset_utils from torchaudio.datasets.commonvoice import COMMONVOICE @@ -11,44 +8,6 @@ ) -class TestWalkFiles(TempDirMixin, TorchaudioTestCase): - root = None - expected = None - - def _add_file(self, *parts): - path = self.get_temp_path(*parts) - self.expected.append(path) - Path(path).touch() - - def setUp(self): - self.root = self.get_temp_path() - self.expected = [] - - # level 1 - for filename in ['a.txt', 'b.txt', 'c.txt']: - self._add_file(filename) - - # level 2 - for dir1 in ['d1', 'd2', 'd3']: - for filename in ['d.txt', 'e.txt', 'f.txt']: - self._add_file(dir1, filename) - # level 3 - for dir2 in ['d1', 'd2', 'd3']: - for filename in ['g.txt', 'h.txt', 'i.txt']: - self._add_file(dir1, dir2, filename) - - print('\n'.join(self.expected)) - - def test_walk_files(self): - """walk_files should traverse files in alphabetical order""" - n_ites = 0 - for i, path in enumerate(dataset_utils.walk_files(self.root, '.txt', prefix=True)): - found = os.path.join(self.root, path) - assert found == self.expected[i] - n_ites += 1 - assert n_ites == len(self.expected) - - class TestIterator(TorchaudioTestCase): backend = 'default' path = get_asset_path('CommonVoice', 'cv-corpus-4-2019-12-10', 'tt') diff --git a/torchaudio/datasets/utils.py b/torchaudio/datasets/utils.py index c187d0d814..189866374f 100644 --- a/torchaudio/datasets/utils.py +++ b/torchaudio/datasets/utils.py @@ -1,18 +1,15 @@ -import errno import hashlib import logging import os -import sys import tarfile import threading +import urllib +import urllib.request import zipfile -from _io import TextIOWrapper from queue import Queue -from typing import Any, Iterable, List, Optional, Tuple, Union +from typing import Any, Iterable, List, Optional import torch -import urllib -import urllib.request from torch.utils.data import Dataset from torch.utils.model_zoo import tqdm @@ -41,11 +38,11 @@ def stream_url(url: str, req.headers["Range"] = "bytes={}-".format(start_byte) with urllib.request.urlopen(req) as upointer, tqdm( - unit="B", - unit_scale=True, - unit_divisor=1024, - total=url_size, - disable=not progress_bar, + unit="B", + unit_scale=True, + unit_divisor=1024, + total=url_size, + disable=not progress_bar, ) as pbar: num_bytes = 0 @@ -203,42 +200,6 @@ def extract_archive(from_path: str, to_path: Optional[str] = None, overwrite: bo raise NotImplementedError("We currently only support tar.gz, tgz, and zip achives.") -def walk_files(root: str, - suffix: Union[str, Tuple[str]], - prefix: bool = False, - remove_suffix: bool = False) -> Iterable[str]: - """List recursively all files ending with a suffix at a given root - Args: - root (str): Path to directory whose folders need to be listed - suffix (str or tuple): Suffix of the files to match, e.g. '.png' or ('.jpg', '.png'). - It uses the Python "str.endswith" method and is passed directly - prefix (bool, optional): If true, prepends the full path to each result, otherwise - only returns the name of the files found (Default: ``False``) - remove_suffix (bool, optional): If true, removes the suffix to each result defined in suffix, - otherwise will return the result as found (Default: ``False``). - """ - - root = os.path.expanduser(root) - - for dirpath, dirs, files in os.walk(root): - dirs.sort() - # `dirs` is the list used in os.walk function and by sorting it in-place here, we change the - # behavior of os.walk to traverse sub directory alphabetically - # see also - # https://stackoverflow.com/questions/6670029/can-i-force-python3s-os-walk-to-visit-directories-in-alphabetical-order-how#comment71993866_6670926 - files.sort() - for f in files: - if f.endswith(suffix): - - if remove_suffix: - f = f[: -len(suffix)] - - if prefix: - f = os.path.join(dirpath, f) - - yield f - - class _DiskCache(Dataset): """ Wrap a dataset so that, whenever a new item is returned, it is saved to disk. From ca0c03b1271729efeab085278e5435708161c6d1 Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Tue, 22 Dec 2020 21:42:42 +0100 Subject: [PATCH 02/20] remove deprecated transform from Dataset --- torchaudio/datasets/vctk.py | 31 ++++++++++--------------------- torchaudio/datasets/yesno.py | 20 ++++---------------- 2 files changed, 14 insertions(+), 37 deletions(-) diff --git a/torchaudio/datasets/vctk.py b/torchaudio/datasets/vctk.py index c71962bad8..7cf0679a56 100644 --- a/torchaudio/datasets/vctk.py +++ b/torchaudio/datasets/vctk.py @@ -1,11 +1,12 @@ import os import warnings -from typing import Any, Tuple, Union from pathlib import Path +from typing import Any, Tuple, Union -import torchaudio from torch import Tensor from torch.utils.data import Dataset + +import torchaudio from torchaudio.datasets.utils import ( download_url, extract_archive, @@ -66,7 +67,6 @@ class VCTK(Dataset): Giving ``download=True`` will result in error as the dataset is no longer publicly available. downsample (bool, optional): Not used. - transform (callable, optional): Optional transform applied on waveform. (default: ``None``) target_transform (callable, optional): Optional transform applied on utterance. (default: ``None``) """ @@ -82,7 +82,6 @@ def __init__(self, folder_in_archive: str = FOLDER_IN_ARCHIVE, download: bool = False, downsample: bool = False, - transform: Any = None, target_transform: Any = None) -> None: if downsample: @@ -92,15 +91,7 @@ def __init__(self, "and suppress this warning." ) - if transform is not None or target_transform is not None: - warnings.warn( - "In the next version, transforms will not be part of the dataset. " - "Please remove the option `transform=True` and " - "`target_transform=True` to suppress this warning." - ) - self.downsample = downsample - self.transform = transform self.target_transform = target_transform # Get string representation of 'root' in case Path object is passed @@ -149,8 +140,6 @@ def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, str]: # return item waveform, sample_rate, utterance, speaker_id, utterance_id = item - if self.transform is not None: - waveform = self.transform(waveform) if self.target_transform is not None: utterance = self.target_transform(utterance) return waveform, sample_rate, utterance, speaker_id, utterance_id @@ -182,12 +171,12 @@ class VCTK_092(Dataset): """ def __init__( - self, - root: str, - mic_id: str = "mic2", - download: bool = False, - url: str = URL, - audio_ext=".flac", + self, + root: str, + mic_id: str = "mic2", + download: bool = False, + url: str = URL, + audio_ext=".flac", ): if mic_id not in ["mic1", "mic2"]: raise RuntimeError( @@ -233,7 +222,7 @@ def __init__( continue utterance_dir = os.path.join(self._txt_dir, speaker_id) for utterance_file in sorted( - f for f in os.listdir(utterance_dir) if f.endswith(".txt") + f for f in os.listdir(utterance_dir) if f.endswith(".txt") ): utterance_id = os.path.splitext(utterance_file)[0] audio_path_mic = os.path.join( diff --git a/torchaudio/datasets/yesno.py b/torchaudio/datasets/yesno.py index 21d67f8ecc..1571dcc378 100644 --- a/torchaudio/datasets/yesno.py +++ b/torchaudio/datasets/yesno.py @@ -1,11 +1,11 @@ import os -import warnings -from typing import Any, List, Tuple, Union from pathlib import Path +from typing import Any, List, Tuple, Union -import torchaudio from torch import Tensor from torch.utils.data import Dataset + +import torchaudio from torchaudio.datasets.utils import ( download_url, extract_archive, @@ -15,7 +15,7 @@ FOLDER_IN_ARCHIVE = "waves_yesno" _CHECKSUMS = { "http://www.openslr.org/resources/1/waves_yesno.tar.gz": - "962ff6e904d2df1126132ecec6978786" + "962ff6e904d2df1126132ecec6978786" } @@ -41,7 +41,6 @@ class YESNO(Dataset): The top-level directory of the dataset. (default: ``"waves_yesno"``) download (bool, optional): Whether to download the dataset if it is not found at root path. (default: ``False``). - transform (callable, optional): Optional transform applied on waveform. (default: ``None``) target_transform (callable, optional): Optional transform applied on utterance. (default: ``None``) """ @@ -52,17 +51,8 @@ def __init__(self, url: str = URL, folder_in_archive: str = FOLDER_IN_ARCHIVE, download: bool = False, - transform: Any = None, target_transform: Any = None) -> None: - if transform is not None or target_transform is not None: - warnings.warn( - "In the next version, transforms will not be part of the dataset. " - "Please remove the option `transform=True` and " - "`target_transform=True` to suppress this warning." - ) - - self.transform = transform self.target_transform = target_transform # Get string representation of 'root' in case Path object is passed @@ -102,8 +92,6 @@ def __getitem__(self, n: int) -> Tuple[Tensor, int, List[int]]: # return item waveform, sample_rate, labels = item - if self.transform is not None: - waveform = self.transform(waveform) if self.target_transform is not None: labels = self.target_transform(labels) return waveform, sample_rate, labels From 6d1868738ee30cd78cac527d22a4a0ea73cb462d Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Tue, 22 Dec 2020 21:49:30 +0100 Subject: [PATCH 03/20] remove target_transform from dataset --- torchaudio/datasets/vctk.py | 10 ++-------- torchaudio/datasets/yesno.py | 10 ++-------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/torchaudio/datasets/vctk.py b/torchaudio/datasets/vctk.py index 7cf0679a56..4e845502d6 100644 --- a/torchaudio/datasets/vctk.py +++ b/torchaudio/datasets/vctk.py @@ -1,7 +1,7 @@ import os import warnings from pathlib import Path -from typing import Any, Tuple, Union +from typing import Tuple, Union from torch import Tensor from torch.utils.data import Dataset @@ -67,7 +67,6 @@ class VCTK(Dataset): Giving ``download=True`` will result in error as the dataset is no longer publicly available. downsample (bool, optional): Not used. - target_transform (callable, optional): Optional transform applied on utterance. (default: ``None``) """ _folder_txt = "txt" @@ -81,8 +80,7 @@ def __init__(self, url: str = URL, folder_in_archive: str = FOLDER_IN_ARCHIVE, download: bool = False, - downsample: bool = False, - target_transform: Any = None) -> None: + downsample: bool = False) -> None: if downsample: warnings.warn( @@ -92,8 +90,6 @@ def __init__(self, ) self.downsample = downsample - self.target_transform = target_transform - # Get string representation of 'root' in case Path object is passed root = os.fspath(root) @@ -140,8 +136,6 @@ def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, str]: # return item waveform, sample_rate, utterance, speaker_id, utterance_id = item - if self.target_transform is not None: - utterance = self.target_transform(utterance) return waveform, sample_rate, utterance, speaker_id, utterance_id def __len__(self) -> int: diff --git a/torchaudio/datasets/yesno.py b/torchaudio/datasets/yesno.py index 1571dcc378..7178b8332c 100644 --- a/torchaudio/datasets/yesno.py +++ b/torchaudio/datasets/yesno.py @@ -1,6 +1,6 @@ import os from pathlib import Path -from typing import Any, List, Tuple, Union +from typing import List, Tuple, Union from torch import Tensor from torch.utils.data import Dataset @@ -41,7 +41,6 @@ class YESNO(Dataset): The top-level directory of the dataset. (default: ``"waves_yesno"``) download (bool, optional): Whether to download the dataset if it is not found at root path. (default: ``False``). - target_transform (callable, optional): Optional transform applied on utterance. (default: ``None``) """ _ext_audio = ".wav" @@ -50,10 +49,7 @@ def __init__(self, root: Union[str, Path], url: str = URL, folder_in_archive: str = FOLDER_IN_ARCHIVE, - download: bool = False, - target_transform: Any = None) -> None: - - self.target_transform = target_transform + download: bool = False) -> None: # Get string representation of 'root' in case Path object is passed root = os.fspath(root) @@ -92,8 +88,6 @@ def __getitem__(self, n: int) -> Tuple[Tensor, int, List[int]]: # return item waveform, sample_rate, labels = item - if self.target_transform is not None: - labels = self.target_transform(labels) return waveform, sample_rate, labels def __len__(self) -> int: From 292796c445b11b5f582d89ca7ac2c44b2ba9b618 Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Thu, 24 Dec 2020 14:14:39 +0100 Subject: [PATCH 04/20] add file extension when loading file --- torchaudio/datasets/commonvoice.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/torchaudio/datasets/commonvoice.py b/torchaudio/datasets/commonvoice.py index d387a8ca2a..86fddb5c0a 100644 --- a/torchaudio/datasets/commonvoice.py +++ b/torchaudio/datasets/commonvoice.py @@ -12,14 +12,15 @@ def load_commonvoice_item(line: List[str], header: List[str], path: str, - folder_audio: str) -> Tuple[Tensor, int, Dict[str, str]]: + folder_audio: str, + ext_audio: str) -> Tuple[Tensor, int, Dict[str, str]]: # Each line as the following data: # client_id, path, sentence, up_votes, down_votes, age, gender, accent assert header[1] == "path" fileid = line[1] - filename = os.path.join(path, folder_audio, fileid) + filename = os.path.join(path, folder_audio, fileid, ext_audio) waveform, sample_rate = torchaudio.load(filename) @@ -95,7 +96,7 @@ def __getitem__(self, n: int) -> Tuple[Tensor, int, Dict[str, str]]: ``up_votes``, ``down_votes``, ``age``, ``gender`` and ``accent``. """ line = self._walker[n] - return load_commonvoice_item(line, self._header, self._path, self._folder_audio) + return load_commonvoice_item(line, self._header, self._path, self._folder_audio, self._ext_audio) def __len__(self) -> int: return len(self._walker) From 20b1e0409d05fd39d7c5abe40ef64563b7ee8e98 Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Thu, 24 Dec 2020 14:56:00 +0100 Subject: [PATCH 05/20] fix filename path --- torchaudio/datasets/commonvoice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchaudio/datasets/commonvoice.py b/torchaudio/datasets/commonvoice.py index 86fddb5c0a..76793ee525 100644 --- a/torchaudio/datasets/commonvoice.py +++ b/torchaudio/datasets/commonvoice.py @@ -20,7 +20,7 @@ def load_commonvoice_item(line: List[str], assert header[1] == "path" fileid = line[1] - filename = os.path.join(path, folder_audio, fileid, ext_audio) + filename = os.path.join(path, folder_audio, fileid + ext_audio) waveform, sample_rate = torchaudio.load(filename) From 792c6ef8127632a953fba471957b0332e4a5f698 Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Thu, 24 Dec 2020 15:43:00 +0100 Subject: [PATCH 06/20] fix audio_path inside unittest --- test/torchaudio_unittest/datasets/commonvoice_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/torchaudio_unittest/datasets/commonvoice_test.py b/test/torchaudio_unittest/datasets/commonvoice_test.py index 22c811a67b..f95351e327 100644 --- a/test/torchaudio_unittest/datasets/commonvoice_test.py +++ b/test/torchaudio_unittest/datasets/commonvoice_test.py @@ -22,12 +22,12 @@ class TestCommonVoice(TempDirMixin, TorchaudioTestCase): # Note: the first content is missing values for `age`, `gender` and `accent` as in the original data. _train_csv_contents = [ ["9d16c5d980247861130e0480e2719f448be73d86a496c36d01a477cbdecd8cfd1399403d7a77bf458d211a70711b2da0845c", - "common_voice_en_18885784.wav", + "common_voice_en_18885784", "He was accorded a State funeral, and was buried in Drayton and Toowoomba Cemetery.", "2", "0", "", "", ""], ["c82eb9291328620f06025a1f8112b909099e447e485e99236cb87df008650250e79fea5ca772061fb6a370830847b9c44d20", - "common_voice_en_556542.wav", "Once more into the breach", "2", "0", "thirties", "male", "us"], + "common_voice_en_556542", "Once more into the breach", "2", "0", "thirties", "male", "us"], ["f74d880c5ad4c5917f314a604d3fc4805159d255796fb9f8defca35333ecc002bdf53dc463503c12674ea840b21b4a507b7c", - "common_voice_en_18607573.wav", + "common_voice_en_18607573", "Caddy, show Miss Clare and Miss Summerson their rooms.", "2", "0", "twenties", "male", "canada"], ] sample_rate = 48000 @@ -46,7 +46,7 @@ def setUpClass(cls): writer.writerow(content) # Generate and store audio - audio_path = os.path.join(audio_base_path, content[1]) + audio_path = os.path.join(audio_base_path, content[1], COMMONVOICE._ext_audio) data = get_whitenoise(sample_rate=cls.sample_rate, duration=1, n_channels=1, seed=i, dtype='float32') save_wav(audio_path, data, cls.sample_rate) From 19d772440acce36b5d69d43ab260cbca35df3ff7 Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Thu, 24 Dec 2020 15:47:36 +0100 Subject: [PATCH 07/20] update audio_path inside unittest --- test/torchaudio_unittest/datasets/commonvoice_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/torchaudio_unittest/datasets/commonvoice_test.py b/test/torchaudio_unittest/datasets/commonvoice_test.py index f95351e327..979cd2ec39 100644 --- a/test/torchaudio_unittest/datasets/commonvoice_test.py +++ b/test/torchaudio_unittest/datasets/commonvoice_test.py @@ -46,7 +46,7 @@ def setUpClass(cls): writer.writerow(content) # Generate and store audio - audio_path = os.path.join(audio_base_path, content[1], COMMONVOICE._ext_audio) + audio_path = os.path.join(audio_base_path, content[1] + COMMONVOICE._ext_audio) data = get_whitenoise(sample_rate=cls.sample_rate, duration=1, n_channels=1, seed=i, dtype='float32') save_wav(audio_path, data, cls.sample_rate) From 15bb775a5f9ba0c1bb0e11c253b0daea12ecb228 Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Thu, 24 Dec 2020 16:17:41 +0100 Subject: [PATCH 08/20] reformat test file --- .../datasets/commonvoice_test.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/test/torchaudio_unittest/datasets/commonvoice_test.py b/test/torchaudio_unittest/datasets/commonvoice_test.py index 979cd2ec39..5ec583d6a5 100644 --- a/test/torchaudio_unittest/datasets/commonvoice_test.py +++ b/test/torchaudio_unittest/datasets/commonvoice_test.py @@ -1,8 +1,7 @@ -import os import csv +import os from pathlib import Path -from torchaudio.datasets import COMMONVOICE from torchaudio_unittest.common_utils import ( TempDirMixin, TorchaudioTestCase, @@ -11,6 +10,8 @@ normalize_wav, ) +from torchaudio.datasets import COMMONVOICE + class TestCommonVoice(TempDirMixin, TorchaudioTestCase): backend = 'default' @@ -22,13 +23,13 @@ class TestCommonVoice(TempDirMixin, TorchaudioTestCase): # Note: the first content is missing values for `age`, `gender` and `accent` as in the original data. _train_csv_contents = [ ["9d16c5d980247861130e0480e2719f448be73d86a496c36d01a477cbdecd8cfd1399403d7a77bf458d211a70711b2da0845c", - "common_voice_en_18885784", - "He was accorded a State funeral, and was buried in Drayton and Toowoomba Cemetery.", "2", "0", "", "", ""], + "common_voice_en_18885784", + "He was accorded a State funeral, and was buried in Drayton and Toowoomba Cemetery.", "2", "0", "", "", ""], ["c82eb9291328620f06025a1f8112b909099e447e485e99236cb87df008650250e79fea5ca772061fb6a370830847b9c44d20", - "common_voice_en_556542", "Once more into the breach", "2", "0", "thirties", "male", "us"], + "common_voice_en_556542", "Once more into the breach", "2", "0", "thirties", "male", "us"], ["f74d880c5ad4c5917f314a604d3fc4805159d255796fb9f8defca35333ecc002bdf53dc463503c12674ea840b21b4a507b7c", - "common_voice_en_18607573", - "Caddy, show Miss Clare and Miss Summerson their rooms.", "2", "0", "twenties", "male", "canada"], + "common_voice_en_18607573", + "Caddy, show Miss Clare and Miss Summerson their rooms.", "2", "0", "twenties", "male", "canada"], ] sample_rate = 48000 From 43cd2ba31deca1590bdd30840b54377ef81d8048 Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Thu, 24 Dec 2020 21:59:40 +0100 Subject: [PATCH 09/20] add french case --- .../datasets/commonvoice_test.py | 59 +++++++++++++++---- torchaudio/datasets/commonvoice.py | 6 +- 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/test/torchaudio_unittest/datasets/commonvoice_test.py b/test/torchaudio_unittest/datasets/commonvoice_test.py index 5ec583d6a5..a75c73381c 100644 --- a/test/torchaudio_unittest/datasets/commonvoice_test.py +++ b/test/torchaudio_unittest/datasets/commonvoice_test.py @@ -18,10 +18,12 @@ class TestCommonVoice(TempDirMixin, TorchaudioTestCase): root_dir = None data = [] + en_data = [] + fr_data = [] _headers = [u"client_ids", u"path", u"sentence", u"up_votes", u"down_votes", u"age", u"gender", u"accent"] # Note: extension is changed to wav for the sake of test # Note: the first content is missing values for `age`, `gender` and `accent` as in the original data. - _train_csv_contents = [ + _en_train_csv_contents = [ ["9d16c5d980247861130e0480e2719f448be73d86a496c36d01a477cbdecd8cfd1399403d7a77bf458d211a70711b2da0845c", "common_voice_en_18885784", "He was accorded a State funeral, and was buried in Drayton and Toowoomba Cemetery.", "2", "0", "", "", ""], @@ -31,10 +33,21 @@ class TestCommonVoice(TempDirMixin, TorchaudioTestCase): "common_voice_en_18607573", "Caddy, show Miss Clare and Miss Summerson their rooms.", "2", "0", "twenties", "male", "canada"], ] + _fr_train_csv_contents = [ + [ + "a2e8e1e1cc74d08c92a53d7b9ff84e077eb90410edd85b8882f16fd037cecfcb6a19413c6c63ce6458cfea9579878fa91cef18343441c601cae0597a4b0d3144", + "89e67e7682b36786a0b4b4022c4d42090c86edd96c78c12d30088e62522b8fe466ea4912e6a1055dfb91b296a0743e0a2bbe16cebac98ee5349e3e8262cb9329", + "Or sur ce point nous n’avons aucune réponse de votre part.", "2", "0", "twenties", "male", "france"], + [ + "a2e8e1e1cc74d08c92a53d7b9ff84e077eb90410edd85b8882f16fd037cecfcb6a19413c6c63ce6458cfea9579878fa91cef18343441c601cae0597a4b0d3144", + "87d71819a26179e93acfee149d0b21b7bf5e926e367d80b2b3792d45f46e04853a514945783ff764c1fc237b4eb0ee2b0a7a7cbd395acbdfcfa9d76a6e199bbd", + "Monsieur de La Verpillière, laissez parler le ministre", "2", "0", "twenties", "male", "france"], + + ] + sample_rate = 48000 - @classmethod - def setUpClass(cls): + def fill_data(cls, has_extension: bool, train_csv_contents): cls.root_dir = cls.get_base_temp_dir() # Tsv file name difference does not mean different subset, testing as a whole dataset here tsv_filename = os.path.join(cls.root_dir, "train.tsv") @@ -43,32 +56,52 @@ def setUpClass(cls): with open(tsv_filename, "w", newline='') as tsv: writer = csv.writer(tsv, delimiter='\t') writer.writerow(cls._headers) - for i, content in enumerate(cls._train_csv_contents): + for i, content in enumerate(train_csv_contents): writer.writerow(content) - # Generate and store audio - audio_path = os.path.join(audio_base_path, content[1] + COMMONVOICE._ext_audio) + if content[1].endswith("mp3"): + audio_path = os.path.join(audio_base_path, content[1]) + else: + audio_path = os.path.join(audio_base_path, content[1] + COMMONVOICE._ext_audio) data = get_whitenoise(sample_rate=cls.sample_rate, duration=1, n_channels=1, seed=i, dtype='float32') save_wav(audio_path, data, cls.sample_rate) - # Append data entry cls.data.append((normalize_wav(data), cls.sample_rate, dict(zip(cls._headers, content)))) + return cls.data + + @classmethod + def setUpClass(cls): + cls.en_data = cls.fill_data(train_csv_contents=cls._en_train_csv_contents) + cls.fr_data = cls.fill_data(train_csv_contents=cls._fr_train_csv_contents) + + def _en_test_commonvoice(self, dataset): + n_ite = 0 + for i, (waveform, sample_rate, dictionary) in enumerate(dataset): + expected_dictionary = self.en_data[i][2] + expected_data = self.en_data[i][0] + self.assertEqual(expected_data, waveform, atol=5e-5, rtol=1e-8) + assert sample_rate == TestCommonVoice.sample_rate + assert dictionary == expected_dictionary + n_ite += 1 + assert n_ite == len(self.en_data) - def _test_commonvoice(self, dataset): + def _fr_test_commonvoice(self, dataset): n_ite = 0 for i, (waveform, sample_rate, dictionary) in enumerate(dataset): - expected_dictionary = self.data[i][2] - expected_data = self.data[i][0] + expected_dictionary = self.fr_data[i][2] + expected_data = self.fr_data[i][0] self.assertEqual(expected_data, waveform, atol=5e-5, rtol=1e-8) assert sample_rate == TestCommonVoice.sample_rate assert dictionary == expected_dictionary n_ite += 1 - assert n_ite == len(self.data) + assert n_ite == len(self.fr_data) def test_commonvoice_str(self): dataset = COMMONVOICE(self.root_dir) - self._test_commonvoice(dataset) + self._en_test_commonvoice(dataset) + self._fr_test_commonvoice(dataset) def test_commonvoice_path(self): dataset = COMMONVOICE(Path(self.root_dir)) - self._test_commonvoice(dataset) + self._en_test_commonvoice(dataset) + self._fr_test_commonvoice(dataset) diff --git a/torchaudio/datasets/commonvoice.py b/torchaudio/datasets/commonvoice.py index 76793ee525..9440b6e01f 100644 --- a/torchaudio/datasets/commonvoice.py +++ b/torchaudio/datasets/commonvoice.py @@ -19,8 +19,10 @@ def load_commonvoice_item(line: List[str], assert header[1] == "path" fileid = line[1] - - filename = os.path.join(path, folder_audio, fileid + ext_audio) + if fileid.endswith("mp3"): + filename = os.path.join(path, folder_audio, fileid) + else: + filename = os.path.join(path, folder_audio, fileid + ext_audio) waveform, sample_rate = torchaudio.load(filename) From e942883fe1314c6f2828aea570e161d4a53bed80 Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Thu, 24 Dec 2020 22:52:54 +0100 Subject: [PATCH 10/20] fix typo and split tests --- .../datasets/commonvoice_test.py | 34 ++++++++++++------- torchaudio/datasets/commonvoice.py | 7 ++-- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/test/torchaudio_unittest/datasets/commonvoice_test.py b/test/torchaudio_unittest/datasets/commonvoice_test.py index a75c73381c..0280c2280f 100644 --- a/test/torchaudio_unittest/datasets/commonvoice_test.py +++ b/test/torchaudio_unittest/datasets/commonvoice_test.py @@ -25,29 +25,33 @@ class TestCommonVoice(TempDirMixin, TorchaudioTestCase): # Note: the first content is missing values for `age`, `gender` and `accent` as in the original data. _en_train_csv_contents = [ ["9d16c5d980247861130e0480e2719f448be73d86a496c36d01a477cbdecd8cfd1399403d7a77bf458d211a70711b2da0845c", - "common_voice_en_18885784", + "common_voice_en_18885784.wav", "He was accorded a State funeral, and was buried in Drayton and Toowoomba Cemetery.", "2", "0", "", "", ""], ["c82eb9291328620f06025a1f8112b909099e447e485e99236cb87df008650250e79fea5ca772061fb6a370830847b9c44d20", - "common_voice_en_556542", "Once more into the breach", "2", "0", "thirties", "male", "us"], + "common_voice_en_556542.wav", "Once more into the breach", "2", "0", "thirties", "male", "us"], ["f74d880c5ad4c5917f314a604d3fc4805159d255796fb9f8defca35333ecc002bdf53dc463503c12674ea840b21b4a507b7c", - "common_voice_en_18607573", + "common_voice_en_18607573.wav", "Caddy, show Miss Clare and Miss Summerson their rooms.", "2", "0", "twenties", "male", "canada"], ] _fr_train_csv_contents = [ [ - "a2e8e1e1cc74d08c92a53d7b9ff84e077eb90410edd85b8882f16fd037cecfcb6a19413c6c63ce6458cfea9579878fa91cef18343441c601cae0597a4b0d3144", - "89e67e7682b36786a0b4b4022c4d42090c86edd96c78c12d30088e62522b8fe466ea4912e6a1055dfb91b296a0743e0a2bbe16cebac98ee5349e3e8262cb9329", + "a2e8e1e1cc74d08c92a53d7b9ff84e077eb90410edd85b8882f16fd037cecfcb6a19413c6c63ce6458cfea9579878fa91cef" + "18343441c601cae0597a4b0d3144", + "89e67e7682b36786a0b4b4022c4d42090c86edd96c78c12d30088e62522b8fe466ea4912e6a1055dfb91b296a0743e0a2bbe" + "16cebac98ee5349e3e8262cb9329", "Or sur ce point nous n’avons aucune réponse de votre part.", "2", "0", "twenties", "male", "france"], [ - "a2e8e1e1cc74d08c92a53d7b9ff84e077eb90410edd85b8882f16fd037cecfcb6a19413c6c63ce6458cfea9579878fa91cef18343441c601cae0597a4b0d3144", - "87d71819a26179e93acfee149d0b21b7bf5e926e367d80b2b3792d45f46e04853a514945783ff764c1fc237b4eb0ee2b0a7a7cbd395acbdfcfa9d76a6e199bbd", + "a2e8e1e1cc74d08c92a53d7b9ff84e077eb90410edd85b8882f16fd037cecfcb6a19413c6c63ce6458cfea9579878fa91cef18" + "343441c601cae0597a4b0d3144", + "87d71819a26179e93acfee149d0b21b7bf5e926e367d80b2b3792d45f46e04853a514945783ff764c1fc237b4eb0ee2b0a7a7" + "cbd395acbdfcfa9d76a6e199bbd", "Monsieur de La Verpillière, laissez parler le ministre", "2", "0", "twenties", "male", "france"], ] sample_rate = 48000 - def fill_data(cls, has_extension: bool, train_csv_contents): + def fill_data(cls, train_csv_contents): cls.root_dir = cls.get_base_temp_dir() # Tsv file name difference does not mean different subset, testing as a whole dataset here tsv_filename = os.path.join(cls.root_dir, "train.tsv") @@ -59,7 +63,7 @@ def fill_data(cls, has_extension: bool, train_csv_contents): for i, content in enumerate(train_csv_contents): writer.writerow(content) # Generate and store audio - if content[1].endswith("mp3"): + if content[1].endswith(".wav"): audio_path = os.path.join(audio_base_path, content[1]) else: audio_path = os.path.join(audio_base_path, content[1] + COMMONVOICE._ext_audio) @@ -96,12 +100,18 @@ def _fr_test_commonvoice(self, dataset): n_ite += 1 assert n_ite == len(self.fr_data) - def test_commonvoice_str(self): + def test_en_commonvoice_str(self): dataset = COMMONVOICE(self.root_dir) self._en_test_commonvoice(dataset) - self._fr_test_commonvoice(dataset) - def test_commonvoice_path(self): + def test_en_commonvoice_path(self): dataset = COMMONVOICE(Path(self.root_dir)) self._en_test_commonvoice(dataset) + + def test_fr_commonvoice_str(self): + dataset = COMMONVOICE(self.root_dir) + self._fr_test_commonvoice(dataset) + + def test_fr_commonvoice_path(self): + dataset = COMMONVOICE(Path(self.root_dir)) self._fr_test_commonvoice(dataset) diff --git a/torchaudio/datasets/commonvoice.py b/torchaudio/datasets/commonvoice.py index 9440b6e01f..8d3264155e 100644 --- a/torchaudio/datasets/commonvoice.py +++ b/torchaudio/datasets/commonvoice.py @@ -1,13 +1,14 @@ -import os import csv +import os import warnings from pathlib import Path from typing import List, Dict, Tuple, Union, Optional -import torchaudio from torch import Tensor from torch.utils.data import Dataset +import torchaudio + def load_commonvoice_item(line: List[str], header: List[str], @@ -19,7 +20,7 @@ def load_commonvoice_item(line: List[str], assert header[1] == "path" fileid = line[1] - if fileid.endswith("mp3"): + if fileid.endswith(".wav"): filename = os.path.join(path, folder_audio, fileid) else: filename = os.path.join(path, folder_audio, fileid + ext_audio) From 1da329ea8999483c92a3a24f2d9cf79e17530bb4 Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Thu, 24 Dec 2020 23:10:26 +0100 Subject: [PATCH 11/20] add class method decorator --- test/torchaudio_unittest/datasets/commonvoice_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/torchaudio_unittest/datasets/commonvoice_test.py b/test/torchaudio_unittest/datasets/commonvoice_test.py index 0280c2280f..d93365e379 100644 --- a/test/torchaudio_unittest/datasets/commonvoice_test.py +++ b/test/torchaudio_unittest/datasets/commonvoice_test.py @@ -51,6 +51,7 @@ class TestCommonVoice(TempDirMixin, TorchaudioTestCase): sample_rate = 48000 + @classmethod def fill_data(cls, train_csv_contents): cls.root_dir = cls.get_base_temp_dir() # Tsv file name difference does not mean different subset, testing as a whole dataset here From 6a7d9701f9dbbffa25ecfada8e298c18750eba3e Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Fri, 25 Dec 2020 18:47:29 +0100 Subject: [PATCH 12/20] refactor test --- .../datasets/commonvoice_test.py | 162 +++++++++++------- torchaudio/datasets/commonvoice.py | 8 +- 2 files changed, 106 insertions(+), 64 deletions(-) diff --git a/test/torchaudio_unittest/datasets/commonvoice_test.py b/test/torchaudio_unittest/datasets/commonvoice_test.py index d93365e379..03ad1dff7d 100644 --- a/test/torchaudio_unittest/datasets/commonvoice_test.py +++ b/test/torchaudio_unittest/datasets/commonvoice_test.py @@ -1,7 +1,8 @@ -import csv import os +import csv from pathlib import Path +from torchaudio.datasets import COMMONVOICE from torchaudio_unittest.common_utils import ( TempDirMixin, TorchaudioTestCase, @@ -10,29 +11,56 @@ normalize_wav, ) -from torchaudio.datasets import COMMONVOICE - +original_ext_audio = COMMONVOICE._ext_audio +sample_rate = 48000 +_headers = [u"client_ids", u"path", u"sentence", u"up_votes", u"down_votes", u"age", u"gender", u"accent"] -class TestCommonVoice(TempDirMixin, TorchaudioTestCase): - backend = 'default' - root_dir = None - data = [] - en_data = [] - fr_data = [] - _headers = [u"client_ids", u"path", u"sentence", u"up_votes", u"down_votes", u"age", u"gender", u"accent"] +def get_mock_dataset_en(root_dir): + """ + root_dir: path + """ + mocked_data = [] # Note: extension is changed to wav for the sake of test # Note: the first content is missing values for `age`, `gender` and `accent` as in the original data. _en_train_csv_contents = [ ["9d16c5d980247861130e0480e2719f448be73d86a496c36d01a477cbdecd8cfd1399403d7a77bf458d211a70711b2da0845c", "common_voice_en_18885784.wav", - "He was accorded a State funeral, and was buried in Drayton and Toowoomba Cemetery.", "2", "0", "", "", ""], + "He was accorded a State funeral, and was buried in Drayton and Toowoomba Cemetery.", "2", "0", "", "", + ""], ["c82eb9291328620f06025a1f8112b909099e447e485e99236cb87df008650250e79fea5ca772061fb6a370830847b9c44d20", "common_voice_en_556542.wav", "Once more into the breach", "2", "0", "thirties", "male", "us"], ["f74d880c5ad4c5917f314a604d3fc4805159d255796fb9f8defca35333ecc002bdf53dc463503c12674ea840b21b4a507b7c", "common_voice_en_18607573.wav", "Caddy, show Miss Clare and Miss Summerson their rooms.", "2", "0", "twenties", "male", "canada"], ] + # Tsv file name difference does not mean different subset, testing as a whole dataset here + tsv_filename = os.path.join(root_dir, "train.tsv") + audio_base_path = os.path.join(root_dir, "clips") + os.makedirs(audio_base_path, exist_ok=True) + with open(tsv_filename, "w", newline='') as tsv: + writer = csv.writer(tsv, delimiter='\t') + writer.writerow(_headers) + for i, content in enumerate(_en_train_csv_contents): + writer.writerow(content) + # Generate and store audio + audio_path = os.path.join(audio_base_path, content[1]) + print(audio_path) + data = get_whitenoise(sample_rate=sample_rate, duration=1, n_channels=1, seed=i, dtype='float32') + save_wav(audio_path, data, sample_rate) + + # Append data entry + mocked_data.append((normalize_wav(data), sample_rate, dict(zip(_headers, content)))) + return mocked_data + + +def get_mock_dataset_fr(root_dir): + """ + root_dir: path + """ + mocked_data = [] + # Note: extension is changed to wav for the sake of test + # Note: the first content is missing values for `age`, `gender` and `accent` as in the original data. _fr_train_csv_contents = [ [ "a2e8e1e1cc74d08c92a53d7b9ff84e077eb90410edd85b8882f16fd037cecfcb6a19413c6c63ce6458cfea9579878fa91cef" @@ -48,71 +76,87 @@ class TestCommonVoice(TempDirMixin, TorchaudioTestCase): "Monsieur de La Verpillière, laissez parler le ministre", "2", "0", "twenties", "male", "france"], ] + # Tsv file name difference does not mean different subset, testing as a whole dataset here + tsv_filename = os.path.join(root_dir, "train.tsv") + audio_base_path = os.path.join(root_dir, "clips") + os.makedirs(audio_base_path, exist_ok=True) + with open(tsv_filename, "w", newline='') as tsv: + writer = csv.writer(tsv, delimiter='\t') + writer.writerow(_headers) + for i, content in enumerate(_fr_train_csv_contents): + writer.writerow(content) + # Generate and store audio + audio_path = os.path.join(audio_base_path, content[1] + COMMONVOICE._ext_audio) + print(audio_path) + data = get_whitenoise(sample_rate=sample_rate, duration=1, n_channels=1, seed=i, dtype='float32') + save_wav(audio_path, data, sample_rate) + + # Append data entry + mocked_data.append((normalize_wav(data), sample_rate, dict(zip(_headers, content)))) + return mocked_data - sample_rate = 48000 +class TestCommonVoiceEN(TempDirMixin, TorchaudioTestCase): + backend = 'default' + root_dir = None + sample_rate = 48000 @classmethod - def fill_data(cls, train_csv_contents): + def setUpClass(cls): cls.root_dir = cls.get_base_temp_dir() - # Tsv file name difference does not mean different subset, testing as a whole dataset here - tsv_filename = os.path.join(cls.root_dir, "train.tsv") - audio_base_path = os.path.join(cls.root_dir, "clips") - os.makedirs(audio_base_path, exist_ok=True) - with open(tsv_filename, "w", newline='') as tsv: - writer = csv.writer(tsv, delimiter='\t') - writer.writerow(cls._headers) - for i, content in enumerate(train_csv_contents): - writer.writerow(content) - # Generate and store audio - if content[1].endswith(".wav"): - audio_path = os.path.join(audio_base_path, content[1]) - else: - audio_path = os.path.join(audio_base_path, content[1] + COMMONVOICE._ext_audio) - data = get_whitenoise(sample_rate=cls.sample_rate, duration=1, n_channels=1, seed=i, dtype='float32') - save_wav(audio_path, data, cls.sample_rate) - # Append data entry - cls.data.append((normalize_wav(data), cls.sample_rate, dict(zip(cls._headers, content)))) - return cls.data + cls.data = get_mock_dataset_en(cls.root_dir) + COMMONVOICE._ext_audio = ".wav" @classmethod - def setUpClass(cls): - cls.en_data = cls.fill_data(train_csv_contents=cls._en_train_csv_contents) - cls.fr_data = cls.fill_data(train_csv_contents=cls._fr_train_csv_contents) + def tearDownClass(cls): + COMMONVOICE._ext_audio = original_ext_audio - def _en_test_commonvoice(self, dataset): + def _test_commonvoice(self, dataset): n_ite = 0 for i, (waveform, sample_rate, dictionary) in enumerate(dataset): - expected_dictionary = self.en_data[i][2] - expected_data = self.en_data[i][0] + expected_dictionary = self.data[i][2] + expected_data = self.data[i][0] self.assertEqual(expected_data, waveform, atol=5e-5, rtol=1e-8) - assert sample_rate == TestCommonVoice.sample_rate + assert sample_rate == TestCommonVoiceEN.sample_rate assert dictionary == expected_dictionary n_ite += 1 - assert n_ite == len(self.en_data) + assert n_ite == len(self.data) + + def test_commonvoice_str(self): + dataset = COMMONVOICE(self.root_dir) + self._test_commonvoice(dataset) - def _fr_test_commonvoice(self, dataset): + def test_commonvoice_path(self): + dataset = COMMONVOICE(Path(self.root_dir)) + self._test_commonvoice(dataset) + + +class TestCommonVoiceFR(TempDirMixin, TorchaudioTestCase): + backend = 'default' + root_dir = None + sample_rate = 48000 + + + @classmethod + def setUpClass(cls): + cls.root_dir = cls.get_base_temp_dir() + cls.data = get_mock_dataset_fr(cls.root_dir) + COMMONVOICE._ext_audio = ".mp3" + + @classmethod + def tearDownClass(cls): + COMMONVOICE._ext_audio = original_ext_audio + + def _test_commonvoice(self, dataset): n_ite = 0 for i, (waveform, sample_rate, dictionary) in enumerate(dataset): - expected_dictionary = self.fr_data[i][2] - expected_data = self.fr_data[i][0] + expected_dictionary = self.data[i][2] + expected_data = self.data[i][0] self.assertEqual(expected_data, waveform, atol=5e-5, rtol=1e-8) - assert sample_rate == TestCommonVoice.sample_rate + assert sample_rate == TestCommonVoiceFR.sample_rate assert dictionary == expected_dictionary n_ite += 1 - assert n_ite == len(self.fr_data) - - def test_en_commonvoice_str(self): - dataset = COMMONVOICE(self.root_dir) - self._en_test_commonvoice(dataset) - - def test_en_commonvoice_path(self): - dataset = COMMONVOICE(Path(self.root_dir)) - self._en_test_commonvoice(dataset) + assert n_ite == len(self.data) - def test_fr_commonvoice_str(self): + def test_commonvoice_str(self): dataset = COMMONVOICE(self.root_dir) - self._fr_test_commonvoice(dataset) - - def test_fr_commonvoice_path(self): - dataset = COMMONVOICE(Path(self.root_dir)) - self._fr_test_commonvoice(dataset) + self._test_commonvoice(dataset) \ No newline at end of file diff --git a/torchaudio/datasets/commonvoice.py b/torchaudio/datasets/commonvoice.py index 8d3264155e..75fd84843f 100644 --- a/torchaudio/datasets/commonvoice.py +++ b/torchaudio/datasets/commonvoice.py @@ -20,11 +20,9 @@ def load_commonvoice_item(line: List[str], assert header[1] == "path" fileid = line[1] - if fileid.endswith(".wav"): - filename = os.path.join(path, folder_audio, fileid) - else: - filename = os.path.join(path, folder_audio, fileid + ext_audio) - + filename = os.path.join(path, folder_audio, fileid) + if not filename.endswith(ext_audio): + filename += ext_audio waveform, sample_rate = torchaudio.load(filename) dic = dict(zip(header, line)) From a4267c2c7c0361efdce0bd317f6a1fac57428318 Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Fri, 25 Dec 2020 19:10:04 +0100 Subject: [PATCH 13/20] remove print statement --- test/torchaudio_unittest/datasets/commonvoice_test.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/test/torchaudio_unittest/datasets/commonvoice_test.py b/test/torchaudio_unittest/datasets/commonvoice_test.py index 03ad1dff7d..b9e497c071 100644 --- a/test/torchaudio_unittest/datasets/commonvoice_test.py +++ b/test/torchaudio_unittest/datasets/commonvoice_test.py @@ -1,8 +1,7 @@ -import os import csv +import os from pathlib import Path -from torchaudio.datasets import COMMONVOICE from torchaudio_unittest.common_utils import ( TempDirMixin, TorchaudioTestCase, @@ -11,6 +10,8 @@ normalize_wav, ) +from torchaudio.datasets import COMMONVOICE + original_ext_audio = COMMONVOICE._ext_audio sample_rate = 48000 _headers = [u"client_ids", u"path", u"sentence", u"up_votes", u"down_votes", u"age", u"gender", u"accent"] @@ -45,7 +46,6 @@ def get_mock_dataset_en(root_dir): writer.writerow(content) # Generate and store audio audio_path = os.path.join(audio_base_path, content[1]) - print(audio_path) data = get_whitenoise(sample_rate=sample_rate, duration=1, n_channels=1, seed=i, dtype='float32') save_wav(audio_path, data, sample_rate) @@ -87,7 +87,6 @@ def get_mock_dataset_fr(root_dir): writer.writerow(content) # Generate and store audio audio_path = os.path.join(audio_base_path, content[1] + COMMONVOICE._ext_audio) - print(audio_path) data = get_whitenoise(sample_rate=sample_rate, duration=1, n_channels=1, seed=i, dtype='float32') save_wav(audio_path, data, sample_rate) @@ -100,6 +99,7 @@ class TestCommonVoiceEN(TempDirMixin, TorchaudioTestCase): backend = 'default' root_dir = None sample_rate = 48000 + @classmethod def setUpClass(cls): cls.root_dir = cls.get_base_temp_dir() @@ -135,7 +135,6 @@ class TestCommonVoiceFR(TempDirMixin, TorchaudioTestCase): root_dir = None sample_rate = 48000 - @classmethod def setUpClass(cls): cls.root_dir = cls.get_base_temp_dir() @@ -159,4 +158,4 @@ def _test_commonvoice(self, dataset): def test_commonvoice_str(self): dataset = COMMONVOICE(self.root_dir) - self._test_commonvoice(dataset) \ No newline at end of file + self._test_commonvoice(dataset) From 2cf76a6da3c9590ec4fdfc9444b5ad2ec31a2427 Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Fri, 25 Dec 2020 21:08:46 +0100 Subject: [PATCH 14/20] fix timeout --- test/torchaudio_unittest/datasets/commonvoice_test.py | 5 ----- test/torchaudio_unittest/datasets/utils_test.py | 9 +++++++++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/test/torchaudio_unittest/datasets/commonvoice_test.py b/test/torchaudio_unittest/datasets/commonvoice_test.py index b9e497c071..ac238c9d3f 100644 --- a/test/torchaudio_unittest/datasets/commonvoice_test.py +++ b/test/torchaudio_unittest/datasets/commonvoice_test.py @@ -18,9 +18,6 @@ def get_mock_dataset_en(root_dir): - """ - root_dir: path - """ mocked_data = [] # Note: extension is changed to wav for the sake of test # Note: the first content is missing values for `age`, `gender` and `accent` as in the original data. @@ -59,8 +56,6 @@ def get_mock_dataset_fr(root_dir): root_dir: path """ mocked_data = [] - # Note: extension is changed to wav for the sake of test - # Note: the first content is missing values for `age`, `gender` and `accent` as in the original data. _fr_train_csv_contents = [ [ "a2e8e1e1cc74d08c92a53d7b9ff84e077eb90410edd85b8882f16fd037cecfcb6a19413c6c63ce6458cfea9579878fa91cef" diff --git a/test/torchaudio_unittest/datasets/utils_test.py b/test/torchaudio_unittest/datasets/utils_test.py index 7dc18f3573..bc0fc8efe2 100644 --- a/test/torchaudio_unittest/datasets/utils_test.py +++ b/test/torchaudio_unittest/datasets/utils_test.py @@ -7,8 +7,17 @@ get_asset_path, ) +original_ext_audio = COMMONVOICE._ext_audio class TestIterator(TorchaudioTestCase): + @classmethod + def setUpClass(cls): + COMMONVOICE._ext_audio = ".wav" + + @classmethod + def tearDownClass(cls): + COMMONVOICE._ext_audio = original_ext_audio + backend = 'default' path = get_asset_path('CommonVoice', 'cv-corpus-4-2019-12-10', 'tt') From 7d5f07783f4e6626b231c44db680034d828a392a Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Fri, 25 Dec 2020 22:40:52 +0100 Subject: [PATCH 15/20] fix code stye --- test/torchaudio_unittest/datasets/commonvoice_test.py | 2 +- test/torchaudio_unittest/datasets/utils_test.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/torchaudio_unittest/datasets/commonvoice_test.py b/test/torchaudio_unittest/datasets/commonvoice_test.py index ac238c9d3f..d2b4aef556 100644 --- a/test/torchaudio_unittest/datasets/commonvoice_test.py +++ b/test/torchaudio_unittest/datasets/commonvoice_test.py @@ -75,7 +75,7 @@ def get_mock_dataset_fr(root_dir): tsv_filename = os.path.join(root_dir, "train.tsv") audio_base_path = os.path.join(root_dir, "clips") os.makedirs(audio_base_path, exist_ok=True) - with open(tsv_filename, "w", newline='') as tsv: + with open(tsv_filename, "w", newline='', encoding="utf-8") as tsv: writer = csv.writer(tsv, delimiter='\t') writer.writerow(_headers) for i, content in enumerate(_fr_train_csv_contents): diff --git a/test/torchaudio_unittest/datasets/utils_test.py b/test/torchaudio_unittest/datasets/utils_test.py index bc0fc8efe2..dd7f40b3e9 100644 --- a/test/torchaudio_unittest/datasets/utils_test.py +++ b/test/torchaudio_unittest/datasets/utils_test.py @@ -1,14 +1,14 @@ -from torchaudio.datasets import utils as dataset_utils -from torchaudio.datasets.commonvoice import COMMONVOICE - from torchaudio_unittest.common_utils import ( - TempDirMixin, TorchaudioTestCase, get_asset_path, ) +from torchaudio.datasets import utils as dataset_utils +from torchaudio.datasets.commonvoice import COMMONVOICE + original_ext_audio = COMMONVOICE._ext_audio + class TestIterator(TorchaudioTestCase): @classmethod def setUpClass(cls): From b119999091d668b02476359e04e5635008e52e96 Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Sat, 26 Dec 2020 01:46:00 +0100 Subject: [PATCH 16/20] remove encoding --- test/torchaudio_unittest/datasets/commonvoice_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/torchaudio_unittest/datasets/commonvoice_test.py b/test/torchaudio_unittest/datasets/commonvoice_test.py index d2b4aef556..ac238c9d3f 100644 --- a/test/torchaudio_unittest/datasets/commonvoice_test.py +++ b/test/torchaudio_unittest/datasets/commonvoice_test.py @@ -75,7 +75,7 @@ def get_mock_dataset_fr(root_dir): tsv_filename = os.path.join(root_dir, "train.tsv") audio_base_path = os.path.join(root_dir, "clips") os.makedirs(audio_base_path, exist_ok=True) - with open(tsv_filename, "w", newline='', encoding="utf-8") as tsv: + with open(tsv_filename, "w", newline='') as tsv: writer = csv.writer(tsv, delimiter='\t') writer.writerow(_headers) for i, content in enumerate(_fr_train_csv_contents): From 5e1f8dd4824c735eeb33ab48860c09cbf21f6971 Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Sat, 26 Dec 2020 17:43:31 +0100 Subject: [PATCH 17/20] add return type to helper functions --- test/torchaudio_unittest/datasets/commonvoice_test.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/test/torchaudio_unittest/datasets/commonvoice_test.py b/test/torchaudio_unittest/datasets/commonvoice_test.py index ac238c9d3f..aed92a027a 100644 --- a/test/torchaudio_unittest/datasets/commonvoice_test.py +++ b/test/torchaudio_unittest/datasets/commonvoice_test.py @@ -1,7 +1,8 @@ import csv import os from pathlib import Path - +from typing import Tuple, Dict +from torch import Tensor from torchaudio_unittest.common_utils import ( TempDirMixin, TorchaudioTestCase, @@ -17,7 +18,7 @@ _headers = [u"client_ids", u"path", u"sentence", u"up_votes", u"down_votes", u"age", u"gender", u"accent"] -def get_mock_dataset_en(root_dir): +def get_mock_dataset_en(root_dir) -> Tuple[Tensor, int, Dict[str, str]]: mocked_data = [] # Note: extension is changed to wav for the sake of test # Note: the first content is missing values for `age`, `gender` and `accent` as in the original data. @@ -51,10 +52,7 @@ def get_mock_dataset_en(root_dir): return mocked_data -def get_mock_dataset_fr(root_dir): - """ - root_dir: path - """ +def get_mock_dataset_fr(root_dir) -> Tuple[Tensor, int, Dict[str, str]]: mocked_data = [] _fr_train_csv_contents = [ [ From 6eb9400ee6a946ade6893b317f5d087ed9544c89 Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Sat, 26 Dec 2020 18:38:22 +0100 Subject: [PATCH 18/20] encode french characters --- test/torchaudio_unittest/datasets/commonvoice_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/torchaudio_unittest/datasets/commonvoice_test.py b/test/torchaudio_unittest/datasets/commonvoice_test.py index aed92a027a..35d8f441f3 100644 --- a/test/torchaudio_unittest/datasets/commonvoice_test.py +++ b/test/torchaudio_unittest/datasets/commonvoice_test.py @@ -2,6 +2,7 @@ import os from pathlib import Path from typing import Tuple, Dict + from torch import Tensor from torchaudio_unittest.common_utils import ( TempDirMixin, @@ -77,6 +78,7 @@ def get_mock_dataset_fr(root_dir) -> Tuple[Tensor, int, Dict[str, str]]: writer = csv.writer(tsv, delimiter='\t') writer.writerow(_headers) for i, content in enumerate(_fr_train_csv_contents): + content[2] = str(content[2].encode("utf-8")) writer.writerow(content) # Generate and store audio audio_path = os.path.join(audio_base_path, content[1] + COMMONVOICE._ext_audio) From 7ff28d778949fab59febaf0a0f87b76f33696ccc Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Sun, 27 Dec 2020 13:43:10 +0100 Subject: [PATCH 19/20] improve the code --- .../datasets/commonvoice_test.py | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/test/torchaudio_unittest/datasets/commonvoice_test.py b/test/torchaudio_unittest/datasets/commonvoice_test.py index 35d8f441f3..023ab561db 100644 --- a/test/torchaudio_unittest/datasets/commonvoice_test.py +++ b/test/torchaudio_unittest/datasets/commonvoice_test.py @@ -14,9 +14,9 @@ from torchaudio.datasets import COMMONVOICE -original_ext_audio = COMMONVOICE._ext_audio -sample_rate = 48000 -_headers = [u"client_ids", u"path", u"sentence", u"up_votes", u"down_votes", u"age", u"gender", u"accent"] +_ORIGINAL_EXT_AUDIO = COMMONVOICE._ext_audio +_SAMPLE_RATE = 48000 +_HEADERS = [u"client_ids", u"path", u"sentence", u"up_votes", u"down_votes", u"age", u"gender", u"accent"] def get_mock_dataset_en(root_dir) -> Tuple[Tensor, int, Dict[str, str]]: @@ -40,16 +40,16 @@ def get_mock_dataset_en(root_dir) -> Tuple[Tensor, int, Dict[str, str]]: os.makedirs(audio_base_path, exist_ok=True) with open(tsv_filename, "w", newline='') as tsv: writer = csv.writer(tsv, delimiter='\t') - writer.writerow(_headers) + writer.writerow(_HEADERS) for i, content in enumerate(_en_train_csv_contents): writer.writerow(content) # Generate and store audio audio_path = os.path.join(audio_base_path, content[1]) - data = get_whitenoise(sample_rate=sample_rate, duration=1, n_channels=1, seed=i, dtype='float32') - save_wav(audio_path, data, sample_rate) + data = get_whitenoise(sample_rate=_SAMPLE_RATE, duration=1, n_channels=1, seed=i, dtype='float32') + save_wav(audio_path, data, _SAMPLE_RATE) # Append data entry - mocked_data.append((normalize_wav(data), sample_rate, dict(zip(_headers, content)))) + mocked_data.append((normalize_wav(data), _SAMPLE_RATE, dict(zip(_HEADERS, content)))) return mocked_data @@ -76,24 +76,23 @@ def get_mock_dataset_fr(root_dir) -> Tuple[Tensor, int, Dict[str, str]]: os.makedirs(audio_base_path, exist_ok=True) with open(tsv_filename, "w", newline='') as tsv: writer = csv.writer(tsv, delimiter='\t') - writer.writerow(_headers) + writer.writerow(_HEADERS) for i, content in enumerate(_fr_train_csv_contents): content[2] = str(content[2].encode("utf-8")) writer.writerow(content) # Generate and store audio - audio_path = os.path.join(audio_base_path, content[1] + COMMONVOICE._ext_audio) - data = get_whitenoise(sample_rate=sample_rate, duration=1, n_channels=1, seed=i, dtype='float32') - save_wav(audio_path, data, sample_rate) + audio_path = os.path.join(audio_base_path, content[1] + _ORIGINAL_EXT_AUDIO) + data = get_whitenoise(sample_rate=_SAMPLE_RATE, duration=1, n_channels=1, seed=i, dtype='float32') + save_wav(audio_path, data, _SAMPLE_RATE) # Append data entry - mocked_data.append((normalize_wav(data), sample_rate, dict(zip(_headers, content)))) + mocked_data.append((normalize_wav(data), _SAMPLE_RATE, dict(zip(_HEADERS, content)))) return mocked_data class TestCommonVoiceEN(TempDirMixin, TorchaudioTestCase): backend = 'default' root_dir = None - sample_rate = 48000 @classmethod def setUpClass(cls): @@ -103,7 +102,7 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - COMMONVOICE._ext_audio = original_ext_audio + COMMONVOICE._ext_audio = _ORIGINAL_EXT_AUDIO def _test_commonvoice(self, dataset): n_ite = 0 @@ -111,7 +110,7 @@ def _test_commonvoice(self, dataset): expected_dictionary = self.data[i][2] expected_data = self.data[i][0] self.assertEqual(expected_data, waveform, atol=5e-5, rtol=1e-8) - assert sample_rate == TestCommonVoiceEN.sample_rate + assert sample_rate == _SAMPLE_RATE assert dictionary == expected_dictionary n_ite += 1 assert n_ite == len(self.data) @@ -128,7 +127,6 @@ def test_commonvoice_path(self): class TestCommonVoiceFR(TempDirMixin, TorchaudioTestCase): backend = 'default' root_dir = None - sample_rate = 48000 @classmethod def setUpClass(cls): @@ -138,7 +136,7 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): - COMMONVOICE._ext_audio = original_ext_audio + COMMONVOICE._ext_audio = _ORIGINAL_EXT_AUDIO def _test_commonvoice(self, dataset): n_ite = 0 @@ -146,7 +144,7 @@ def _test_commonvoice(self, dataset): expected_dictionary = self.data[i][2] expected_data = self.data[i][0] self.assertEqual(expected_data, waveform, atol=5e-5, rtol=1e-8) - assert sample_rate == TestCommonVoiceFR.sample_rate + assert sample_rate == _SAMPLE_RATE assert dictionary == expected_dictionary n_ite += 1 assert n_ite == len(self.data) From b8c09e2f218ad527c5a5c0b2f15ce7d10890629d Mon Sep 17 00:00:00 2001 From: AzizCode92 Date: Sun, 27 Dec 2020 15:15:54 +0100 Subject: [PATCH 20/20] restart pipeline --- test/torchaudio_unittest/datasets/commonvoice_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/torchaudio_unittest/datasets/commonvoice_test.py b/test/torchaudio_unittest/datasets/commonvoice_test.py index 023ab561db..db64214b55 100644 --- a/test/torchaudio_unittest/datasets/commonvoice_test.py +++ b/test/torchaudio_unittest/datasets/commonvoice_test.py @@ -47,7 +47,6 @@ def get_mock_dataset_en(root_dir) -> Tuple[Tensor, int, Dict[str, str]]: audio_path = os.path.join(audio_base_path, content[1]) data = get_whitenoise(sample_rate=_SAMPLE_RATE, duration=1, n_channels=1, seed=i, dtype='float32') save_wav(audio_path, data, _SAMPLE_RATE) - # Append data entry mocked_data.append((normalize_wav(data), _SAMPLE_RATE, dict(zip(_HEADERS, content)))) return mocked_data