Skip to content

Commit 55b5c80

Browse files
jimchen90Ji Chen
andauthored
Add cmu_arctic dataset (#710)
* Add cmu_arctic dataset * add dataset name * update audio test file with whitenoise.wav file * add test text file * update text method and file name * update comment * change datasets order in doc * add line length Co-authored-by: Ji Chen <[email protected]>
1 parent c82a7f9 commit 55b5c80

File tree

6 files changed

+165
-0
lines changed

6 files changed

+165
-0
lines changed

docs/source/datasets.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,14 @@ All the datasets have almost similar API. They all have two common arguments:
2525
.. currentmodule:: torchaudio.datasets
2626

2727

28+
CMUARCTIC
29+
~~~~~~~~~
30+
31+
.. autoclass:: CMUARCTIC
32+
:members: __getitem__
33+
:special-members:
34+
35+
2836
COMMONVOICE
2937
~~~~~~~~~~~
3038

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
( arctic_a0024 "This is the test text." )
431 KB
Binary file not shown.

test/test_datasets.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from torchaudio.datasets.yesno import YESNO
99
from torchaudio.datasets.ljspeech import LJSPEECH
1010
from torchaudio.datasets.gtzan import GTZAN
11+
from torchaudio.datasets.cmuarctic import CMUARCTIC
1112

1213
from . import common_utils
1314

@@ -60,6 +61,9 @@ def test_gtzan(self):
6061
data = GTZAN(self.path)
6162
data[0]
6263

64+
def test_cmuarctic(self):
65+
data = CMUARCTIC(self.path)
66+
data[0]
6367

6468
if __name__ == "__main__":
6569
unittest.main()

torchaudio/datasets/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from .gtzan import GTZAN
77
from .yesno import YESNO
88
from .ljspeech import LJSPEECH
9+
from .cmuarctic import CMUARCTIC
910

1011
__all__ = (
1112
"COMMONVOICE",
@@ -15,6 +16,7 @@
1516
"YESNO",
1617
"LJSPEECH",
1718
"GTZAN",
19+
"CMUARCTIC",
1820
"diskcache_iterator",
1921
"bg_iterator",
2022
)

torchaudio/datasets/cmuarctic.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
import os
2+
from typing import Tuple
3+
4+
import torchaudio
5+
from torch import Tensor
6+
from torch.utils.data import Dataset
7+
from torchaudio.datasets.utils import (
8+
download_url,
9+
extract_archive,
10+
unicode_csv_reader,
11+
)
12+
13+
URL = "aew"
14+
FOLDER_IN_ARCHIVE = "ARCTIC"
15+
_CHECKSUMS = {
16+
"http://festvox.org/cmu_arctic/packed/cmu_us_aew_arctic.tar.bz2":
17+
"4382b116efcc8339c37e01253cb56295",
18+
"http://festvox.org/cmu_arctic/packed/cmu_us_ahw_arctic.tar.bz2":
19+
"b072d6e961e3f36a2473042d097d6da9",
20+
"http://festvox.org/cmu_arctic/packed/cmu_us_aup_arctic.tar.bz2":
21+
"5301c7aee8919d2abd632e2667adfa7f",
22+
"http://festvox.org/cmu_arctic/packed/cmu_us_awb_arctic.tar.bz2":
23+
"280fdff1e9857119d9a2c57b50e12db7",
24+
"http://festvox.org/cmu_arctic/packed/cmu_us_axb_arctic.tar.bz2":
25+
"5e21cb26c6529c533df1d02ccde5a186",
26+
"http://festvox.org/cmu_arctic/packed/cmu_us_bdl_arctic.tar.bz2":
27+
"b2c3e558f656af2e0a65da0ac0c3377a",
28+
"http://festvox.org/cmu_arctic/packed/cmu_us_clb_arctic.tar.bz2":
29+
"3957c503748e3ce17a3b73c1b9861fb0",
30+
"http://festvox.org/cmu_arctic/packed/cmu_us_eey_arctic.tar.bz2":
31+
"59708e932d27664f9eda3e8e6859969b",
32+
"http://festvox.org/cmu_arctic/packed/cmu_us_fem_arctic.tar.bz2":
33+
"dba4f992ff023347c07c304bf72f4c73",
34+
"http://festvox.org/cmu_arctic/packed/cmu_us_gka_arctic.tar.bz2":
35+
"24a876ea7335c1b0ff21460e1241340f",
36+
"http://festvox.org/cmu_arctic/packed/cmu_us_jmk_arctic.tar.bz2":
37+
"afb69d95f02350537e8a28df5ab6004b",
38+
"http://festvox.org/cmu_arctic/packed/cmu_us_ksp_arctic.tar.bz2":
39+
"4ce5b3b91a0a54b6b685b1b05aa0b3be",
40+
"http://festvox.org/cmu_arctic/packed/cmu_us_ljm_arctic.tar.bz2":
41+
"6f45a3b2c86a4ed0465b353be291f77d",
42+
"http://festvox.org/cmu_arctic/packed/cmu_us_lnh_arctic.tar.bz2":
43+
"c6a15abad5c14d27f4ee856502f0232f",
44+
"http://festvox.org/cmu_arctic/packed/cmu_us_rms_arctic.tar.bz2":
45+
"71072c983df1e590d9e9519e2a621f6e",
46+
"http://festvox.org/cmu_arctic/packed/cmu_us_rxr_arctic.tar.bz2":
47+
"3771ff03a2f5b5c3b53aa0a68b9ad0d5",
48+
"http://festvox.org/cmu_arctic/packed/cmu_us_slp_arctic.tar.bz2":
49+
"9cbf984a832ea01b5058ba9a96862850",
50+
"http://festvox.org/cmu_arctic/packed/cmu_us_slt_arctic.tar.bz2":
51+
"959eecb2cbbc4ac304c6b92269380c81",
52+
}
53+
54+
55+
def load_cmuarctic_item(line: str,
56+
path: str,
57+
folder_audio: str,
58+
ext_audio: str) -> Tuple[Tensor, int, str, str]:
59+
60+
utterance_id, utterance = line[0].strip().split(" ", 2)[1:]
61+
62+
# Remove space, double quote, and single parenthesis from utterance
63+
utterance = utterance[1:-3]
64+
65+
file_audio = os.path.join(path, folder_audio, utterance_id + ext_audio)
66+
67+
# Load audio
68+
waveform, sample_rate = torchaudio.load(file_audio)
69+
70+
return (
71+
waveform,
72+
sample_rate,
73+
utterance,
74+
utterance_id.split("_")[1]
75+
)
76+
77+
78+
class CMUARCTIC(Dataset):
79+
"""
80+
Create a Dataset for CMU_arctic. Each item is a tuple of the form:
81+
waveform, sample_rate, utterance, utterance_id
82+
"""
83+
84+
_file_text = "txt.done.data"
85+
_folder_text = "etc"
86+
_ext_audio = ".wav"
87+
_folder_audio = "wav"
88+
89+
def __init__(self,
90+
root: str,
91+
url: str = URL,
92+
folder_in_archive: str = FOLDER_IN_ARCHIVE,
93+
download: bool = False) -> None:
94+
95+
if url in [
96+
"aew",
97+
"ahw",
98+
"aup",
99+
"awb",
100+
"axb",
101+
"bdl",
102+
"clb",
103+
"eey",
104+
"fem",
105+
"gka",
106+
"jmk",
107+
"ksp",
108+
"ljm",
109+
"lnh",
110+
"rms",
111+
"rxr",
112+
"slp",
113+
"slt"
114+
]:
115+
116+
url = "cmu_us_" + url + "_arctic"
117+
ext_archive = ".tar.bz2"
118+
base_url = "http://www.festvox.org/cmu_arctic/packed/"
119+
120+
url = os.path.join(base_url, url + ext_archive)
121+
122+
basename = os.path.basename(url)
123+
root = os.path.join(root, folder_in_archive)
124+
if not os.path.isdir(root):
125+
os.mkdir(root)
126+
archive = os.path.join(root, basename)
127+
128+
basename = basename.split(".")[0]
129+
130+
self._path = os.path.join(root, basename)
131+
132+
if download:
133+
if not os.path.isdir(self._path):
134+
if not os.path.isfile(archive):
135+
checksum = _CHECKSUMS.get(url, None)
136+
download_url(url, root, hash_value=checksum, hash_type="md5")
137+
extract_archive(archive)
138+
139+
self._text = os.path.join(self._path, self._folder_text, self._file_text)
140+
141+
with open(self._text, "r") as text:
142+
walker = unicode_csv_reader(text, delimiter="\n")
143+
self._walker = list(walker)
144+
145+
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str]:
146+
line = self._walker[n]
147+
return load_cmuarctic_item(line, self._path, self._folder_audio, self._ext_audio)
148+
149+
def __len__(self) -> int:
150+
return len(self._walker)

0 commit comments

Comments
 (0)