Skip to content

Commit d25a4dd

Browse files
krishnakalyan3krishnakalyan3vincentqb
authored
Using Path and glob instead of walk_files (#1069)
- yesno - librispeech - libritts - speechcommands Co-authored-by: krishnakalyan3 <[email protected]> Co-authored-by: Vincent Quenneville-Belair <[email protected]>
1 parent 79c97fb commit d25a4dd

File tree

4 files changed

+5
-18
lines changed

4 files changed

+5
-18
lines changed

torchaudio/datasets/librispeech.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from torchaudio.datasets.utils import (
99
download_url,
1010
extract_archive,
11-
walk_files,
1211
)
1312

1413
URL = "train-clean-100"
@@ -125,10 +124,7 @@ def __init__(self,
125124
download_url(url, root, hash_value=checksum)
126125
extract_archive(archive)
127126

128-
walker = walk_files(
129-
self._path, suffix=self._ext_audio, prefix=False, remove_suffix=True
130-
)
131-
self._walker = list(walker)
127+
self._walker = sorted(str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio))
132128

133129
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]:
134130
"""Load the n-th sample from the dataset.

torchaudio/datasets/libritts.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from torchaudio.datasets.utils import (
99
download_url,
1010
extract_archive,
11-
walk_files,
1211
)
1312

1413
URL = "train-clean-100"
@@ -126,10 +125,7 @@ def __init__(
126125
download_url(url, root, hash_value=checksum)
127126
extract_archive(archive)
128127

129-
walker = walk_files(
130-
self._path, suffix=self._ext_audio, prefix=False, remove_suffix=True
131-
)
132-
self._walker = list(walker)
128+
self._walker = sorted(str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio))
133129

134130
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int, int, str]:
135131
"""Load the n-th sample from the dataset.

torchaudio/datasets/speechcommands.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from torchaudio.datasets.utils import (
99
download_url,
1010
extract_archive,
11-
walk_files
1211
)
1312

1413
FOLDER_IN_ARCHIVE = "SpeechCommands"
@@ -110,15 +109,15 @@ def __init__(self,
110109
self._walker = _load_list(self._path, "testing_list.txt")
111110
elif subset == "training":
112111
excludes = set(_load_list(self._path, "validation_list.txt", "testing_list.txt"))
113-
walker = walk_files(self._path, suffix=".wav", prefix=True)
112+
walker = sorted(str(p) for p in Path(self._path).glob('*/*.wav'))
114113
self._walker = [
115114
w for w in walker
116115
if HASH_DIVIDER in w
117116
and EXCEPT_FOLDER not in w
118117
and os.path.normpath(w) not in excludes
119118
]
120119
else:
121-
walker = walk_files(self._path, suffix=".wav", prefix=True)
120+
walker = sorted(str(p) for p in Path(self._path).glob('*/*.wav'))
122121
self._walker = [w for w in walker if HASH_DIVIDER in w and EXCEPT_FOLDER not in w]
123122

124123
def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int]:

torchaudio/datasets/yesno.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from torchaudio.datasets.utils import (
1010
download_url,
1111
extract_archive,
12-
walk_files
1312
)
1413

1514
URL = "http://www.openslr.org/resources/1/waves_yesno.tar.gz"
@@ -85,10 +84,7 @@ def __init__(self,
8584
"Dataset not found. Please use `download=True` to download it."
8685
)
8786

88-
walker = walk_files(
89-
self._path, suffix=self._ext_audio, prefix=False, remove_suffix=True
90-
)
91-
self._walker = list(walker)
87+
self._walker = sorted(str(p.stem) for p in Path(self._path).glob('*' + self._ext_audio))
9288

9389
def __getitem__(self, n: int) -> Tuple[Tensor, int, List[int]]:
9490
"""Load the n-th sample from the dataset.

0 commit comments

Comments
 (0)