diff --git a/torchaudio/datasets/librispeech.py b/torchaudio/datasets/librispeech.py index c13fb312b8..8447c4c8a2 100644 --- a/torchaudio/datasets/librispeech.py +++ b/torchaudio/datasets/librispeech.py @@ -8,7 +8,6 @@ from torchaudio.datasets.utils import ( download_url, extract_archive, - walk_files, ) URL = "train-clean-100" @@ -125,10 +124,7 @@ def __init__(self, download_url(url, root, hash_value=checksum) extract_archive(archive) - walker = walk_files( - self._path, suffix=self._ext_audio, prefix=False, remove_suffix=True - ) - self._walker = list(walker) + self._walker = sorted(str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)) def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]: """Load the n-th sample from the dataset. diff --git a/torchaudio/datasets/libritts.py b/torchaudio/datasets/libritts.py index 8ed57d9b52..9f0c38a751 100644 --- a/torchaudio/datasets/libritts.py +++ b/torchaudio/datasets/libritts.py @@ -8,7 +8,6 @@ from torchaudio.datasets.utils import ( download_url, extract_archive, - walk_files, ) URL = "train-clean-100" @@ -126,10 +125,7 @@ def __init__( download_url(url, root, hash_value=checksum) extract_archive(archive) - walker = walk_files( - self._path, suffix=self._ext_audio, prefix=False, remove_suffix=True - ) - self._walker = list(walker) + self._walker = sorted(str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)) def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int, int, str]: """Load the n-th sample from the dataset. diff --git a/torchaudio/datasets/speechcommands.py b/torchaudio/datasets/speechcommands.py index 7d774da943..5264ea24de 100644 --- a/torchaudio/datasets/speechcommands.py +++ b/torchaudio/datasets/speechcommands.py @@ -8,7 +8,6 @@ from torchaudio.datasets.utils import ( download_url, extract_archive, - walk_files ) FOLDER_IN_ARCHIVE = "SpeechCommands" @@ -110,7 +109,7 @@ def __init__(self, self._walker = _load_list(self._path, "testing_list.txt") elif subset == "training": excludes = set(_load_list(self._path, "validation_list.txt", "testing_list.txt")) - walker = walk_files(self._path, suffix=".wav", prefix=True) + walker = sorted(str(p) for p in Path(self._path).glob('*/*.wav')) self._walker = [ w for w in walker if HASH_DIVIDER in w @@ -118,7 +117,7 @@ def __init__(self, and os.path.normpath(w) not in excludes ] else: - walker = walk_files(self._path, suffix=".wav", prefix=True) + walker = sorted(str(p) for p in Path(self._path).glob('*/*.wav')) self._walker = [w for w in walker if HASH_DIVIDER in w and EXCEPT_FOLDER not in w] def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int]: diff --git a/torchaudio/datasets/yesno.py b/torchaudio/datasets/yesno.py index 182d224ba4..21d67f8ecc 100644 --- a/torchaudio/datasets/yesno.py +++ b/torchaudio/datasets/yesno.py @@ -9,7 +9,6 @@ from torchaudio.datasets.utils import ( download_url, extract_archive, - walk_files ) URL = "http://www.openslr.org/resources/1/waves_yesno.tar.gz" @@ -85,10 +84,7 @@ def __init__(self, "Dataset not found. Please use `download=True` to download it." ) - walker = walk_files( - self._path, suffix=self._ext_audio, prefix=False, remove_suffix=True - ) - self._walker = list(walker) + self._walker = sorted(str(p.stem) for p in Path(self._path).glob('*' + self._ext_audio)) def __getitem__(self, n: int) -> Tuple[Tensor, int, List[int]]: """Load the n-th sample from the dataset.