Skip to content

Commit 5461862

Browse files
committed
Make walk_files return sorted
1 parent 58300a6 commit 5461862

File tree

3 files changed

+45
-2
lines changed

3 files changed

+45
-2
lines changed

test/test_datasets.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
2-
import unittest
2+
from pathlib import Path
33

4+
from torchaudio.datasets import utils as dataset_utils
45
from torchaudio.datasets.commonvoice import COMMONVOICE
56
from torchaudio.datasets.librispeech import LIBRISPEECH
67
from torchaudio.datasets.speechcommands import SPEECHCOMMANDS
@@ -22,6 +23,41 @@
2223
)
2324

2425

26+
class TestWalkFiles(TempDirMixin, TorchaudioTestCase):
27+
root = None
28+
expected = None
29+
30+
def _add_file(self, *parts):
31+
path = self.get_temp_path(*parts)
32+
self.expected.append(path)
33+
Path(path).touch()
34+
35+
def setUp(self):
36+
self.root = self.get_temp_path()
37+
self.expected = []
38+
39+
# level 1
40+
for filename in ['a.txt', 'b.txt', 'c.txt']:
41+
self._add_file(filename)
42+
43+
# level 2
44+
for dir1 in ['d1', 'd2', 'd3']:
45+
for filename in ['d.txt', 'e.txt', 'f.txt']:
46+
self._add_file(dir1, filename)
47+
# level 3
48+
for dir2 in ['d1', 'd2', 'd3']:
49+
for filename in ['g.txt', 'h.txt', 'i.txt']:
50+
self._add_file(dir1, dir2, filename)
51+
52+
print('\n'.join(self.expected))
53+
54+
def test_walk_files(self):
55+
"""walk_files should traverse files in alphabetical order"""
56+
for i, path in enumerate(dataset_utils.walk_files(self.root, '.txt', prefix=True)):
57+
found = os.path.join(self.root, path)
58+
assert found == self.expected[i]
59+
60+
2561
class TestDatasets(TorchaudioTestCase):
2662
backend = 'default'
2763
path = get_asset_path()

torchaudio/datasets/gtzan.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,6 +1064,7 @@ def __init__(
10641064
continue
10651065

10661066
songs_in_genre = os.listdir(fulldir)
1067+
songs_in_genre.sort()
10671068
for fname in songs_in_genre:
10681069
name, ext = os.path.splitext(fname)
10691070
if ext.lower() == ".wav" and "." in name:

torchaudio/datasets/utils.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,13 @@ def walk_files(root: str,
264264

265265
root = os.path.expanduser(root)
266266

267-
for dirpath, _, files in os.walk(root):
267+
for dirpath, dirs, files in os.walk(root):
268+
dirs.sort()
269+
# `dirs` is the list used in os.walk function and by sorting it in-place here, we change the
270+
# behavior of os.walk to traverse sub directory alphabetically
271+
# see also
272+
# https://stackoverflow.com/questions/6670029/can-i-force-python3s-os-walk-to-visit-directories-in-alphabetical-order-how#comment71993866_6670926
273+
files.sort()
268274
for f in files:
269275
if f.endswith(suffix):
270276

0 commit comments

Comments
 (0)