Skip to content

Commit a50899e

Browse files
authored
Copy/get/put empty directories correctly (#1199)
* Copy/get/put empty directories correctly * Trailing slash is enough to indicate directory in cp/get/put * Performance improvements
1 parent 95d90c7 commit a50899e

File tree

6 files changed

+148
-25
lines changed

6 files changed

+148
-25
lines changed

fsspec/asyn.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,9 @@ async def _copy(
337337
on_error = "raise"
338338

339339
paths = await self._expand_path(path1, maxdepth=maxdepth, recursive=recursive)
340-
isdir = isinstance(path2, str) and await self._isdir(path2)
340+
isdir = isinstance(path2, str) and (
341+
path2.endswith("/") or await self._isdir(path2)
342+
)
341343
path2 = other_paths(
342344
paths,
343345
path2,
@@ -486,7 +488,9 @@ async def _put(
486488
lpath = make_path_posix(lpath)
487489
fs = LocalFileSystem()
488490
lpaths = fs.expand_path(lpath, recursive=recursive)
489-
isdir = isinstance(rpath, str) and await self._isdir(rpath)
491+
isdir = isinstance(rpath, str) and (
492+
rpath.endswith("/") or await self._isdir(rpath)
493+
)
490494
rpaths = other_paths(
491495
lpaths,
492496
rpath,
@@ -538,7 +542,9 @@ async def _get(
538542
rpath = self._strip_protocol(rpath)
539543
lpath = make_path_posix(lpath)
540544
rpaths = await self._expand_path(rpath, recursive=recursive)
541-
isdir = isinstance(lpath, str) and LocalFileSystem().isdir(lpath)
545+
isdir = isinstance(lpath, str) and (
546+
lpath.endswith("/") or LocalFileSystem().isdir(lpath)
547+
)
542548
lpaths = other_paths(
543549
rpaths,
544550
lpath,

fsspec/implementations/tests/test_local.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,55 @@ def test_cp_get_put_directory_recursive(tmpdir, funcname):
910910
assert fs.find(target) == [make_path_posix(os.path.join(target, "file"))]
911911

912912

913+
@pytest.mark.parametrize("funcname", ["cp", "get", "put"])
914+
def test_cp_get_put_empty_directory(tmpdir, funcname):
915+
# https://github.com/fsspec/filesystem_spec/issues/1198
916+
# cp/get/put of empty directory.
917+
fs = LocalFileSystem(auto_mkdir=True)
918+
empty = os.path.join(str(tmpdir), "empty")
919+
fs.mkdir(empty)
920+
921+
target = os.path.join(str(tmpdir), "target")
922+
fs.mkdir(target)
923+
924+
if funcname == "cp":
925+
func = fs.cp
926+
elif funcname == "get":
927+
func = fs.get
928+
elif funcname == "put":
929+
func = fs.put
930+
931+
# cp/get/put without slash, target directory exists
932+
assert fs.isdir(target)
933+
func(empty, target)
934+
assert fs.find(target, withdirs=True) == [
935+
make_path_posix(os.path.join(target, "empty"))
936+
]
937+
938+
fs.rm(target + "/empty", recursive=True)
939+
940+
# cp/get/put with slash, target directory exists
941+
assert fs.isdir(target)
942+
func(empty + "/", target)
943+
assert fs.find(target, withdirs=True) == []
944+
945+
fs.rm(target, recursive=True)
946+
947+
# cp/get/put without slash, target directory doesn't exist
948+
assert not fs.isdir(target)
949+
func(empty, target)
950+
assert fs.isdir(target)
951+
assert fs.find(target, withdirs=True) == []
952+
953+
fs.rm(target, recursive=True)
954+
955+
# cp/get/put with slash, target directory doesn't exist
956+
assert not fs.isdir(target)
957+
func(empty + "/", target)
958+
assert fs.isdir(target)
959+
assert fs.find(target, withdirs=True) == []
960+
961+
913962
def test_cp_two_files(tmpdir):
914963
fs = LocalFileSystem(auto_mkdir=True)
915964
src = os.path.join(str(tmpdir), "src")

fsspec/implementations/tests/test_memory.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,44 @@ def test_put_directory_recursive(m, tmpdir):
272272
assert m.find(target) == correct
273273

274274

275+
def test_cp_empty_directory(m):
276+
# https://github.com/fsspec/filesystem_spec/issues/1198
277+
# cp/get/put of empty directory.
278+
empty = "/src/empty"
279+
m.mkdir(empty)
280+
281+
target = "/target"
282+
m.mkdir(target)
283+
284+
# cp without slash, target directory exists
285+
assert m.isdir(target)
286+
m.cp(empty, target)
287+
assert m.find(target, withdirs=True) == [target + "/empty"]
288+
289+
m.rm(target + "/empty", recursive=True)
290+
291+
# cp with slash, target directory exists
292+
assert m.isdir(target)
293+
m.cp(empty + "/", target)
294+
assert m.find(target, withdirs=True) == []
295+
296+
m.rm(target, recursive=True)
297+
298+
# cp without slash, target directory doesn't exist
299+
assert not m.isdir(target)
300+
m.cp(empty, target)
301+
assert m.isdir(target)
302+
assert m.find(target, withdirs=True) == []
303+
304+
m.rm(target, recursive=True)
305+
306+
# cp with slash, target directory doesn't exist
307+
assert not m.isdir(target)
308+
m.cp(empty + "/", target)
309+
assert m.isdir(target)
310+
assert m.find(target, withdirs=True) == []
311+
312+
275313
def test_cp_two_files(m):
276314
src = "/src"
277315
file0 = src + "/file0"

fsspec/spec.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -884,7 +884,9 @@ def get(self, rpath, lpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwarg
884884
if isinstance(lpath, str):
885885
lpath = make_path_posix(lpath)
886886
rpaths = self.expand_path(rpath, recursive=recursive)
887-
isdir = isinstance(lpath, str) and LocalFileSystem().isdir(lpath)
887+
isdir = isinstance(lpath, str) and (
888+
lpath.endswith("/") or LocalFileSystem().isdir(lpath)
889+
)
888890
lpaths = other_paths(
889891
rpaths,
890892
lpath,
@@ -937,7 +939,7 @@ def put(self, lpath, rpath, recursive=False, callback=_DEFAULT_CALLBACK, **kwarg
937939
lpath = make_path_posix(lpath)
938940
fs = LocalFileSystem()
939941
lpaths = fs.expand_path(lpath, recursive=recursive)
940-
isdir = isinstance(rpath, str) and self.isdir(rpath)
942+
isdir = isinstance(rpath, str) and (rpath.endswith("/") or self.isdir(rpath))
941943
rpaths = other_paths(
942944
lpaths,
943945
rpath,
@@ -978,7 +980,7 @@ def copy(self, path1, path2, recursive=False, on_error=None, **kwargs):
978980
on_error = "raise"
979981

980982
paths = self.expand_path(path1, recursive=recursive)
981-
isdir = isinstance(path2, str) and self.isdir(path2)
983+
isdir = isinstance(path2, str) and (path2.endswith("/") or self.isdir(path2))
982984
path2 = other_paths(
983985
paths,
984986
path2,

fsspec/tests/test_utils.py

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -255,42 +255,76 @@ def test_common_prefix(paths, out):
255255

256256

257257
@pytest.mark.parametrize(
258-
"paths, other, is_dir, expected",
258+
"paths, other, is_dir, exists, expected",
259259
(
260-
(["/path1"], "/path2", False, ["/path2"]),
261-
(["/path1"], "/path2", True, ["/path2/path1"]),
262-
(["/path1"], "/path2", None, ["/path2"]),
263-
(["/path1"], "/path2/", True, ["/path2/path1"]),
264-
(["/path1"], ["/path2"], True, ["/path2"]),
265-
(["/path1", "/path2"], "/path2", True, ["/path2/path1", "/path2/path2"]),
260+
(["/path1"], "/path2", False, False, ["/path2"]),
261+
(["/path1"], "/path2", True, True, ["/path2/path1"]),
262+
(["/path1"], "/path2", None, False, ["/path2"]),
263+
(["/path1"], "/path2/", True, True, ["/path2/path1"]),
264+
(["/path1"], ["/path2"], True, False, ["/path2"]),
265+
(["/path1"], ["/path2"], True, True, ["/path2"]),
266+
(["/path1", "/path2"], "/path2", True, False, ["/path2/path1", "/path2/path2"]),
267+
(["/path1", "/path2"], "/path2", True, True, ["/path2/path1", "/path2/path2"]),
266268
(
267269
["/more/path1", "/more/path2"],
268270
"/path2",
269271
True,
272+
False,
270273
["/path2/path1", "/path2/path2"],
271274
),
272275
(
273276
["/more/path1", "/more/path2"],
274277
"/path2",
278+
True,
279+
True,
280+
["/path2/more/path1", "/path2/more/path2"],
281+
),
282+
(
283+
["/more/path1", "/more/path2"],
284+
"/path2",
285+
False,
275286
False,
276287
["/path2/path1", "/path2/path2"],
277288
),
289+
(
290+
["/more/path1", "/more/path2"],
291+
"/path2",
292+
False,
293+
True,
294+
["/path2/more/path1", "/path2/more/path2"],
295+
),
278296
(
279297
["/more/path1", "/more/path2"],
280298
"/path2/",
281299
None,
300+
False,
282301
["/path2/path1", "/path2/path2"],
283302
),
303+
(
304+
["/more/path1", "/more/path2"],
305+
"/path2/",
306+
None,
307+
True,
308+
["/path2/more/path1", "/path2/more/path2"],
309+
),
310+
(
311+
["/more/path1", "/diff/path2"],
312+
"/path2/",
313+
None,
314+
False,
315+
["/path2/more/path1", "/path2/diff/path2"],
316+
),
284317
(
285318
["/more/path1", "/diff/path2"],
286319
"/path2/",
287320
None,
321+
True,
288322
["/path2/more/path1", "/path2/diff/path2"],
289323
),
290324
),
291325
)
292-
def test_other_paths(paths, other, is_dir, expected):
293-
assert other_paths(paths, other, is_dir) == expected
326+
def test_other_paths(paths, other, is_dir, exists, expected):
327+
assert other_paths(paths, other, is_dir, exists) == expected
294328

295329

296330
def test_log():

fsspec/utils.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -366,16 +366,10 @@ def other_paths(paths, path2, is_dir=None, exists=False):
366366
if isinstance(path2, str):
367367
is_dir = is_dir or path2.endswith("/")
368368
path2 = path2.rstrip("/")
369-
if len(paths) > 1:
370-
cp = common_prefix(paths)
371-
if exists:
372-
cp = cp.rsplit("/", 1)[0]
373-
path2 = [p.replace(cp, path2, 1) for p in paths]
374-
else:
375-
if is_dir:
376-
path2 = [path2.rstrip("/") + "/" + paths[0].rsplit("/")[-1]]
377-
else:
378-
path2 = [path2]
369+
cp = common_prefix(paths)
370+
if exists:
371+
cp = cp.rsplit("/", 1)[0]
372+
path2 = [p.replace(cp, path2, 1) for p in paths]
379373
else:
380374
assert len(paths) == len(path2)
381375
return path2

0 commit comments

Comments
 (0)