Skip to content

Commit 8a84bd6

Browse files
martinduranteode
andauthored
Memfs no bytes copy (#999)
* memory: Avoid bytes copies where possible * Pass 'data' kwarg into MemoryFile from open * Implement pipe_file that initializes rather than copies * Follow CI linter recommendations * remove assert and unused kwargs * Don't copy bytes in MemoryFS * remove test Co-authored-by: Brian Ealdwine <[email protected]>
1 parent 42f82a9 commit 8a84bd6

File tree

3 files changed

+37
-13
lines changed

3 files changed

+37
-13
lines changed

fsspec/implementations/memory.py

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def ls(self, path, detail=False, **kwargs):
3838
return [
3939
{
4040
"name": path,
41-
"size": self.store[path].getbuffer().nbytes,
41+
"size": self.store[path].size,
4242
"type": "file",
4343
"created": self.store[path].created,
4444
}
@@ -53,7 +53,7 @@ def ls(self, path, detail=False, **kwargs):
5353
out.append(
5454
{
5555
"name": p2,
56-
"size": self.store[p2].getbuffer().nbytes,
56+
"size": self.store[p2].size,
5757
"type": "file",
5858
"created": self.store[p2].created,
5959
}
@@ -114,6 +114,13 @@ def makedirs(self, path, exist_ok=False):
114114
if not exist_ok:
115115
raise
116116

117+
def pipe_file(self, path, value, **kwargs):
118+
"""Set the bytes of given file
119+
120+
Avoids copies of the data if possible
121+
"""
122+
self.open(path, "wb", data=value)
123+
117124
def rmdir(self, path):
118125
path = self._strip_protocol(path)
119126
if path == "":
@@ -145,9 +152,7 @@ def info(self, path, **kwargs):
145152
filelike = self.store[path]
146153
return {
147154
"name": path,
148-
"size": filelike.size
149-
if hasattr(filelike, "size")
150-
else filelike.getbuffer().nbytes,
155+
"size": filelike.size,
151156
"type": "file",
152157
"created": getattr(filelike, "created", None),
153158
}
@@ -184,7 +189,7 @@ def _open(
184189
else:
185190
raise FileNotFoundError(path)
186191
if mode == "wb":
187-
m = MemoryFile(self, path)
192+
m = MemoryFile(self, path, kwargs.get("data"))
188193
if not self._intrans:
189194
m.commit()
190195
return m
@@ -193,7 +198,9 @@ def cp_file(self, path1, path2, **kwargs):
193198
path1 = self._strip_protocol(path1)
194199
path2 = self._strip_protocol(path2)
195200
if self.isfile(path1):
196-
self.store[path2] = MemoryFile(self, path2, self.store[path1].getbuffer())
201+
self.store[path2] = MemoryFile(
202+
self, path2, self.store[path1].getvalue()
203+
) # implicit copy
197204
elif self.isdir(path1):
198205
if path2 not in self.pseudo_dirs:
199206
self.pseudo_dirs.append(path2)
@@ -203,7 +210,7 @@ def cp_file(self, path1, path2, **kwargs):
203210
def cat_file(self, path, start=None, end=None, **kwargs):
204211
path = self._strip_protocol(path)
205212
try:
206-
return self.store[path].getvalue()[start:end]
213+
return bytes(self.store[path].getbuffer()[start:end])
207214
except KeyError:
208215
raise FileNotFoundError(path)
209216

@@ -246,17 +253,18 @@ def __init__(self, fs=None, path=None, data=None):
246253
self.path = path
247254
self.created = datetime.utcnow().timestamp()
248255
if data:
249-
self.write(data)
250-
self.size = len(data)
256+
super().__init__(data)
251257
self.seek(0)
252258

259+
@property
260+
def size(self):
261+
return self.getbuffer().nbytes
262+
253263
def __enter__(self):
254264
return self
255265

256266
def close(self):
257-
position = self.tell()
258-
self.size = self.seek(0, 2)
259-
self.seek(position)
267+
pass
260268

261269
def discard(self):
262270
pass

fsspec/tests/test_utils.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import pytest
66

7+
import fsspec.utils
78
from fsspec.utils import (
89
can_be_local,
910
common_prefix,
@@ -377,3 +378,9 @@ def test_merge_offset_ranges(max_gap, max_block):
377378
assert expect_paths == result_paths
378379
assert expect_starts == result_starts
379380
assert expect_ends == result_ends
381+
382+
383+
def test_size():
384+
f = io.BytesIO(b"hello")
385+
assert fsspec.utils.file_size(f) == 5
386+
assert f.tell() == 0

fsspec/utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,3 +619,12 @@ def __exit__(
619619
# forward anything else too
620620
def __getattr__(self, name):
621621
return getattr(self.fp, name)
622+
623+
624+
def file_size(filelike):
625+
"""Find length of any open read-mode file-like"""
626+
pos = filelike.tell()
627+
try:
628+
return filelike.seek(0, 2)
629+
finally:
630+
filelike.seek(pos)

0 commit comments

Comments
 (0)