Skip to content

Commit 39efd7f

Browse files
authored
cache: added clear_expired_cache (#1055 - #1061) (#1092)
1 parent b5ce591 commit 39efd7f

File tree

2 files changed

+142
-0
lines changed

2 files changed

+142
-0
lines changed

fsspec/implementations/cached.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,44 @@ def clear_cache(self):
232232
rmtree(self.storage[-1])
233233
self.load_cache()
234234

235+
def clear_expired_cache(self, expiry_time=None):
236+
"""Remove all expired files and metadata from the cache
237+
238+
In the case of multiple cache locations, this clears only the last one,
239+
which is assumed to be the read/write one.
240+
241+
Parameters
242+
----------
243+
expiry_time: int
244+
The time in seconds after which a local copy is considered useless.
245+
If not defined the default is equivalent to the attribute from the
246+
file caching instantiation.
247+
"""
248+
249+
if not expiry_time:
250+
expiry_time = self.expiry
251+
252+
self._check_cache()
253+
254+
for path, detail in self.cached_files[-1].copy().items():
255+
if time.time() - detail["time"] > expiry_time:
256+
if self.same_names:
257+
basename = os.path.basename(detail["original"])
258+
fn = os.path.join(self.storage[-1], basename)
259+
else:
260+
fn = os.path.join(self.storage[-1], detail["fn"])
261+
if os.path.exists(fn):
262+
os.remove(fn)
263+
self.cached_files[-1].pop(path)
264+
265+
if self.cached_files[-1]:
266+
cache_path = os.path.join(self.storage[-1], "cache")
267+
with open(cache_path, "wb") as fc:
268+
pickle.dump(self.cached_files[-1], fc)
269+
else:
270+
rmtree(self.storage[-1])
271+
self.load_cache()
272+
235273
def pop_from_cache(self, path):
236274
"""Remove cached version of given file
237275
@@ -389,6 +427,7 @@ def __getattribute__(self, item):
389427
"_check_cache",
390428
"_mkcache",
391429
"clear_cache",
430+
"clear_expired_cache",
392431
"pop_from_cache",
393432
"_mkcache",
394433
"local_file",

fsspec/implementations/tests/test_cached.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,109 @@ def test_clear():
153153
assert len(os.listdir(cache1)) < 2
154154

155155

156+
def test_clear_expired(tmp_path):
157+
def __ager(cache_fn, fn):
158+
"""
159+
Modify the cache file to virtually add time lag to selected files.
160+
161+
Parameters
162+
---------
163+
cache_fn: str
164+
cache path
165+
fn: str
166+
file name to be modified
167+
"""
168+
import pathlib
169+
import time
170+
171+
if os.path.exists(cache_fn):
172+
with open(cache_fn, "rb") as f:
173+
cached_files = pickle.load(f)
174+
fn_posix = pathlib.Path(fn).as_posix()
175+
cached_files[fn_posix]["time"] = cached_files[fn_posix]["time"] - 691200
176+
assert os.access(cache_fn, os.W_OK), "Cache is not writable"
177+
with open(cache_fn, "wb") as f:
178+
pickle.dump(cached_files, f)
179+
time.sleep(1)
180+
181+
origin = tmp_path.joinpath("origin")
182+
cache1 = tmp_path.joinpath("cache1")
183+
cache2 = tmp_path.joinpath("cache2")
184+
cache3 = tmp_path.joinpath("cache3")
185+
186+
origin.mkdir()
187+
cache1.mkdir()
188+
cache2.mkdir()
189+
cache3.mkdir()
190+
191+
data = b"test data"
192+
f1 = origin.joinpath("afile")
193+
f2 = origin.joinpath("bfile")
194+
f3 = origin.joinpath("cfile")
195+
f4 = origin.joinpath("dfile")
196+
197+
with open(f1, "wb") as f:
198+
f.write(data)
199+
with open(f2, "wb") as f:
200+
f.write(data)
201+
with open(f3, "wb") as f:
202+
f.write(data)
203+
with open(f4, "wb") as f:
204+
f.write(data)
205+
206+
# populates first cache
207+
fs = fsspec.filesystem(
208+
"filecache", target_protocol="file", cache_storage=str(cache1), cache_check=1
209+
)
210+
assert fs.cat(str(f1)) == data
211+
212+
# populates "last" cache if file not found in first one
213+
fs = fsspec.filesystem(
214+
"filecache",
215+
target_protocol="file",
216+
cache_storage=[str(cache1), str(cache2)],
217+
cache_check=1,
218+
)
219+
assert fs.cat(str(f2)) == data
220+
assert fs.cat(str(f3)) == data
221+
assert len(os.listdir(cache2)) == 3
222+
223+
# force the expiration
224+
cache_fn = os.path.join(fs.storage[-1], "cache")
225+
__ager(cache_fn, f2)
226+
227+
# remove from cache2 the expired files
228+
fs.clear_expired_cache()
229+
assert len(os.listdir(cache2)) == 2
230+
231+
# check complete cleanup
232+
__ager(cache_fn, f3)
233+
234+
fs.clear_expired_cache()
235+
assert not fs._check_file(f2)
236+
assert not fs._check_file(f3)
237+
assert len(os.listdir(cache2)) < 2
238+
239+
# check cache1 to be untouched after cleaning
240+
assert len(os.listdir(cache1)) == 2
241+
242+
# check cleaning with 'same_name' option enabled
243+
fs = fsspec.filesystem(
244+
"filecache",
245+
target_protocol="file",
246+
cache_storage=[str(cache1), str(cache2), str(cache3)],
247+
same_names=True,
248+
cache_check=1,
249+
)
250+
assert fs.cat(str(f4)) == data
251+
252+
cache_fn = os.path.join(fs.storage[-1], "cache")
253+
__ager(cache_fn, f4)
254+
255+
fs.clear_expired_cache()
256+
assert not fs._check_file(str(f4))
257+
258+
156259
def test_pop():
157260
import tempfile
158261

0 commit comments

Comments
 (0)