11from __future__ import annotations
22
33import contextlib
4- import hashlib
54import inspect
65import logging
76import os
87import pickle
98import tempfile
109import time
1110from shutil import rmtree
12- from typing import ClassVar
11+ from typing import Any , ClassVar
1312
1413from fsspec import AbstractFileSystem , filesystem
1514from fsspec .callbacks import _DEFAULT_CALLBACK
1615from fsspec .compression import compr
1716from fsspec .core import BaseCache , MMapCache
1817from fsspec .exceptions import BlocksizeMismatchError
18+ from fsspec .implementations .cache_mapper import create_cache_mapper
1919from fsspec .spec import AbstractBufferedFile
2020from fsspec .utils import infer_compression
2121
@@ -115,9 +115,7 @@ def __init__(
115115 self .check_files = check_files
116116 self .expiry = expiry_time
117117 self .compression = compression
118- # TODO: same_names should allow for variable prefix, not only
119- # to keep the basename
120- self .same_names = same_names
118+ self ._mapper = create_cache_mapper (same_names )
121119 self .target_protocol = (
122120 target_protocol
123121 if isinstance (target_protocol , str )
@@ -255,11 +253,12 @@ def clear_expired_cache(self, expiry_time=None):
255253
256254 for path , detail in self .cached_files [- 1 ].copy ().items ():
257255 if time .time () - detail ["time" ] > expiry_time :
258- if self .same_names :
259- basename = os .path .basename (detail ["original" ])
260- fn = os .path .join (self .storage [- 1 ], basename )
261- else :
262- fn = os .path .join (self .storage [- 1 ], detail ["fn" ])
256+ fn = detail .get ("fn" , "" )
257+ if not fn :
258+ raise RuntimeError (
259+ f"Cache metadata does not contain 'fn' for { path } "
260+ )
261+ fn = os .path .join (self .storage [- 1 ], fn )
263262 if os .path .exists (fn ):
264263 os .remove (fn )
265264 self .cached_files [- 1 ].pop (path )
@@ -339,7 +338,7 @@ def _open(
339338 # TODO: action where partial file exists in read-only cache
340339 logger .debug ("Opening partially cached copy of %s" % path )
341340 else :
342- hash = self .hash_name (path , self . same_names )
341+ hash = self ._mapper (path )
343342 fn = os .path .join (self .storage [- 1 ], hash )
344343 blocks = set ()
345344 detail = {
@@ -385,8 +384,10 @@ def _open(
385384 self .save_cache ()
386385 return f
387386
388- def hash_name (self , path , same_name ):
389- return hash_name (path , same_name = same_name )
387+ def hash_name (self , path : str , * args : Any ) -> str :
388+ # Kept for backward compatibility with downstream libraries.
389+ # Ignores extra arguments, previously same_name boolean.
390+ return self ._mapper (path )
390391
391392 def close_and_update (self , f , close ):
392393 """Called when a file is closing, so store the set of blocks"""
@@ -488,7 +489,7 @@ def __eq__(self, other):
488489 and self .check_files == other .check_files
489490 and self .expiry == other .expiry
490491 and self .compression == other .compression
491- and self .same_names == other .same_names
492+ and self ._mapper == other ._mapper
492493 and self .target_protocol == other .target_protocol
493494 )
494495
@@ -501,7 +502,7 @@ def __hash__(self):
501502 ^ hash (self .check_files )
502503 ^ hash (self .expiry )
503504 ^ hash (self .compression )
504- ^ hash (self .same_names )
505+ ^ hash (self ._mapper )
505506 ^ hash (self .target_protocol )
506507 )
507508
@@ -546,7 +547,7 @@ def open_many(self, open_files):
546547 details = [self ._check_file (sp ) for sp in paths ]
547548 downpath = [p for p , d in zip (paths , details ) if not d ]
548549 downfn0 = [
549- os .path .join (self .storage [- 1 ], self .hash_name ( p , self . same_names ))
550+ os .path .join (self .storage [- 1 ], self ._mapper ( p ))
550551 for p , d in zip (paths , details )
551552 ] # keep these path names for opening later
552553 downfn = [fn for fn , d in zip (downfn0 , details ) if not d ]
@@ -558,7 +559,7 @@ def open_many(self, open_files):
558559 newdetail = [
559560 {
560561 "original" : path ,
561- "fn" : self .hash_name (path , self . same_names ),
562+ "fn" : self ._mapper (path ),
562563 "blocks" : True ,
563564 "time" : time .time (),
564565 "uid" : self .fs .ukey (path ),
@@ -590,7 +591,7 @@ def commit_many(self, open_files):
590591 pass
591592
592593 def _make_local_details (self , path ):
593- hash = self .hash_name (path , self . same_names )
594+ hash = self ._mapper (path )
594595 fn = os .path .join (self .storage [- 1 ], hash )
595596 detail = {
596597 "original" : path ,
@@ -731,7 +732,7 @@ def __init__(self, **kwargs):
731732
732733 def _check_file (self , path ):
733734 self ._check_cache ()
734- sha = self .hash_name (path , self . same_names )
735+ sha = self ._mapper (path )
735736 for storage in self .storage :
736737 fn = os .path .join (storage , sha )
737738 if os .path .exists (fn ):
@@ -752,7 +753,7 @@ def _open(self, path, mode="rb", **kwargs):
752753 if fn :
753754 return open (fn , mode )
754755
755- sha = self .hash_name (path , self . same_names )
756+ sha = self ._mapper (path )
756757 fn = os .path .join (self .storage [- 1 ], sha )
757758 logger .debug ("Copying %s to local cache" % path )
758759 kwargs ["mode" ] = mode
@@ -838,14 +839,6 @@ def __getattr__(self, item):
838839 return getattr (self .fh , item )
839840
840841
841- def hash_name (path , same_name ):
842- if same_name :
843- hash = os .path .basename (path )
844- else :
845- hash = hashlib .sha256 (path .encode ()).hexdigest ()
846- return hash
847-
848-
849842@contextlib .contextmanager
850843def atomic_write (path , mode = "wb" ):
851844 """
0 commit comments