@@ -202,6 +202,7 @@ def _optimum_chunksize_internals(
202202 dim = working [0 ]
203203 working = working [1 :]
204204 result .append (dim )
205+ result = tuple (result )
205206
206207 return result
207208
@@ -227,6 +228,33 @@ def _optimum_chunksize(
227228 )
228229
229230
231+ class LRUCache :
232+ def __init__ (self , maxsize : int ) -> None :
233+ self ._cache : dict = {}
234+ self .maxsize = maxsize
235+
236+ def __getitem__ (self , key ):
237+ value = self ._cache .pop (key )
238+ self ._cache [key ] = value
239+ return value
240+
241+ def __setitem__ (self , key , value ):
242+ self ._cache [key ] = value
243+ if len (self ._cache ) > self .maxsize :
244+ self ._cache .pop (next (iter (self ._cache )))
245+
246+ def __contains__ (self , key ):
247+ return key in self ._cache
248+
249+ def __repr__ (self ):
250+ return (
251+ f"<{ self .__class__ .__name__ } maxsize={ self .maxsize } cache={ self ._cache !r} >"
252+ )
253+
254+
255+ CACHE = LRUCache (100 )
256+
257+
230258def as_lazy_data (data , chunks = None , asarray = False , meta = None , dims_fixed = None ):
231259 """Convert the input array `data` to a :class:`dask.array.Array`.
232260
@@ -264,6 +292,8 @@ def as_lazy_data(data, chunks=None, asarray=False, meta=None, dims_fixed=None):
264292 but reduced by a factor if that exceeds the dask default chunksize.
265293
266294 """
295+ from iris .fileformats .netcdf ._thread_safe_nc import NetCDFDataProxy
296+
267297 if isinstance (data , ma .core .MaskedConstant ):
268298 data = ma .masked_array (data .data , mask = data .mask )
269299
@@ -277,7 +307,7 @@ def as_lazy_data(data, chunks=None, asarray=False, meta=None, dims_fixed=None):
277307 if chunks is None :
278308 # No existing chunks : Make a chunk the shape of the entire input array
279309 # (but we will subdivide it if too big).
280- chunks = list (data .shape )
310+ chunks = tuple (data .shape )
281311
282312 # Adjust chunk size for better dask performance,
283313 # NOTE: but only if no shape dimension is zero, so that we can handle the
@@ -291,9 +321,22 @@ def as_lazy_data(data, chunks=None, asarray=False, meta=None, dims_fixed=None):
291321 dims_fixed = dims_fixed ,
292322 )
293323
294- if not is_lazy_data (data ):
295- data = da .from_array (data , chunks = chunks , asarray = asarray , meta = meta )
296- return data
324+ # Define a cache key for caching arrays created from NetCDFDataProxy objects.
325+ # Creating new Dask arrays is relatively slow, therefore caching is beneficial
326+ # if many cubes in the same file share coordinate arrays.
327+ if isinstance (data , NetCDFDataProxy ):
328+ key = (repr (data ), chunks , asarray , meta .dtype , type (meta ))
329+ else :
330+ key = None
331+
332+ if is_lazy_data (data ):
333+ result = data
334+ else :
335+ if key not in CACHE :
336+ CACHE [key ] = da .from_array (data , chunks = chunks , asarray = asarray , meta = meta )
337+ result = CACHE [key ].copy ()
338+
339+ return result
297340
298341
299342def _co_realise_lazy_arrays (arrays ):
0 commit comments