From 443cf93825ef8ce18ececf855478684234b7cb2f Mon Sep 17 00:00:00 2001 From: Richard Hattersley Date: Thu, 22 Nov 2012 15:00:45 +0000 Subject: [PATCH 1/3] Faster masked array creation in DataManager. --- lib/iris/fileformats/manager.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/iris/fileformats/manager.py b/lib/iris/fileformats/manager.py index d9bd1dfb2e..3aa3c44b05 100644 --- a/lib/iris/fileformats/manager.py +++ b/lib/iris/fileformats/manager.py @@ -249,13 +249,16 @@ def load(self, proxy_array): # Create fully masked data (all missing) try: - data = numpy.ma.zeros(array_shape, dtype=self.data_type.newbyteorder('='), fill_value=self.mdi) - data.mask = True + raw_data = numpy.empty(array_shape, + dtype=self.data_type.newbyteorder('=')) + mask = numpy.ones(array_shape, dtype=numpy.bool) + data = numpy.ma.MaskedArray(raw_data, mask=mask, + fill_value=self.mdi) except ValueError: raise DataManager.ArrayTooBigForAddressSpace( - 'Cannot create an array of shape %r as it will not fit in memory. Try reducing the shape ' - 'of the proxy array by using indexing.' % (array_shape, ) - ) + 'Cannot create an array of shape %r as it will not' + ' fit in memory. Try reducing the shape of the' + ' proxy array by using indexing.'.format(array_shape)) for index, proxy in numpy.ndenumerate(proxy_array): if proxy not in [None, 0]: # 0 can come from slicing masked proxy; numpy.array(masked_constant). From fb093b29c3b4d2136ad37e3698661cad56401fec Mon Sep 17 00:00:00 2001 From: Richard Hattersley Date: Fri, 23 Nov 2012 12:22:23 +0000 Subject: [PATCH 2/3] Faster masked array creation in merge --- lib/iris/_merge.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/iris/_merge.py b/lib/iris/_merge.py index c6d67c1b78..fa68768cf1 100644 --- a/lib/iris/_merge.py +++ b/lib/iris/_merge.py @@ -1144,13 +1144,14 @@ def _get_cube(self): # Create fully masked data i.e. all missing. if signature.data_manager is None: # Must zero the data in order to avoid random checksums. - data = numpy.ma.zeros(self._shape, dtype=signature.data_type) - data.fill_value = signature.mdi + data = numpy.ma.MaskedArray(numpy.zeros(self._shape, + signature.data_type), + mask=numpy.ones(self._shape, 'bool'), + fill_value=signature.mdi) else: - # With dtype=object, ma.empty DOES initialise the memory (all None). - data = numpy.ma.empty(self._shape, dtype=object) + data = numpy.ma.MaskedArray(numpy.zeros(self._shape, 'object'), + mask=numpy.ones(self._shape, 'bool')) - data.mask = True cube = iris.cube.Cube(data, dim_coords_and_dims=dim_coords_and_dims, aux_coords_and_dims=aux_coords_and_dims, From f4bdfb48fbbfe27e68264e3d9a74e18016c45cc8 Mon Sep 17 00:00:00 2001 From: Richard Hattersley Date: Tue, 27 Nov 2012 16:05:04 +0000 Subject: [PATCH 3/3] Tweak comment and error text --- lib/iris/_merge.py | 5 +++-- lib/iris/fileformats/manager.py | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/iris/_merge.py b/lib/iris/_merge.py index fa68768cf1..ef11235d93 100644 --- a/lib/iris/_merge.py +++ b/lib/iris/_merge.py @@ -1141,9 +1141,10 @@ def _get_cube(self): aux_coords_and_dims = [(deepcopy(coord), dims) for coord, dims in self._aux_coords_and_dims] kwargs = dict(zip(iris.cube.CubeMetadata._fields, signature.defn)) - # Create fully masked data i.e. all missing. + # Create fully masked data, i.e. all missing. + # (The CubeML checksum doesn't respect the mask, so we zero the + # underlying data to ensure repeatable checksums.) if signature.data_manager is None: - # Must zero the data in order to avoid random checksums. data = numpy.ma.MaskedArray(numpy.zeros(self._shape, signature.data_type), mask=numpy.ones(self._shape, 'bool'), diff --git a/lib/iris/fileformats/manager.py b/lib/iris/fileformats/manager.py index 3aa3c44b05..0a906cf9d9 100644 --- a/lib/iris/fileformats/manager.py +++ b/lib/iris/fileformats/manager.py @@ -256,9 +256,9 @@ def load(self, proxy_array): fill_value=self.mdi) except ValueError: raise DataManager.ArrayTooBigForAddressSpace( - 'Cannot create an array of shape %r as it will not' - ' fit in memory. Try reducing the shape of the' - ' proxy array by using indexing.'.format(array_shape)) + 'Cannot create an array of shape %r as it will not fit in' + ' memory. Consider using indexing to select a subset of' + ' the Cube.'.format(array_shape)) for index, proxy in numpy.ndenumerate(proxy_array): if proxy not in [None, 0]: # 0 can come from slicing masked proxy; numpy.array(masked_constant).