diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index 14d5d0c924..1f9d4d35b4 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -108,6 +108,9 @@ This document explains the changes made to Iris for this release #. `@wjbenfold`_ fixed plotting of circular coordinates to extend kwarg arrays as well as the data. (:issue:`466`, :pull:`4649`) +#. `@wjbenfold`_ corrected the axis on which masking is applied when an + aggregator adds a trailing dimension. (:pull:`4755`) + 💣 Incompatible Changes ======================= @@ -121,6 +124,7 @@ This document explains the changes made to Iris for this release #. `@wjbenfold`_ added caching to the calculation of the points array in a :class:`~iris.coords.DimCoord` created using :meth:`~iris.coords.DimCoord.from_regular`. (:pull:`4698`) + #. `@wjbenfold`_ introduced caching in :func:`_lazy_data._optimum_chunksize` and :func:`iris.fileformats.pp_load_rules._epoch_date_hours` to reduce time spent repeating calculations. (:pull:`4716`) @@ -128,6 +132,9 @@ This document explains the changes made to Iris for this release #. `@pp-mo`_ made :meth:`~iris.cube.Cube.add_aux_factory` faster. (:pull:`4718`) +#. `@wjbenfold`_ permitted the fast percentile aggregation method to be used on + masked data when the missing data tolerance is set to 0. (:issue:`4735`, + :pull:`4755`) 🔥 Deprecations diff --git a/lib/iris/analysis/__init__.py b/lib/iris/analysis/__init__.py index 60994fb6c2..6d2ee599c8 100644 --- a/lib/iris/analysis/__init__.py +++ b/lib/iris/analysis/__init__.py @@ -39,6 +39,7 @@ from collections.abc import Iterable import functools from functools import wraps +import warnings import dask.array as da import numpy as np @@ -591,7 +592,13 @@ def aggregate(self, data, axis, **kwargs): and result is not ma.masked ): fraction_not_missing = data.count(axis=axis) / data.shape[axis] - mask_update = 1 - mdtol > fraction_not_missing + mask_update = np.array(1 - mdtol > fraction_not_missing) + if np.array(result).ndim > mask_update.ndim: + # call_func created trailing dimension. + mask_update = np.broadcast_to( + mask_update.reshape(mask_update.shape + (1,)), + np.array(result).shape, + ) if ma.isMaskedArray(result): result.mask = result.mask | mask_update else: @@ -720,6 +727,25 @@ def __init__(self, units_func=None, **kwargs): **kwargs, ) + def _base_aggregate(self, data, axis, lazy, **kwargs): + """ + Method to avoid duplication of checks in aggregate and lazy_aggregate. + """ + msg = "{} aggregator requires the mandatory keyword argument {!r}." + for arg in self._args: + if arg not in kwargs: + raise ValueError(msg.format(self.name(), arg)) + + if kwargs.get("fast_percentile_method", False) and ( + kwargs.get("mdtol", 1) != 0 + ): + kwargs["error_on_masked"] = True + + if lazy: + return _Aggregator.lazy_aggregate(self, data, axis, **kwargs) + else: + return _Aggregator.aggregate(self, data, axis, **kwargs) + def aggregate(self, data, axis, **kwargs): """ Perform the percentile aggregation over the given data. @@ -755,12 +781,7 @@ def aggregate(self, data, axis, **kwargs): """ - msg = "{} aggregator requires the mandatory keyword argument {!r}." - for arg in self._args: - if arg not in kwargs: - raise ValueError(msg.format(self.name(), arg)) - - return _Aggregator.aggregate(self, data, axis, **kwargs) + return self._base_aggregate(data, axis, lazy=False, **kwargs) def lazy_aggregate(self, data, axis, **kwargs): """ @@ -794,12 +815,7 @@ def lazy_aggregate(self, data, axis, **kwargs): """ - msg = "{} aggregator requires the mandatory keyword argument {!r}." - for arg in self._args: - if arg not in kwargs: - raise ValueError(msg.format(self.name(), arg)) - - return _Aggregator.lazy_aggregate(self, data, axis, **kwargs) + return self._base_aggregate(data, axis, lazy=True, **kwargs) def post_process(self, collapsed_cube, data_result, coords, **kwargs): """ @@ -1281,10 +1297,19 @@ def _calc_percentile(data, percent, fast_percentile_method=False, **kwargs): """ if fast_percentile_method: - msg = "Cannot use fast np.percentile method with masked array." - if ma.is_masked(data): - raise TypeError(msg) - result = np.percentile(data, percent, axis=-1) + if kwargs.pop("error_on_masked", False): + msg = ( + "Cannot use fast np.percentile method with masked array unless" + " mdtol is 0." + ) + if ma.is_masked(data): + raise TypeError(msg) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "Warning: 'partition' will ignore the 'mask' of the MaskedArray.", + ) + result = np.percentile(data, percent, axis=-1) result = result.T else: quantiles = percent / 100.0 @@ -1965,7 +1990,8 @@ def interp_order(length): * fast_percentile_method (boolean): When set to True, uses :func:`numpy.percentile` method as a faster alternative to the :func:`scipy.stats.mstats.mquantiles` method. alphap and - betap are ignored. An exception is raised if the data are masked. + betap are ignored. An exception is raised if the data are masked and the + missing data tolerance is not 0. Defaults to False. **For example**: diff --git a/lib/iris/tests/unit/analysis/test_PERCENTILE.py b/lib/iris/tests/unit/analysis/test_PERCENTILE.py index d56d1ffb1f..a74dd891ba 100644 --- a/lib/iris/tests/unit/analysis/test_PERCENTILE.py +++ b/lib/iris/tests/unit/analysis/test_PERCENTILE.py @@ -32,7 +32,7 @@ def check_percentile_calc( if self.lazy: data = as_lazy_data(data) - expected = np.array(expected) + expected = ma.array(expected) actual = self.agg_method( data, @@ -52,9 +52,9 @@ def check_percentile_calc( self.assertFalse(is_lazy) if approx: - self.assertArrayAlmostEqual(actual, expected) + self.assertMaskedArrayAlmostEqual(actual, expected) else: - self.assertArrayEqual(actual, expected) + self.assertMaskedArrayEqual(actual, expected) def test_1d_single(self): data = np.arange(11) @@ -131,7 +131,7 @@ def test_masked_2d_single(self): def test_masked_2d_multi(self): shape = (3, 10) data = ma.arange(np.prod(shape)).reshape(shape) - data[1] = ma.masked + data[1, ::2] = ma.masked percent = np.array([10, 50, 70, 80]) axis = 0 mdtol = 0.1 @@ -140,10 +140,11 @@ def test_masked_2d_multi(self): # linear interpolation. expected = percent / 100 * 20 # Other columns are first column plus column number. - expected = ( + expected = ma.array( np.broadcast_to(expected, (shape[-1], percent.size)) + np.arange(shape[-1])[:, np.newaxis] ) + expected[::2] = ma.masked self.check_percentile_calc( data, axis, percent, expected, mdtol=mdtol, approx=True @@ -205,15 +206,32 @@ def setUp(self): self.agg_method = PERCENTILE.aggregate def test_masked(self): - shape = (2, 11) + # Using (3,11) because np.percentile returns a masked array anyway with + # (2, 11) + shape = (3, 11) data = ma.arange(np.prod(shape)).reshape(shape) data[0, ::2] = ma.masked - emsg = "Cannot use fast np.percentile method with masked array." + emsg = ( + "Cannot use fast np.percentile method with masked array unless " + "mdtol is 0." + ) with self.assertRaisesRegex(TypeError, emsg): PERCENTILE.aggregate( data, axis=0, percent=50, fast_percentile_method=True ) + def test_masked_mdtol_0(self): + # Using (3,11) because np.percentile returns a masked array anyway with + # (2, 11) + shape = (3, 11) + axis = 0 + percent = 50 + data = ma.arange(np.prod(shape)).reshape(shape) + data[0, ::2] = ma.masked + expected = ma.arange(shape[-1]) + 11 + expected[::2] = ma.masked + self.check_percentile_calc(data, axis, percent, expected, mdtol=0) + @mock.patch("numpy.percentile") def test_numpy_percentile_called(self, mocked_percentile): # Basic check that numpy.percentile is called. @@ -286,10 +304,26 @@ def test_masked(self): actual = PERCENTILE.lazy_aggregate( data, axis=0, percent=50, fast_percentile_method=True ) - emsg = "Cannot use fast np.percentile method with masked array." + emsg = ( + "Cannot use fast np.percentile method with masked array unless " + "mdtol is 0." + ) with self.assertRaisesRegex(TypeError, emsg): as_concrete_data(actual) + def test_masked_mdtol_0(self): + # Using (3,11) because np.percentile returns a masked array anyway with + # (2, 11) + shape = (3, 11) + axis = 0 + percent = 50 + data = ma.arange(np.prod(shape)).reshape(shape) + data[0, ::2] = ma.masked + data = as_lazy_data(data) + expected = ma.arange(shape[-1]) + 11 + expected[::2] = ma.masked + self.check_percentile_calc(data, axis, percent, expected, mdtol=0) + @mock.patch("numpy.percentile", return_value=np.array([2, 4])) def test_numpy_percentile_called(self, mocked_percentile): # Basic check that numpy.percentile is called.