SciTools · rcomer · May 23, 2022 · May 19, 2022 · May 19, 2022 · May 20, 2022
diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst
@@ -108,6 +108,9 @@ This document explains the changes made to Iris for this release
 #. `@wjbenfold`_ fixed plotting of circular coordinates to extend kwarg arrays
    as well as the data. (:issue:`466`, :pull:`4649`)
 
+#. `@wjbenfold`_ corrected the axis on which masking is applied when an
+   aggregator adds a trailing dimension. (:pull:`4755`)
+
 
 💣 Incompatible Changes
 =======================
@@ -121,13 +124,17 @@ This document explains the changes made to Iris for this release
 #. `@wjbenfold`_ added caching to the calculation of the points array in a
    :class:`~iris.coords.DimCoord` created using
    :meth:`~iris.coords.DimCoord.from_regular`. (:pull:`4698`)
+
 #. `@wjbenfold`_ introduced caching in :func:`_lazy_data._optimum_chunksize` and
    :func:`iris.fileformats.pp_load_rules._epoch_date_hours` to reduce time spent
    repeating calculations. (:pull:`4716`)
 
 #. `@pp-mo`_ made :meth:`~iris.cube.Cube.add_aux_factory` faster.
    (:pull:`4718`)
 
+#. `@wjbenfold`_ permitted the fast percentile aggregation method to be used on
+   masked data when the missing data tolerance is set to 0. (:issue:`4735`,
+   :pull:`4755`)
 
 
 🔥 Deprecations

diff --git a/lib/iris/analysis/__init__.py b/lib/iris/analysis/__init__.py
@@ -39,6 +39,7 @@
 from collections.abc import Iterable
 import functools
 from functools import wraps
+import warnings
 
 import dask.array as da
 import numpy as np
@@ -591,7 +592,13 @@ def aggregate(self, data, axis, **kwargs):
             and result is not ma.masked
         ):
             fraction_not_missing = data.count(axis=axis) / data.shape[axis]
-            mask_update = 1 - mdtol > fraction_not_missing
+            mask_update = np.array(1 - mdtol > fraction_not_missing)
+            if np.array(result).ndim > mask_update.ndim:
+                # call_func created trailing dimension.
+                mask_update = np.broadcast_to(
+                    mask_update.reshape(mask_update.shape + (1,)),
+                    np.array(result).shape,
+                )
             if ma.isMaskedArray(result):
                 result.mask = result.mask | mask_update
             else:
@@ -720,6 +727,25 @@ def __init__(self, units_func=None, **kwargs):
             **kwargs,
         )
 
+    def _base_aggregate(self, data, axis, lazy, **kwargs):
+        """
+        Method to avoid duplication of checks in aggregate and lazy_aggregate.
+        """
+        msg = "{} aggregator requires the mandatory keyword argument {!r}."
+        for arg in self._args:
+            if arg not in kwargs:
+                raise ValueError(msg.format(self.name(), arg))
+
+        if kwargs.get("fast_percentile_method", False) and (
+            kwargs.get("mdtol", 1) != 0
+        ):
+            kwargs["error_on_masked"] = True
+
+        if lazy:
+            return _Aggregator.lazy_aggregate(self, data, axis, **kwargs)
+        else:
+            return _Aggregator.aggregate(self, data, axis, **kwargs)
+
     def aggregate(self, data, axis, **kwargs):
         """
         Perform the percentile aggregation over the given data.
@@ -755,12 +781,7 @@ def aggregate(self, data, axis, **kwargs):
 
         """
 
-        msg = "{} aggregator requires the mandatory keyword argument {!r}."
-        for arg in self._args:
-            if arg not in kwargs:
-                raise ValueError(msg.format(self.name(), arg))
-
-        return _Aggregator.aggregate(self, data, axis, **kwargs)
+        return self._base_aggregate(data, axis, lazy=False, **kwargs)
 
     def lazy_aggregate(self, data, axis, **kwargs):
         """
@@ -794,12 +815,7 @@ def lazy_aggregate(self, data, axis, **kwargs):
 
         """
 
-        msg = "{} aggregator requires the mandatory keyword argument {!r}."
-        for arg in self._args:
-            if arg not in kwargs:
-                raise ValueError(msg.format(self.name(), arg))
-
-        return _Aggregator.lazy_aggregate(self, data, axis, **kwargs)
+        return self._base_aggregate(data, axis, lazy=True, **kwargs)
 
     def post_process(self, collapsed_cube, data_result, coords, **kwargs):
         """
@@ -1281,10 +1297,19 @@ def _calc_percentile(data, percent, fast_percentile_method=False, **kwargs):
 
     """
     if fast_percentile_method:
-        msg = "Cannot use fast np.percentile method with masked array."
-        if ma.is_masked(data):
-            raise TypeError(msg)
-        result = np.percentile(data, percent, axis=-1)
+        if kwargs.pop("error_on_masked", False):
+            msg = (
+                "Cannot use fast np.percentile method with masked array unless"
+                " mdtol is 0."
+            )
+            if ma.is_masked(data):
+                raise TypeError(msg)
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "ignore",
+                "Warning: 'partition' will ignore the 'mask' of the MaskedArray.",
+            )
+            result = np.percentile(data, percent, axis=-1)
         result = result.T
     else:
         quantiles = percent / 100.0
@@ -1965,7 +1990,8 @@ def interp_order(length):
 * fast_percentile_method (boolean):
     When set to True, uses :func:`numpy.percentile` method as a faster
     alternative to the :func:`scipy.stats.mstats.mquantiles` method.  alphap and
-    betap are ignored. An exception is raised if the data are masked.
+    betap are ignored. An exception is raised if the data are masked and the
+    missing data tolerance is not 0.
     Defaults to False.
 
 **For example**:

diff --git a/lib/iris/tests/unit/analysis/test_PERCENTILE.py b/lib/iris/tests/unit/analysis/test_PERCENTILE.py
@@ -32,7 +32,7 @@ def check_percentile_calc(
         if self.lazy:
             data = as_lazy_data(data)
 
-        expected = np.array(expected)
+        expected = ma.array(expected)
 
         actual = self.agg_method(
             data,
@@ -52,9 +52,9 @@ def check_percentile_calc(
             self.assertFalse(is_lazy)
 
         if approx:
-            self.assertArrayAlmostEqual(actual, expected)
+            self.assertMaskedArrayAlmostEqual(actual, expected)
         else:
-            self.assertArrayEqual(actual, expected)
+            self.assertMaskedArrayEqual(actual, expected)
 
     def test_1d_single(self):
         data = np.arange(11)
@@ -131,7 +131,7 @@ def test_masked_2d_single(self):
     def test_masked_2d_multi(self):
         shape = (3, 10)
         data = ma.arange(np.prod(shape)).reshape(shape)
-        data[1] = ma.masked
+        data[1, ::2] = ma.masked
         percent = np.array([10, 50, 70, 80])
         axis = 0
         mdtol = 0.1
@@ -140,10 +140,11 @@ def test_masked_2d_multi(self):
         # linear interpolation.
         expected = percent / 100 * 20
         # Other columns are first column plus column number.
-        expected = (
+        expected = ma.array(
             np.broadcast_to(expected, (shape[-1], percent.size))
             + np.arange(shape[-1])[:, np.newaxis]
         )
+        expected[::2] = ma.masked
 
         self.check_percentile_calc(
             data, axis, percent, expected, mdtol=mdtol, approx=True
@@ -205,15 +206,32 @@ def setUp(self):
         self.agg_method = PERCENTILE.aggregate
 
     def test_masked(self):
-        shape = (2, 11)
+        # Using (3,11) because np.percentile returns a masked array anyway with
+        # (2, 11)
+        shape = (3, 11)
         data = ma.arange(np.prod(shape)).reshape(shape)
         data[0, ::2] = ma.masked
-        emsg = "Cannot use fast np.percentile method with masked array."
+        emsg = (
+            "Cannot use fast np.percentile method with masked array unless "
+            "mdtol is 0."
+        )
         with self.assertRaisesRegex(TypeError, emsg):
             PERCENTILE.aggregate(
                 data, axis=0, percent=50, fast_percentile_method=True
             )
 
+    def test_masked_mdtol_0(self):
+        # Using (3,11) because np.percentile returns a masked array anyway with
+        # (2, 11)
+        shape = (3, 11)
+        axis = 0
+        percent = 50
+        data = ma.arange(np.prod(shape)).reshape(shape)
+        data[0, ::2] = ma.masked
+        expected = ma.arange(shape[-1]) + 11
+        expected[::2] = ma.masked
+        self.check_percentile_calc(data, axis, percent, expected, mdtol=0)
+
     @mock.patch("numpy.percentile")
     def test_numpy_percentile_called(self, mocked_percentile):
         # Basic check that numpy.percentile is called.
@@ -286,10 +304,26 @@ def test_masked(self):
         actual = PERCENTILE.lazy_aggregate(
             data, axis=0, percent=50, fast_percentile_method=True
         )
-        emsg = "Cannot use fast np.percentile method with masked array."
+        emsg = (
+            "Cannot use fast np.percentile method with masked array unless "
+            "mdtol is 0."
+        )
         with self.assertRaisesRegex(TypeError, emsg):
             as_concrete_data(actual)
 
+    def test_masked_mdtol_0(self):
+        # Using (3,11) because np.percentile returns a masked array anyway with
+        # (2, 11)
+        shape = (3, 11)
+        axis = 0
+        percent = 50
+        data = ma.arange(np.prod(shape)).reshape(shape)
+        data[0, ::2] = ma.masked
+        data = as_lazy_data(data)
+        expected = ma.arange(shape[-1]) + 11
+        expected[::2] = ma.masked
+        self.check_percentile_calc(data, axis, percent, expected, mdtol=0)
+
     @mock.patch("numpy.percentile", return_value=np.array([2, 4]))
     def test_numpy_percentile_called(self, mocked_percentile):
         # Basic check that numpy.percentile is called.