Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ This document explains the changes made to Iris for this release
#. `@wjbenfold`_ fixed plotting of circular coordinates to extend kwarg arrays
as well as the data. (:issue:`466`, :pull:`4649`)

#. `@wjbenfold`_ corrected the axis on which masking is applied when an
aggregator adds a trailing dimension. (:pull:`4755`)


💣 Incompatible Changes
=======================
Expand All @@ -121,13 +124,17 @@ This document explains the changes made to Iris for this release
#. `@wjbenfold`_ added caching to the calculation of the points array in a
:class:`~iris.coords.DimCoord` created using
:meth:`~iris.coords.DimCoord.from_regular`. (:pull:`4698`)

#. `@wjbenfold`_ introduced caching in :func:`_lazy_data._optimum_chunksize` and
:func:`iris.fileformats.pp_load_rules._epoch_date_hours` to reduce time spent
repeating calculations. (:pull:`4716`)

#. `@pp-mo`_ made :meth:`~iris.cube.Cube.add_aux_factory` faster.
(:pull:`4718`)

#. `@wjbenfold`_ permitted the fast percentile aggregation method to be used on
masked data when the missing data tolerance is set to 0. (:issue:`4735`,
:pull:`4755`)


🔥 Deprecations
Expand Down
62 changes: 44 additions & 18 deletions lib/iris/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from collections.abc import Iterable
import functools
from functools import wraps
import warnings

import dask.array as da
import numpy as np
Expand Down Expand Up @@ -591,7 +592,13 @@ def aggregate(self, data, axis, **kwargs):
and result is not ma.masked
):
fraction_not_missing = data.count(axis=axis) / data.shape[axis]
mask_update = 1 - mdtol > fraction_not_missing
mask_update = np.array(1 - mdtol > fraction_not_missing)
if np.array(result).ndim > mask_update.ndim:
# call_func created trailing dimension.
mask_update = np.broadcast_to(
mask_update.reshape(mask_update.shape + (1,)),
np.array(result).shape,
)
if ma.isMaskedArray(result):
result.mask = result.mask | mask_update
else:
Expand Down Expand Up @@ -720,6 +727,25 @@ def __init__(self, units_func=None, **kwargs):
**kwargs,
)

def _base_aggregate(self, data, axis, lazy, **kwargs):
"""
Method to avoid duplication of checks in aggregate and lazy_aggregate.
"""
msg = "{} aggregator requires the mandatory keyword argument {!r}."
for arg in self._args:
if arg not in kwargs:
raise ValueError(msg.format(self.name(), arg))

if kwargs.get("fast_percentile_method", False) and (
kwargs.get("mdtol", 1) != 0
):
kwargs["error_on_masked"] = True

if lazy:
return _Aggregator.lazy_aggregate(self, data, axis, **kwargs)
else:
return _Aggregator.aggregate(self, data, axis, **kwargs)

def aggregate(self, data, axis, **kwargs):
"""
Perform the percentile aggregation over the given data.
Expand Down Expand Up @@ -755,12 +781,7 @@ def aggregate(self, data, axis, **kwargs):

"""

msg = "{} aggregator requires the mandatory keyword argument {!r}."
for arg in self._args:
if arg not in kwargs:
raise ValueError(msg.format(self.name(), arg))

return _Aggregator.aggregate(self, data, axis, **kwargs)
return self._base_aggregate(data, axis, lazy=False, **kwargs)

def lazy_aggregate(self, data, axis, **kwargs):
"""
Expand Down Expand Up @@ -794,12 +815,7 @@ def lazy_aggregate(self, data, axis, **kwargs):

"""

msg = "{} aggregator requires the mandatory keyword argument {!r}."
for arg in self._args:
if arg not in kwargs:
raise ValueError(msg.format(self.name(), arg))

return _Aggregator.lazy_aggregate(self, data, axis, **kwargs)
return self._base_aggregate(data, axis, lazy=True, **kwargs)

def post_process(self, collapsed_cube, data_result, coords, **kwargs):
"""
Expand Down Expand Up @@ -1281,10 +1297,19 @@ def _calc_percentile(data, percent, fast_percentile_method=False, **kwargs):

"""
if fast_percentile_method:
msg = "Cannot use fast np.percentile method with masked array."
if ma.is_masked(data):
raise TypeError(msg)
result = np.percentile(data, percent, axis=-1)
if kwargs.pop("error_on_masked", False):
msg = (
"Cannot use fast np.percentile method with masked array unless"
" mdtol is 0."
)
if ma.is_masked(data):
raise TypeError(msg)
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
"Warning: 'partition' will ignore the 'mask' of the MaskedArray.",
)
result = np.percentile(data, percent, axis=-1)
result = result.T
else:
quantiles = percent / 100.0
Expand Down Expand Up @@ -1965,7 +1990,8 @@ def interp_order(length):
* fast_percentile_method (boolean):
When set to True, uses :func:`numpy.percentile` method as a faster
alternative to the :func:`scipy.stats.mstats.mquantiles` method. alphap and
betap are ignored. An exception is raised if the data are masked.
betap are ignored. An exception is raised if the data are masked and the
missing data tolerance is not 0.
Defaults to False.

**For example**:
Expand Down
50 changes: 42 additions & 8 deletions lib/iris/tests/unit/analysis/test_PERCENTILE.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def check_percentile_calc(
if self.lazy:
data = as_lazy_data(data)

expected = np.array(expected)
expected = ma.array(expected)

actual = self.agg_method(
data,
Expand All @@ -52,9 +52,9 @@ def check_percentile_calc(
self.assertFalse(is_lazy)

if approx:
self.assertArrayAlmostEqual(actual, expected)
self.assertMaskedArrayAlmostEqual(actual, expected)
else:
self.assertArrayEqual(actual, expected)
self.assertMaskedArrayEqual(actual, expected)

def test_1d_single(self):
data = np.arange(11)
Expand Down Expand Up @@ -131,7 +131,7 @@ def test_masked_2d_single(self):
def test_masked_2d_multi(self):
shape = (3, 10)
data = ma.arange(np.prod(shape)).reshape(shape)
data[1] = ma.masked
data[1, ::2] = ma.masked
percent = np.array([10, 50, 70, 80])
axis = 0
mdtol = 0.1
Expand All @@ -140,10 +140,11 @@ def test_masked_2d_multi(self):
# linear interpolation.
expected = percent / 100 * 20
# Other columns are first column plus column number.
expected = (
expected = ma.array(
np.broadcast_to(expected, (shape[-1], percent.size))
+ np.arange(shape[-1])[:, np.newaxis]
)
expected[::2] = ma.masked

self.check_percentile_calc(
data, axis, percent, expected, mdtol=mdtol, approx=True
Expand Down Expand Up @@ -205,15 +206,32 @@ def setUp(self):
self.agg_method = PERCENTILE.aggregate

def test_masked(self):
shape = (2, 11)
# Using (3,11) because np.percentile returns a masked array anyway with
# (2, 11)
shape = (3, 11)
data = ma.arange(np.prod(shape)).reshape(shape)
data[0, ::2] = ma.masked
emsg = "Cannot use fast np.percentile method with masked array."
emsg = (
"Cannot use fast np.percentile method with masked array unless "
"mdtol is 0."
)
with self.assertRaisesRegex(TypeError, emsg):
PERCENTILE.aggregate(
data, axis=0, percent=50, fast_percentile_method=True
)

def test_masked_mdtol_0(self):
# Using (3,11) because np.percentile returns a masked array anyway with
# (2, 11)
shape = (3, 11)
axis = 0
percent = 50
data = ma.arange(np.prod(shape)).reshape(shape)
data[0, ::2] = ma.masked
expected = ma.arange(shape[-1]) + 11
expected[::2] = ma.masked
self.check_percentile_calc(data, axis, percent, expected, mdtol=0)

@mock.patch("numpy.percentile")
def test_numpy_percentile_called(self, mocked_percentile):
# Basic check that numpy.percentile is called.
Expand Down Expand Up @@ -286,10 +304,26 @@ def test_masked(self):
actual = PERCENTILE.lazy_aggregate(
data, axis=0, percent=50, fast_percentile_method=True
)
emsg = "Cannot use fast np.percentile method with masked array."
emsg = (
"Cannot use fast np.percentile method with masked array unless "
"mdtol is 0."
)
with self.assertRaisesRegex(TypeError, emsg):
as_concrete_data(actual)

def test_masked_mdtol_0(self):
# Using (3,11) because np.percentile returns a masked array anyway with
# (2, 11)
shape = (3, 11)
axis = 0
percent = 50
data = ma.arange(np.prod(shape)).reshape(shape)
data[0, ::2] = ma.masked
data = as_lazy_data(data)
expected = ma.arange(shape[-1]) + 11
expected[::2] = ma.masked
self.check_percentile_calc(data, axis, percent, expected, mdtol=0)

@mock.patch("numpy.percentile", return_value=np.array([2, 4]))
def test_numpy_percentile_called(self, mocked_percentile):
# Basic check that numpy.percentile is called.
Expand Down