diff --git a/lib/iris/common/_split_attribute_dicts.py b/lib/iris/common/_split_attribute_dicts.py index 41ce321a8f..92fd883dd7 100644 --- a/lib/iris/common/_split_attribute_dicts.py +++ b/lib/iris/common/_split_attribute_dicts.py @@ -18,7 +18,6 @@ So, we simply treat "global" and "local" attributes of the same name as entirely independent. Which happily is also the easiest to code, and to explain. """ - from collections.abc import Mapping, Sequence from functools import wraps @@ -30,7 +29,16 @@ def _convert_splitattrs_to_pairedkeys_dict(dic): Transform a :class:`~iris.cube.CubeAttributesDict` "split" attributes dictionary into a 'normal' :class:`dict`, with paired keys of the form ('global', name) or ('local', name). + + If the input is *not* a split-attrs dict, it is converted to one before + transforming it. This will assign its keys to global/local depending on a standard + set of choices (see :class:`~iris.cube.CubeAttributesDict`). """ + from iris.cube import CubeAttrsDict + + # Convert input to CubeAttrsDict + if not hasattr(dic, "globals") or not hasattr(dic, "locals"): + dic = CubeAttrsDict(dic) def _global_then_local_items(dic): # Routine to produce global, then local 'items' in order, and with all keys @@ -93,13 +101,6 @@ def adjust_for_split_attribute_dictionaries(operation): @wraps(operation) def _inner_function(*args, **kwargs): - from iris.cube import CubeAttrsDict - - # First make all inputs into CubeAttrsDict, if not already. - args = [ - arg if isinstance(arg, CubeAttrsDict) else CubeAttrsDict(arg) - for arg in args - ] # Convert all inputs into 'pairedkeys' type dicts args = [_convert_splitattrs_to_pairedkeys_dict(arg) for arg in args] diff --git a/lib/iris/tests/unit/util/test_equalise_attributes.py b/lib/iris/tests/unit/util/test_equalise_attributes.py index 13aa1e2af4..b410b5522c 100644 --- a/lib/iris/tests/unit/util/test_equalise_attributes.py +++ b/lib/iris/tests/unit/util/test_equalise_attributes.py @@ -14,8 +14,13 @@ import numpy as np -from iris.cube import Cube +from iris.coords import AuxCoord +from iris.cube import Cube, CubeAttrsDict import iris.tests.stock +from iris.tests.unit.common.metadata.test_CubeMetadata import ( + _TEST_ATTRNAME, + make_attrsdict, +) from iris.util import equalise_attributes @@ -153,5 +158,111 @@ def test_complex_somecommon(self): ) +class TestSplitattributes: + """ + Extra testing for cases where attributes differ specifically by type + + That is, where there is a new possibility of 'mismatch' due to the newer "typing" + of attributes as global or local. + + Specifically, it is now possible that although + "cube1.attributes.keys() == cube2.attributes.keys()", + AND "cube1.attributes[k] == cube2.attributes[k]" for all keys, + YET STILL (possibly) "cube1.attributes != cube2.attributes" + """ + + @staticmethod + def _sample_splitattrs_cube(attr_global_local): + attrs = CubeAttrsDict( + globals=make_attrsdict(attr_global_local[0]), + locals=make_attrsdict(attr_global_local[1]), + ) + return Cube([0], attributes=attrs) + + @staticmethod + def check_equalised_result(cube1, cube2): + equalise_attributes([cube1, cube2]) + # Note: "X" represents a missing attribute, as in test_CubeMetadata + return [ + ( + cube1.attributes.globals.get(_TEST_ATTRNAME, "X") + + cube1.attributes.locals.get(_TEST_ATTRNAME, "X") + ), + ( + cube2.attributes.globals.get(_TEST_ATTRNAME, "X") + + cube2.attributes.locals.get(_TEST_ATTRNAME, "X") + ), + ] + + def test__global_and_local__bothsame(self): + # A trivial case showing that the original globals+locals are both preserved. + cube1 = self._sample_splitattrs_cube("AB") + cube2 = self._sample_splitattrs_cube("AB") + result = self.check_equalised_result(cube1, cube2) + assert result == ["AB", "AB"] + + def test__globals_different(self): + cube1 = self._sample_splitattrs_cube("AX") + cube2 = self._sample_splitattrs_cube("BX") + result = self.check_equalised_result(cube1, cube2) + assert result == ["XX", "XX"] + + def test__locals_different(self): + cube1 = self._sample_splitattrs_cube("XA") + cube2 = self._sample_splitattrs_cube("XB") + result = self.check_equalised_result(cube1, cube2) + assert result == ["XX", "XX"] + + def test__oneglobal_onelocal__different(self): + cube1 = self._sample_splitattrs_cube("AX") + cube2 = self._sample_splitattrs_cube("XB") + result = self.check_equalised_result(cube1, cube2) + assert result == ["XX", "XX"] + + # This case fails without the split-attributes fix. + def test__oneglobal_onelocal__same(self): + cube1 = self._sample_splitattrs_cube("AX") + cube2 = self._sample_splitattrs_cube("XA") + result = self.check_equalised_result(cube1, cube2) + assert result == ["XX", "XX"] + + def test__sameglobals_onelocal__different(self): + cube1 = self._sample_splitattrs_cube("AB") + cube2 = self._sample_splitattrs_cube("AX") + result = self.check_equalised_result(cube1, cube2) + assert result == ["XX", "XX"] + + # This case fails without the split-attributes fix. + def test__sameglobals_onelocal__same(self): + cube1 = self._sample_splitattrs_cube("AA") + cube2 = self._sample_splitattrs_cube("AX") + result = self.check_equalised_result(cube1, cube2) + assert result == ["XX", "XX"] + + # This case fails without the split-attributes fix. + def test__differentglobals_samelocals(self): + cube1 = self._sample_splitattrs_cube("AC") + cube2 = self._sample_splitattrs_cube("BC") + result = self.check_equalised_result(cube1, cube2) + assert result == ["XX", "XX"] + + +class TestNonCube: + # Just to assert that we can do operations on non-cube components (like Coords), + # in fact effectively, anything with a ".attributes". + # Even though the docstring does not admit this, we test it because we put in + # special code to preserve it when adding the split-attribute handling. + def test(self): + attrs = [1, 1, 2] + coords = [ + AuxCoord([0], attributes={"a": attr, "b": "all_the_same"}) + for attr in attrs + ] + equalise_attributes(coords) + assert all( + coord.attributes == {"b": "all_the_same"} for coord in coords + ) + + if __name__ == "__main__": tests.main() diff --git a/lib/iris/util.py b/lib/iris/util.py index 0b31ebdafc..258be7d318 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -2059,24 +2059,50 @@ def equalise_attributes(cubes): See more at :doc:`/userguide/real_and_lazy_data`. """ - removed = [] + # deferred import to avoid circularity problem + from iris.common._split_attribute_dicts import ( + _convert_splitattrs_to_pairedkeys_dict, + ) + + cube_attrs = [cube.attributes for cube in cubes] + + # Convert all the input dictionaries to ones with 'paired' keys, so each key + # becomes a pair, ('local'/'global', attribute-name), making them specific to each + # "type", i.e. global or local. + # This is needed to ensure that afterwards all cubes will have identical + # attributes, E.G. it treats an attribute which is global on one cube and local + # on another as *not* the same. This is essential to its use in making merges work. + # + # This approach does also still function with "ordinary" dictionaries, or + # :class:`iris.common.mixin.LimitedAttributeDict`, though somewhat inefficiently, + # so the routine works on *other* objects bearing attributes, i.e. not just Cubes. + # That is also important since the original code allows that (though the docstring + # does not admit it). + cube_attrs = [ + _convert_splitattrs_to_pairedkeys_dict(dic) for dic in cube_attrs + ] + # Work out which attributes are identical across all the cubes. - common_keys = list(cubes[0].attributes.keys()) + common_keys = list(cube_attrs[0].keys()) keys_to_remove = set(common_keys) - for cube in cubes[1:]: - cube_keys = list(cube.attributes.keys()) + for attrs in cube_attrs[1:]: + cube_keys = list(attrs.keys()) keys_to_remove.update(cube_keys) common_keys = [ key for key in common_keys - if ( - key in cube_keys - and np.all(cube.attributes[key] == cubes[0].attributes[key]) - ) + if (key in cube_keys and np.all(attrs[key] == cube_attrs[0][key])) ] keys_to_remove.difference_update(common_keys) - # Remove all the other attributes. + # Convert back from the resulting 'paired' keys set, extracting just the + # attribute-name parts, as a set of names to be discarded. + # Note: we don't care any more what type (global/local) these were : we will + # simply remove *all* attributes with those names. + keys_to_remove = set(key_pair[1] for key_pair in keys_to_remove) + + # Remove all the non-matching attributes. + removed = [] for cube in cubes: deleted_attributes = { key: cube.attributes.pop(key) @@ -2084,6 +2110,7 @@ def equalise_attributes(cubes): if key in cube.attributes } removed.append(deleted_attributes) + return removed