diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index 4abf1e1192..6fcdcfb7bc 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -42,6 +42,12 @@ This document explains the changes made to Iris for this release ``iris.plot.plot(z_cube)`` will produce a z-vs-phenomenon plot, where before it would have produced a phenomenon-vs-z plot. (:pull:`3906`) +#. `@bjlittle`_ introduced :func:`iris.common.metadata.hexdigest` to the + public API. Previously it was a private function introduced in ``v3.0.0``. + Given any object, :func:`~iris.common.metadata.hexdigest` returns a string + representation of the 64-bit non-cryptographic hash of the object using the + extremely fast `xxhash`_ hashing algorithm. (:pull:`4020`) + 🐛 Bugs Fixed ============= @@ -150,3 +156,4 @@ This document explains the changes made to Iris for this release .. _PyPI: https://pypi.org/project/scitools-iris/ .. _Python 3.8: https://www.python.org/downloads/release/python-380/ .. _README.md: https://github.com/SciTools/iris#----- +.. _xxhash: http://cyan4973.github.io/xxHash/ diff --git a/lib/iris/_representation.py b/lib/iris/_representation.py index ee1e1a0d55..63974d1e50 100644 --- a/lib/iris/_representation.py +++ b/lib/iris/_representation.py @@ -9,7 +9,7 @@ import re import iris.util -from iris.common.metadata import _hexdigest as quickhash +from iris.common.metadata import hexdigest class DimensionHeader: @@ -101,7 +101,7 @@ def _summary_coord_extra(self, cube, coord): # ..except setdefault fails if values are numpy arrays. if key not in attributes: attributes[key] = value - elif quickhash(attributes[key]) != quickhash(value): + elif hexdigest(attributes[key]) != hexdigest(value): # NOTE: fast and array-safe comparison, as used in # :mod:`iris.common.metadata`. vary.add(key) diff --git a/lib/iris/common/metadata.py b/lib/iris/common/metadata.py index 9b1d3278f3..e0546b3c0b 100644 --- a/lib/iris/common/metadata.py +++ b/lib/iris/common/metadata.py @@ -37,6 +37,7 @@ "CoordMetadata", "CubeMetadata", "DimCoordMetadata", + "hexdigest", "metadata_manager_factory", ] @@ -48,34 +49,46 @@ logger = get_logger(__name__, fmt="[%(cls)s.%(funcName)s]") -def _hexdigest(value): +def hexdigest(item): """ - Return a hexidecimal string hash representation of the provided value. + Calculate a hexidecimal string hash representation of the provided item. - Calculates a 64-bit non-cryptographic hash of the provided value, - and returns the hexdigest string representation of the calculated hash. + Calculates a 64-bit non-cryptographic hash of the provided item, using + the extremely fast ``xxhash`` hashing algorithm, and returns the hexdigest + string representation of the hash. + + This provides a means to compare large and/or complex objects through + simple string hexdigest comparison. + + Args: + + * item (object): + The item that requires to have its hexdigest calculated. + + Returns: + The string hexidecimal representation of the item's 64-bit hash. """ # Special case: deal with numpy arrays. - if ma.isMaskedArray(value): + if ma.isMaskedArray(item): parts = ( - value.shape, - xxh64_hexdigest(value.data), - xxh64_hexdigest(value.mask), + item.shape, + xxh64_hexdigest(item.data), + xxh64_hexdigest(item.mask), ) - value = str(parts) - elif isinstance(value, np.ndarray): - parts = (value.shape, xxh64_hexdigest(value)) - value = str(parts) + item = str(parts) + elif isinstance(item, np.ndarray): + parts = (item.shape, xxh64_hexdigest(item)) + item = str(parts) try: # Calculate single-shot hash to avoid allocating state on the heap - result = xxh64_hexdigest(value) + result = xxh64_hexdigest(item) except TypeError: # xxhash expects a bytes-like object, so try hashing the - # string representation of the provided value instead, but + # string representation of the provided item instead, but # also fold in the object type... - parts = (type(value), value) + parts = (type(item), item) result = xxh64_hexdigest(str(parts)) return result @@ -338,8 +351,8 @@ def _combine_lenient_attributes(left, right): # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is # completely hashable, as required by a set. - sleft = {(k, _hexdigest(v)) for k, v in left.items()} - sright = {(k, _hexdigest(v)) for k, v in right.items()} + sleft = {(k, hexdigest(v)) for k, v in left.items()} + sright = {(k, hexdigest(v)) for k, v in right.items()} # Intersection of common items. common = sleft & sright # Items in sleft different from sright. @@ -367,8 +380,8 @@ def _combine_strict_attributes(left, right): # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is # completely hashable, as required by a set. - sleft = {(k, _hexdigest(v)) for k, v in left.items()} - sright = {(k, _hexdigest(v)) for k, v in right.items()} + sleft = {(k, hexdigest(v)) for k, v in left.items()} + sright = {(k, hexdigest(v)) for k, v in right.items()} # Intersection of common items. common = sleft & sright # Now bring the result together. @@ -426,8 +439,8 @@ def _compare_lenient_attributes(left, right): # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is # completely hashable, as required by a set. - sleft = {(k, _hexdigest(v)) for k, v in left.items()} - sright = {(k, _hexdigest(v)) for k, v in right.items()} + sleft = {(k, hexdigest(v)) for k, v in left.items()} + sright = {(k, hexdigest(v)) for k, v in right.items()} # Items in sleft different from sright. dsleft = dict(sleft - sright) # Items in sright different from sleft. @@ -443,8 +456,8 @@ def _compare_strict_attributes(left, right): # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is # completely hashable, as required by a set. - sleft = {(k, _hexdigest(v)) for k, v in left.items()} - sright = {(k, _hexdigest(v)) for k, v in right.items()} + sleft = {(k, hexdigest(v)) for k, v in left.items()} + sright = {(k, hexdigest(v)) for k, v in right.items()} return sleft == sright @@ -512,8 +525,8 @@ def _difference_lenient_attributes(left, right): # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is # completely hashable, as required by a set. - sleft = {(k, _hexdigest(v)) for k, v in left.items()} - sright = {(k, _hexdigest(v)) for k, v in right.items()} + sleft = {(k, hexdigest(v)) for k, v in left.items()} + sright = {(k, hexdigest(v)) for k, v in right.items()} # Items in sleft different from sright. dsleft = dict(sleft - sright) # Items in sright different from sleft. @@ -540,8 +553,8 @@ def _difference_strict_attributes(left, right): # Use xxhash to perform an extremely fast non-cryptographic hash of # each dictionary key rvalue, thus ensuring that the dictionary is # completely hashable, as required by a set. - sleft = {(k, _hexdigest(v)) for k, v in left.items()} - sright = {(k, _hexdigest(v)) for k, v in right.items()} + sleft = {(k, hexdigest(v)) for k, v in left.items()} + sright = {(k, hexdigest(v)) for k, v in right.items()} # Items in sleft different from sright. dsleft = dict(sleft - sright) # Items in sright different from sleft. diff --git a/lib/iris/tests/unit/common/metadata/test__hexdigest.py b/lib/iris/tests/unit/common/metadata/test_hexdigest.py similarity index 97% rename from lib/iris/tests/unit/common/metadata/test__hexdigest.py rename to lib/iris/tests/unit/common/metadata/test_hexdigest.py index 546327a21b..55c697ea6d 100644 --- a/lib/iris/tests/unit/common/metadata/test__hexdigest.py +++ b/lib/iris/tests/unit/common/metadata/test_hexdigest.py @@ -4,7 +4,7 @@ # See COPYING and COPYING.LESSER in the root of the repository for full # licensing details. """ -Unit tests for the :func:`iris.common.metadata._hexdigest`. +Unit tests for the :func:`iris.common.metadata.hexdigest`. """ @@ -18,7 +18,7 @@ import numpy as np from xxhash import xxh64, xxh64_hexdigest -from iris.common.metadata import _hexdigest as hexdigest +from iris.common.metadata import hexdigest class TestBytesLikeObject(tests.IrisTest):