Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ This document explains the changes made to Iris for this release
``iris.plot.plot(z_cube)`` will produce a z-vs-phenomenon plot, where before
it would have produced a phenomenon-vs-z plot. (:pull:`3906`)

#. `@bjlittle`_ introduced :func:`iris.common.metadata.hexdigest` to the
public API. Previously it was a private function introduced in ``v3.0.0``.
Given any object, :func:`~iris.common.metadata.hexdigest` returns a string
representation of the 64-bit non-cryptographic hash of the object using the
extremely fast `xxhash`_ hashing algorithm. (:pull:`4020`)


🐛 Bugs Fixed
=============
Expand Down Expand Up @@ -150,3 +156,4 @@ This document explains the changes made to Iris for this release
.. _PyPI: https://pypi.org/project/scitools-iris/
.. _Python 3.8: https://www.python.org/downloads/release/python-380/
.. _README.md: https://github.com/SciTools/iris#-----
.. _xxhash: http://cyan4973.github.io/xxHash/
4 changes: 2 additions & 2 deletions lib/iris/_representation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import re

import iris.util
from iris.common.metadata import _hexdigest as quickhash
from iris.common.metadata import hexdigest


class DimensionHeader:
Expand Down Expand Up @@ -101,7 +101,7 @@ def _summary_coord_extra(self, cube, coord):
# ..except setdefault fails if values are numpy arrays.
if key not in attributes:
attributes[key] = value
elif quickhash(attributes[key]) != quickhash(value):
elif hexdigest(attributes[key]) != hexdigest(value):
# NOTE: fast and array-safe comparison, as used in
# :mod:`iris.common.metadata`.
vary.add(key)
Expand Down
67 changes: 40 additions & 27 deletions lib/iris/common/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"CoordMetadata",
"CubeMetadata",
"DimCoordMetadata",
"hexdigest",
"metadata_manager_factory",
]

Expand All @@ -48,34 +49,46 @@
logger = get_logger(__name__, fmt="[%(cls)s.%(funcName)s]")


def _hexdigest(value):
def hexdigest(item):
"""
Return a hexidecimal string hash representation of the provided value.
Calculate a hexidecimal string hash representation of the provided item.

Calculates a 64-bit non-cryptographic hash of the provided value,
and returns the hexdigest string representation of the calculated hash.
Calculates a 64-bit non-cryptographic hash of the provided item, using
the extremely fast ``xxhash`` hashing algorithm, and returns the hexdigest
string representation of the hash.

This provides a means to compare large and/or complex objects through
simple string hexdigest comparison.

Args:

* item (object):
The item that requires to have its hexdigest calculated.

Returns:
The string hexidecimal representation of the item's 64-bit hash.

"""
# Special case: deal with numpy arrays.
if ma.isMaskedArray(value):
if ma.isMaskedArray(item):
parts = (
value.shape,
xxh64_hexdigest(value.data),
xxh64_hexdigest(value.mask),
item.shape,
xxh64_hexdigest(item.data),
xxh64_hexdigest(item.mask),
)
value = str(parts)
elif isinstance(value, np.ndarray):
parts = (value.shape, xxh64_hexdigest(value))
value = str(parts)
item = str(parts)
elif isinstance(item, np.ndarray):
parts = (item.shape, xxh64_hexdigest(item))
item = str(parts)

try:
# Calculate single-shot hash to avoid allocating state on the heap
result = xxh64_hexdigest(value)
result = xxh64_hexdigest(item)
except TypeError:
# xxhash expects a bytes-like object, so try hashing the
# string representation of the provided value instead, but
# string representation of the provided item instead, but
# also fold in the object type...
parts = (type(value), value)
parts = (type(item), item)
result = xxh64_hexdigest(str(parts))

return result
Expand Down Expand Up @@ -338,8 +351,8 @@ def _combine_lenient_attributes(left, right):
# Use xxhash to perform an extremely fast non-cryptographic hash of
# each dictionary key rvalue, thus ensuring that the dictionary is
# completely hashable, as required by a set.
sleft = {(k, _hexdigest(v)) for k, v in left.items()}
sright = {(k, _hexdigest(v)) for k, v in right.items()}
sleft = {(k, hexdigest(v)) for k, v in left.items()}
sright = {(k, hexdigest(v)) for k, v in right.items()}
# Intersection of common items.
common = sleft & sright
# Items in sleft different from sright.
Expand Down Expand Up @@ -367,8 +380,8 @@ def _combine_strict_attributes(left, right):
# Use xxhash to perform an extremely fast non-cryptographic hash of
# each dictionary key rvalue, thus ensuring that the dictionary is
# completely hashable, as required by a set.
sleft = {(k, _hexdigest(v)) for k, v in left.items()}
sright = {(k, _hexdigest(v)) for k, v in right.items()}
sleft = {(k, hexdigest(v)) for k, v in left.items()}
sright = {(k, hexdigest(v)) for k, v in right.items()}
# Intersection of common items.
common = sleft & sright
# Now bring the result together.
Expand Down Expand Up @@ -426,8 +439,8 @@ def _compare_lenient_attributes(left, right):
# Use xxhash to perform an extremely fast non-cryptographic hash of
# each dictionary key rvalue, thus ensuring that the dictionary is
# completely hashable, as required by a set.
sleft = {(k, _hexdigest(v)) for k, v in left.items()}
sright = {(k, _hexdigest(v)) for k, v in right.items()}
sleft = {(k, hexdigest(v)) for k, v in left.items()}
sright = {(k, hexdigest(v)) for k, v in right.items()}
# Items in sleft different from sright.
dsleft = dict(sleft - sright)
# Items in sright different from sleft.
Expand All @@ -443,8 +456,8 @@ def _compare_strict_attributes(left, right):
# Use xxhash to perform an extremely fast non-cryptographic hash of
# each dictionary key rvalue, thus ensuring that the dictionary is
# completely hashable, as required by a set.
sleft = {(k, _hexdigest(v)) for k, v in left.items()}
sright = {(k, _hexdigest(v)) for k, v in right.items()}
sleft = {(k, hexdigest(v)) for k, v in left.items()}
sright = {(k, hexdigest(v)) for k, v in right.items()}

return sleft == sright

Expand Down Expand Up @@ -512,8 +525,8 @@ def _difference_lenient_attributes(left, right):
# Use xxhash to perform an extremely fast non-cryptographic hash of
# each dictionary key rvalue, thus ensuring that the dictionary is
# completely hashable, as required by a set.
sleft = {(k, _hexdigest(v)) for k, v in left.items()}
sright = {(k, _hexdigest(v)) for k, v in right.items()}
sleft = {(k, hexdigest(v)) for k, v in left.items()}
sright = {(k, hexdigest(v)) for k, v in right.items()}
# Items in sleft different from sright.
dsleft = dict(sleft - sright)
# Items in sright different from sleft.
Expand All @@ -540,8 +553,8 @@ def _difference_strict_attributes(left, right):
# Use xxhash to perform an extremely fast non-cryptographic hash of
# each dictionary key rvalue, thus ensuring that the dictionary is
# completely hashable, as required by a set.
sleft = {(k, _hexdigest(v)) for k, v in left.items()}
sright = {(k, _hexdigest(v)) for k, v in right.items()}
sleft = {(k, hexdigest(v)) for k, v in left.items()}
sright = {(k, hexdigest(v)) for k, v in right.items()}
# Items in sleft different from sright.
dsleft = dict(sleft - sright)
# Items in sright different from sleft.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# See COPYING and COPYING.LESSER in the root of the repository for full
# licensing details.
"""
Unit tests for the :func:`iris.common.metadata._hexdigest`.
Unit tests for the :func:`iris.common.metadata.hexdigest`.

"""

Expand All @@ -18,7 +18,7 @@
import numpy as np
from xxhash import xxh64, xxh64_hexdigest

from iris.common.metadata import _hexdigest as hexdigest
from iris.common.metadata import hexdigest


class TestBytesLikeObject(tests.IrisTest):
Expand Down