|
| 1 | +""" |
| 2 | +Wavelet-based approximate Earthmover's distance (EMD) for n-dimensional signals. |
| 3 | +
|
| 4 | +This code is based on the following paper: |
| 5 | + Sameer Shirdhonkar and David W. Jacobs. |
| 6 | + "Approximate earth mover’s distance in linear time." |
| 7 | + 2008 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). |
| 8 | +
|
| 9 | +More details are available in their technical report: |
| 10 | + CAR-TR-1025 CS-TR-4908 UMIACS-TR-2008-06. |
| 11 | +""" |
| 12 | + |
| 13 | +import warnings |
| 14 | + |
| 15 | +import numpy as np |
| 16 | +import pywt |
| 17 | + |
| 18 | + |
| 19 | +def wemd_embed(arr, wavelet="coif3", level=None): |
| 20 | + """ |
| 21 | + This function computes an embedding of Numpy arrays such that |
| 22 | + for non-negative arrays that sum to one, the L1 distance between the |
| 23 | + resulting embeddings is strongly equivalent to the Earthmover distance |
| 24 | + of the arrays. |
| 25 | +
|
| 26 | + :param arr: Numpy array |
| 27 | + :param level: Decomposition level of the wavelets. |
| 28 | + Larger levels yield more coefficients and more accurate results. |
| 29 | + If no level is given, we take the the log2 of the side-length of the domain. |
| 30 | + :param wavelet: Either the name of a wavelet supported by PyWavelets |
| 31 | + (e.g. 'coif3', 'sym3', 'sym5', etc.) or a pywt.Wavelet object |
| 32 | + See https://pywavelets.readthedocs.io/en/latest/ref/wavelets.html#built-in-wavelets-wavelist |
| 33 | + The default is 'coif3', because it seems to work well empirically. |
| 34 | + :returns: One-dimensional numpy array containing weighted details coefficients. |
| 35 | + """ |
| 36 | + dimension = arr.ndim |
| 37 | + |
| 38 | + if level is None: |
| 39 | + level = int(np.ceil(np.log2(max(arr.shape)))) + 1 |
| 40 | + |
| 41 | + # Using wavedecn with the default level creates this boundary effects warning. |
| 42 | + # However, this doesn't seem to be a cause for concern. |
| 43 | + with warnings.catch_warnings(): |
| 44 | + warnings.filterwarnings( |
| 45 | + "ignore", |
| 46 | + message="Level value of .* is too high:" |
| 47 | + " all coefficients will experience boundary effects.", |
| 48 | + ) |
| 49 | + arrdwt = pywt.wavedecn(arr, wavelet, mode="zero", level=level) |
| 50 | + |
| 51 | + detail_coefs = arrdwt[1:] |
| 52 | + assert len(detail_coefs) == level |
| 53 | + |
| 54 | + weighted_coefs = [] |
| 55 | + for (j, details_level_j) in enumerate(detail_coefs): |
| 56 | + multiplier = 2 ** ((level - 1 - j) * (1 + (dimension / 2.0))) |
| 57 | + for coefs in details_level_j.values(): |
| 58 | + weighted_coefs.append(multiplier * coefs.flatten()) |
| 59 | + |
| 60 | + return np.concatenate(weighted_coefs) |
| 61 | + |
| 62 | + |
| 63 | +def wemd_norm(arr, wavelet="coif3", level=None): |
| 64 | + """ |
| 65 | + Wavelet-based norm used to approximate the Earthmover's distance between |
| 66 | + mass distributions specified as Numpy arrays (typically images or volumes). |
| 67 | +
|
| 68 | + :param arr: Numpy array of the difference between the two mass distributions. |
| 69 | + :param level: Decomposition level of the wavelets. |
| 70 | + Larger levels yield more coefficients and more accurate results. |
| 71 | + If no level is given, we take the the log2 of the side-length of the domain. |
| 72 | + Larger levels yield more coefficients and more accurate results |
| 73 | + :param wavelet: Either the name of a wavelet supported by PyWavelets |
| 74 | + (e.g. 'coif3', 'sym3', 'sym5', etc.) or a pywt.Wavelet object |
| 75 | + See https://pywavelets.readthedocs.io/en/latest/ref/wavelets.html#built-in-wavelets-wavelist |
| 76 | + The default is 'coif3', because it seems to work well empirically. |
| 77 | + :return: Approximated Earthmover's Distance |
| 78 | + """ |
| 79 | + |
| 80 | + coefs = wemd_embed(arr, wavelet, level) |
| 81 | + return np.linalg.norm(coefs, ord=1) |
0 commit comments