From 19eb2b5214991c8ade083685807e8edfbb2955f3 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 20 May 2021 10:55:14 -0400 Subject: [PATCH 01/21] Add PyWavelets dependency --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 018c188885..3f7afe1468 100644 --- a/setup.py +++ b/setup.py @@ -32,6 +32,7 @@ def read(fname): "numpy==1.16", "pandas==0.25.3", "pyfftw", + "PyWavelets", "pillow", "scipy==1.4.0", "scikit-learn", From 64882e4a0edeb9854ae2dde2cdf7eec582bcbe15 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 20 May 2021 10:55:27 -0400 Subject: [PATCH 02/21] initial add of Amit M code --- src/aspire/operators/wemd.py | 43 ++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 src/aspire/operators/wemd.py diff --git a/src/aspire/operators/wemd.py b/src/aspire/operators/wemd.py new file mode 100644 index 0000000000..888e512700 --- /dev/null +++ b/src/aspire/operators/wemd.py @@ -0,0 +1,43 @@ +""" +Wavelet-based approximate Earthmover's distance (EMD) for 2D/3D signals. + +This code is based on the following paper: + Sameer Shirdhonkar and David W. Jacobs. "Approximate earth mover’s distance in linear time." 2008 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). + +More details are available in their technical report: CAR-TR-1025 CS-TR-4908 UMIACS-TR-2008-06. +""" + +import numpy as np +import pywt + + +def embed(arr, wavelet, level): + """ + This function computes an embedding of Numpy arrays such that the L1 distance + between the resulting embeddings is approximately equal to the Earthmover distance of the arrays. + + Input: + arr - numpy array. + level - Decomposition level of the wavelets. Larger levels yield more coefficients and more accurate results. + wavelet - either the name of a wavelet supported by PyWavelets (e.g. 'coif3', 'sym3') or a pywt.Wavelet object. + See https://pywavelets.readthedocs.io/en/latest/ref/wavelets.html#built-in-wavelets-wavelist + Output: + One-dimensional numpy array containing weighted details coefficients. + Approximate Earthmover's distances are then given by the l_1 distances of the results, e.g. + wemd(arr1, arr2) := numpy.linalg.norm(embed(arr1, wavelet, level)-embed(arr2, wavelet, level), ord=1) + """ + + arrdwt = pywt.wavedecn(arr/arr.sum(), wavelet, mode='zero', level=level) + + dimension = len(arr.shape) + assert dimension in [2,3] + + n_levels = len(arrdwt[1:]) + + weighted_coefs = [] + for (j, details_level_j) in enumerate(arrdwt[1:]): + for coefs in details_level_j.values(): + multiplier = 2**((n_levels-1-j)*(1+(dimension/2.0))) + weighted_coefs.append(coefs.flatten()*multiplier) + + return np.concatenate(weighted_coefs) From edb5224926b6f21b241813b41ace740f9fefc373 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 20 May 2021 10:59:01 -0400 Subject: [PATCH 03/21] add wemd function and change docstring format --- src/aspire/operators/wemd.py | 44 +++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/src/aspire/operators/wemd.py b/src/aspire/operators/wemd.py index 888e512700..b6b629a83c 100644 --- a/src/aspire/operators/wemd.py +++ b/src/aspire/operators/wemd.py @@ -13,24 +13,23 @@ def embed(arr, wavelet, level): """ - This function computes an embedding of Numpy arrays such that the L1 distance - between the resulting embeddings is approximately equal to the Earthmover distance of the arrays. - - Input: - arr - numpy array. - level - Decomposition level of the wavelets. Larger levels yield more coefficients and more accurate results. - wavelet - either the name of a wavelet supported by PyWavelets (e.g. 'coif3', 'sym3') or a pywt.Wavelet object. - See https://pywavelets.readthedocs.io/en/latest/ref/wavelets.html#built-in-wavelets-wavelist - Output: - One-dimensional numpy array containing weighted details coefficients. - Approximate Earthmover's distances are then given by the l_1 distances of the results, e.g. - wemd(arr1, arr2) := numpy.linalg.norm(embed(arr1, wavelet, level)-embed(arr2, wavelet, level), ord=1) + This function computes an embedding of Numpy arrays such that + the L1 distance between the resulting embeddings is approximately + equal to the Earthmover distance of the arrays. + + :param arr: Numpy array + :param level: Decomposition level of the wavelets + Larger levels yield more coefficients and more accurate results + :param wavelet: Either the name of a wavelet supported by PyWavelets + (e.g. 'coif3', 'sym3') or a pywt.Wavelet object + See https://pywavelets.readthedocs.io/en/latest/ref/wavelets.html#built-in-wavelets-wavelist + :returns: One-dimensional numpy array containing weighted details coefficients. """ arrdwt = pywt.wavedecn(arr/arr.sum(), wavelet, mode='zero', level=level) dimension = len(arr.shape) - assert dimension in [2,3] + assert dimension in [2,3], f'Dimension {dimension} should be 2 or 3' n_levels = len(arrdwt[1:]) @@ -41,3 +40,22 @@ def embed(arr, wavelet, level): weighted_coefs.append(coefs.flatten()*multiplier) return np.concatenate(weighted_coefs) + +def wemd(arr1, arr2, wavelet, level): + """ + Approximate Earthmover's distance between using `embed`. + + :param arr1: Numpy array + :param arr2: Numpy array + :param level: Decomposition level of the wavelets + Larger levels yield more coefficients and more accurate results + :param wavelet: Either the name of a wavelet supported by PyWavelets + (e.g. 'coif3', 'sym3') or a pywt.Wavelet object + See https://pywavelets.readthedocs.io/en/latest/ref/wavelets.html#built-in-wavelets-wavelist + :return: Approximated Earthmover Distance + """ + + coefs1 = embed(arr1, wavelet, level) + coefs2 = embed(arr2, wavelet, level) + + return np.linalg.norm(coefs1 - coefs2, ord=1) From d94d7029cac866e02f62947bb49bf45d65460690 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 20 May 2021 10:59:46 -0400 Subject: [PATCH 04/21] formatting wemd code --- src/aspire/operators/wemd.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/aspire/operators/wemd.py b/src/aspire/operators/wemd.py index b6b629a83c..ac6c733194 100644 --- a/src/aspire/operators/wemd.py +++ b/src/aspire/operators/wemd.py @@ -26,21 +26,22 @@ def embed(arr, wavelet, level): :returns: One-dimensional numpy array containing weighted details coefficients. """ - arrdwt = pywt.wavedecn(arr/arr.sum(), wavelet, mode='zero', level=level) + arrdwt = pywt.wavedecn(arr / arr.sum(), wavelet, mode="zero", level=level) dimension = len(arr.shape) - assert dimension in [2,3], f'Dimension {dimension} should be 2 or 3' + assert dimension in [2, 3], f"Dimension {dimension} should be 2 or 3" n_levels = len(arrdwt[1:]) weighted_coefs = [] for (j, details_level_j) in enumerate(arrdwt[1:]): for coefs in details_level_j.values(): - multiplier = 2**((n_levels-1-j)*(1+(dimension/2.0))) - weighted_coefs.append(coefs.flatten()*multiplier) + multiplier = 2 ** ((n_levels - 1 - j) * (1 + (dimension / 2.0))) + weighted_coefs.append(coefs.flatten() * multiplier) return np.concatenate(weighted_coefs) + def wemd(arr1, arr2, wavelet, level): """ Approximate Earthmover's distance between using `embed`. From 96a2c74849f755c80298137c857fa0a81c8a75ad Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 20 May 2021 11:02:16 -0400 Subject: [PATCH 05/21] add wemd to operators __init__ and change name embed~>wembed --- src/aspire/operators/__init__.py | 1 + src/aspire/operators/wemd.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/aspire/operators/__init__.py b/src/aspire/operators/__init__.py index 036864245d..31b35dfabd 100644 --- a/src/aspire/operators/__init__.py +++ b/src/aspire/operators/__init__.py @@ -14,3 +14,4 @@ ScaledFilter, ZeroFilter, ) +from .wemd import wembed, wemd diff --git a/src/aspire/operators/wemd.py b/src/aspire/operators/wemd.py index ac6c733194..dc4ecb1bc0 100644 --- a/src/aspire/operators/wemd.py +++ b/src/aspire/operators/wemd.py @@ -11,7 +11,7 @@ import pywt -def embed(arr, wavelet, level): +def wembed(arr, wavelet, level): """ This function computes an embedding of Numpy arrays such that the L1 distance between the resulting embeddings is approximately @@ -56,7 +56,7 @@ def wemd(arr1, arr2, wavelet, level): :return: Approximated Earthmover Distance """ - coefs1 = embed(arr1, wavelet, level) - coefs2 = embed(arr2, wavelet, level) + coefs1 = wembed(arr1, wavelet, level) + coefs2 = wembed(arr2, wavelet, level) return np.linalg.norm(coefs1 - coefs2, ord=1) From 324dd046260c83e768172041131e628f73aa2a78 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 20 May 2021 11:30:29 -0400 Subject: [PATCH 06/21] change assert to ValueError --- src/aspire/operators/wemd.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/aspire/operators/wemd.py b/src/aspire/operators/wemd.py index dc4ecb1bc0..f72d8cfdf8 100644 --- a/src/aspire/operators/wemd.py +++ b/src/aspire/operators/wemd.py @@ -29,7 +29,8 @@ def wembed(arr, wavelet, level): arrdwt = pywt.wavedecn(arr / arr.sum(), wavelet, mode="zero", level=level) dimension = len(arr.shape) - assert dimension in [2, 3], f"Dimension {dimension} should be 2 or 3" + if dimension not in (2, 3): + raise ValueError(f"wembed input dimension {dimension} should be 2 or 3.") n_levels = len(arrdwt[1:]) From ca6d1e867fcb7abe72357a9d4094e1478546d08c Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 20 May 2021 11:30:52 -0400 Subject: [PATCH 07/21] add wemd unit test stub --- tests/test_wemd.py | 47 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 tests/test_wemd.py diff --git a/tests/test_wemd.py b/tests/test_wemd.py new file mode 100644 index 0000000000..9567c4fc70 --- /dev/null +++ b/tests/test_wemd.py @@ -0,0 +1,47 @@ +import logging +from unittest import TestCase + +import numpy as np +from pytest import raises + +from aspire.operators import wembed, wemd + +logger = logging.getLogger(__name__) + + +class WEMDTestCase(TestCase): + def setUp(self): + self.x = np.random.random((100, 100)) + self.y = np.random.random((100, 100)) + + def tearDown(self): + pass + + def testSmokeTest(self): + """ + Remove me later after adding real tests. + """ + + _ = wembed(self.x, "coif3", 5) + + d = wemd(self.x, self.y, "coif3", 5) + logger.info(f"wemd {d}") + + def testDimMistmatch(self): + """ + Intentionally pass something not 2d or 3d. + """ + + # 1d + with raises(ValueError): + _ = wembed(self.x.flatten(), "coif3", 5) + + # 4d + with raises(ValueError): + _ = wembed(self.x.reshape((2, 5, 5, 2)), "coif3", 5) + + def test_wembed(self): + pass + + def test_wemd(self): + pass From 016a0bbb6c8107deae781784ab097164eb24fba0 Mon Sep 17 00:00:00 2001 From: Amit Moscovich Date: Mon, 7 Jun 2021 15:46:17 -0400 Subject: [PATCH 08/21] First version of wavelet-based approximate Earthmover's distance, with unit test. --- src/aspire/operators/__init__.py | 2 +- src/aspire/operators/wemd.py | 28 ++++++--------- tests/test_wemd.py | 61 +++++++++++++++----------------- 3 files changed, 41 insertions(+), 50 deletions(-) diff --git a/src/aspire/operators/__init__.py b/src/aspire/operators/__init__.py index 31b35dfabd..5557fe9df6 100644 --- a/src/aspire/operators/__init__.py +++ b/src/aspire/operators/__init__.py @@ -14,4 +14,4 @@ ScaledFilter, ZeroFilter, ) -from .wemd import wembed, wemd +from .wemd import wemd_embed, wemd_norm diff --git a/src/aspire/operators/wemd.py b/src/aspire/operators/wemd.py index f72d8cfdf8..d6b4c92bc0 100644 --- a/src/aspire/operators/wemd.py +++ b/src/aspire/operators/wemd.py @@ -1,5 +1,5 @@ """ -Wavelet-based approximate Earthmover's distance (EMD) for 2D/3D signals. +Wavelet-based approximate Earthmover's distance (EMD) for n-dimensional signals. This code is based on the following paper: Sameer Shirdhonkar and David W. Jacobs. "Approximate earth mover’s distance in linear time." 2008 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). @@ -11,11 +11,11 @@ import pywt -def wembed(arr, wavelet, level): +def wemd_embed(arr, wavelet, level): """ This function computes an embedding of Numpy arrays such that - the L1 distance between the resulting embeddings is approximately - equal to the Earthmover distance of the arrays. + for non-negative arrays that sum to one, the L1 distance between the resulting embeddings + is strongly equivalent to the Earthmover distance of the arrays. :param arr: Numpy array :param level: Decomposition level of the wavelets @@ -26,11 +26,9 @@ def wembed(arr, wavelet, level): :returns: One-dimensional numpy array containing weighted details coefficients. """ - arrdwt = pywt.wavedecn(arr / arr.sum(), wavelet, mode="zero", level=level) + arrdwt = pywt.wavedecn(arr, wavelet, mode="zero", level=level) dimension = len(arr.shape) - if dimension not in (2, 3): - raise ValueError(f"wembed input dimension {dimension} should be 2 or 3.") n_levels = len(arrdwt[1:]) @@ -43,21 +41,17 @@ def wembed(arr, wavelet, level): return np.concatenate(weighted_coefs) -def wemd(arr1, arr2, wavelet, level): +def wemd_norm(arr, wavelet, level): """ - Approximate Earthmover's distance between using `embed`. + Wavelet-based norm used to approximate the Earthmover's distance between mass distributions specified as Numpy arrays (typically images or volumes). - :param arr1: Numpy array - :param arr2: Numpy array + :param arr: Numpy array of the difference between the two mass distributions. :param level: Decomposition level of the wavelets Larger levels yield more coefficients and more accurate results :param wavelet: Either the name of a wavelet supported by PyWavelets (e.g. 'coif3', 'sym3') or a pywt.Wavelet object See https://pywavelets.readthedocs.io/en/latest/ref/wavelets.html#built-in-wavelets-wavelist - :return: Approximated Earthmover Distance + :return: Approximated Earthmover's Distance """ - - coefs1 = wembed(arr1, wavelet, level) - coefs2 = wembed(arr2, wavelet, level) - - return np.linalg.norm(coefs1 - coefs2, ord=1) + coefs = wemd_embed(arr, wavelet, level) + return np.linalg.norm(coefs, ord=1) diff --git a/tests/test_wemd.py b/tests/test_wemd.py index 9567c4fc70..6d1eb3d0f7 100644 --- a/tests/test_wemd.py +++ b/tests/test_wemd.py @@ -2,46 +2,43 @@ from unittest import TestCase import numpy as np +from numpy import mgrid, sin, cos, all, asarray, ceil, log2 from pytest import raises -from aspire.operators import wembed, wemd +import warnings -logger = logging.getLogger(__name__) - - -class WEMDTestCase(TestCase): - def setUp(self): - self.x = np.random.random((100, 100)) - self.y = np.random.random((100, 100)) +from aspire.operators import wemd_norm - def tearDown(self): - pass - - def testSmokeTest(self): - """ - Remove me later after adding real tests. - """ +logger = logging.getLogger(__name__) - _ = wembed(self.x, "coif3", 5) - d = wemd(self.x, self.y, "coif3", 5) - logger.info(f"wemd {d}") +def _smoothed_disk_image(x, y, radius, width, height): + (Y, X) = mgrid[:height,:width] + ratio = ((X-x)**2 + (Y-y)**2)/(radius**2) + return 2.0 - 2/(1 + np.exp(-ratio)) # Sigmoid funciton - def testDimMistmatch(self): - """ - Intentionally pass something not 2d or 3d. - """ - # 1d - with raises(ValueError): - _ = wembed(self.x.flatten(), "coif3", 5) +def _is_monotone(seq): + arr = asarray(seq) + assert arr.ndim == 1 + return all(arr[1:] >= arr[:-1]) - # 4d - with raises(ValueError): - _ = wembed(self.x.reshape((2, 5, 5, 2)), "coif3", 5) - def test_wembed(self): - pass +class WEMDTestCase(TestCase): + '''Test that the WEMD distance between smoothed disks of various radii, angles and distances is monotone in the Euclidean distance of their centers. + Note that this monotonicity isn't strictly required by the theory, but holds empirically.''' + def test_wemd_norm(self): + WIDTH = 64 + HEIGHT = 64 + CENTER_X = WIDTH/2 + CENTER_Y = HEIGHT/2 + LEVEL = int(ceil(log2(max(WIDTH, HEIGHT)))) + 1 # A rule of thumb for the wavelet level is to take the log2 of the side length. + WAVELET = 'coif3' + warnings.filterwarnings("ignore", message="Level value of .* is too high: all coefficients will experience boundary effects.") + for radius in [1, 2, 3, 4, 5, 6, 7]: # Just some random radii + for angle in [0.0, 0.4755, 0.6538, 1.9818, 3.0991, 4.4689, 4.9859, 5.5752]: # Just some random angles + disks = [_smoothed_disk_image(CENTER_X + int(k*cos(angle)), CENTER_Y + int(k*sin(angle)), radius, WIDTH, HEIGHT) for k in range(0,16,2)] + wemd_distances_along_ray = [wemd_norm(disks[0] - disk, WAVELET, LEVEL) for disk in disks] + print(wemd_distances_along_ray) + self.assertTrue(_is_monotone(wemd_distances_along_ray)) - def test_wemd(self): - pass From 723fb8ee4e5e764591a85bfed64c21279048dea7 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 7 Jun 2021 15:58:50 -0400 Subject: [PATCH 09/21] apply auto style/linter --- tests/test_wemd.py | 60 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/tests/test_wemd.py b/tests/test_wemd.py index 6d1eb3d0f7..c4fdb5c40e 100644 --- a/tests/test_wemd.py +++ b/tests/test_wemd.py @@ -1,21 +1,20 @@ import logging +import warnings from unittest import TestCase import numpy as np -from numpy import mgrid, sin, cos, all, asarray, ceil, log2 +from numpy import all, asarray, ceil, cos, log2, mgrid, sin from pytest import raises -import warnings - from aspire.operators import wemd_norm logger = logging.getLogger(__name__) def _smoothed_disk_image(x, y, radius, width, height): - (Y, X) = mgrid[:height,:width] - ratio = ((X-x)**2 + (Y-y)**2)/(radius**2) - return 2.0 - 2/(1 + np.exp(-ratio)) # Sigmoid funciton + (Y, X) = mgrid[:height, :width] + ratio = ((X - x) ** 2 + (Y - y) ** 2) / (radius ** 2) + return 2.0 - 2 / (1 + np.exp(-ratio)) # Sigmoid funciton def _is_monotone(seq): @@ -25,20 +24,45 @@ def _is_monotone(seq): class WEMDTestCase(TestCase): - '''Test that the WEMD distance between smoothed disks of various radii, angles and distances is monotone in the Euclidean distance of their centers. - Note that this monotonicity isn't strictly required by the theory, but holds empirically.''' + """Test that the WEMD distance between smoothed disks of various radii, angles and distances is monotone in the Euclidean distance of their centers. + Note that this monotonicity isn't strictly required by the theory, but holds empirically.""" + def test_wemd_norm(self): WIDTH = 64 HEIGHT = 64 - CENTER_X = WIDTH/2 - CENTER_Y = HEIGHT/2 - LEVEL = int(ceil(log2(max(WIDTH, HEIGHT)))) + 1 # A rule of thumb for the wavelet level is to take the log2 of the side length. - WAVELET = 'coif3' - warnings.filterwarnings("ignore", message="Level value of .* is too high: all coefficients will experience boundary effects.") - for radius in [1, 2, 3, 4, 5, 6, 7]: # Just some random radii - for angle in [0.0, 0.4755, 0.6538, 1.9818, 3.0991, 4.4689, 4.9859, 5.5752]: # Just some random angles - disks = [_smoothed_disk_image(CENTER_X + int(k*cos(angle)), CENTER_Y + int(k*sin(angle)), radius, WIDTH, HEIGHT) for k in range(0,16,2)] - wemd_distances_along_ray = [wemd_norm(disks[0] - disk, WAVELET, LEVEL) for disk in disks] + CENTER_X = WIDTH / 2 + CENTER_Y = HEIGHT / 2 + LEVEL = ( + int(ceil(log2(max(WIDTH, HEIGHT)))) + 1 + ) # A rule of thumb for the wavelet level is to take the log2 of the side length. + WAVELET = "coif3" + warnings.filterwarnings( + "ignore", + message="Level value of .* is too high: all coefficients will experience boundary effects.", + ) + for radius in [1, 2, 3, 4, 5, 6, 7]: # Just some random radii + for angle in [ + 0.0, + 0.4755, + 0.6538, + 1.9818, + 3.0991, + 4.4689, + 4.9859, + 5.5752, + ]: # Just some random angles + disks = [ + _smoothed_disk_image( + CENTER_X + int(k * cos(angle)), + CENTER_Y + int(k * sin(angle)), + radius, + WIDTH, + HEIGHT, + ) + for k in range(0, 16, 2) + ] + wemd_distances_along_ray = [ + wemd_norm(disks[0] - disk, WAVELET, LEVEL) for disk in disks + ] print(wemd_distances_along_ray) self.assertTrue(_is_monotone(wemd_distances_along_ray)) - From e3fe6559653daced1dfa20353b4e9c45755ca6c6 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 7 Jun 2021 16:04:00 -0400 Subject: [PATCH 10/21] minor tweaks, unused import, int div, conver print to logger --- tests/test_wemd.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/test_wemd.py b/tests/test_wemd.py index c4fdb5c40e..57fe55222e 100644 --- a/tests/test_wemd.py +++ b/tests/test_wemd.py @@ -4,7 +4,6 @@ import numpy as np from numpy import all, asarray, ceil, cos, log2, mgrid, sin -from pytest import raises from aspire.operators import wemd_norm @@ -30,8 +29,8 @@ class WEMDTestCase(TestCase): def test_wemd_norm(self): WIDTH = 64 HEIGHT = 64 - CENTER_X = WIDTH / 2 - CENTER_Y = HEIGHT / 2 + CENTER_X = WIDTH // 2 + CENTER_Y = HEIGHT // 2 LEVEL = ( int(ceil(log2(max(WIDTH, HEIGHT)))) + 1 ) # A rule of thumb for the wavelet level is to take the log2 of the side length. @@ -64,5 +63,5 @@ def test_wemd_norm(self): wemd_distances_along_ray = [ wemd_norm(disks[0] - disk, WAVELET, LEVEL) for disk in disks ] - print(wemd_distances_along_ray) + logger.info(f"wemd distances along ray: {wemd_distances_along_ray}") self.assertTrue(_is_monotone(wemd_distances_along_ray)) From 05bcc10c143c0093d2b01fbff1cb84196e85db45 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 7 Jun 2021 16:14:33 -0400 Subject: [PATCH 11/21] line length tweaks --- src/aspire/operators/wemd.py | 16 +++++++++++----- tests/test_wemd.py | 17 +++++++++++------ 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/aspire/operators/wemd.py b/src/aspire/operators/wemd.py index d6b4c92bc0..8c139f6236 100644 --- a/src/aspire/operators/wemd.py +++ b/src/aspire/operators/wemd.py @@ -2,9 +2,12 @@ Wavelet-based approximate Earthmover's distance (EMD) for n-dimensional signals. This code is based on the following paper: - Sameer Shirdhonkar and David W. Jacobs. "Approximate earth mover’s distance in linear time." 2008 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). + Sameer Shirdhonkar and David W. Jacobs. + "Approximate earth mover’s distance in linear time." + 2008 IEEE Conference on Computer Vision and Pattern Recognition (CVPR). -More details are available in their technical report: CAR-TR-1025 CS-TR-4908 UMIACS-TR-2008-06. +More details are available in their technical report: + CAR-TR-1025 CS-TR-4908 UMIACS-TR-2008-06. """ import numpy as np @@ -14,8 +17,9 @@ def wemd_embed(arr, wavelet, level): """ This function computes an embedding of Numpy arrays such that - for non-negative arrays that sum to one, the L1 distance between the resulting embeddings - is strongly equivalent to the Earthmover distance of the arrays. + for non-negative arrays that sum to one, the L1 distance between the + resulting embeddings is strongly equivalent to the Earthmover distance + of the arrays. :param arr: Numpy array :param level: Decomposition level of the wavelets @@ -43,7 +47,8 @@ def wemd_embed(arr, wavelet, level): def wemd_norm(arr, wavelet, level): """ - Wavelet-based norm used to approximate the Earthmover's distance between mass distributions specified as Numpy arrays (typically images or volumes). + Wavelet-based norm used to approximate the Earthmover's distance between + mass distributions specified as Numpy arrays (typically images or volumes). :param arr: Numpy array of the difference between the two mass distributions. :param level: Decomposition level of the wavelets @@ -53,5 +58,6 @@ def wemd_norm(arr, wavelet, level): See https://pywavelets.readthedocs.io/en/latest/ref/wavelets.html#built-in-wavelets-wavelist :return: Approximated Earthmover's Distance """ + coefs = wemd_embed(arr, wavelet, level) return np.linalg.norm(coefs, ord=1) diff --git a/tests/test_wemd.py b/tests/test_wemd.py index 57fe55222e..50a2beadf9 100644 --- a/tests/test_wemd.py +++ b/tests/test_wemd.py @@ -23,21 +23,26 @@ def _is_monotone(seq): class WEMDTestCase(TestCase): - """Test that the WEMD distance between smoothed disks of various radii, angles and distances is monotone in the Euclidean distance of their centers. - Note that this monotonicity isn't strictly required by the theory, but holds empirically.""" + """ + Test that the WEMD distance between smoothed disks of various radii, + angles and distances is monotone in the Euclidean distance of their centers. + Note that this monotonicity isn't strictly required by the theory, + but holds empirically. + """ def test_wemd_norm(self): WIDTH = 64 HEIGHT = 64 CENTER_X = WIDTH // 2 CENTER_Y = HEIGHT // 2 - LEVEL = ( - int(ceil(log2(max(WIDTH, HEIGHT)))) + 1 - ) # A rule of thumb for the wavelet level is to take the log2 of the side length. + # A rule of thumb for the wavelet level is to take + # the log2 of the side length. + LEVEL = int(ceil(log2(max(WIDTH, HEIGHT)))) + 1 WAVELET = "coif3" warnings.filterwarnings( "ignore", - message="Level value of .* is too high: all coefficients will experience boundary effects.", + message="Level value of .* is too high:" + " all coefficients will experience boundary effects.", ) for radius in [1, 2, 3, 4, 5, 6, 7]: # Just some random radii for angle in [ From e279c3dcf3e2372a9e85f9c56bbdb505b671faef Mon Sep 17 00:00:00 2001 From: Amit Moscovich Date: Mon, 7 Jun 2021 17:54:02 -0400 Subject: [PATCH 12/21] Reponse to janden's review. --- src/aspire/operators/wemd.py | 13 ++++++------- tests/test_wemd.py | 28 ++++++++++++---------------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/src/aspire/operators/wemd.py b/src/aspire/operators/wemd.py index 8c139f6236..5f101659fe 100644 --- a/src/aspire/operators/wemd.py +++ b/src/aspire/operators/wemd.py @@ -25,21 +25,20 @@ def wemd_embed(arr, wavelet, level): :param level: Decomposition level of the wavelets Larger levels yield more coefficients and more accurate results :param wavelet: Either the name of a wavelet supported by PyWavelets - (e.g. 'coif3', 'sym3') or a pywt.Wavelet object + (e.g. 'coif3', 'sym3', 'sym5', etc.) or a pywt.Wavelet object See https://pywavelets.readthedocs.io/en/latest/ref/wavelets.html#built-in-wavelets-wavelist :returns: One-dimensional numpy array containing weighted details coefficients. """ + dimension = arr.ndim arrdwt = pywt.wavedecn(arr, wavelet, mode="zero", level=level) - - dimension = len(arr.shape) - - n_levels = len(arrdwt[1:]) + detail_coefs = arrdwt[1:] + assert len(detail_coefs) == level weighted_coefs = [] - for (j, details_level_j) in enumerate(arrdwt[1:]): + for (j, details_level_j) in enumerate(detail_coefs): for coefs in details_level_j.values(): - multiplier = 2 ** ((n_levels - 1 - j) * (1 + (dimension / 2.0))) + multiplier = 2 ** ((level-1-j) * (1+(dimension/2.0))) weighted_coefs.append(coefs.flatten() * multiplier) return np.concatenate(weighted_coefs) diff --git a/tests/test_wemd.py b/tests/test_wemd.py index 50a2beadf9..827d2bb80f 100644 --- a/tests/test_wemd.py +++ b/tests/test_wemd.py @@ -12,8 +12,8 @@ def _smoothed_disk_image(x, y, radius, width, height): (Y, X) = mgrid[:height, :width] - ratio = ((X - x) ** 2 + (Y - y) ** 2) / (radius ** 2) - return 2.0 - 2 / (1 + np.exp(-ratio)) # Sigmoid funciton + ratio = ((X-x)**2 + (Y-y)**2) / (radius**2) + return 2.0 - 2 / (1 + np.exp(-ratio)) # Scaled sigmoid funciton def _is_monotone(seq): @@ -29,14 +29,13 @@ class WEMDTestCase(TestCase): Note that this monotonicity isn't strictly required by the theory, but holds empirically. """ - def test_wemd_norm(self): WIDTH = 64 HEIGHT = 64 CENTER_X = WIDTH // 2 CENTER_Y = HEIGHT // 2 - # A rule of thumb for the wavelet level is to take - # the log2 of the side length. + + # A rule of thumb for the wavelet level is to take the log2 of the side length. LEVEL = int(ceil(log2(max(WIDTH, HEIGHT)))) + 1 WAVELET = "coif3" warnings.filterwarnings( @@ -44,17 +43,13 @@ def test_wemd_norm(self): message="Level value of .* is too high:" " all coefficients will experience boundary effects.", ) - for radius in [1, 2, 3, 4, 5, 6, 7]: # Just some random radii - for angle in [ - 0.0, - 0.4755, - 0.6538, - 1.9818, - 3.0991, - 4.4689, - 4.9859, - 5.5752, - ]: # Just some random angles + + # A few disk radii and ray angles to test + RADII = [1, 2, 3, 4, 5, 6, 7] + ANGLES = [0.0, 0.4755, 0.6538, 1.9818, 3.0991, 4.4689, 4.9859, 5.5752] + + for radius in RADII: + for angle in ANGLES: disks = [ _smoothed_disk_image( CENTER_X + int(k * cos(angle)), @@ -70,3 +65,4 @@ def test_wemd_norm(self): ] logger.info(f"wemd distances along ray: {wemd_distances_along_ray}") self.assertTrue(_is_monotone(wemd_distances_along_ray)) + From 0c02981e32dbe51d7ec4e4bbb7687bb44704baf8 Mon Sep 17 00:00:00 2001 From: Amit Moscovich Date: Mon, 7 Jun 2021 18:10:10 -0400 Subject: [PATCH 13/21] Minor comment --- src/aspire/operators/wemd.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/aspire/operators/wemd.py b/src/aspire/operators/wemd.py index 5f101659fe..59d6a22811 100644 --- a/src/aspire/operators/wemd.py +++ b/src/aspire/operators/wemd.py @@ -22,8 +22,9 @@ def wemd_embed(arr, wavelet, level): of the arrays. :param arr: Numpy array - :param level: Decomposition level of the wavelets - Larger levels yield more coefficients and more accurate results + :param level: Decomposition level of the wavelets. + Larger levels yield more coefficients and more accurate results. + As a rule of thumb, you can take level to be the log2 of the side-length of the domain. :param wavelet: Either the name of a wavelet supported by PyWavelets (e.g. 'coif3', 'sym3', 'sym5', etc.) or a pywt.Wavelet object See https://pywavelets.readthedocs.io/en/latest/ref/wavelets.html#built-in-wavelets-wavelist From aea4d7bf5243223cf479e23a78b65a1ed4ff5745 Mon Sep 17 00:00:00 2001 From: Amit Moscovich Date: Mon, 7 Jun 2021 19:29:55 -0400 Subject: [PATCH 14/21] Added default values for the wavelet and level parameters of WEMD. --- src/aspire/operators/wemd.py | 17 ++++++++++++----- tests/test_wemd.py | 5 +---- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/aspire/operators/wemd.py b/src/aspire/operators/wemd.py index 59d6a22811..0ec4ab261b 100644 --- a/src/aspire/operators/wemd.py +++ b/src/aspire/operators/wemd.py @@ -14,7 +14,7 @@ import pywt -def wemd_embed(arr, wavelet, level): +def wemd_embed(arr, wavelet='coif3', level=None): """ This function computes an embedding of Numpy arrays such that for non-negative arrays that sum to one, the L1 distance between the @@ -24,14 +24,18 @@ def wemd_embed(arr, wavelet, level): :param arr: Numpy array :param level: Decomposition level of the wavelets. Larger levels yield more coefficients and more accurate results. - As a rule of thumb, you can take level to be the log2 of the side-length of the domain. + If no level is given, we take the the log2 of the side-length of the domain. :param wavelet: Either the name of a wavelet supported by PyWavelets (e.g. 'coif3', 'sym3', 'sym5', etc.) or a pywt.Wavelet object See https://pywavelets.readthedocs.io/en/latest/ref/wavelets.html#built-in-wavelets-wavelist + The default is 'coif3', because it seems to work well empirically. :returns: One-dimensional numpy array containing weighted details coefficients. """ dimension = arr.ndim + if level is None: + level = int(np.ceil(np.log2(max(arr.shape))))+1 + arrdwt = pywt.wavedecn(arr, wavelet, mode="zero", level=level) detail_coefs = arrdwt[1:] assert len(detail_coefs) == level @@ -45,17 +49,20 @@ def wemd_embed(arr, wavelet, level): return np.concatenate(weighted_coefs) -def wemd_norm(arr, wavelet, level): +def wemd_norm(arr, wavelet='coif3', level=None): """ Wavelet-based norm used to approximate the Earthmover's distance between mass distributions specified as Numpy arrays (typically images or volumes). :param arr: Numpy array of the difference between the two mass distributions. - :param level: Decomposition level of the wavelets + :param level: Decomposition level of the wavelets. + Larger levels yield more coefficients and more accurate results. + If no level is given, we take the the log2 of the side-length of the domain. Larger levels yield more coefficients and more accurate results :param wavelet: Either the name of a wavelet supported by PyWavelets - (e.g. 'coif3', 'sym3') or a pywt.Wavelet object + (e.g. 'coif3', 'sym3', 'sym5', etc.) or a pywt.Wavelet object See https://pywavelets.readthedocs.io/en/latest/ref/wavelets.html#built-in-wavelets-wavelist + The default is 'coif3', because it seems to work well empirically. :return: Approximated Earthmover's Distance """ diff --git a/tests/test_wemd.py b/tests/test_wemd.py index 827d2bb80f..75ea905138 100644 --- a/tests/test_wemd.py +++ b/tests/test_wemd.py @@ -35,9 +35,6 @@ def test_wemd_norm(self): CENTER_X = WIDTH // 2 CENTER_Y = HEIGHT // 2 - # A rule of thumb for the wavelet level is to take the log2 of the side length. - LEVEL = int(ceil(log2(max(WIDTH, HEIGHT)))) + 1 - WAVELET = "coif3" warnings.filterwarnings( "ignore", message="Level value of .* is too high:" @@ -61,7 +58,7 @@ def test_wemd_norm(self): for k in range(0, 16, 2) ] wemd_distances_along_ray = [ - wemd_norm(disks[0] - disk, WAVELET, LEVEL) for disk in disks + wemd_norm(disks[0] - disk) for disk in disks ] logger.info(f"wemd distances along ray: {wemd_distances_along_ray}") self.assertTrue(_is_monotone(wemd_distances_along_ray)) From 29982cf65878e5302f50709f11c7ac20276dc307 Mon Sep 17 00:00:00 2001 From: Amit Moscovich Date: Mon, 7 Jun 2021 19:32:19 -0400 Subject: [PATCH 15/21] Minor performance improvement of wemd_embed --- src/aspire/operators/wemd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aspire/operators/wemd.py b/src/aspire/operators/wemd.py index 0ec4ab261b..42cacd049f 100644 --- a/src/aspire/operators/wemd.py +++ b/src/aspire/operators/wemd.py @@ -42,9 +42,9 @@ def wemd_embed(arr, wavelet='coif3', level=None): weighted_coefs = [] for (j, details_level_j) in enumerate(detail_coefs): + multiplier = 2 ** ((level-1-j) * (1+(dimension/2.0))) for coefs in details_level_j.values(): - multiplier = 2 ** ((level-1-j) * (1+(dimension/2.0))) - weighted_coefs.append(coefs.flatten() * multiplier) + weighted_coefs.append(multiplier * coefs.flatten()) return np.concatenate(weighted_coefs) From 5ef2679b0a0074e148a09af10335a9187c5c399b Mon Sep 17 00:00:00 2001 From: Amit Moscovich Date: Mon, 7 Jun 2021 19:36:13 -0400 Subject: [PATCH 16/21] wemd_embed: suppress boundary effect warning when calling pywt.wavedecn() with default level. --- src/aspire/operators/wemd.py | 9 +++++++++ tests/test_wemd.py | 6 ------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/aspire/operators/wemd.py b/src/aspire/operators/wemd.py index 42cacd049f..b410000bc7 100644 --- a/src/aspire/operators/wemd.py +++ b/src/aspire/operators/wemd.py @@ -10,6 +10,7 @@ CAR-TR-1025 CS-TR-4908 UMIACS-TR-2008-06. """ +import warnings import numpy as np import pywt @@ -36,7 +37,15 @@ def wemd_embed(arr, wavelet='coif3', level=None): if level is None: level = int(np.ceil(np.log2(max(arr.shape))))+1 + # Using wavedecn with the default level creates this boundary effects warning. + # However, this doesn't seem to be a cause for concern. + warnings.filterwarnings( + "ignore", + message="Level value of .* is too high:" + " all coefficients will experience boundary effects.", + ) arrdwt = pywt.wavedecn(arr, wavelet, mode="zero", level=level) + detail_coefs = arrdwt[1:] assert len(detail_coefs) == level diff --git a/tests/test_wemd.py b/tests/test_wemd.py index 75ea905138..637a73ed99 100644 --- a/tests/test_wemd.py +++ b/tests/test_wemd.py @@ -35,12 +35,6 @@ def test_wemd_norm(self): CENTER_X = WIDTH // 2 CENTER_Y = HEIGHT // 2 - warnings.filterwarnings( - "ignore", - message="Level value of .* is too high:" - " all coefficients will experience boundary effects.", - ) - # A few disk radii and ray angles to test RADII = [1, 2, 3, 4, 5, 6, 7] ANGLES = [0.0, 0.4755, 0.6538, 1.9818, 3.0991, 4.4689, 4.9859, 5.5752] From 6cec52afb440bbf2044124040415b9b1d6198164 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 8 Jun 2021 07:08:52 -0400 Subject: [PATCH 17/21] isort/black formatting --- src/aspire/operators/wemd.py | 9 +++++---- tests/test_wemd.py | 6 +++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/aspire/operators/wemd.py b/src/aspire/operators/wemd.py index b410000bc7..52cbd7546f 100644 --- a/src/aspire/operators/wemd.py +++ b/src/aspire/operators/wemd.py @@ -11,11 +11,12 @@ """ import warnings + import numpy as np import pywt -def wemd_embed(arr, wavelet='coif3', level=None): +def wemd_embed(arr, wavelet="coif3", level=None): """ This function computes an embedding of Numpy arrays such that for non-negative arrays that sum to one, the L1 distance between the @@ -35,7 +36,7 @@ def wemd_embed(arr, wavelet='coif3', level=None): dimension = arr.ndim if level is None: - level = int(np.ceil(np.log2(max(arr.shape))))+1 + level = int(np.ceil(np.log2(max(arr.shape)))) + 1 # Using wavedecn with the default level creates this boundary effects warning. # However, this doesn't seem to be a cause for concern. @@ -51,14 +52,14 @@ def wemd_embed(arr, wavelet='coif3', level=None): weighted_coefs = [] for (j, details_level_j) in enumerate(detail_coefs): - multiplier = 2 ** ((level-1-j) * (1+(dimension/2.0))) + multiplier = 2 ** ((level - 1 - j) * (1 + (dimension / 2.0))) for coefs in details_level_j.values(): weighted_coefs.append(multiplier * coefs.flatten()) return np.concatenate(weighted_coefs) -def wemd_norm(arr, wavelet='coif3', level=None): +def wemd_norm(arr, wavelet="coif3", level=None): """ Wavelet-based norm used to approximate the Earthmover's distance between mass distributions specified as Numpy arrays (typically images or volumes). diff --git a/tests/test_wemd.py b/tests/test_wemd.py index 637a73ed99..c460c2b73c 100644 --- a/tests/test_wemd.py +++ b/tests/test_wemd.py @@ -12,7 +12,7 @@ def _smoothed_disk_image(x, y, radius, width, height): (Y, X) = mgrid[:height, :width] - ratio = ((X-x)**2 + (Y-y)**2) / (radius**2) + ratio = ((X - x) ** 2 + (Y - y) ** 2) / (radius ** 2) return 2.0 - 2 / (1 + np.exp(-ratio)) # Scaled sigmoid funciton @@ -29,12 +29,13 @@ class WEMDTestCase(TestCase): Note that this monotonicity isn't strictly required by the theory, but holds empirically. """ + def test_wemd_norm(self): WIDTH = 64 HEIGHT = 64 CENTER_X = WIDTH // 2 CENTER_Y = HEIGHT // 2 - + # A few disk radii and ray angles to test RADII = [1, 2, 3, 4, 5, 6, 7] ANGLES = [0.0, 0.4755, 0.6538, 1.9818, 3.0991, 4.4689, 4.9859, 5.5752] @@ -56,4 +57,3 @@ def test_wemd_norm(self): ] logger.info(f"wemd distances along ray: {wemd_distances_along_ray}") self.assertTrue(_is_monotone(wemd_distances_along_ray)) - From 8e5de521654a5587503c97bc7aedeff0b8d7564c Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 8 Jun 2021 07:09:41 -0400 Subject: [PATCH 18/21] remove unused imports --- tests/test_wemd.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_wemd.py b/tests/test_wemd.py index c460c2b73c..fd5a3daac6 100644 --- a/tests/test_wemd.py +++ b/tests/test_wemd.py @@ -1,9 +1,8 @@ import logging -import warnings from unittest import TestCase import numpy as np -from numpy import all, asarray, ceil, cos, log2, mgrid, sin +from numpy import all, asarray, cos, mgrid, sin from aspire.operators import wemd_norm From 37d39dd2a4637942d9643c38888fa7e2ac42ec31 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 8 Jun 2021 07:10:32 -0400 Subject: [PATCH 19/21] Disambiguate built-in `all` with `np.all` --- tests/test_wemd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_wemd.py b/tests/test_wemd.py index fd5a3daac6..897180c160 100644 --- a/tests/test_wemd.py +++ b/tests/test_wemd.py @@ -2,7 +2,7 @@ from unittest import TestCase import numpy as np -from numpy import all, asarray, cos, mgrid, sin +from numpy import asarray, cos, mgrid, sin from aspire.operators import wemd_norm @@ -18,7 +18,7 @@ def _smoothed_disk_image(x, y, radius, width, height): def _is_monotone(seq): arr = asarray(seq) assert arr.ndim == 1 - return all(arr[1:] >= arr[:-1]) + return np.all(arr[1:] >= arr[:-1]) class WEMDTestCase(TestCase): From 57512e757a91ca4260c191638716c7e053cc308c Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 8 Jun 2021 12:11:42 -0400 Subject: [PATCH 20/21] use context block for warnings filter --- src/aspire/operators/wemd.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/aspire/operators/wemd.py b/src/aspire/operators/wemd.py index 52cbd7546f..06680fb68f 100644 --- a/src/aspire/operators/wemd.py +++ b/src/aspire/operators/wemd.py @@ -40,12 +40,13 @@ def wemd_embed(arr, wavelet="coif3", level=None): # Using wavedecn with the default level creates this boundary effects warning. # However, this doesn't seem to be a cause for concern. - warnings.filterwarnings( - "ignore", - message="Level value of .* is too high:" - " all coefficients will experience boundary effects.", - ) - arrdwt = pywt.wavedecn(arr, wavelet, mode="zero", level=level) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message="Level value of .* is too high:" + " all coefficients will experience boundary effects.", + ) + arrdwt = pywt.wavedecn(arr, wavelet, mode="zero", level=level) detail_coefs = arrdwt[1:] assert len(detail_coefs) == level From 5d4ac3267f755e92dec150164a0099051378c111 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 8 Jun 2021 12:12:35 -0400 Subject: [PATCH 21/21] rename testfile to match methods --- tests/{test_wemd.py => test_wbemd.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_wemd.py => test_wbemd.py} (100%) diff --git a/tests/test_wemd.py b/tests/test_wbemd.py similarity index 100% rename from tests/test_wemd.py rename to tests/test_wbemd.py