From 58c97a1f91c25ddcf5b784c538e9019b1300bc7a Mon Sep 17 00:00:00 2001 From: Will Benfold Date: Mon, 11 Oct 2021 17:12:48 +0100 Subject: [PATCH 1/9] Interpolate all points at once rather than sequentially --- lib/iris/analysis/trajectory.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/iris/analysis/trajectory.py b/lib/iris/analysis/trajectory.py index d5fac9d108..5d2b9a1322 100644 --- a/lib/iris/analysis/trajectory.py +++ b/lib/iris/analysis/trajectory.py @@ -320,9 +320,13 @@ def interpolate(cube, sample_points, method=None): break if method in ["linear", None]: + columns = cube.interpolate(sample_points, Linear()) for i in range(trajectory_size): - point = [(coord, values[i]) for coord, values in sample_points] - column = cube.interpolate(point, Linear()) + column_dims = [ + i if dim in squish_my_dims else slice(None) + for dim in range(columns.ndim) + ] + column = columns[tuple(column_dims)] new_cube.data[..., i] = column.data # Fill in the empty squashed (non derived) coords. for column_coord in column.dim_coords + column.aux_coords: From d42471dcd5bf982275987ccd7ee254c1cfabf4bd Mon Sep 17 00:00:00 2001 From: Will Benfold Date: Thu, 21 Oct 2021 13:28:25 +0100 Subject: [PATCH 2/9] This splits the internals of cube.interpolate in an effort not to make so much superfluous data, but is slow --- lib/iris/analysis/trajectory.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/lib/iris/analysis/trajectory.py b/lib/iris/analysis/trajectory.py index 5d2b9a1322..f0c30d1274 100644 --- a/lib/iris/analysis/trajectory.py +++ b/lib/iris/analysis/trajectory.py @@ -320,14 +320,20 @@ def interpolate(cube, sample_points, method=None): break if method in ["linear", None]: - columns = cube.interpolate(sample_points, Linear()) + # Rearrange sample_points to extract the coordinates, which we need + # immediately, and the points, which we'll use shortly + coords, points = zip(*sample_points) + # Create our interpolator once, which we can then use repeatedly + interp = Linear().interpolator(cube, coords) for i in range(trajectory_size): - column_dims = [ - i if dim in squish_my_dims else slice(None) - for dim in range(columns.ndim) - ] - column = columns[tuple(column_dims)] - new_cube.data[..., i] = column.data + # interp is callable with a seqence of arrays. We're using it in + # such a way that each array is length 1 + column = interp([[coord_vals[i]] for coord_vals in points]) + # Now we need to cut down some length 1 coordinates and pop it in + # the cube data + new_cube.data[..., i] = np.squeeze( + column.data, tuple(squish_my_dims) + ) # Fill in the empty squashed (non derived) coords. for column_coord in column.dim_coords + column.aux_coords: src_dims = cube.coord_dims(column_coord) From 6d8af62f079b24f12bb718270e02695a049ceb0b Mon Sep 17 00:00:00 2001 From: Will Benfold Date: Thu, 21 Oct 2021 17:36:27 +0100 Subject: [PATCH 3/9] Generate the extra values then discard them, but with minimal looping --- lib/iris/analysis/trajectory.py | 73 ++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 24 deletions(-) diff --git a/lib/iris/analysis/trajectory.py b/lib/iris/analysis/trajectory.py index f0c30d1274..8b461401b8 100644 --- a/lib/iris/analysis/trajectory.py +++ b/lib/iris/analysis/trajectory.py @@ -320,30 +320,55 @@ def interpolate(cube, sample_points, method=None): break if method in ["linear", None]: - # Rearrange sample_points to extract the coordinates, which we need - # immediately, and the points, which we'll use shortly - coords, points = zip(*sample_points) - # Create our interpolator once, which we can then use repeatedly - interp = Linear().interpolator(cube, coords) - for i in range(trajectory_size): - # interp is callable with a seqence of arrays. We're using it in - # such a way that each array is length 1 - column = interp([[coord_vals[i]] for coord_vals in points]) - # Now we need to cut down some length 1 coordinates and pop it in - # the cube data - new_cube.data[..., i] = np.squeeze( - column.data, tuple(squish_my_dims) - ) - # Fill in the empty squashed (non derived) coords. - for column_coord in column.dim_coords + column.aux_coords: - src_dims = cube.coord_dims(column_coord) - if not squish_my_dims.isdisjoint(src_dims): - if len(column_coord.points) != 1: - msg = "Expected to find exactly one point. Found {}." - raise Exception(msg.format(column_coord.points)) - new_cube.coord(column_coord.name()).points[ - i - ] = column_coord.points[0] + # Using cube.interpolate will generate extra values that we don't need + # as it makes a grid from the provided coordinates (like a meshgrid) + # and then does interpolation for all of them. This is memory + # inefficient, but significantly more time efficient than calling + # cube.interpolate (or the underlying method on the interpolator) + # repeatedly, so using this approach for now. In future, it would be + # ideal if we only interpolated at the points we care about + columns = cube.interpolate(sample_points, Linear()) + # np.einsum(a, [0, 0], [0]) is like np.diag(a) + # We're using einsum here to do an n-dimensional diagonal, leaving the + # other dimensions unaffected and putting the diagonal's direction on + # the final axis + initial_inds = list(range(1, columns.ndim + 1)) + for ind in squish_my_dims: + initial_inds[ind] = 0 + final_inds = list(filter(lambda x: x != 0, initial_inds)) + [0] + new_cube.data = np.einsum(columns.data, initial_inds, final_inds) + + # Fill in the empty squashed (non derived) coords. + # We're using the same einstein summation plan as for the cube, but + # redoing those indices to match the indices in the coordinates + for columns_coord in columns.dim_coords + columns.aux_coords: + src_dims = cube.coord_dims(columns_coord) + if not squish_my_dims.isdisjoint(src_dims): + # Mapping the cube indicies onto the coord + initial_coord_inds = [initial_inds[ind] for ind in src_dims] + # Making the final ones the same way as for the cube + # 0 will always appear in the initial ones because we know this + # coord overlaps the squish dims + final_coord_inds = list( + filter(lambda x: x != 0, initial_coord_inds) + ) + [0] + new_coord_points = np.einsum( + columns_coord.points, initial_coord_inds, final_coord_inds + ) + # Check we're not overwriting coord.points with the wrong shape + if ( + not new_cube.coord(columns_coord.name()).points.shape + == new_coord_points.shape + ): + msg = "Expected to have new points of shape {}. Found shape of {}." + raise Exception( + msg.format( + new_cube.coord(columns_coord.name()).points.shape, + new_coord_points.shape, + ) + ) + # Replace the points + new_cube.coord(columns_coord.name()).points = new_coord_points elif method == "nearest": # Use a cache with _nearest_neighbour_indices_ndcoords() From 1d8fe8e9dfbb826d6c48a8bf737330a5afc009f9 Mon Sep 17 00:00:00 2001 From: Will Benfold Date: Mon, 22 Nov 2021 14:09:03 +0000 Subject: [PATCH 4/9] Updated docstrings to clarify behaviour --- lib/iris/analysis/__init__.py | 4 ++++ lib/iris/cube.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/lib/iris/analysis/__init__.py b/lib/iris/analysis/__init__.py index 6d2ee599c8..41b6ec389c 100644 --- a/lib/iris/analysis/__init__.py +++ b/lib/iris/analysis/__init__.py @@ -2696,6 +2696,10 @@ def interpolator(self, cube, coords): dimensions in the result cube caused by scalar values in `sample_points`. + The N arrays of values within `sample_points` will be used to + create an N-d grid of points that will then be sampled (rather than + just N points) + The values for coordinates that correspond to date/times may optionally be supplied as datetime.datetime or cftime.datetime instances. diff --git a/lib/iris/cube.py b/lib/iris/cube.py index 6211f0a727..f0a79d0965 100644 --- a/lib/iris/cube.py +++ b/lib/iris/cube.py @@ -4359,6 +4359,8 @@ def interpolate(self, sample_points, scheme, collapse_scalar=True): interpolate. The values for coordinates that correspond to dates or times may optionally be supplied as datetime.datetime or cftime.datetime instances. + The N pairs supplied will be used to create an N-d grid of points + that will then be sampled (rather than just N points). * scheme: An instance of the type of interpolation to use to interpolate from this :class:`~iris.cube.Cube` to the given sample points. The From 177796131ec73808619e08147911e14397624c08 Mon Sep 17 00:00:00 2001 From: Will Benfold Date: Mon, 22 Nov 2021 14:46:36 +0000 Subject: [PATCH 5/9] Benchmarks for trajectory (linear and nearest neighbour) --- benchmarks/benchmarks/trajectory.py | 48 +++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 benchmarks/benchmarks/trajectory.py diff --git a/benchmarks/benchmarks/trajectory.py b/benchmarks/benchmarks/trajectory.py new file mode 100644 index 0000000000..5c1d10d218 --- /dev/null +++ b/benchmarks/benchmarks/trajectory.py @@ -0,0 +1,48 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +""" +Trajectory benchmark test + +""" + +# import iris tests first so that some things can be initialised before +# importing anything else +from iris import tests # isort:skip + +import numpy as np + +import iris +from iris.analysis.trajectory import interpolate + + +class TrajectoryInterpolation: + def setup(self) -> None: + # Prepare a cube and a template + + cube_file_path = tests.get_data_path( + ["NetCDF", "regrid", "regrid_xyt.nc"] + ) + self.cube = iris.load_cube(cube_file_path) + + trajectory = np.array( + [np.array((-50 + i, -50 + i)) for i in range(100)] + ) + self.sample_points = [ + ("longitude", trajectory[:, 0]), + ("latitude", trajectory[:, 1]), + ] + + def time_trajectory_linear(self) -> None: + # Regrid the cube onto the template. + out_cube = interpolate(self.cube, self.sample_points, method="linear") + # Realise the data + out_cube.data + + def time_trajectory_nearest(self) -> None: + # Regrid the cube onto the template. + out_cube = interpolate(self.cube, self.sample_points, method="nearest") + # Realise the data + out_cube.data From dc5c9fef95f3d6309fd5ff3b149ad154f72aa86b Mon Sep 17 00:00:00 2001 From: Will Benfold Date: Mon, 22 Nov 2021 17:17:47 +0000 Subject: [PATCH 6/9] Update error thrown for aux coord over interpolated and uninterpolated dimensions, and add unit tests --- lib/iris/analysis/trajectory.py | 8 +- .../analysis/trajectory/test_interpolate.py | 127 ++++++++++++++++++ 2 files changed, 133 insertions(+), 2 deletions(-) diff --git a/lib/iris/analysis/trajectory.py b/lib/iris/analysis/trajectory.py index 8b461401b8..946ae1cb2c 100644 --- a/lib/iris/analysis/trajectory.py +++ b/lib/iris/analysis/trajectory.py @@ -360,9 +360,13 @@ def interpolate(cube, sample_points, method=None): not new_cube.coord(columns_coord.name()).points.shape == new_coord_points.shape ): - msg = "Expected to have new points of shape {}. Found shape of {}." - raise Exception( + msg = ( + "Coord {} was expected to have new points of shape {}. " + "Found shape of {}." + ) + raise ValueError( msg.format( + columns_coord.name(), new_cube.coord(columns_coord.name()).points.shape, new_coord_points.shape, ) diff --git a/lib/iris/tests/unit/analysis/trajectory/test_interpolate.py b/lib/iris/tests/unit/analysis/trajectory/test_interpolate.py index 038019611c..dad781ed74 100644 --- a/lib/iris/tests/unit/analysis/trajectory/test_interpolate.py +++ b/lib/iris/tests/unit/analysis/trajectory/test_interpolate.py @@ -181,5 +181,132 @@ def test_metadata(self): self.assertEqual(result, expected) +class TestLinear(tests.IrisTest): + # Test interpolation with 'linear' method. + # This is basically a wrapper to 'analysis._scipy_interpolate''s + # _RegulardGridInterpolator. That has its own test, so we don't test the + # basic calculation exhaustively here. Instead we check the way it + # handles the source and result cubes (especially coordinates). + + def setUp(self): + cube = iris.tests.stock.simple_3d() + # Actually, this cube *isn't* terribly realistic, as the lat+lon coords + # have integer type, which in this case produces some peculiar results. + # Let's fix that (and not bother to test the peculiar behaviour). + for coord_name in ("longitude", "latitude"): + coord = cube.coord(coord_name) + coord.points = coord.points.astype(float) + self.test_cube = cube + # Set sample point to test single-point linear interpolation operation. + self.single_sample_point = [ + ("latitude", [9]), + ("longitude", [-120]), + ] + # Set expected results of single-point linear interpolation operation. + self.single_sample_result = np.array( + [ + 64 / 15, + 244 / 15, + ] + )[:, np.newaxis] + + def test_single_point_same_cube(self): + # Check exact result matching for a single point. + cube = self.test_cube + result = interpolate(cube, self.single_sample_point, method="linear") + # Check that the result is a single trajectory point, exactly equal to + # the expected part of the original data. + self.assertEqual(result.shape[-1], 1) + self.assertArrayAllClose(result.data, self.single_sample_result) + + def test_multi_point_same_cube(self): + # Check an exact result for multiple points. + cube = self.test_cube + # Use latitude selection to recreate a whole row of the original cube. + sample_points = [ + ("longitude", [-180, -90, 0, 90]), + ("latitude", [0, 0, 0, 0]), + ] + result = interpolate(cube, sample_points, method="linear") + + # The result should be identical to a single latitude section of the + # original, but with modified coords (latitude has 4 repeated zeros). + expected = cube[:, 1, :] + # Result 'longitude' is now an aux coord. + co_x = expected.coord("longitude") + expected.remove_coord(co_x) + expected.add_aux_coord(co_x, 1) + # Result 'latitude' is now an aux coord containing 4*[0]. + expected.remove_coord("latitude") + co_y = AuxCoord( + [0, 0, 0, 0], standard_name="latitude", units="degrees" + ) + expected.add_aux_coord(co_y, 1) + self.assertEqual(result, expected) + + def test_aux_coord_noninterpolation_dim(self): + # Check exact result with an aux-coord mapped to an uninterpolated dim. + cube = self.test_cube + cube.add_aux_coord(DimCoord([17, 19], long_name="aux0"), 0) + + # The result cube should exactly equal a single source point. + result = interpolate(cube, self.single_sample_point, method="linear") + self.assertEqual(result.shape[-1], 1) + self.assertArrayAllClose(result.data, self.single_sample_result) + + def test_aux_coord_one_interp_dim(self): + # Check exact result with an aux-coord over one interpolation dims. + cube = self.test_cube + cube.add_aux_coord(AuxCoord([11, 12, 13, 14], long_name="aux_x"), 2) + + # The result cube should exactly equal a single source point. + result = interpolate(cube, self.single_sample_point, method="linear") + self.assertEqual(result.shape[-1], 1) + self.assertArrayAllClose(result.data, self.single_sample_result) + + def test_aux_coord_both_interp_dims(self): + # Check exact result with an aux-coord over both interpolation dims. + cube = self.test_cube + cube.add_aux_coord( + AuxCoord( + [[11, 12, 13, 14], [21, 22, 23, 24], [31, 32, 33, 34]], + long_name="aux_xy", + ), + (1, 2), + ) + + # The result cube should exactly equal a single source point. + result = interpolate(cube, self.single_sample_point, method="linear") + self.assertEqual(result.shape[-1], 1) + self.assertArrayAllClose(result.data, self.single_sample_result) + + def test_aux_coord_fail_mixed_dims(self): + # Check behaviour with an aux-coord mapped over both interpolation and + # non-interpolation dims : not supported. + cube = self.test_cube + cube.add_aux_coord( + AuxCoord( + [[111, 112, 113, 114], [211, 212, 213, 214]], + long_name="aux_0x", + ), + (0, 2), + ) + msg = "Coord aux_0x was expected to have new points of shape .*\\. Found shape of .*\\." + with self.assertRaisesRegex(ValueError, msg): + interpolate(cube, self.single_sample_point, method="linear") + + def test_metadata(self): + # Check exact result matching for a single point, with additional + # attributes and cell-methods. + cube = self.test_cube + cube.attributes["ODD_ATTR"] = "string-value-example" + cube.add_cell_method(iris.coords.CellMethod("mean", "area")) + result = interpolate(cube, self.single_sample_point, method="linear") + # Check that the result is a single trajectory point, exactly equal to + # the expected part of the original data. + self.assertEqual(result.shape[-1], 1) + self.assertArrayAllClose(result.data, self.single_sample_result) + + if __name__ == "__main__": tests.main() From 354552c846256d6568fee1891ab1a013a8e17d55 Mon Sep 17 00:00:00 2001 From: Will Benfold Date: Fri, 4 Mar 2022 15:53:03 +0000 Subject: [PATCH 7/9] Update cml --- .../tests/results/netcdf/save_load_traj.cml | 4 +-- .../results/trajectory/constant_latitude.cml | 10 +++---- .../tests/results/trajectory/single_point.cml | 8 ++--- lib/iris/tests/results/trajectory/zigzag.cml | 29 ++++++++++++------- 4 files changed, 29 insertions(+), 22 deletions(-) diff --git a/lib/iris/tests/results/netcdf/save_load_traj.cml b/lib/iris/tests/results/netcdf/save_load_traj.cml index 7f8b3d7e99..9b225d127f 100644 --- a/lib/iris/tests/results/netcdf/save_load_traj.cml +++ b/lib/iris/tests/results/netcdf/save_load_traj.cml @@ -1,6 +1,6 @@ - + @@ -36,6 +36,6 @@ - + diff --git a/lib/iris/tests/results/trajectory/constant_latitude.cml b/lib/iris/tests/results/trajectory/constant_latitude.cml index 7990edada5..f07f1a131f 100644 --- a/lib/iris/tests/results/trajectory/constant_latitude.cml +++ b/lib/iris/tests/results/trajectory/constant_latitude.cml @@ -1,6 +1,6 @@ - + @@ -16,13 +16,13 @@ + -0.1188, -0.1188]" shape="(100,)" standard_name="grid_latitude" units="Unit('degrees')" value_type="float64"> - + @@ -94,6 +94,6 @@ - + diff --git a/lib/iris/tests/results/trajectory/single_point.cml b/lib/iris/tests/results/trajectory/single_point.cml index 393ad5e335..a1cf83a03b 100644 --- a/lib/iris/tests/results/trajectory/single_point.cml +++ b/lib/iris/tests/results/trajectory/single_point.cml @@ -1,6 +1,6 @@ - + @@ -15,12 +15,12 @@ 347921.666667, 347921.833333, 347922.0]" shape="(6,)" standard_name="forecast_reference_time" units="Unit('hours since 1970-01-01 00:00:00', calendar='gregorian')" value_type="float64"/> - + - + @@ -92,6 +92,6 @@ - + diff --git a/lib/iris/tests/results/trajectory/zigzag.cml b/lib/iris/tests/results/trajectory/zigzag.cml index 250500786c..3bdb3e9013 100644 --- a/lib/iris/tests/results/trajectory/zigzag.cml +++ b/lib/iris/tests/results/trajectory/zigzag.cml @@ -1,6 +1,6 @@ - + @@ -14,19 +14,26 @@ - + - + @@ -52,6 +59,6 @@ - + From 3fd867d435860395185522975767f51ee22e8447 Mon Sep 17 00:00:00 2001 From: Will Benfold Date: Fri, 4 Mar 2022 15:57:13 +0000 Subject: [PATCH 8/9] What's new has moved --- docs/src/whatsnew/latest.rst.template | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/src/whatsnew/latest.rst.template b/docs/src/whatsnew/latest.rst.template index 1b36d3f0b0..3696991f5f 100644 --- a/docs/src/whatsnew/latest.rst.template +++ b/docs/src/whatsnew/latest.rst.template @@ -72,7 +72,8 @@ NOTE: section above is a template for bugfix patches 🚀 Performance Enhancements =========================== -#. N/A +#. `@wjbenfold`_ improved the speed of linear interpolation using + :meth:`iris.analysis.trajectory.interpolate` (:pull:`4366`) 🔥 Deprecations From 9cf67115c297b8860cafb506d2203d92499af146 Mon Sep 17 00:00:00 2001 From: Will Benfold Date: Mon, 9 May 2022 09:40:24 +0100 Subject: [PATCH 9/9] What's new fix for rebase --- docs/src/whatsnew/latest.rst | 4 ++++ docs/src/whatsnew/latest.rst.template | 3 +-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index 05d1e3fcdd..75d24697f5 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -136,6 +136,10 @@ This document explains the changes made to Iris for this release aggregation method to be used on masked data when the missing data tolerance is set to 0. (:issue:`4735`, :pull:`4755`) +#. `@wjbenfold`_ improved the speed of linear interpolation using + :meth:`iris.analysis.trajectory.interpolate` (:pull:`4366`) + + 🔥 Deprecations =============== diff --git a/docs/src/whatsnew/latest.rst.template b/docs/src/whatsnew/latest.rst.template index 3696991f5f..1b36d3f0b0 100644 --- a/docs/src/whatsnew/latest.rst.template +++ b/docs/src/whatsnew/latest.rst.template @@ -72,8 +72,7 @@ NOTE: section above is a template for bugfix patches 🚀 Performance Enhancements =========================== -#. `@wjbenfold`_ improved the speed of linear interpolation using - :meth:`iris.analysis.trajectory.interpolate` (:pull:`4366`) +#. N/A 🔥 Deprecations