diff --git a/benchmarks/benchmarks/__init__.py b/benchmarks/benchmarks/__init__.py
index 4a964a648d..38502c9306 100644
--- a/benchmarks/benchmarks/__init__.py
+++ b/benchmarks/benchmarks/__init__.py
@@ -4,5 +4,69 @@
 # See COPYING and COPYING.LESSER in the root of the repository for full
 # licensing details.
 """Common code for benchmarks."""
+import resource
+
+from .generate_data import BENCHMARK_DATA, run_function_elsewhere
 
 ARTIFICIAL_DIM_SIZE = int(10e3)  # For all artificial cubes, coords etc.
+
+
+def disable_repeat_between_setup(benchmark_object):
+    """
+    Decorator for benchmarks where object persistence would be inappropriate.
+
+    E.g:
+        * Benchmarking data realisation
+        * Benchmarking Cube coord addition
+
+    Can be applied to benchmark classes/methods/functions.
+
+    https://asv.readthedocs.io/en/stable/benchmarks.html#timing-benchmarks
+
+    """
+    # Prevent repeat runs between setup() runs - object(s) will persist after 1st.
+    benchmark_object.number = 1
+    # Compensate for reduced certainty by increasing number of repeats.
+    #  (setup() is run between each repeat).
+    #  Minimum 5 repeats, run up to 30 repeats / 20 secs whichever comes first.
+    benchmark_object.repeat = (5, 30, 20.0)
+    # ASV uses warmup to estimate benchmark time before planning the real run.
+    #  Prevent this, since object(s) will persist after first warmup run,
+    #  which would give ASV misleading info (warmups ignore ``number``).
+    benchmark_object.warmup_time = 0.0
+
+    return benchmark_object
+
+
+class TrackAddedMemoryAllocation:
+    """
+    Context manager which measures by how much process resident memory grew,
+    during execution of its enclosed code block.
+
+    Obviously limited as to what it actually measures : Relies on the current
+    process not having significant unused (de-allocated) memory when the
+    tested codeblock runs, and only reliable when the code allocates a
+    significant amount of new memory.
+
+    Example:
+        with TrackAddedMemoryAllocation() as mb:
+            initial_call()
+            other_call()
+        result = mb.addedmem_mb()
+
+    """
+
+    @staticmethod
+    def process_resident_memory_mb():
+        return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024.0
+
+    def __enter__(self):
+        self.mb_before = self.process_resident_memory_mb()
+        return self
+
+    def __exit__(self, *_):
+        self.mb_after = self.process_resident_memory_mb()
+
+    def addedmem_mb(self):
+        """Return measured memory growth, in Mb."""
+        return self.mb_after - self.mb_before
diff --git a/benchmarks/benchmarks/aux_factory.py b/benchmarks/benchmarks/aux_factory.py
index 270119da71..45bfa1b515 100644
--- a/benchmarks/benchmarks/aux_factory.py
+++ b/benchmarks/benchmarks/aux_factory.py
@@ -10,9 +10,10 @@
 
 import numpy as np
 
-from benchmarks import ARTIFICIAL_DIM_SIZE
 from iris import aux_factory, coords
 
+from . import ARTIFICIAL_DIM_SIZE
+
 
 class FactoryCommon:
     # TODO: once https://github.com/airspeed-velocity/asv/pull/828 is released:
diff --git a/benchmarks/benchmarks/coords.py b/benchmarks/benchmarks/coords.py
index fce7318d49..5cea1e1e2e 100644
--- a/benchmarks/benchmarks/coords.py
+++ b/benchmarks/benchmarks/coords.py
@@ -10,9 +10,10 @@
 
 import numpy as np
 
-from benchmarks import ARTIFICIAL_DIM_SIZE
 from iris import coords
 
+from . import ARTIFICIAL_DIM_SIZE, disable_repeat_between_setup
+
 
 def setup():
     """General variables needed by multiple benchmark classes."""
@@ -92,6 +93,23 @@ def setup(self):
     def create(self):
         return coords.AuxCoord(**self.create_kwargs)
 
+    def time_points(self):
+        _ = self.component.points
+
+    def time_bounds(self):
+        _ = self.component.bounds
+
+
+@disable_repeat_between_setup
+class AuxCoordLazy(AuxCoord):
+    """Lazy equivalent of :class:`AuxCoord`."""
+
+    def setup(self):
+        super().setup()
+        self.create_kwargs["points"] = self.component.lazy_points()
+        self.create_kwargs["bounds"] = self.component.lazy_bounds()
+        self.setup_common()
+
 
 class CellMeasure(CoordCommon):
     def setup(self):
diff --git a/benchmarks/benchmarks/cube.py b/benchmarks/benchmarks/cube.py
index 3cfa6b248b..8a12391684 100644
--- a/benchmarks/benchmarks/cube.py
+++ b/benchmarks/benchmarks/cube.py
@@ -10,11 +10,13 @@
 
 import numpy as np
 
-from benchmarks import ARTIFICIAL_DIM_SIZE
 from iris import analysis, aux_factory, coords, cube
 
+from . import ARTIFICIAL_DIM_SIZE, disable_repeat_between_setup
+from .generate_data.stock import sample_meshcoord
 
-def setup():
+
+def setup(*params):
     """General variables needed by multiple benchmark classes."""
     global data_1d
     global data_2d
@@ -170,6 +172,44 @@ def setup(self):
         self.setup_common()
 
 
+class MeshCoord:
+    params = [
+        6,  # minimal cube-sphere
+        int(1e6),  # realistic cube-sphere size
+        ARTIFICIAL_DIM_SIZE,  # To match size in :class:`AuxCoord`
+    ]
+    param_names = ["number of faces"]
+
+    def setup(self, n_faces):
+        mesh_kwargs = dict(
+            n_nodes=n_faces + 2, n_edges=n_faces * 2, n_faces=n_faces
+        )
+
+        self.mesh_coord = sample_meshcoord(sample_mesh_kwargs=mesh_kwargs)
+        self.data = np.zeros(n_faces)
+        self.cube_blank = cube.Cube(data=self.data)
+        self.cube = self.create()
+
+    def create(self):
+        return cube.Cube(
+            data=self.data, aux_coords_and_dims=[(self.mesh_coord, 0)]
+        )
+
+    def time_create(self, n_faces):
+        _ = self.create()
+
+    @disable_repeat_between_setup
+    def time_add(self, n_faces):
+        self.cube_blank.add_aux_coord(self.mesh_coord, 0)
+
+    @disable_repeat_between_setup
+    def time_remove(self, n_faces):
+        self.cube.remove_coord(self.mesh_coord)
+
+    def time_return(self, n_faces):
+        _ = self.cube
+
+
 class Merge:
     def setup(self):
         self.cube_list = cube.CubeList()
diff --git a/benchmarks/benchmarks/experimental/__init__.py b/benchmarks/benchmarks/experimental/__init__.py
new file mode 100644
index 0000000000..f16e400bce
--- /dev/null
+++ b/benchmarks/benchmarks/experimental/__init__.py
@@ -0,0 +1,9 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+Benchmark tests for the experimental module.
+
+"""
diff --git a/benchmarks/benchmarks/experimental/ugrid.py b/benchmarks/benchmarks/experimental/ugrid.py
new file mode 100644
index 0000000000..609abbe77c
--- /dev/null
+++ b/benchmarks/benchmarks/experimental/ugrid.py
@@ -0,0 +1,195 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+Benchmark tests for the experimental.ugrid module.
+
+"""
+
+from copy import deepcopy
+
+import numpy as np
+
+from iris.experimental import ugrid
+
+from .. import ARTIFICIAL_DIM_SIZE, disable_repeat_between_setup
+from ..generate_data.stock import sample_mesh
+
+
+class UGridCommon:
+    """
+    A base class running a generalised suite of benchmarks for any ugrid object.
+    Object to be specified in a subclass.
+
+    ASV will run the benchmarks within this class for any subclasses.
+
+    ASV will not benchmark this class as setup() triggers a NotImplementedError.
+    (ASV has not yet released ABC/abstractmethod support - asv#838).
+
+    """
+
+    params = [
+        6,  # minimal cube-sphere
+        int(1e6),  # realistic cube-sphere size
+    ]
+    param_names = ["number of faces"]
+
+    def setup(self, *params):
+        self.object = self.create()
+
+    def create(self):
+        raise NotImplementedError
+
+    def time_create(self, *params):
+        """Create an instance of the benchmarked object. create() method is
+        specified in the subclass."""
+        self.create()
+
+    def time_return(self, *params):
+        """Return an instance of the benchmarked object."""
+        _ = self.object
+
+
+class Connectivity(UGridCommon):
+    def setup(self, n_faces):
+        self.array = np.zeros([n_faces, 3], dtype=np.int)
+        super().setup(n_faces)
+
+    def create(self):
+        return ugrid.Connectivity(
+            indices=self.array, cf_role="face_node_connectivity"
+        )
+
+    def time_indices(self, n_faces):
+        _ = self.object.indices
+
+    def time_location_lengths(self, n_faces):
+        # Proofed against the Connectivity name change (633ed17).
+        if getattr(self.object, "src_lengths", False):
+            meth = self.object.src_lengths
+        else:
+            meth = self.object.location_lengths
+        _ = meth()
+
+    def time_validate_indices(self, n_faces):
+        self.object.validate_indices()
+
+
+@disable_repeat_between_setup
+class ConnectivityLazy(Connectivity):
+    """Lazy equivalent of :class:`Connectivity`."""
+
+    def setup(self, n_faces):
+        super().setup(n_faces)
+        self.array = self.object.lazy_indices()
+        self.object = self.create()
+
+
+class Mesh(UGridCommon):
+    def setup(self, n_faces, lazy=False):
+        ####
+        # Steal everything from the sample mesh for benchmarking creation of a
+        #  brand new mesh.
+        source_mesh = sample_mesh(
+            n_nodes=n_faces + 2,
+            n_edges=n_faces * 2,
+            n_faces=n_faces,
+            lazy_values=lazy,
+        )
+
+        def get_coords_and_axes(location):
+            search_kwargs = {f"include_{location}s": True}
+            return [
+                (source_mesh.coord(axis=axis, **search_kwargs), axis)
+                for axis in ("x", "y")
+            ]
+
+        self.mesh_kwargs = dict(
+            topology_dimension=source_mesh.topology_dimension,
+            node_coords_and_axes=get_coords_and_axes("node"),
+            connectivities=source_mesh.connectivities(),
+            edge_coords_and_axes=get_coords_and_axes("edge"),
+            face_coords_and_axes=get_coords_and_axes("face"),
+        )
+        ####
+
+        super().setup(n_faces)
+
+        self.face_node = self.object.face_node_connectivity
+        self.node_x = self.object.node_coords.node_x
+        # Kwargs for reuse in search and remove methods.
+        self.connectivities_kwarg = dict(cf_role="edge_node_connectivity")
+        self.coords_kwarg = dict(include_faces=True)
+
+        # TODO: an opportunity for speeding up runtime if needed, since
+        #  eq_object is not needed for all benchmarks. Just don't generate it
+        #  within a benchmark - the execution time is large enough that it
+        #  could be a significant portion of the benchmark - makes regressions
+        #  smaller and could even pick up regressions in copying instead!
+        self.eq_object = deepcopy(self.object)
+
+    def create(self):
+        return ugrid.Mesh(**self.mesh_kwargs)
+
+    def time_add_connectivities(self, n_faces):
+        self.object.add_connectivities(self.face_node)
+
+    def time_add_coords(self, n_faces):
+        self.object.add_coords(node_x=self.node_x)
+
+    def time_connectivities(self, n_faces):
+        _ = self.object.connectivities(**self.connectivities_kwarg)
+
+    def time_coords(self, n_faces):
+        _ = self.object.coords(**self.coords_kwarg)
+
+    def time_eq(self, n_faces):
+        _ = self.object == self.eq_object
+
+    def time_remove_connectivities(self, n_faces):
+        self.object.remove_connectivities(**self.connectivities_kwarg)
+
+    def time_remove_coords(self, n_faces):
+        self.object.remove_coords(**self.coords_kwarg)
+
+
+@disable_repeat_between_setup
+class MeshLazy(Mesh):
+    """Lazy equivalent of :class:`Mesh`."""
+
+    def setup(self, n_faces, lazy=True):
+        super().setup(n_faces, lazy=lazy)
+
+
+class MeshCoord(UGridCommon):
+    # Add extra parameter value to match AuxCoord benchmarking.
+    params = UGridCommon.params + [ARTIFICIAL_DIM_SIZE]
+
+    def setup(self, n_faces, lazy=False):
+        self.mesh = sample_mesh(
+            n_nodes=n_faces + 2,
+            n_edges=n_faces * 2,
+            n_faces=n_faces,
+            lazy_values=lazy,
+        )
+
+        super().setup(n_faces)
+
+    def create(self):
+        return ugrid.MeshCoord(mesh=self.mesh, location="face", axis="x")
+
+    def time_points(self, n_faces):
+        _ = self.object.points
+
+    def time_bounds(self, n_faces):
+        _ = self.object.bounds
+
+
+@disable_repeat_between_setup
+class MeshCoordLazy(MeshCoord):
+    """Lazy equivalent of :class:`MeshCoord`."""
+
+    def setup(self, n_faces, lazy=True):
+        super().setup(n_faces, lazy=lazy)
diff --git a/benchmarks/benchmarks/generate_data/__init__.py b/benchmarks/benchmarks/generate_data/__init__.py
index a56f2e4623..125e2e1b53 100644
--- a/benchmarks/benchmarks/generate_data/__init__.py
+++ b/benchmarks/benchmarks/generate_data/__init__.py
@@ -16,11 +16,18 @@
 benchmark sequence runs over two different Python versions.
 
 """
+from contextlib import contextmanager
 from inspect import getsource
 from os import environ
 from pathlib import Path
 from subprocess import CalledProcessError, check_output, run
 from textwrap import dedent
+from typing import Iterable
+
+from iris import load_cube as iris_loadcube
+from iris._lazy_data import as_concrete_data
+from iris.experimental.ugrid import PARSE_UGRID_ON_LOAD
+from iris.fileformats import netcdf
 
 #: Python executable used by :func:`run_function_elsewhere`, set via env
 #:  variable of same name. Must be path of Python within an environment that
@@ -92,3 +99,99 @@ def run_function_elsewhere(func_to_run, *args, **kwargs):
         [DATA_GEN_PYTHON, "-c", python_string], capture_output=True, check=True
     )
     return result.stdout
+
+
+def generate_cube_like_2d_cubesphere(
+    n_cube: int, with_mesh: bool, output_path: str
+):
+    """
+    Construct and save to file an LFRIc cubesphere-like cube for a given
+    cubesphere size, *or* a simpler structured (UM-like) cube of equivalent
+    size.
+
+    NOTE: this function is *NEVER* called from within this actual package.
+    Instead, it is to be called via benchmarks.remote_data_generation,
+    so that it can use up-to-date facilities, independent of the ASV controlled
+    environment which contains the "Iris commit under test".
+    This means:
+      * it must be completely self-contained : i.e. it includes all its
+        own imports, and saves results to an output file.
+
+    """
+    from iris import save
+    from iris.tests.stock.mesh import sample_mesh, sample_mesh_cube
+
+    n_face_nodes = n_cube * n_cube
+    n_faces = 6 * n_face_nodes
+
+    # Set n_nodes=n_faces and n_edges=2*n_faces
+    # : Not exact, but similar to a 'real' cubesphere.
+    n_nodes = n_faces
+    n_edges = 2 * n_faces
+    if with_mesh:
+        mesh = sample_mesh(
+            n_nodes=n_nodes, n_faces=n_faces, n_edges=n_edges, lazy_values=True
+        )
+        cube = sample_mesh_cube(mesh=mesh, n_z=1)
+    else:
+        cube = sample_mesh_cube(nomesh_faces=n_faces, n_z=1)
+
+    # Strip off the 'extra' aux-coord mapping the mesh, which sample-cube adds
+    # but which we don't want.
+    cube.remove_coord("mesh_face_aux")
+
+    # Save the result to a named file.
+    save(cube, output_path)
+
+
+def make_cube_like_2d_cubesphere(n_cube: int, with_mesh: bool):
+    """
+    Generate an LFRIc cubesphere-like cube for a given cubesphere size,
+    *or* a simpler structured (UM-like) cube of equivalent size.
+
+    All the cube data, coords and mesh content are LAZY, and produced without
+    allocating large real arrays (to allow peak-memory testing).
+
+    NOTE: the actual cube generation is done in a stable Iris environment via
+    benchmarks.remote_data_generation, so it is all channeled via cached netcdf
+    files in our common testdata directory.
+
+    """
+    identifying_filename = (
+        f"cube_like_2d_cubesphere_C{n_cube}_Mesh={with_mesh}.nc"
+    )
+    filepath = BENCHMARK_DATA / identifying_filename
+    if not filepath.exists():
+        # Create the required testfile, by running the generation code remotely
+        #  in a 'fixed' python environment.
+        run_function_elsewhere(
+            generate_cube_like_2d_cubesphere,
+            n_cube,
+            with_mesh=with_mesh,
+            output_path=str(filepath),
+        )
+
+    # File now *should* definitely exist: content is simply the desired cube.
+    with PARSE_UGRID_ON_LOAD.context():
+        with load_realised():
+            cube = iris_loadcube(str(filepath))
+    return cube
+
+
+@contextmanager
+def load_realised():
+    """
+    Force NetCDF loading with realised arrays.
+
+    Since passing between data generation and benchmarking environments is via
+    file loading, but some benchmarks are only meaningful if starting with real
+    arrays.
+    """
+    from iris.fileformats.netcdf import _get_cf_var_data as pre_patched
+
+    def patched(cf_var, filename):
+        return as_concrete_data(pre_patched(cf_var, filename))
+
+    netcdf._get_cf_var_data = patched
+    yield netcdf
+    netcdf._get_cf_var_data = pre_patched
diff --git a/benchmarks/benchmarks/generate_data/stock.py b/benchmarks/benchmarks/generate_data/stock.py
new file mode 100644
index 0000000000..e352147fc8
--- /dev/null
+++ b/benchmarks/benchmarks/generate_data/stock.py
@@ -0,0 +1,126 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+Wrappers for using :mod:`iris.tests.stock` methods for benchmarking.
+
+See :mod:`benchmarks.generate_data` for an explanation of this structure.
+"""
+
+from pathlib import Path
+import pickle
+
+from iris.experimental.ugrid import PARSE_UGRID_ON_LOAD, load_mesh
+
+from . import BENCHMARK_DATA, REUSE_DATA, load_realised, run_function_elsewhere
+
+
+def create_file__xios_2d_face_half_levels(
+    temp_file_dir, dataset_name, n_faces=866, n_times=1
+):
+    """
+    Wrapper for :meth:`iris.tests.stock.netcdf.create_file__xios_2d_face_half_levels`.
+
+    Have taken control of temp_file_dir
+
+    todo: is create_file__xios_2d_face_half_levels still appropriate now we can
+     properly save Mesh Cubes?
+    """
+
+    def _external(*args, **kwargs):
+        from iris.tests.stock.netcdf import (
+            create_file__xios_2d_face_half_levels,
+        )
+
+        print(create_file__xios_2d_face_half_levels(*args, **kwargs), end="")
+
+    args_list = [dataset_name, n_faces, n_times]
+    args_hash = hash(str(args_list))
+    save_path = (
+        BENCHMARK_DATA / f"create_file__xios_2d_face_half_levels_{args_hash}"
+    ).with_suffix(".nc")
+    if not REUSE_DATA or not save_path.is_file():
+        # create_file__xios_2d_face_half_levels takes control of save location
+        #  so need to move to a more specific name that allows re-use.
+        actual_path = run_function_elsewhere(
+            _external, str(BENCHMARK_DATA), *args_list
+        )
+        Path(actual_path.decode()).replace(save_path)
+    return save_path
+
+
+def sample_mesh(n_nodes=None, n_faces=None, n_edges=None, lazy_values=False):
+    """Wrapper for :meth:iris.tests.stock.mesh.sample_mesh`."""
+
+    def _external(*args, **kwargs):
+        from iris.experimental.ugrid import save_mesh
+        from iris.tests.stock.mesh import sample_mesh
+
+        save_path_ = kwargs.pop("save_path")
+        # Always saving, so laziness is irrelevant. Use lazy to save time.
+        kwargs["lazy_values"] = True
+        new_mesh = sample_mesh(*args, **kwargs)
+        save_mesh(new_mesh, save_path_)
+
+    arg_list = [n_nodes, n_faces, n_edges]
+    args_hash = hash(str(arg_list))
+    save_path = (BENCHMARK_DATA / f"sample_mesh_{args_hash}").with_suffix(
+        ".nc"
+    )
+    if not REUSE_DATA or not save_path.is_file():
+        _ = run_function_elsewhere(
+            _external, *arg_list, save_path=str(save_path)
+        )
+    with PARSE_UGRID_ON_LOAD.context():
+        if not lazy_values:
+            # Realise everything.
+            with load_realised():
+                mesh = load_mesh(str(save_path))
+        else:
+            mesh = load_mesh(str(save_path))
+    return mesh
+
+
+def sample_meshcoord(sample_mesh_kwargs=None, location="face", axis="x"):
+    """
+    Wrapper for :meth:`iris.tests.stock.mesh.sample_meshcoord`.
+
+    Parameters deviate from the original as cannot pass a
+    :class:`iris.experimental.ugrid.Mesh to the separate Python instance - must
+    instead generate the Mesh as well.
+
+    MeshCoords cannot be saved to file, so the _external method saves the
+    MeshCoord's Mesh, then the original Python instance loads in that Mesh and
+    regenerates the MeshCoord from there.
+    """
+
+    def _external(sample_mesh_kwargs_, save_path_):
+        from iris.experimental.ugrid import save_mesh
+        from iris.tests.stock.mesh import sample_mesh, sample_meshcoord
+
+        if sample_mesh_kwargs_:
+            input_mesh = sample_mesh(**sample_mesh_kwargs_)
+        else:
+            input_mesh = None
+        # Don't parse the location or axis arguments - only saving the Mesh at
+        #  this stage.
+        new_meshcoord = sample_meshcoord(mesh=input_mesh)
+        save_mesh(new_meshcoord.mesh, save_path_)
+
+    args_hash = hash(str(sample_mesh_kwargs))
+    save_path = (
+        BENCHMARK_DATA / f"sample_mesh_coord_{args_hash}"
+    ).with_suffix(".nc")
+    if not REUSE_DATA or not save_path.is_file():
+        _ = run_function_elsewhere(
+            _external,
+            sample_mesh_kwargs_=sample_mesh_kwargs,
+            save_path_=str(save_path),
+        )
+    with PARSE_UGRID_ON_LOAD.context():
+        with load_realised():
+            source_mesh = load_mesh(str(save_path))
+    # Regenerate MeshCoord from its Mesh, which we saved.
+    return source_mesh.to_MeshCoord(location=location, axis=axis)
diff --git a/benchmarks/benchmarks/iterate.py b/benchmarks/benchmarks/iterate.py
index 20422750ef..0a5415ac2b 100644
--- a/benchmarks/benchmarks/iterate.py
+++ b/benchmarks/benchmarks/iterate.py
@@ -9,9 +9,10 @@
 """
 import numpy as np
 
-from benchmarks import ARTIFICIAL_DIM_SIZE
 from iris import coords, cube, iterate
 
+from . import ARTIFICIAL_DIM_SIZE
+
 
 def setup():
     """General variables needed by multiple benchmark classes."""
diff --git a/benchmarks/benchmarks/mixin.py b/benchmarks/benchmarks/mixin.py
index e78b150438..bec5518eee 100644
--- a/benchmarks/benchmarks/mixin.py
+++ b/benchmarks/benchmarks/mixin.py
@@ -10,10 +10,11 @@
 
 import numpy as np
 
-from benchmarks import ARTIFICIAL_DIM_SIZE
 from iris import coords
 from iris.common.metadata import AncillaryVariableMetadata
 
+from . import ARTIFICIAL_DIM_SIZE
+
 LONG_NAME = "air temperature"
 STANDARD_NAME = "air_temperature"
 VAR_NAME = "air_temp"
diff --git a/benchmarks/benchmarks/netcdf_save.py b/benchmarks/benchmarks/netcdf_save.py
new file mode 100644
index 0000000000..c7580b3c63
--- /dev/null
+++ b/benchmarks/benchmarks/netcdf_save.py
@@ -0,0 +1,61 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+Cubesphere-like netcdf saving benchmarks.
+
+Where possible benchmarks should be parameterised for two sizes of input data:
+  * minimal: enables detection of regressions in parts of the run-time that do
+             NOT scale with data size.
+  * large: large enough to exclusively detect regressions in parts of the
+           run-time that scale with data size. Aim for benchmark time ~20x
+           that of the minimal benchmark.
+
+"""
+from iris import save
+from iris.experimental.ugrid import save_mesh
+
+from . import TrackAddedMemoryAllocation
+from .generate_data import make_cube_like_2d_cubesphere
+
+
+class NetcdfSave:
+    params = [[1, 600], [False, True]]
+    param_names = ["cubesphere-N", "is_unstructured"]
+
+    def setup(self, n_cubesphere, is_unstructured):
+        self.cube = make_cube_like_2d_cubesphere(
+            n_cube=n_cubesphere, with_mesh=is_unstructured
+        )
+
+    def _save_data(self, cube, do_copy=True):
+        if do_copy:
+            # Copy the cube, to avoid distorting the results by changing it
+            # Because we known that older Iris code realises lazy coords
+            cube = cube.copy()
+        save(cube, "tmp.nc")
+
+    def _save_mesh(self, cube):
+        # In this case, we are happy that the mesh is *not* modified
+        save_mesh(cube.mesh, "mesh.nc")
+
+    def time_netcdf_save_cube(self, n_cubesphere, is_unstructured):
+        self._save_data(self.cube)
+
+    def time_netcdf_save_mesh(self, n_cubesphere, is_unstructured):
+        if is_unstructured:
+            self._save_mesh(self.cube)
+
+    def track_addedmem_netcdf_save(self, n_cubesphere, is_unstructured):
+        cube = self.cube.copy()  # Do this outside the testing block
+        with TrackAddedMemoryAllocation() as mb:
+            self._save_data(cube, do_copy=False)
+        return mb.addedmem_mb()
+
+
+# Declare a 'Mb' unit for all 'track_addedmem_..' type benchmarks
+for attr in dir(NetcdfSave):
+    if attr.startswith("track_addedmem_"):
+        getattr(NetcdfSave, attr).unit = "Mb"
diff --git a/benchmarks/benchmarks/plot.py b/benchmarks/benchmarks/plot.py
index 24899776dc..75195c86e9 100644
--- a/benchmarks/benchmarks/plot.py
+++ b/benchmarks/benchmarks/plot.py
@@ -10,9 +10,10 @@
 import matplotlib
 import numpy as np
 
-from benchmarks import ARTIFICIAL_DIM_SIZE
 from iris import coords, cube, plot
 
+from . import ARTIFICIAL_DIM_SIZE
+
 matplotlib.use("agg")
 
 
diff --git a/benchmarks/benchmarks/regions_combine.py b/benchmarks/benchmarks/regions_combine.py
new file mode 100644
index 0000000000..a99dc57263
--- /dev/null
+++ b/benchmarks/benchmarks/regions_combine.py
@@ -0,0 +1,268 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+Benchmarks stages of operation of the function
+:func:`iris.experimental.ugrid.utils.recombine_submeshes`.
+
+Where possible benchmarks should be parameterised for two sizes of input data:
+  * minimal: enables detection of regressions in parts of the run-time that do
+             NOT scale with data size.
+  * large: large enough to exclusively detect regressions in parts of the
+           run-time that scale with data size. Aim for benchmark time ~20x
+           that of the minimal benchmark.
+
+"""
+import os
+
+import dask.array as da
+import numpy as np
+
+from iris import load, load_cube, save
+from iris.experimental.ugrid import PARSE_UGRID_ON_LOAD
+from iris.experimental.ugrid.utils import recombine_submeshes
+
+from . import TrackAddedMemoryAllocation
+from .generate_data import make_cube_like_2d_cubesphere
+
+
+class MixinCombineRegions:
+    # Characterise time taken + memory-allocated, for various stages of combine
+    # operations on cubesphere-like test data.
+    params = [4, 500]
+    param_names = ["cubesphere-N"]
+
+    def _parametrised_cache_filename(self, n_cubesphere, content_name):
+        return f"cube_C{n_cubesphere}_{content_name}.nc"
+
+    def _make_region_cubes(self, full_mesh_cube):
+        """Make a fixed number of region cubes from a full meshcube."""
+        # Divide the cube into regions.
+        n_faces = full_mesh_cube.shape[-1]
+        # Start with a simple list of face indices
+        # first extend to multiple of 5
+        n_faces_5s = 5 * ((n_faces + 1) // 5)
+        i_faces = np.arange(n_faces_5s, dtype=int)
+        # reshape (5N,) to (N, 5)
+        i_faces = i_faces.reshape((n_faces_5s // 5, 5))
+        # reorder [2, 3, 4, 0, 1] within each block of 5
+        i_faces = np.concatenate([i_faces[:, 2:], i_faces[:, :2]], axis=1)
+        # flatten to get [2 3 4 0 1 (-) 8 9 10 6 7 (-) 13 14 15 11 12 ...]
+        i_faces = i_faces.flatten()
+        # reduce back to orignal length, wrap any overflows into valid range
+        i_faces = i_faces[:n_faces] % n_faces
+
+        # Divide into regions -- always slightly uneven, since 7 doesn't divide
+        n_regions = 7
+        n_facesperregion = n_faces // n_regions
+        i_face_regions = (i_faces // n_facesperregion) % n_regions
+        region_inds = [
+            np.where(i_face_regions == i_region)[0]
+            for i_region in range(n_regions)
+        ]
+        # NOTE: this produces 7 regions, with near-adjacent value ranges but
+        # with some points "moved" to an adjacent region.
+        # Also, region-0 is bigger (because of not dividing by 7).
+
+        # Finally, make region cubes with these indices.
+        region_cubes = [full_mesh_cube[..., inds] for inds in region_inds]
+        return region_cubes
+
+    def setup_cache(self):
+        """Cache all the necessary source data on disk."""
+
+        # Control dask, to minimise memory usage + allow largest data.
+        self.fix_dask_settings()
+
+        for n_cubesphere in self.params:
+            # Do for each parameter, since "setup_cache" is NOT parametrised
+            mesh_cube = make_cube_like_2d_cubesphere(
+                n_cube=n_cubesphere, with_mesh=True
+            )
+            # Save to files which include the parameter in the names.
+            save(
+                mesh_cube,
+                self._parametrised_cache_filename(n_cubesphere, "meshcube"),
+            )
+            region_cubes = self._make_region_cubes(mesh_cube)
+            save(
+                region_cubes,
+                self._parametrised_cache_filename(n_cubesphere, "regioncubes"),
+            )
+
+    def setup(
+        self, n_cubesphere, imaginary_data=True, create_result_cube=True
+    ):
+        """
+        The combine-tests "standard" setup operation.
+
+        Load the source cubes (full-mesh + region) from disk.
+        These are specific to the cubesize parameter.
+        The data is cached on disk rather than calculated, to avoid any
+        pre-loading of the process memory allocation.
+
+        If 'imaginary_data' is set (default), the region cubes data is replaced
+        with lazy data in the form of a da.zeros().  Otherwise, the region data
+        is lazy data from the files.
+
+        If 'create_result_cube' is set, create "self.combined_cube" containing
+        the (still lazy) result.
+
+        NOTE: various test classes override + extend this.
+
+        """
+
+        # Load source cubes (full-mesh and regions)
+        with PARSE_UGRID_ON_LOAD.context():
+            self.full_mesh_cube = load_cube(
+                self._parametrised_cache_filename(n_cubesphere, "meshcube")
+            )
+            self.region_cubes = load(
+                self._parametrised_cache_filename(n_cubesphere, "regioncubes")
+            )
+
+        # Remove all var-names from loaded cubes, which can otherwise cause
+        # problems.  Also implement 'imaginary' data.
+        for cube in self.region_cubes + [self.full_mesh_cube]:
+            cube.var_name = None
+            for coord in cube.coords():
+                coord.var_name = None
+            if imaginary_data:
+                # Replace cube data (lazy file data) with 'imaginary' data.
+                # This has the same lazy-array attributes, but is allocated by
+                # creating chunks on demand instead of loading from file.
+                data = cube.lazy_data()
+                data = da.zeros(
+                    data.shape, dtype=data.dtype, chunks=data.chunksize
+                )
+                cube.data = data
+
+        if create_result_cube:
+            self.recombined_cube = self.recombine()
+
+        # Fix dask usage mode for all the subsequent performance tests.
+        self.fix_dask_settings()
+
+    def fix_dask_settings(self):
+        """
+        Fix "standard" dask behaviour for time+space testing.
+
+        Currently this is single-threaded mode, with known chunksize,
+        which is optimised for space saving so we can test largest data.
+
+        """
+
+        import dask.config as dcfg
+
+        # Use single-threaded, to avoid process-switching costs and minimise memory usage.
+        # N.B. generally may be slower, but use less memory ?
+        dcfg.set(scheduler="single-threaded")
+        # Configure iris._lazy_data.as_lazy_data to aim for 100Mb chunks
+        dcfg.set({"array.chunk-size": "128Mib"})
+
+    def recombine(self):
+        # A handy general shorthand for the main "combine" operation.
+        result = recombine_submeshes(
+            self.full_mesh_cube,
+            self.region_cubes,
+            index_coord_name="i_mesh_face",
+        )
+        return result
+
+
+class CombineRegionsCreateCube(MixinCombineRegions):
+    """
+    Time+memory costs of creating a combined-regions cube.
+
+    The result is lazy, and we don't do the actual calculation.
+
+    """
+
+    def setup(self, n_cubesphere):
+        # In this case only, do *not* create the result cube.
+        # That is the operation we want to test.
+        super().setup(n_cubesphere, create_result_cube=False)
+
+    def time_create_combined_cube(self, n_cubesphere):
+        self.recombine()
+
+    def track_addedmem_create_combined_cube(self, n_cubesphere):
+        with TrackAddedMemoryAllocation() as mb:
+            self.recombine()
+        return mb.addedmem_mb()
+
+
+CombineRegionsCreateCube.track_addedmem_create_combined_cube.unit = "Mb"
+
+
+class CombineRegionsComputeRealData(MixinCombineRegions):
+    """
+    Time+memory costs of computing combined-regions data.
+    """
+
+    def time_compute_data(self, n_cubesphere):
+        self.recombined_cube.data
+
+    def track_addedmem_compute_data(self, n_cubesphere):
+        with TrackAddedMemoryAllocation() as mb:
+            self.recombined_cube.data
+
+        return mb.addedmem_mb()
+
+
+CombineRegionsComputeRealData.track_addedmem_compute_data.unit = "Mb"
+
+
+class CombineRegionsSaveData(MixinCombineRegions):
+    """
+    Test saving *only*, having replaced the input cube data with 'imaginary'
+    array data, so that input data is not loaded from disk during the save
+    operation.
+
+    """
+
+    def time_save(self, n_cubesphere):
+        # Save to disk, which must compute data + stream it to file.
+        save(self.recombined_cube, "tmp.nc")
+
+    def track_addedmem_save(self, n_cubesphere):
+        with TrackAddedMemoryAllocation() as mb:
+            save(self.recombined_cube, "tmp.nc")
+
+        return mb.addedmem_mb()
+
+    def track_filesize_saved(self, n_cubesphere):
+        save(self.recombined_cube, "tmp.nc")
+        return os.path.getsize("tmp.nc") * 1.0e-6
+
+
+CombineRegionsSaveData.track_addedmem_save.unit = "Mb"
+CombineRegionsSaveData.track_filesize_saved.unit = "Mb"
+
+
+class CombineRegionsFileStreamedCalc(MixinCombineRegions):
+    """
+    Test the whole cost of file-to-file streaming.
+    Uses the combined cube which is based on lazy data loading from the region
+    cubes on disk.
+    """
+
+    def setup(self, n_cubesphere):
+        # In this case only, do *not* replace the loaded regions data with
+        # 'imaginary' data, as we want to test file-to-file calculation+save.
+        super().setup(n_cubesphere, imaginary_data=False)
+
+    def time_stream_file2file(self, n_cubesphere):
+        # Save to disk, which must compute data + stream it to file.
+        save(self.recombined_cube, "tmp.nc")
+
+    def track_addedmem_stream_file2file(self, n_cubesphere):
+        with TrackAddedMemoryAllocation() as mb:
+            save(self.recombined_cube, "tmp.nc")
+
+        return mb.addedmem_mb()
+
+
+CombineRegionsFileStreamedCalc.track_addedmem_stream_file2file.unit = "Mb"
diff --git a/benchmarks/benchmarks/regridding.py b/benchmarks/benchmarks/regridding.py
index 6db33aa192..c315119c11 100644
--- a/benchmarks/benchmarks/regridding.py
+++ b/benchmarks/benchmarks/regridding.py
@@ -25,16 +25,31 @@ def setup(self) -> None:
         )
         self.cube = iris.load_cube(cube_file_path)
 
+        # Prepare a tougher cube and chunk it
+        chunked_cube_file_path = tests.get_data_path(
+            ["NetCDF", "regrid", "regrid_xyt.nc"]
+        )
+        self.chunked_cube = iris.load_cube(chunked_cube_file_path)
+
+        # Chunked data makes the regridder run repeatedly
+        self.cube.data = self.cube.lazy_data().rechunk((1, -1, -1))
+
         template_file_path = tests.get_data_path(
             ["NetCDF", "regrid", "regrid_template_global_latlon.nc"]
         )
         self.template_cube = iris.load_cube(template_file_path)
 
-        # Chunked data makes the regridder run repeatedly
-        self.cube.data = self.cube.lazy_data().rechunk((1, -1, -1))
+        # Prepare a regridding scheme
+        self.scheme_area_w = AreaWeighted()
 
     def time_regrid_area_w(self) -> None:
         # Regrid the cube onto the template.
-        out = self.cube.regrid(self.template_cube, AreaWeighted())
+        out = self.cube.regrid(self.template_cube, self.scheme_area_w)
         # Realise the data
         out.data
+
+    def time_regrid_area_w_new_grid(self) -> None:
+        # Regrid the chunked cube
+        out = self.chunked_cube.regrid(self.template_cube, self.scheme_area_w)
+        # Realise data
+        out.data
diff --git a/benchmarks/benchmarks/ugrid_load.py b/benchmarks/benchmarks/ugrid_load.py
new file mode 100644
index 0000000000..352450dcec
--- /dev/null
+++ b/benchmarks/benchmarks/ugrid_load.py
@@ -0,0 +1,128 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+Mesh data loading benchmark tests.
+
+Where possible benchmarks should be parameterised for two sizes of input data:
+  * minimal: enables detection of regressions in parts of the run-time that do
+             NOT scale with data size.
+  * large: large enough to exclusively detect regressions in parts of the
+           run-time that scale with data size. Aim for benchmark time ~20x
+           that of the minimal benchmark.
+
+"""
+
+from iris import load_cube as iris_load_cube
+from iris.experimental.ugrid import PARSE_UGRID_ON_LOAD
+from iris.experimental.ugrid import load_mesh as iris_load_mesh
+
+from .generate_data.stock import create_file__xios_2d_face_half_levels
+
+
+def synthetic_data(**kwargs):
+    # Ensure all uses of the synthetic data function use the common directory.
+    # File location is controlled by :mod:`generate_data`, hence temp_file_dir=None.
+    return create_file__xios_2d_face_half_levels(temp_file_dir=None, **kwargs)
+
+
+def load_cube(*args, **kwargs):
+    with PARSE_UGRID_ON_LOAD.context():
+        return iris_load_cube(*args, **kwargs)
+
+
+def load_mesh(*args, **kwargs):
+    with PARSE_UGRID_ON_LOAD.context():
+        return iris_load_mesh(*args, **kwargs)
+
+
+class BasicLoading:
+    params = [1, int(4.1e6)]
+    param_names = ["number of faces"]
+
+    def setup_common(self, **kwargs):
+        self.data_path = synthetic_data(**kwargs)
+
+    def setup(self, *args):
+        self.setup_common(dataset_name="Loading", n_faces=args[0])
+
+    def time_load_file(self, *args):
+        _ = load_cube(str(self.data_path))
+
+    def time_load_mesh(self, *args):
+        _ = load_mesh(str(self.data_path))
+
+
+class BasicLoadingTime(BasicLoading):
+    """Same as BasicLoading, but scaling over a time series - an unlimited dimension."""
+
+    param_names = ["number of time steps"]
+
+    def setup(self, *args):
+        self.setup_common(dataset_name="Loading", n_faces=1, n_times=args[0])
+
+
+class DataRealisation:
+    # Prevent repeat runs between setup() runs - data won't be lazy after 1st.
+    number = 1
+    # Compensate for reduced certainty by increasing number of repeats.
+    repeat = (10, 10, 10.0)
+    # Prevent ASV running its warmup, which ignores `number` and would
+    # therefore get a false idea of typical run time since the data would stop
+    # being lazy.
+    warmup_time = 0.0
+    timeout = 300.0
+
+    params = [1, int(4e6)]
+    param_names = ["number of faces"]
+
+    def setup_common(self, **kwargs):
+        data_path = synthetic_data(**kwargs)
+        self.cube = load_cube(str(data_path))
+
+    def setup(self, *args):
+        self.setup_common(dataset_name="Realisation", n_faces=args[0])
+
+    def time_realise_data(self, *args):
+        assert self.cube.has_lazy_data()
+        _ = self.cube.data[0]
+
+
+class DataRealisationTime(DataRealisation):
+    """Same as DataRealisation, but scaling over a time series - an unlimited dimension."""
+
+    param_names = ["number of time steps"]
+
+    def setup(self, *args):
+        self.setup_common(
+            dataset_name="Realisation", n_faces=1, n_times=args[0]
+        )
+
+
+class Callback:
+    params = [1, int(4.5e6)]
+    param_names = ["number of faces"]
+
+    def setup_common(self, **kwargs):
+        def callback(cube, field, filename):
+            return cube[::2]
+
+        self.data_path = synthetic_data(**kwargs)
+        self.callback = callback
+
+    def setup(self, *args):
+        self.setup_common(dataset_name="Loading", n_faces=args[0])
+
+    def time_load_file_callback(self, *args):
+        _ = load_cube(str(self.data_path), callback=self.callback)
+
+
+class CallbackTime(Callback):
+    """Same as Callback, but scaling over a time series - an unlimited dimension."""
+
+    param_names = ["number of time steps"]
+
+    def setup(self, *args):
+        self.setup_common(dataset_name="Loading", n_faces=1, n_times=args[0])