SciTools · trexfeathers · Mar 20, 2020 · Mar 13, 2020 · Mar 15, 2020 · Mar 18, 2020
diff --git a/.travis.yml b/.travis.yml
@@ -28,7 +28,7 @@ git:
 
 install:
   - >
-    export IRIS_TEST_DATA_REF="1696ac3a823a06b95f430670f285ee97671d2cf2";
+    export IRIS_TEST_DATA_REF="672dbb46c986038fa5d06a3d8aad691fd1951e07";
     export IRIS_TEST_DATA_SUFFIX=$(echo "${IRIS_TEST_DATA_REF}" | sed "s/^v//");
 
   # Install miniconda

diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py
@@ -17,11 +17,9 @@
 from abc import ABCMeta, abstractmethod
 
 from collections.abc import Iterable, MutableMapping
-import os
 import re
 import warnings
 
-import netCDF4
 import numpy as np
 import numpy.ma as ma
 
@@ -1008,8 +1006,12 @@ class CFReader:
 
     """
 
-    def __init__(self, filename, warn=False, monotonic=False):
-        self._filename = os.path.expanduser(filename)
+    def __init__(
+        self, dataset, warn=False, monotonic=False, exclude_var_names=None
+    ):
+        self._dataset = dataset
+        self._filename = dataset.filepath()
+
         # All CF variable types EXCEPT for the "special cases" of
         # CFDataVariable, CFCoordinateVariable and _CFFormulaTermsVariable.
         self._variable_types = (
@@ -1025,8 +1027,6 @@ def __init__(self, filename, warn=False, monotonic=False):
         #: Collection of CF-netCDF variables associated with this netCDF file
         self.cf_group = CFGroup()
 
-        self._dataset = netCDF4.Dataset(self._filename, mode="r")
-
         # Issue load optimisation warning.
         if warn and self._dataset.file_format in [
             "NETCDF3_CLASSIC",
@@ -1039,6 +1039,7 @@ def __init__(self, filename, warn=False, monotonic=False):
 
         self._check_monotonic = monotonic
 
+        self.exclude_var_names = exclude_var_names or []
         self._translate()
         self._build_cf_groups()
         self._reset()
@@ -1049,26 +1050,30 @@ def __repr__(self):
     def _translate(self):
         """Classify the netCDF variables into CF-netCDF variables."""
 
-        netcdf_variable_names = list(self._dataset.variables.keys())
+        netcdf_variable_names = [
+            var_name
+            for var_name in self._dataset.variables.keys()
+            if var_name not in self.exclude_var_names
+        ]
 
         # Identify all CF coordinate variables first. This must be done
         # first as, by CF convention, the definition of a CF auxiliary
         # coordinate variable may include a scalar CF coordinate variable,
         # whereas we want these two types of variables to be mutually exclusive.
         coords = CFCoordinateVariable.identify(
-            self._dataset.variables, monotonic=self._check_monotonic
+            self._dataset.variables,
+            ignore=self.exclude_var_names,
+            monotonic=self._check_monotonic,
         )
         self.cf_group.update(coords)
         coordinate_names = list(self.cf_group.coordinates.keys())
 
         # Identify all CF variables EXCEPT for the "special cases".
         for variable_type in self._variable_types:
             # Prevent grid mapping variables being mis-identified as CF coordinate variables.
-            ignore = (
-                None
-                if issubclass(variable_type, CFGridMappingVariable)
-                else coordinate_names
-            )
+            ignore = self.exclude_var_names
+            if not issubclass(variable_type, CFGridMappingVariable):
+                ignore += coordinate_names
             self.cf_group.update(
                 variable_type.identify(self._dataset.variables, ignore=ignore)
             )
@@ -1082,7 +1087,7 @@ def _translate(self):
 
         # Identify and register all CF formula terms.
         formula_terms = _CFFormulaTermsVariable.identify(
-            self._dataset.variables
+            self._dataset.variables, ignore=self.exclude_var_names
         )
 
         for cf_var in formula_terms.values():
@@ -1125,10 +1130,9 @@ def _build(cf_variable):
             for variable_type in self._variable_types:
                 # Prevent grid mapping variables being mis-identified as
                 # CF coordinate variables.
-                if issubclass(variable_type, CFGridMappingVariable):
-                    ignore = None
-                else:
-                    ignore = coordinate_names
+                ignore = self.exclude_var_names
+                if not issubclass(variable_type, CFGridMappingVariable):
+                    ignore += coordinate_names
                 match = variable_type.identify(
                     self._dataset.variables,
                     ignore=ignore,
@@ -1258,11 +1262,8 @@ def _build(cf_variable):
     def _reset(self):
         """Reset the attribute touch history of each variable."""
         for nc_var_name in self._dataset.variables.keys():
-            self.cf_group[nc_var_name].cf_attrs_reset()
-
-    def __del__(self):
-        # Explicitly close dataset to prevent file remaining open.
-        self._dataset.close()
+            if nc_var_name not in self.exclude_var_names:
+                self.cf_group[nc_var_name].cf_attrs_reset()
 
 
 def _getncattr(dataset, attr, default=None):

diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py
@@ -44,6 +44,7 @@
 import iris.exceptions
 import iris.fileformats.cf
 import iris.fileformats._pyke_rules
+from iris.fileformats.ugrid_cf_reader import UGridCFReader
 import iris.io
 import iris.util
 from iris._lazy_data import as_lazy_data
@@ -752,7 +753,7 @@ def coord_from_term(term):
         cube.add_aux_factory(factory)
 
 
-def load_cubes(filenames, callback=None):
+def load_cubes(filenames, callback=None, *args, **kwargs):
     """
     Loads cubes from a list of NetCDF filenames/URLs.
 
@@ -777,15 +778,20 @@ def load_cubes(filenames, callback=None):
         filenames = [filenames]
 
     for filename in filenames:
-        # Ingest the netCDF file.
-        cf = iris.fileformats.cf.CFReader(filename)
+        # Ingest the netCDF file, creating a reader which also checks for UGRID
+        # content.
+        reader = UGridCFReader(filename, *args, **kwargs)
 
         # Process each CF data variable.
-        data_variables = list(cf.cf_group.data_variables.values()) + list(
-            cf.cf_group.promoted.values()
-        )
+        data_variables = list(
+            reader.cfreader.cf_group.data_variables.values()
+        ) + list(reader.cfreader.cf_group.promoted.values())
         for cf_var in data_variables:
-            cube = _load_cube(engine, cf, cf_var, filename)
+            cube = _load_cube(engine, reader.cfreader, cf_var, filename)
+
+            # Post-process each cube to attach information describing the
+            # unstructured mesh dimension, if any.
+            reader.complete_ugrid_cube(cube)
 
             # Process any associated formula terms and attach
             # the corresponding AuxCoordFactory.

diff --git a/lib/iris/fileformats/ugrid_cf_reader.py b/lib/iris/fileformats/ugrid_cf_reader.py
@@ -0,0 +1,201 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+Adds a UGRID extension layer to netCDF file loading.
+
+"""
+from collections import namedtuple
+import os
+
+import netCDF4
+
+from gridded.pyugrid.ugrid import UGrid
+from gridded.pyugrid.read_netcdf import (
+    find_mesh_names,
+    load_grid_from_nc_dataset,
+)
+from iris.fileformats.cf import CFReader
+
+
+_UGRID_ELEMENT_TYPE_NAMES = ("node", "edge", "face", "volume")
+
+# Generate all possible UGRID structural property names.
+# These are the UGRID mesh properties that contain variable names for linkage,
+# which may appear as recognised properties of the main mesh variable.
+
+# Start with coordinate variables for each element type (aka "mesh_location").
+_UGRID_LINK_PROPERTIES = [
+    "{}_coordinates".format(elem) for elem in _UGRID_ELEMENT_TYPE_NAMES
+]
+
+# Add in all possible type-to-type_connectivity elements.
+# NOTE: this actually generates extra unused names, such as
+# "node_face_connectivity", because we are not bothering to distinguish
+# between lower- and higher-order elements.
+# For now just don't worry about that, as long as we get all the ones which
+# *are* needed.
+_UGRID_LINK_PROPERTIES += [
+    "{}_{}_connectivity".format(e1, e2)
+    for e1 in _UGRID_ELEMENT_TYPE_NAMES
+    for e2 in _UGRID_ELEMENT_TYPE_NAMES
+]
+
+# Also allow for boundary information.
+_UGRID_LINK_PROPERTIES += ["boundary_node_connectivity"]
+
+
+class CubeUgrid(
+    namedtuple("CubeUgrid", ["cube_dim", "grid", "mesh_location"])
+):
+    """
+    Object recording the unstructured grid dimension of a cube.
+
+    * cube_dim (int):
+        The cube dimension which maps the unstructured grid.
+        There can be only one.
+
+    * grid (`gridded.pyugrid.UGrid`):
+        A 'gridded' description of a UGRID mesh.
+
+    * mesh_location (str):
+        Which element of the mesh the cube is mapped to.
+        Can be 'face', 'edge' or 'node'.  A 'volume' is not supported.
+
+    """
+
+    def __str__(self):
+        result = "Cube unstructured-grid dimension:"
+        result += "\n   cube dimension = {}".format(self.cube_dim)
+        result += '\n   mesh_location = "{}"'.format(self.mesh_location)
+        result += '\n   mesh "{}" :\n'.format(self.grid.mesh_name)
+        try:
+            mesh_str = str(self.grid.info)
+        except TypeError:
+            mesh_str = "<unprintable mesh>"
+        result += "\n".join(["     " + line for line in mesh_str.split("\n")])
+        result += "\n"
+        return result
+
+
+class UGridCFReader:
+    """
+    A CFReader extension to add UGRID information to netcdf cube loading.
+
+    Identifies UGRID-specific parts of a netcdf file, providing:
+
+    * `self.cfreader` : a CFReader object to interpret the CF data from the
+      file for cube creation, while ignoring the UGRID mesh data.
+
+    * `self.complete_ugrid_cube(cube)` a call to add the relevant UGRID
+      information to a cube created from the cfreader data.
+
+    This allows us to decouple UGRID from CF support with minimal changes to
+    the existing `iris.fileformats.netcdf` code, which is intimately coupled to
+    both the CFReader class and the netCDF4 file interface.
+
+    """
+
+    def __init__(self, filename, *args, **kwargs):
+        self.filename = os.path.expanduser(filename)
+        dataset = netCDF4.Dataset(self.filename, mode="r")
+        self.dataset = dataset
+        meshes = {}
+        for meshname in find_mesh_names(self.dataset):
+            mesh = UGrid()
+            load_grid_from_nc_dataset(dataset, mesh, mesh_name=meshname)
+            meshes[meshname] = mesh
+        self.meshes = meshes
+
+        # Generate list of excluded variable names.
+        exclude_vars = list(meshes.keys())
+
+        temp_xios_fix = kwargs.pop("temp_xios_fix", False)
+        if not temp_xios_fix:
+            # This way *ought* to work, but maybe problems with the test file ?
+            for mesh in meshes.values():
+                mesh_var = dataset.variables[mesh.mesh_name]
+                for attr in mesh_var.ncattrs():
+                    if attr in _UGRID_LINK_PROPERTIES:
+                        exclude_vars.extend(mesh_var.getncattr(attr).split())
+        else:
+            # A crude and XIOS-specific alternative ..
+            exclude_vars += [
+                name
+                for name in dataset.variables.keys()
+                if any(name.startswith(meshname) for meshname in meshes.keys())
+            ]
+
+        # Identify possible mesh dimensions and make a map of them.
+        meshdims_map = {}  # Maps {dimension-name: (mesh, mesh-location)}
+        for mesh in meshes.values():
+            mesh_var = dataset.variables[mesh.mesh_name]
+            if mesh.faces is not None:
+                # Work out name of faces dimension and record it.
+                if "face_dimension" in mesh_var.ncattrs():
+                    faces_dim_name = mesh_var.getncattr("face_dimension")
+                else:
+                    # Assume default dimension ordering, and get the dim name
+                    # from dims of a non-optional connectivity variable.
+                    faces_varname = mesh_var.face_node_connectivity
+                    faces_var = dataset.variables[faces_varname]
+                    faces_dim_name = faces_var.dimensions[0]
+                meshdims_map[faces_dim_name] = (mesh, "face")
+            if mesh.edges is not None:
+                # Work out name of edges dimension and record it.
+                if "edge_dimension" in mesh_var.ncattrs():
+                    edges_dim_name = mesh_var.getncattr("edge_dimension")
+                else:
+                    # Assume default dimension ordering, and get the dim name
+                    # from dims of a non-optional connectivity variable.
+                    edges_varname = mesh_var.edge_node_connectivity
+                    edges_var = dataset.variables[edges_varname]
+                    edges_dim_name = edges_var.dimensions[0]
+                meshdims_map[edges_dim_name] = (mesh, "edge")
+            if mesh.nodes is not None:
+                # Work out name of nodes dimension and record it.
+                # Get it from a non-optional coordinate variable.
+                nodes_varname = mesh_var.node_coordinates.split()[0]
+                nodes_var = dataset.variables[nodes_varname]
+                nodes_dim_name = nodes_var.dimensions[0]
+                meshdims_map[nodes_dim_name] = (mesh, "node")
+        self.meshdims_map = meshdims_map
+
+        # Create a CFReader object which skips the UGRID-related variables.
+        kwargs["exclude_var_names"] = exclude_vars
+        self.cfreader = CFReader(self.dataset, *args, **kwargs)
+
+    def complete_ugrid_cube(self, cube):
+        """
+        Add the ".ugrid" property to a cube loaded with the `self.cfreader`.
+
+        We identify the unstructured-grid dimension of the cube (if any), and
+        attach a suitable CubeUgrid object, linking the cube mesh dimension to
+        an element-type (aka "mesh_location") of a mesh.
+
+        """
+        # Set a 'cube.ugrid' property.
+        data_var = self.dataset.variables[cube.var_name]
+        meshes_info = [
+            (i_dim, self.meshdims_map.get(dim_name))
+            for i_dim, dim_name in enumerate(data_var.dimensions)
+            if dim_name in self.meshdims_map
+        ]
+        if len(meshes_info) > 1:
+            msg = "Cube maps more than one mesh dimension: {}"
+            raise ValueError(msg.format(meshes_info))
+        if meshes_info:
+            i_dim, (mesh, mesh_location) = meshes_info[0]
+            cube.ugrid = CubeUgrid(
+                cube_dim=i_dim, grid=mesh, mesh_location=mesh_location
+            )
+        else:
+            # Add an empty 'cube.ugrid' to all cubes otherwise.
+            cube.ugrid = None
+        return
+
+    def __del__(self):
+        # Explicitly close dataset to prevent file remaining open.
+        self.dataset.close()
diff --git a/lib/iris/tests/integration/experimental/test_regrid_ProjectedUnstructured.py b/lib/iris/tests/integration/experimental/test_regrid_ProjectedUnstructured.py
@@ -27,7 +27,7 @@
 class TestProjectedUnstructured(tests.IrisTest):
     def setUp(self):
         path = tests.get_data_path(
-            ("NetCDF", "unstructured_grid", "theta_nodal_xios.nc")
+            ("NetCDF", "unstructured_grid", "theta_nodal_not_ugrid.nc")
         )
         self.src = iris.load_cube(path, "Potential Temperature")
 

diff --git a/lib/iris/tests/integration/test_regridding.py b/lib/iris/tests/integration/test_regridding.py
@@ -99,7 +99,7 @@ def test_nearest(self):
 class TestUnstructured(tests.IrisTest):
     def setUp(self):
         path = tests.get_data_path(
-            ("NetCDF", "unstructured_grid", "theta_nodal_xios.nc")
+            ("NetCDF", "unstructured_grid", "theta_nodal_not_ugrid.nc")
         )
         self.src = iris.load_cube(path, "Potential Temperature")
         self.grid = simple_3d()[0, :, :]