Skip to content
4 changes: 3 additions & 1 deletion benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ used to generate benchmark test objects/files; see
but will defer to any value already set in the shell.
* `BENCHMARK_DATA` - optional - path to a directory for benchmark synthetic
test data, which the benchmark scripts will create if it doesn't already
exist. Defaults to `<root>/benchmarks/.data/` if not set.
exist. Defaults to `<root>/benchmarks/.data/` if not set. Note that some of
the generated files, especially in the 'SPerf' suite, are many GB in size so
plan accordingly.
* `ON_DEMAND_BENCHMARKS` - optional - when set (to any value): benchmarks
decorated with `@on_demand_benchmark` are included in the ASV run. Usually
coupled with the ASV `--bench` argument to only run the benchmark(s) of
Expand Down
6 changes: 6 additions & 0 deletions benchmarks/benchmarks/generate_data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from pathlib import Path
from subprocess import CalledProcessError, check_output, run
from textwrap import dedent
from warnings import warn

from iris._lazy_data import as_concrete_data
from iris.fileformats import netcdf
Expand All @@ -47,6 +48,11 @@
BENCHMARK_DATA = Path(environ.get("BENCHMARK_DATA", default_data_dir))
if BENCHMARK_DATA == default_data_dir:
BENCHMARK_DATA.mkdir(exist_ok=True)
message = (
f"No BENCHMARK_DATA env var, defaulting to {BENCHMARK_DATA}. "
"Note that some benchmark files are GB in size."
)
warn(message)
elif not BENCHMARK_DATA.is_dir():
message = f"Not a directory: {BENCHMARK_DATA} ."
raise ValueError(message)
Expand Down
16 changes: 13 additions & 3 deletions benchmarks/benchmarks/generate_data/stock.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,31 @@
See :mod:`benchmarks.generate_data` for an explanation of this structure.
"""

from hashlib import sha256
import json
from pathlib import Path

from iris.experimental.ugrid import PARSE_UGRID_ON_LOAD, load_mesh

from . import BENCHMARK_DATA, REUSE_DATA, load_realised, run_function_elsewhere


def hash_args(*args, **kwargs):
"""Convert arguments into a short hash - for preserving args in filenames."""
arg_string = str(args)
kwarg_string = json.dumps(kwargs)
full_string = arg_string + kwarg_string
return sha256(full_string.encode()).hexdigest()[:10]


def _create_file__xios_common(func_name, **kwargs):
def _external(func_name_, temp_file_dir, **kwargs_):
from iris.tests.stock import netcdf

func = getattr(netcdf, func_name_)
print(func(temp_file_dir, **kwargs_), end="")

args_hash = hash(str(kwargs))
args_hash = hash_args(**kwargs)
save_path = (BENCHMARK_DATA / f"{func_name}_{args_hash}").with_suffix(
".nc"
)
Expand Down Expand Up @@ -95,7 +105,7 @@ def _external(*args, **kwargs):
save_mesh(new_mesh, save_path_)

arg_list = [n_nodes, n_faces, n_edges]
args_hash = hash(str(arg_list))
args_hash = hash_args(*arg_list)
save_path = (BENCHMARK_DATA / f"sample_mesh_{args_hash}").with_suffix(
".nc"
)
Expand Down Expand Up @@ -139,7 +149,7 @@ def _external(sample_mesh_kwargs_, save_path_):
new_meshcoord = sample_meshcoord(mesh=input_mesh)
save_mesh(new_meshcoord.mesh, save_path_)

args_hash = hash(str(sample_mesh_kwargs))
args_hash = hash_args(**sample_mesh_kwargs)
save_path = (
BENCHMARK_DATA / f"sample_mesh_coord_{args_hash}"
).with_suffix(".nc")
Expand Down
6 changes: 5 additions & 1 deletion benchmarks/benchmarks/sperf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,14 @@
class FileMixin:
"""For use in any benchmark classes that work on a file."""

# Allows time for large file generation.
timeout = 3600.0
# Largest file with these params: ~90GB.
# Total disk space: ~410GB.
params = [
[12, 384, 640, 960, 1280, 1668],
[1, 36, 72],
[1, 3, 36, 72],
[1, 3, 10],
]
param_names = ["cubesphere_C<N>", "N levels", "N time steps"]
# cubesphere_C<N>: notation refers to faces per panel.
Expand Down
25 changes: 16 additions & 9 deletions benchmarks/benchmarks/sperf/combine_regions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,21 @@
from iris.experimental.ugrid.utils import recombine_submeshes

from .. import TrackAddedMemoryAllocation, on_demand_benchmark
from ..generate_data.ugrid import make_cube_like_2d_cubesphere
from ..generate_data.ugrid import BENCHMARK_DATA, make_cube_like_2d_cubesphere


class Mixin:
# Characterise time taken + memory-allocated, for various stages of combine
# operations on cubesphere-like test data.
timeout = 180.0
timeout = 300.0
params = [100, 200, 300, 500, 1000, 1668]
param_names = ["cubesphere_C<N>"]
# Fix result units for the tracking benchmarks.
unit = "Mb"
temp_save_path = BENCHMARK_DATA / "tmp.nc"

def _parametrised_cache_filename(self, n_cubesphere, content_name):
return f"cube_C{n_cubesphere}_{content_name}.nc"
return BENCHMARK_DATA / f"cube_C{n_cubesphere}_{content_name}.nc"

def _make_region_cubes(self, full_mesh_cube):
"""Make a fixed number of region cubes from a full meshcube."""
Expand Down Expand Up @@ -139,6 +140,9 @@ def setup(
# Fix dask usage mode for all the subsequent performance tests.
self.fix_dask_settings()

def teardown(self, _):
self.temp_save_path.unlink(missing_ok=True)

def fix_dask_settings(self):
"""
Fix "standard" dask behaviour for time+space testing.
Expand All @@ -165,6 +169,9 @@ def recombine(self):
)
return result

def save_recombined_cube(self):
save(self.recombined_cube, self.temp_save_path)


@on_demand_benchmark
class CreateCube(Mixin):
Expand Down Expand Up @@ -215,15 +222,15 @@ class SaveData(Mixin):

def time_save(self, n_cubesphere):
# Save to disk, which must compute data + stream it to file.
save(self.recombined_cube, "tmp.nc")
self.save_recombined_cube()

@TrackAddedMemoryAllocation.decorator()
def track_addedmem_save(self, n_cubesphere):
save(self.recombined_cube, "tmp.nc")
self.save_recombined_cube()

def track_filesize_saved(self, n_cubesphere):
save(self.recombined_cube, "tmp.nc")
return os.path.getsize("tmp.nc") * 1.0e-6
self.save_recombined_cube()
return self.temp_save_path.stat().st_size * 1.0e-6


@on_demand_benchmark
Expand All @@ -243,8 +250,8 @@ def setup(

def time_stream_file2file(self, n_cubesphere):
# Save to disk, which must compute data + stream it to file.
save(self.recombined_cube, "tmp.nc")
self.save_recombined_cube()

@TrackAddedMemoryAllocation.decorator()
def track_addedmem_stream_file2file(self, n_cubesphere):
save(self.recombined_cube, "tmp.nc")
self.save_recombined_cube()
3 changes: 0 additions & 3 deletions benchmarks/benchmarks/sperf/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@ def time_load_cube(self, _, __, ___):

@on_demand_benchmark
class Realise(FileMixin):
# The larger files take a long time to realise.
timeout = 600.0

def setup(self, c_size, n_levels, n_times):
super().setup(c_size, n_levels, n_times)
self.loaded_cube = self.load_cube()
Expand Down
8 changes: 6 additions & 2 deletions lib/iris/tests/stock/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from string import Template
import subprocess

import dask
from dask import array as da
import netCDF4
import numpy as np

Expand Down Expand Up @@ -79,11 +81,13 @@ def _add_standard_data(nc_path, unlimited_dim_size=0):
# so it can be a dim-coord.
data_size = np.prod(shape)
data = np.arange(1, data_size + 1, dtype=var.dtype).reshape(shape)
var[:] = data
else:
# Fill with a plain value. But avoid zeros, so we can simulate
# valid ugrid connectivities even when start_index=1.
data = np.ones(shape, dtype=var.dtype) # Do not use zero
var[:] = data
with dask.config.set({"array.chunk-size": "2048MiB"}):
data = da.ones(shape, dtype=var.dtype) # Do not use zero
da.store(data, var)

ds.close()

Expand Down
9 changes: 7 additions & 2 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import hashlib
import os
from pathlib import Path
import re
from tempfile import NamedTemporaryFile
from typing import Literal

Expand Down Expand Up @@ -314,7 +315,7 @@ def benchmarks(
----------
session: object
A `nox.sessions.Session` object.
run_type: {"overnight", "branch", "custom"}
run_type: {"overnight", "branch", "cperf", "sperf", "custom"}
* ``overnight``: benchmarks all commits between the input **first
commit** to ``HEAD``, comparing each to its parent for performance
shifts. If a commit causes shifts, the output is saved to a file:
Expand Down Expand Up @@ -501,6 +502,11 @@ def asv_compare(*commits):
asv_command = (
asv_harness.format(posargs=commit_range) + f" --bench={run_type}"
)
# C/SPerf benchmarks are much bigger than the CI ones:
# Don't fail the whole run if memory blows on 1 benchmark.
asv_command = asv_command.replace(" --strict", "")
# Only do a single round.
asv_command = re.sub(r"rounds=\d", "rounds=1", asv_command)
session.run(*asv_command.split(" "), *asv_args)

asv_command = f"asv publish {commit_range} --html-dir={publish_subdir}"
Expand All @@ -511,7 +517,6 @@ def asv_compare(*commits):
print(
f'New ASV results for "{run_type}".\n'
f'See "{publish_subdir}",'
f'\n html in "{location / "html"}".'
f'\n or JSON files under "{location / "results"}".'
)

Expand Down