diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index a8247a247b..d4c01af48a 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -1,10 +1,11 @@
-# This is a basic workflow to help you get started with Actions
+# Use ASV to check for performance regressions in the last 24 hours' commits.
name: benchmark-check
on:
- # Triggers the workflow on push or pull request events but only for the master branch
- pull_request:
+ schedule:
+ # Runs every day at 23:00.
+ - cron: "0 23 * * *"
jobs:
benchmark:
@@ -23,12 +24,8 @@ jobs:
steps:
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
- uses: actions/checkout@v2
-
- - name: Fetch the PR base branch too
- run: |
- git fetch --depth=1 origin ${{ github.event.pull_request.base.ref }}
- git branch _base FETCH_HEAD
- echo PR_BASE_SHA=$(git rev-parse _base) >> $GITHUB_ENV
+ with:
+ fetch-depth: 0
- name: Install Nox
run: |
@@ -65,11 +62,46 @@ jobs:
run: |
echo "OVERRIDE_TEST_DATA_REPOSITORY=${GITHUB_WORKSPACE}/${IRIS_TEST_DATA_PATH}/test_data" >> $GITHUB_ENV
- - name: Run CI benchmarks
+ - name: Run overnight benchmarks
+ run: |
+ first_commit=$(git log --after="$(date -d "1 day ago" +"%Y-%m-%d") 23:00:00" --pretty=format:"%h" | tail -n 1)
+ if [ "$first_commit" != "" ]
+ then
+ nox --session="benchmarks(overnight)" -- $first_commit
+ fi
+
+ - name: Create issues for performance shifts
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
- mkdir --parents benchmarks/.asv
- set -o pipefail
- nox --session="benchmarks(ci compare)" | tee benchmarks/.asv/ci_compare.txt
+ if [ -d benchmarks/.asv/performance-shifts ]
+ then
+ cd benchmarks/.asv/performance-shifts
+ for commit_file in *
+ do
+ pr_number=$(git log "$commit_file"^! --oneline | grep -o "#[0-9]*" | tail -1 | cut -c 2-)
+ assignee=$(gh pr view $pr_number --json author -q '.["author"]["login"]' --repo $GITHUB_REPOSITORY)
+ title="Performance Shift(s): \`$commit_file\`"
+ body="
+ Benchmark comparison has identified performance shifts at commit \
+ $commit_file (#$pr_number). Please review the report below and \
+ take corrective/congratulatory action as appropriate \
+ :slightly_smiling_face:
+
+
+ Performance shift report
+
+ \`\`\`
+ $(cat $commit_file)
+ \`\`\`
+
+
+
+ Generated by GHA run [\`${{github.run_id}}\`](https://github.com/${{github.repository}}/actions/runs/${{github.run_id}})
+ "
+ gh issue create --title "$title" --body "$body" --assignee $assignee --label "Bot" --label "Type: Performance" --repo $GITHUB_REPOSITORY
+ done
+ fi
- name: Archive asv results
if: ${{ always() }}
@@ -78,4 +110,3 @@ jobs:
name: asv-report
path: |
benchmarks/.asv/results
- benchmarks/.asv/ci_compare.txt
diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 0000000000..baa1afe700
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,80 @@
+# Iris Performance Benchmarking
+
+Iris uses an [Airspeed Velocity](https://github.com/airspeed-velocity/asv)
+(ASV) setup to benchmark performance. This is primarily designed to check for
+performance shifts between commits using statistical analysis, but can also
+be easily repurposed for manual comparative and scalability analyses.
+
+The benchmarks are automatically run overnight
+[by a GitHub Action](../.github/workflows/benchmark.yml), with any notable
+shifts in performance being flagged in a new GitHub issue.
+
+## Running benchmarks
+
+`asv ...` commands must be run from this directory. You will need to have ASV
+installed, as well as Nox (see
+[Benchmark environments](#benchmark-environments)).
+
+[Iris' noxfile](../noxfile.py) includes a `benchmarks` session that provides
+conveniences for setting up before benchmarking, and can also replicate the
+automated overnight run locally. See the session docstring for detail.
+
+### Environment variables
+
+* ``DATA_GEN_PYTHON`` - required - path to a Python executable that can be
+used to generate benchmark test objects/files; see
+[Data generation](#data-generation). The Nox session sets this automatically,
+but will defer to any value already set in the shell.
+* ``BENCHMARK_DATA`` - optional - path to a directory for benchmark synthetic
+test data, which the benchmark scripts will create if it doesn't already
+exist. Defaults to ``/benchmarks/.data/`` if not set.
+
+## Writing benchmarks
+
+[See the ASV docs](https://asv.readthedocs.io/) for full detail.
+
+### Data generation
+**Important:** be sure not to use the benchmarking environment to generate any
+test objects/files, as this environment changes with each commit being
+benchmarked, creating inconsistent benchmark 'conditions'. The
+[generate_data](./benchmarks/generate_data/__init__.py) module offers a
+solution; read more detail there.
+
+### ASV re-run behaviour
+
+Note that ASV re-runs a benchmark multiple times between its `setup()` routine.
+This is a problem for benchmarking certain Iris operations such as data
+realisation, since the data will no longer be lazy after the first run.
+Consider writing extra steps to restore objects' original state _within_ the
+benchmark itself.
+
+If adding steps to the benchmark will skew the result too much then re-running
+can be disabled by setting an attribute on the benchmark: `number = 1`. To
+maintain result accuracy this should be accompanied by increasing the number of
+repeats _between_ `setup()` calls using the `repeat` attribute.
+`warmup_time = 0` is also advisable since ASV performs independent re-runs to
+estimate run-time, and these will still be subject to the original problem.
+
+### Scaling / non-Scaling Performance Differences
+
+When comparing performance between commits/file-type/whatever it can be helpful
+to know if the differences exist in scaling or non-scaling parts of the Iris
+functionality in question. This can be done using a size parameter, setting
+one value to be as small as possible (e.g. a scalar `Cube`), and the other to
+be significantly larger (e.g. a 1000x1000 `Cube`). Performance differences
+might only be seen for the larger value, or the smaller, or both, getting you
+closer to the root cause.
+
+## Benchmark environments
+
+We have disabled ASV's standard environment management, instead using an
+environment built using the same Nox scripts as Iris' test environments. This
+is done using ASV's plugin architecture - see
+[asv_delegated_conda.py](asv_delegated_conda.py) and the extra config items in
+[asv.conf.json](asv.conf.json).
+
+(ASV is written to control the environment(s) that benchmarks are run in -
+minimising external factors and also allowing it to compare between a matrix
+of dependencies (each in a separate environment). We have chosen to sacrifice
+these features in favour of testing each commit with its intended dependencies,
+controlled by Nox + lock-files).
diff --git a/noxfile.py b/noxfile.py
index 0600540c5b..e4d91c6bab 100755
--- a/noxfile.py
+++ b/noxfile.py
@@ -8,6 +8,8 @@
import hashlib
import os
from pathlib import Path
+from tempfile import NamedTemporaryFile
+from typing import Literal
import nox
from nox.logger import logger
@@ -289,31 +291,60 @@ def linkcheck(session: nox.sessions.Session):
)
-@nox.session(python=PY_VER, venv_backend="conda")
+@nox.session
@nox.parametrize(
- ["ci_mode"],
- [True, False],
- ids=["ci compare", "full"],
+ "run_type",
+ ["overnight", "branch", "custom"],
+ ids=["overnight", "branch", "custom"],
)
-def benchmarks(session: nox.sessions.Session, ci_mode: bool):
+def benchmarks(
+ session: nox.sessions.Session,
+ run_type: Literal["overnight", "branch", "custom"],
+):
"""
Perform Iris performance benchmarks (using Airspeed Velocity).
+ All run types require a single Nox positional argument (e.g.
+ ``nox --session="foo" -- my_pos_arg``) - detailed in the parameters
+ section - and can optionally accept a series of further arguments that will
+ be added to session's ASV command.
+
Parameters
----------
session: object
A `nox.sessions.Session` object.
- ci_mode: bool
- Run a cut-down selection of benchmarks, comparing the current commit to
- the last commit for performance regressions.
-
- Notes
- -----
- ASV is set up to use ``nox --session=tests --install-only`` to prepare
- the benchmarking environment. This session environment must use a Python
- version that is also available for ``--session=tests``.
+ run_type: {"overnight", "branch", "custom"}
+ * ``overnight``: benchmarks all commits between the input **first
+ commit** to ``HEAD``, comparing each to its parent for performance
+ shifts. If a commit causes shifts, the output is saved to a file:
+ ``.asv/performance-shifts/``. Designed for checking the
+ previous 24 hours' commits, typically in a scheduled script.
+ * ``branch``: Performs the same operations as ``overnight``, but always
+ on two commits only - ``HEAD``, and ``HEAD``'s merge-base with the
+ input **base branch**. Output from this run is never saved to a file.
+ Designed for testing if the active branch's changes cause performance
+ shifts - anticipating what would be caught by ``overnight`` once
+ merged.
+ **For maximum accuracy, avoid using the machine that is running this
+ session. Run time could be >1 hour for the full benchmark suite.**
+ * ``custom``: run ASV with the input **ASV sub-command**, without any
+ preset arguments - must all be supplied by the user. So just like
+ running ASV manually, with the convenience of re-using the session's
+ scripted setup steps.
+
+ Examples
+ --------
+ * ``nox --session="benchmarks(overnight)" -- a1b23d4``
+ * ``nox --session="benchmarks(branch)" -- upstream/main``
+ * ``nox --session="benchmarks(branch)" -- upstream/mesh-data-model``
+ * ``nox --session="benchmarks(branch)" -- upstream/main --bench=regridding``
+ * ``nox --session="benchmarks(custom)" -- continuous a1b23d4 HEAD --quick``
"""
+ # The threshold beyond which shifts are 'notable'. See `asv compare`` docs
+ # for more.
+ COMPARE_FACTOR = 1.2
+
session.install("asv", "nox")
data_gen_var = "DATA_GEN_PYTHON"
@@ -327,12 +358,12 @@ def benchmarks(session: nox.sessions.Session, ci_mode: bool):
"nox",
"--session=tests",
"--install-only",
- f"--python={session.python}",
+ f"--python={_PY_VERSION_LATEST}",
)
# Find the environment built above, set it to be the data generation
# environment.
data_gen_python = next(
- Path(".nox").rglob(f"tests*/bin/python{session.python}")
+ Path(".nox").rglob(f"tests*/bin/python{_PY_VERSION_LATEST}")
).resolve()
session.env[data_gen_var] = data_gen_python
@@ -360,25 +391,85 @@ def benchmarks(session: nox.sessions.Session, ci_mode: bool):
# Skip over setup questions for a new machine.
session.run("asv", "machine", "--yes")
- def asv_exec(*sub_args: str) -> None:
- run_args = ["asv", *sub_args]
- session.run(*run_args)
-
- if ci_mode:
- # If on a PR: compare to the base (target) branch.
- # Else: compare to previous commit.
- previous_commit = os.environ.get("PR_BASE_SHA", "HEAD^1")
- try:
- asv_exec(
- "continuous",
- "--factor=1.2",
- previous_commit,
- "HEAD",
- "--attribute",
- "rounds=4",
- )
- finally:
- asv_exec("compare", previous_commit, "HEAD")
+ # All run types require one Nox posarg.
+ run_type_arg = {
+ "overnight": "first commit",
+ "branch": "base branch",
+ "custom": "ASV sub-command",
+ }
+ if run_type not in run_type_arg.keys():
+ message = f"Unsupported run-type: {run_type}"
+ raise NotImplementedError(message)
+ if not session.posargs:
+ message = (
+ f"Missing mandatory first Nox session posarg: "
+ f"{run_type_arg[run_type]}"
+ )
+ raise ValueError(message)
+ first_arg = session.posargs[0]
+ # Optional extra arguments to be passed down to ASV.
+ asv_args = session.posargs[1:]
+
+ def asv_compare(*commits):
+ """Run through a list of commits comparing each one to the next."""
+ commits = [commit[:8] for commit in commits]
+ shifts_dir = Path(".asv") / "performance-shifts"
+ for i in range(len(commits) - 1):
+ before = commits[i]
+ after = commits[i + 1]
+ asv_command_ = f"asv compare {before} {after} --factor={COMPARE_FACTOR} --split"
+ session.run(*asv_command_.split(" "))
+
+ if run_type == "overnight":
+ # Record performance shifts.
+ # Run the command again but limited to only showing performance
+ # shifts.
+ shifts = session.run(
+ *asv_command_.split(" "), "--only-changed", silent=True
+ )
+ if shifts:
+ # Write the shifts report to a file.
+ # Dir is used by .github/workflows/benchmarks.yml,
+ # but not cached - intended to be discarded after run.
+ shifts_dir.mkdir(exist_ok=True, parents=True)
+ shifts_path = shifts_dir / after
+ with shifts_path.open("w") as shifts_file:
+ shifts_file.write(shifts)
+
+ # Common ASV arguments used for both `overnight` and `bench` run_types.
+ asv_harness = "asv run {posargs} --attribute rounds=4 --interleave-rounds --strict --show-stderr"
+
+ if run_type == "overnight":
+ first_commit = first_arg
+ commit_range = f"{first_commit}^^.."
+ asv_command = asv_harness.format(posargs=commit_range)
+ session.run(*asv_command.split(" "), *asv_args)
+
+ # git rev-list --first-parent is the command ASV uses.
+ git_command = f"git rev-list --first-parent {commit_range}"
+ commit_string = session.run(
+ *git_command.split(" "), silent=True, external=True
+ )
+ commit_list = commit_string.rstrip().split("\n")
+ asv_compare(*reversed(commit_list))
+
+ elif run_type == "branch":
+ base_branch = first_arg
+ git_command = f"git merge-base HEAD {base_branch}"
+ merge_base = session.run(
+ *git_command.split(" "), silent=True, external=True
+ )[:8]
+
+ with NamedTemporaryFile("w") as hashfile:
+ hashfile.writelines([merge_base, "\n", "HEAD"])
+ hashfile.flush()
+ commit_range = f"HASHFILE:{hashfile.name}"
+ asv_command = asv_harness.format(posargs=commit_range)
+ session.run(*asv_command.split(" "), *asv_args)
+
+ asv_compare(merge_base, "HEAD")
+
else:
- # f5ceb808 = first commit supporting nox --install-only .
- asv_exec("run", "f5ceb808..HEAD")
+ asv_subcommand = first_arg
+ assert run_type == "custom"
+ session.run("asv", asv_subcommand, *asv_args)