SciTools · pp-mo · Feb 16, 2022 · Feb 10, 2022 · Feb 10, 2022 · Feb 10, 2022
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -1,10 +1,11 @@
-# This is a basic workflow to help you get started with Actions
+# Use ASV to check for performance regressions in the last 24 hours' commits.
 
 name: benchmark-check
 
 on:
-  # Triggers the workflow on push or pull request events but only for the master branch
-  pull_request:
+  schedule:
+    # Runs every day at 23:00.
+    - cron: "0 23 * * *"
 
 jobs:
   benchmark:
@@ -23,12 +24,8 @@ jobs:
     steps:
       # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
       - uses: actions/checkout@v2
-
-      - name: Fetch the PR base branch too
-        run: |
-          git fetch --depth=1 origin ${{ github.event.pull_request.base.ref }}
-          git branch _base FETCH_HEAD
-          echo PR_BASE_SHA=$(git rev-parse _base) >> $GITHUB_ENV
+        with:
+          fetch-depth: 0
 
       - name: Install Nox
         run: |
@@ -65,11 +62,46 @@ jobs:
         run: |
           echo "OVERRIDE_TEST_DATA_REPOSITORY=${GITHUB_WORKSPACE}/${IRIS_TEST_DATA_PATH}/test_data" >> $GITHUB_ENV
 
-      - name: Run CI benchmarks
+      - name: Run overnight benchmarks
+        run: |
+          first_commit=$(git log --after="$(date -d "1 day ago" +"%Y-%m-%d") 23:00:00" --pretty=format:"%h" | tail -n 1)
+          if [ "$first_commit" != "" ]
+          then
+            nox --session="benchmarks(overnight)" -- $first_commit
+          fi
+
+      - name: Create issues for performance shifts
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
-          mkdir --parents benchmarks/.asv
-          set -o pipefail
-          nox --session="benchmarks(ci compare)" | tee benchmarks/.asv/ci_compare.txt
+          if [ -d benchmarks/.asv/performance-shifts ]
+          then
+            cd benchmarks/.asv/performance-shifts
+            for commit_file in *
+            do
+              pr_number=$(git log "$commit_file"^! --oneline | grep -o "#[0-9]*" | tail -1 | cut -c 2-)
+              assignee=$(gh pr view $pr_number --json author -q '.["author"]["login"]' --repo $GITHUB_REPOSITORY)
+              title="Performance Shift(s): \`$commit_file\`"
+              body="
+          Benchmark comparison has identified performance shifts at commit \
+          $commit_file (#$pr_number). Please review the report below and \
+          take corrective/congratulatory action as appropriate \
+          :slightly_smiling_face:
+
+          <details>
+          <summary>Performance shift report</summary>
+
+          \`\`\`
+          $(cat $commit_file)
+          \`\`\`
+
+          </details>
+
+          Generated by GHA run [\`${{github.run_id}}\`](https://github.com/${{github.repository}}/actions/runs/${{github.run_id}})
+              "
+              gh issue create --title "$title" --body "$body" --assignee $assignee --label "Bot" --label "Type: Performance" --repo $GITHUB_REPOSITORY
+            done
+          fi
 
       - name: Archive asv results
         if: ${{ always() }}
@@ -78,4 +110,3 @@ jobs:
           name: asv-report
           path: |
             benchmarks/.asv/results
-            benchmarks/.asv/ci_compare.txt
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -0,0 +1,80 @@
+# Iris Performance Benchmarking
+
+Iris uses an [Airspeed Velocity](https://github.com/airspeed-velocity/asv)
+(ASV) setup to benchmark performance. This is primarily designed to check for
+performance shifts between commits using statistical analysis, but can also
+be easily repurposed for manual comparative and scalability analyses.
+
+The benchmarks are automatically run overnight
+[by a GitHub Action](../.github/workflows/benchmark.yml), with any notable
+shifts in performance being flagged in a new GitHub issue.
+
+## Running benchmarks
+
+`asv ...` commands must be run from this directory. You will need to have ASV
+installed, as well as Nox (see
+[Benchmark environments](#benchmark-environments)).
+
+[Iris' noxfile](../noxfile.py) includes a `benchmarks` session that provides
+conveniences for setting up before benchmarking, and can also replicate the
+automated overnight run locally. See the session docstring for detail.
+
+### Environment variables
+
+* ``DATA_GEN_PYTHON`` - required - path to a Python executable that can be
+used to generate benchmark test objects/files; see
+[Data generation](#data-generation). The Nox session sets this automatically,
+but will defer to any value already set in the shell.
+* ``BENCHMARK_DATA`` - optional - path to a directory for benchmark synthetic
+test data, which the benchmark scripts will create if it doesn't already
+exist. Defaults to ``<root>/benchmarks/.data/`` if not set.
+
+## Writing benchmarks
+
+[See the ASV docs](https://asv.readthedocs.io/) for full detail.
+
+### Data generation
+**Important:** be sure not to use the benchmarking environment to generate any
+test objects/files, as this environment changes with each commit being
+benchmarked, creating inconsistent benchmark 'conditions'. The
+[generate_data](./benchmarks/generate_data/__init__.py) module offers a
+solution; read more detail there.
+
+### ASV re-run behaviour
+
+Note that ASV re-runs a benchmark multiple times between its `setup()` routine.
+This is a problem for benchmarking certain Iris operations such as data
+realisation, since the data will no longer be lazy after the first run.
+Consider writing extra steps to restore objects' original state _within_ the
+benchmark itself.
+
+If adding steps to the benchmark will skew the result too much then re-running
+can be disabled by setting an attribute on the benchmark: `number = 1`. To
+maintain result accuracy this should be accompanied by increasing the number of
+repeats _between_ `setup()` calls using the `repeat` attribute.
+`warmup_time = 0` is also advisable since ASV performs independent re-runs to
+estimate run-time, and these will still be subject to the original problem.
+
+### Scaling / non-Scaling Performance Differences
+
+When comparing performance between commits/file-type/whatever it can be helpful
+to know if the differences exist in scaling or non-scaling parts of the Iris
+functionality in question. This can be done using a size parameter, setting
+one value to be as small as possible (e.g. a scalar `Cube`), and the other to
+be significantly larger (e.g. a 1000x1000 `Cube`). Performance differences
+might only be seen for the larger value, or the smaller, or both, getting you
+closer to the root cause.
+
+## Benchmark environments
+
+We have disabled ASV's standard environment management, instead using an
+environment built using the same Nox scripts as Iris' test environments. This
+is done using ASV's plugin architecture - see
+[asv_delegated_conda.py](asv_delegated_conda.py) and the extra config items in
+[asv.conf.json](asv.conf.json).
+
+(ASV is written to control the environment(s) that benchmarks are run in -
+minimising external factors and also allowing it to compare between a matrix
+of dependencies (each in a separate environment). We have chosen to sacrifice
+these features in favour of testing each commit with its intended dependencies,
+controlled by Nox + lock-files).
diff --git a/noxfile.py b/noxfile.py
@@ -8,6 +8,8 @@
 import hashlib
 import os
 from pathlib import Path
+from tempfile import NamedTemporaryFile
+from typing import Literal
 
 import nox
 from nox.logger import logger
@@ -289,31 +291,60 @@ def linkcheck(session: nox.sessions.Session):
     )
 
 
-@nox.session(python=PY_VER, venv_backend="conda")
+@nox.session
 @nox.parametrize(
-    ["ci_mode"],
-    [True, False],
-    ids=["ci compare", "full"],
+    "run_type",
+    ["overnight", "branch", "custom"],
+    ids=["overnight", "branch", "custom"],
 )
-def benchmarks(session: nox.sessions.Session, ci_mode: bool):
+def benchmarks(
+    session: nox.sessions.Session,
+    run_type: Literal["overnight", "branch", "custom"],
+):
     """
     Perform Iris performance benchmarks (using Airspeed Velocity).
 
+    All run types require a single Nox positional argument (e.g.
+    ``nox --session="foo" -- my_pos_arg``) - detailed in the parameters
+    section - and can optionally accept a series of further arguments that will
+    be added to session's ASV command.
+
     Parameters
     ----------
     session: object
         A `nox.sessions.Session` object.
-    ci_mode: bool
-        Run a cut-down selection of benchmarks, comparing the current commit to
-        the last commit for performance regressions.
-
-    Notes
-    -----
-    ASV is set up to use ``nox --session=tests --install-only`` to prepare
-    the benchmarking environment. This session environment must use a Python
-    version that is also available for ``--session=tests``.
+    run_type: {"overnight", "branch", "custom"}
+        * ``overnight``: benchmarks all commits between the input **first
+          commit** to ``HEAD``, comparing each to its parent for performance
+          shifts. If a commit causes shifts, the output is saved to a file:
+          ``.asv/performance-shifts/<commit-sha>``. Designed for checking the
+          previous 24 hours' commits, typically in a scheduled script.
+        * ``branch``: Performs the same operations as ``overnight``, but always
+          on two commits only - ``HEAD``, and ``HEAD``'s merge-base with the
+          input **base branch**. Output from this run is never saved to a file.
+          Designed for testing if the active branch's changes cause performance
+          shifts - anticipating what would be caught by ``overnight`` once
+          merged.
+          **For maximum accuracy, avoid using the machine that is running this
+          session. Run time could be >1 hour for the full benchmark suite.**
+        * ``custom``: run ASV with the input **ASV sub-command**, without any
+          preset arguments - must all be supplied by the user. So just like
+          running ASV manually, with the convenience of re-using the session's
+          scripted setup steps.
+
+    Examples
+    --------
+    * ``nox --session="benchmarks(overnight)" -- a1b23d4``
+    * ``nox --session="benchmarks(branch)" -- upstream/main``
+    * ``nox --session="benchmarks(branch)" -- upstream/mesh-data-model``
+    * ``nox --session="benchmarks(branch)" -- upstream/main --bench=regridding``
+    * ``nox --session="benchmarks(custom)" -- continuous a1b23d4 HEAD --quick``
 
     """
+    # The threshold beyond which shifts are 'notable'. See `asv compare`` docs
+    #  for more.
+    COMPARE_FACTOR = 1.2
+
     session.install("asv", "nox")
 
     data_gen_var = "DATA_GEN_PYTHON"
@@ -327,12 +358,12 @@ def benchmarks(session: nox.sessions.Session, ci_mode: bool):
             "nox",
             "--session=tests",
             "--install-only",
-            f"--python={session.python}",
+            f"--python={_PY_VERSION_LATEST}",
         )
         # Find the environment built above, set it to be the data generation
         #  environment.
         data_gen_python = next(
-            Path(".nox").rglob(f"tests*/bin/python{session.python}")
+            Path(".nox").rglob(f"tests*/bin/python{_PY_VERSION_LATEST}")
         ).resolve()
         session.env[data_gen_var] = data_gen_python
 
@@ -360,25 +391,85 @@ def benchmarks(session: nox.sessions.Session, ci_mode: bool):
     # Skip over setup questions for a new machine.
     session.run("asv", "machine", "--yes")
 
-    def asv_exec(*sub_args: str) -> None:
-        run_args = ["asv", *sub_args]
-        session.run(*run_args)
-
-    if ci_mode:
-        # If on a PR: compare to the base (target) branch.
-        #  Else: compare to previous commit.
-        previous_commit = os.environ.get("PR_BASE_SHA", "HEAD^1")
-        try:
-            asv_exec(
-                "continuous",
-                "--factor=1.2",
-                previous_commit,
-                "HEAD",
-                "--attribute",
-                "rounds=4",
-            )
-        finally:
-            asv_exec("compare", previous_commit, "HEAD")
+    # All run types require one Nox posarg.
+    run_type_arg = {
+        "overnight": "first commit",
+        "branch": "base branch",
+        "custom": "ASV sub-command",
+    }
+    if run_type not in run_type_arg.keys():
+        message = f"Unsupported run-type: {run_type}"
+        raise NotImplementedError(message)
+    if not session.posargs:
+        message = (
+            f"Missing mandatory first Nox session posarg: "
+            f"{run_type_arg[run_type]}"
+        )
+        raise ValueError(message)
+    first_arg = session.posargs[0]
+    # Optional extra arguments to be passed down to ASV.
+    asv_args = session.posargs[1:]
+
+    def asv_compare(*commits):
+        """Run through a list of commits comparing each one to the next."""
+        commits = [commit[:8] for commit in commits]
+        shifts_dir = Path(".asv") / "performance-shifts"
+        for i in range(len(commits) - 1):
+            before = commits[i]
+            after = commits[i + 1]
+            asv_command_ = f"asv compare {before} {after} --factor={COMPARE_FACTOR} --split"
+            session.run(*asv_command_.split(" "))
+
+            if run_type == "overnight":
+                # Record performance shifts.
+                # Run the command again but limited to only showing performance
+                #  shifts.
+                shifts = session.run(
+                    *asv_command_.split(" "), "--only-changed", silent=True
+                )
+                if shifts:
+                    # Write the shifts report to a file.
+                    # Dir is used by .github/workflows/benchmarks.yml,
+                    #  but not cached - intended to be discarded after run.
+                    shifts_dir.mkdir(exist_ok=True, parents=True)
+                    shifts_path = shifts_dir / after
+                    with shifts_path.open("w") as shifts_file:
+                        shifts_file.write(shifts)
+
+    # Common ASV arguments used for both `overnight` and `bench` run_types.
+    asv_harness = "asv run {posargs} --attribute rounds=4 --interleave-rounds --strict --show-stderr"
+
+    if run_type == "overnight":
+        first_commit = first_arg
+        commit_range = f"{first_commit}^^.."
+        asv_command = asv_harness.format(posargs=commit_range)
+        session.run(*asv_command.split(" "), *asv_args)
+
+        # git rev-list --first-parent is the command ASV uses.
+        git_command = f"git rev-list --first-parent {commit_range}"
+        commit_string = session.run(
+            *git_command.split(" "), silent=True, external=True
+        )
+        commit_list = commit_string.rstrip().split("\n")
+        asv_compare(*reversed(commit_list))
+
+    elif run_type == "branch":
+        base_branch = first_arg
+        git_command = f"git merge-base HEAD {base_branch}"
+        merge_base = session.run(
+            *git_command.split(" "), silent=True, external=True
+        )[:8]
+
+        with NamedTemporaryFile("w") as hashfile:
+            hashfile.writelines([merge_base, "\n", "HEAD"])
+            hashfile.flush()
+            commit_range = f"HASHFILE:{hashfile.name}"
+            asv_command = asv_harness.format(posargs=commit_range)
+            session.run(*asv_command.split(" "), *asv_args)
+
+        asv_compare(merge_base, "HEAD")
+
     else:
-        # f5ceb808 = first commit supporting nox --install-only .
-        asv_exec("run", "f5ceb808..HEAD")
+        asv_subcommand = first_arg
+        assert run_type == "custom"
+        session.run("asv", asv_subcommand, *asv_args)