diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index a8247a247b..d4c01af48a 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -1,10 +1,11 @@ -# This is a basic workflow to help you get started with Actions +# Use ASV to check for performance regressions in the last 24 hours' commits. name: benchmark-check on: - # Triggers the workflow on push or pull request events but only for the master branch - pull_request: + schedule: + # Runs every day at 23:00. + - cron: "0 23 * * *" jobs: benchmark: @@ -23,12 +24,8 @@ jobs: steps: # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - uses: actions/checkout@v2 - - - name: Fetch the PR base branch too - run: | - git fetch --depth=1 origin ${{ github.event.pull_request.base.ref }} - git branch _base FETCH_HEAD - echo PR_BASE_SHA=$(git rev-parse _base) >> $GITHUB_ENV + with: + fetch-depth: 0 - name: Install Nox run: | @@ -65,11 +62,46 @@ jobs: run: | echo "OVERRIDE_TEST_DATA_REPOSITORY=${GITHUB_WORKSPACE}/${IRIS_TEST_DATA_PATH}/test_data" >> $GITHUB_ENV - - name: Run CI benchmarks + - name: Run overnight benchmarks + run: | + first_commit=$(git log --after="$(date -d "1 day ago" +"%Y-%m-%d") 23:00:00" --pretty=format:"%h" | tail -n 1) + if [ "$first_commit" != "" ] + then + nox --session="benchmarks(overnight)" -- $first_commit + fi + + - name: Create issues for performance shifts + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - mkdir --parents benchmarks/.asv - set -o pipefail - nox --session="benchmarks(ci compare)" | tee benchmarks/.asv/ci_compare.txt + if [ -d benchmarks/.asv/performance-shifts ] + then + cd benchmarks/.asv/performance-shifts + for commit_file in * + do + pr_number=$(git log "$commit_file"^! --oneline | grep -o "#[0-9]*" | tail -1 | cut -c 2-) + assignee=$(gh pr view $pr_number --json author -q '.["author"]["login"]' --repo $GITHUB_REPOSITORY) + title="Performance Shift(s): \`$commit_file\`" + body=" + Benchmark comparison has identified performance shifts at commit \ + $commit_file (#$pr_number). Please review the report below and \ + take corrective/congratulatory action as appropriate \ + :slightly_smiling_face: + +
+ Performance shift report + + \`\`\` + $(cat $commit_file) + \`\`\` + +
+ + Generated by GHA run [\`${{github.run_id}}\`](https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}) + " + gh issue create --title "$title" --body "$body" --assignee $assignee --label "Bot" --label "Type: Performance" --repo $GITHUB_REPOSITORY + done + fi - name: Archive asv results if: ${{ always() }} @@ -78,4 +110,3 @@ jobs: name: asv-report path: | benchmarks/.asv/results - benchmarks/.asv/ci_compare.txt diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000000..baa1afe700 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,80 @@ +# Iris Performance Benchmarking + +Iris uses an [Airspeed Velocity](https://github.com/airspeed-velocity/asv) +(ASV) setup to benchmark performance. This is primarily designed to check for +performance shifts between commits using statistical analysis, but can also +be easily repurposed for manual comparative and scalability analyses. + +The benchmarks are automatically run overnight +[by a GitHub Action](../.github/workflows/benchmark.yml), with any notable +shifts in performance being flagged in a new GitHub issue. + +## Running benchmarks + +`asv ...` commands must be run from this directory. You will need to have ASV +installed, as well as Nox (see +[Benchmark environments](#benchmark-environments)). + +[Iris' noxfile](../noxfile.py) includes a `benchmarks` session that provides +conveniences for setting up before benchmarking, and can also replicate the +automated overnight run locally. See the session docstring for detail. + +### Environment variables + +* ``DATA_GEN_PYTHON`` - required - path to a Python executable that can be +used to generate benchmark test objects/files; see +[Data generation](#data-generation). The Nox session sets this automatically, +but will defer to any value already set in the shell. +* ``BENCHMARK_DATA`` - optional - path to a directory for benchmark synthetic +test data, which the benchmark scripts will create if it doesn't already +exist. Defaults to ``/benchmarks/.data/`` if not set. + +## Writing benchmarks + +[See the ASV docs](https://asv.readthedocs.io/) for full detail. + +### Data generation +**Important:** be sure not to use the benchmarking environment to generate any +test objects/files, as this environment changes with each commit being +benchmarked, creating inconsistent benchmark 'conditions'. The +[generate_data](./benchmarks/generate_data/__init__.py) module offers a +solution; read more detail there. + +### ASV re-run behaviour + +Note that ASV re-runs a benchmark multiple times between its `setup()` routine. +This is a problem for benchmarking certain Iris operations such as data +realisation, since the data will no longer be lazy after the first run. +Consider writing extra steps to restore objects' original state _within_ the +benchmark itself. + +If adding steps to the benchmark will skew the result too much then re-running +can be disabled by setting an attribute on the benchmark: `number = 1`. To +maintain result accuracy this should be accompanied by increasing the number of +repeats _between_ `setup()` calls using the `repeat` attribute. +`warmup_time = 0` is also advisable since ASV performs independent re-runs to +estimate run-time, and these will still be subject to the original problem. + +### Scaling / non-Scaling Performance Differences + +When comparing performance between commits/file-type/whatever it can be helpful +to know if the differences exist in scaling or non-scaling parts of the Iris +functionality in question. This can be done using a size parameter, setting +one value to be as small as possible (e.g. a scalar `Cube`), and the other to +be significantly larger (e.g. a 1000x1000 `Cube`). Performance differences +might only be seen for the larger value, or the smaller, or both, getting you +closer to the root cause. + +## Benchmark environments + +We have disabled ASV's standard environment management, instead using an +environment built using the same Nox scripts as Iris' test environments. This +is done using ASV's plugin architecture - see +[asv_delegated_conda.py](asv_delegated_conda.py) and the extra config items in +[asv.conf.json](asv.conf.json). + +(ASV is written to control the environment(s) that benchmarks are run in - +minimising external factors and also allowing it to compare between a matrix +of dependencies (each in a separate environment). We have chosen to sacrifice +these features in favour of testing each commit with its intended dependencies, +controlled by Nox + lock-files). diff --git a/noxfile.py b/noxfile.py index 0600540c5b..e4d91c6bab 100755 --- a/noxfile.py +++ b/noxfile.py @@ -8,6 +8,8 @@ import hashlib import os from pathlib import Path +from tempfile import NamedTemporaryFile +from typing import Literal import nox from nox.logger import logger @@ -289,31 +291,60 @@ def linkcheck(session: nox.sessions.Session): ) -@nox.session(python=PY_VER, venv_backend="conda") +@nox.session @nox.parametrize( - ["ci_mode"], - [True, False], - ids=["ci compare", "full"], + "run_type", + ["overnight", "branch", "custom"], + ids=["overnight", "branch", "custom"], ) -def benchmarks(session: nox.sessions.Session, ci_mode: bool): +def benchmarks( + session: nox.sessions.Session, + run_type: Literal["overnight", "branch", "custom"], +): """ Perform Iris performance benchmarks (using Airspeed Velocity). + All run types require a single Nox positional argument (e.g. + ``nox --session="foo" -- my_pos_arg``) - detailed in the parameters + section - and can optionally accept a series of further arguments that will + be added to session's ASV command. + Parameters ---------- session: object A `nox.sessions.Session` object. - ci_mode: bool - Run a cut-down selection of benchmarks, comparing the current commit to - the last commit for performance regressions. - - Notes - ----- - ASV is set up to use ``nox --session=tests --install-only`` to prepare - the benchmarking environment. This session environment must use a Python - version that is also available for ``--session=tests``. + run_type: {"overnight", "branch", "custom"} + * ``overnight``: benchmarks all commits between the input **first + commit** to ``HEAD``, comparing each to its parent for performance + shifts. If a commit causes shifts, the output is saved to a file: + ``.asv/performance-shifts/``. Designed for checking the + previous 24 hours' commits, typically in a scheduled script. + * ``branch``: Performs the same operations as ``overnight``, but always + on two commits only - ``HEAD``, and ``HEAD``'s merge-base with the + input **base branch**. Output from this run is never saved to a file. + Designed for testing if the active branch's changes cause performance + shifts - anticipating what would be caught by ``overnight`` once + merged. + **For maximum accuracy, avoid using the machine that is running this + session. Run time could be >1 hour for the full benchmark suite.** + * ``custom``: run ASV with the input **ASV sub-command**, without any + preset arguments - must all be supplied by the user. So just like + running ASV manually, with the convenience of re-using the session's + scripted setup steps. + + Examples + -------- + * ``nox --session="benchmarks(overnight)" -- a1b23d4`` + * ``nox --session="benchmarks(branch)" -- upstream/main`` + * ``nox --session="benchmarks(branch)" -- upstream/mesh-data-model`` + * ``nox --session="benchmarks(branch)" -- upstream/main --bench=regridding`` + * ``nox --session="benchmarks(custom)" -- continuous a1b23d4 HEAD --quick`` """ + # The threshold beyond which shifts are 'notable'. See `asv compare`` docs + # for more. + COMPARE_FACTOR = 1.2 + session.install("asv", "nox") data_gen_var = "DATA_GEN_PYTHON" @@ -327,12 +358,12 @@ def benchmarks(session: nox.sessions.Session, ci_mode: bool): "nox", "--session=tests", "--install-only", - f"--python={session.python}", + f"--python={_PY_VERSION_LATEST}", ) # Find the environment built above, set it to be the data generation # environment. data_gen_python = next( - Path(".nox").rglob(f"tests*/bin/python{session.python}") + Path(".nox").rglob(f"tests*/bin/python{_PY_VERSION_LATEST}") ).resolve() session.env[data_gen_var] = data_gen_python @@ -360,25 +391,85 @@ def benchmarks(session: nox.sessions.Session, ci_mode: bool): # Skip over setup questions for a new machine. session.run("asv", "machine", "--yes") - def asv_exec(*sub_args: str) -> None: - run_args = ["asv", *sub_args] - session.run(*run_args) - - if ci_mode: - # If on a PR: compare to the base (target) branch. - # Else: compare to previous commit. - previous_commit = os.environ.get("PR_BASE_SHA", "HEAD^1") - try: - asv_exec( - "continuous", - "--factor=1.2", - previous_commit, - "HEAD", - "--attribute", - "rounds=4", - ) - finally: - asv_exec("compare", previous_commit, "HEAD") + # All run types require one Nox posarg. + run_type_arg = { + "overnight": "first commit", + "branch": "base branch", + "custom": "ASV sub-command", + } + if run_type not in run_type_arg.keys(): + message = f"Unsupported run-type: {run_type}" + raise NotImplementedError(message) + if not session.posargs: + message = ( + f"Missing mandatory first Nox session posarg: " + f"{run_type_arg[run_type]}" + ) + raise ValueError(message) + first_arg = session.posargs[0] + # Optional extra arguments to be passed down to ASV. + asv_args = session.posargs[1:] + + def asv_compare(*commits): + """Run through a list of commits comparing each one to the next.""" + commits = [commit[:8] for commit in commits] + shifts_dir = Path(".asv") / "performance-shifts" + for i in range(len(commits) - 1): + before = commits[i] + after = commits[i + 1] + asv_command_ = f"asv compare {before} {after} --factor={COMPARE_FACTOR} --split" + session.run(*asv_command_.split(" ")) + + if run_type == "overnight": + # Record performance shifts. + # Run the command again but limited to only showing performance + # shifts. + shifts = session.run( + *asv_command_.split(" "), "--only-changed", silent=True + ) + if shifts: + # Write the shifts report to a file. + # Dir is used by .github/workflows/benchmarks.yml, + # but not cached - intended to be discarded after run. + shifts_dir.mkdir(exist_ok=True, parents=True) + shifts_path = shifts_dir / after + with shifts_path.open("w") as shifts_file: + shifts_file.write(shifts) + + # Common ASV arguments used for both `overnight` and `bench` run_types. + asv_harness = "asv run {posargs} --attribute rounds=4 --interleave-rounds --strict --show-stderr" + + if run_type == "overnight": + first_commit = first_arg + commit_range = f"{first_commit}^^.." + asv_command = asv_harness.format(posargs=commit_range) + session.run(*asv_command.split(" "), *asv_args) + + # git rev-list --first-parent is the command ASV uses. + git_command = f"git rev-list --first-parent {commit_range}" + commit_string = session.run( + *git_command.split(" "), silent=True, external=True + ) + commit_list = commit_string.rstrip().split("\n") + asv_compare(*reversed(commit_list)) + + elif run_type == "branch": + base_branch = first_arg + git_command = f"git merge-base HEAD {base_branch}" + merge_base = session.run( + *git_command.split(" "), silent=True, external=True + )[:8] + + with NamedTemporaryFile("w") as hashfile: + hashfile.writelines([merge_base, "\n", "HEAD"]) + hashfile.flush() + commit_range = f"HASHFILE:{hashfile.name}" + asv_command = asv_harness.format(posargs=commit_range) + session.run(*asv_command.split(" "), *asv_args) + + asv_compare(merge_base, "HEAD") + else: - # f5ceb808 = first commit supporting nox --install-only . - asv_exec("run", "f5ceb808..HEAD") + asv_subcommand = first_arg + assert run_type == "custom" + session.run("asv", asv_subcommand, *asv_args)