diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 000000000..2dc99856f
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,39 @@
+exclude: ^contrib/
+repos:
+-   repo: https://github.com/pycqa/isort
+    rev: 5.13.2
+    hooks:
+    -   id: isort
+        name: isort (python)
+-   repo: https://github.com/psf/black
+    rev: 24.8.0
+    hooks:
+    -   id: black
+        exclude: ^docs/exts/details\.py$
+-   repo: https://github.com/pycqa/flake8
+    rev: 7.1.2
+    hooks:
+    -   id: flake8
+#-   repo: https://github.com/pre-commit/mirrors-mypy
+#    rev: v1.14.1
+#    hooks:
+#    -   id: mypy
+#        args: [--show-error-codes, --strict, --no-warn-return-any]
+#        files: ^(drgn/.*\.py|_drgn.pyi|_drgn_util/.*\.py|tools/.*\.py|vmtest/.*\.py)$
+#        additional_dependencies: [aiohttp, uritemplate]
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+    -   id: trailing-whitespace
+        exclude_types: [diff]
+    -   id: end-of-file-fixer
+        exclude_types: [diff]
+    -   id: check-yaml
+    -   id: check-added-large-files
+    -   id: debug-statements
+    -   id: check-merge-conflict
+-   repo: https://github.com/netromdk/vermin
+    rev: v1.6.0
+    hooks:
+    -   id: vermin
+        args: ['-t=3.8-', '--violations', '--eval-annotations']
diff --git a/drgn/commands/_builtin/__init__.py b/drgn/commands/_builtin/__init__.py
index 21bb2ce98..bfb36fe85 100644
--- a/drgn/commands/_builtin/__init__.py
+++ b/drgn/commands/_builtin/__init__.py
@@ -9,11 +9,14 @@
 import argparse
 import importlib
 import pkgutil
+import re
 import subprocess
+import sys
+import traceback
 from typing import Any, Dict
 
 from drgn import Program, execscript
-from drgn.commands import argument, command, custom_command
+from drgn.commands import _shell_command, argument, command, custom_command
 
 # Import all submodules, recursively.
 for _module_info in pkgutil.walk_packages(__path__, __name__ + "."):
@@ -37,6 +40,82 @@ def _cmd_sh(prog: Program, name: str, args: str, **kwargs: Any) -> int:
         return subprocess.call(["sh", "-i"])
 
 
+@custom_command(
+    description="execute a python statement and allow shell redirection",
+    usage="**py** [*command*]",
+    long_description="""
+    Execute the given code, up to the first shell redirection or pipeline
+    statement, as Python code.
+
+    For each occurrence of a pipeline operator (``|``) or any redirection
+    operator (``<``, ``>``, ``<<``, ``>>``), attempt to parse the preceding text
+    as Python code. If the preceding text is syntactically valid code, then
+    interpret the remainder of the command as shell redirections or pipelines,
+    and execute the Python code with those redirections and pipelines applied.
+
+    The operators above can be used in syntactically valid Python. This means
+    you need to be careful when using this function, and ensure that you wrap
+    their uses with parentheses.
+
+    For example, consider the command: ``%py field | MY_FLAG | grep foo``. While
+    the intent here may be to execute the Python code ``field | MY_FLAG`` and
+    pass its result to ``grep``, that is not what will happen. The portion of
+    text prior to the first ``|`` is valid Python, so it will be executed, and
+    its output piped to the shell pipeline ``MY_FLAG | grep foo``. Instead,
+    running ``%py (field | MY_FLAG) | grep foo`` ensures that ``field |
+    MY_FLAG`` gets piped to ``grep foo``, because ``(field`` on its own is not
+    valid Python syntax.
+    """,
+)
+def _cmd_py(
+    prog: Program,
+    name: str,
+    args: str,
+    *,
+    globals: Dict[str, Any],
+    **kwargs: Any,
+) -> None:
+
+    def print_exc() -> None:
+        # When printing a traceback, we should not print our own stack frame, as
+        # that would confuse the user. Unfortunately the traceback objects are
+        # linked lists and there's no functionality to drop the last N frames of
+        # a traceback while printing.
+        _, _, tb = sys.exc_info()
+        count = 0
+        while tb:
+            count += 1
+            tb = tb.tb_next
+        traceback.print_exc(limit=1 - count)
+
+    for match in re.finditer(r"[|<>]", args):
+        try:
+            pos = match.start()
+            code = compile(args[:pos], "<input>", "single")
+            break
+        except SyntaxError:
+            pass
+    else:
+        # Fallback for no match: compile all the code as a "single" statement so
+        # exec() still prints out the result. At this point, a syntax error
+        # should be formatted just like a standard Python exception.
+        try:
+            pos = len(args)
+            code = compile(args, "<input>", "single")
+        except SyntaxError:
+            print_exc()
+            return
+
+    with _shell_command(args[pos:]):
+        try:
+            exec(code, globals)
+        except (Exception, KeyboardInterrupt):
+            # Any exception should be formatted just as the interpreter would.
+            # This includes keyboard interrupts, but not things like
+            # SystemExit or GeneratorExit.
+            print_exc()
+
+
 @command(
     description="run a drgn script",
     long_description="""
diff --git a/drgn/commands/_builtin/crash/ptov.py b/drgn/commands/_builtin/crash/ptov.py
new file mode 100644
index 000000000..ac559d26d
--- /dev/null
+++ b/drgn/commands/_builtin/crash/ptov.py
@@ -0,0 +1,114 @@
+# Copyright (c) 2025, Kylin Software, Inc. and affiliates.
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+import argparse
+from typing import Any
+
+from drgn import Object, Program
+from drgn.commands import argument, drgn_argument
+from drgn.commands.crash import CrashDrgnCodeBuilder, crash_command, parse_cpuspec
+from drgn.helpers.common.format import print_table
+from drgn.helpers.linux.mm import phys_to_virt
+from drgn.helpers.linux.percpu import per_cpu_ptr
+
+
+@crash_command(
+    description="physical or per-CPU to virtual",
+    long_description="""This command translates a hexadecimal physical address into a
+                        kernel virtual address. Alternatively, a hexadecimal per-cpu
+                        offset and cpu specifier will be translated into kernel virtual
+                        addresses for each cpu specified.""",
+    arguments=(
+        argument(
+            "address",
+            metavar="address|offset:cpuspec",
+            nargs="+",
+            help="hexadecimal physical address or hexadecimal per-CPU offset and CPU specifier",
+        ),
+        drgn_argument,
+    ),
+)
+def _crash_cmd_ptov(
+    prog: Program, name: str, args: argparse.Namespace, **kwargs: Any
+) -> None:
+    if args.drgn:
+        # Create a single builder for all addresses
+        builder = CrashDrgnCodeBuilder(prog)
+        physical_addresses = []
+        per_cpu_offsets = []
+
+        for address in args.address:
+            if ":" in address:
+                # Add imports only once
+                builder.add_from_import("drgn", "Object")
+                builder.add_from_import("drgn.helpers.linux.percpu", "per_cpu_ptr")
+                builder.add_from_import(
+                    "drgn.helpers.linux.cpumask", "for_each_possible_cpu"
+                )
+                # Parse the cpuspec in the actual command code
+                offset_str, cpu_spec = address.split(":", 1)
+                offset = int(offset_str, 16)
+                per_cpu_offsets.append((offset, parse_cpuspec(cpu_spec)))
+            else:
+                # Add imports only once
+                builder.add_from_import("drgn.helpers.linux.mm", "phys_to_virt")
+                physical_addresses.append(int(address, 16))
+
+        # Generate code for physical addresses
+        if physical_addresses:
+            builder.append("addresses = [")
+            builder.append(", ".join(f"0x{addr:x}" for addr in physical_addresses))
+            builder.append("]\n")
+            builder.append("for address in addresses:\n")
+            builder.append("    virt = phys_to_virt(address)\n")
+
+        # Generate code for per-CPU offsets
+        for offset, cpuspec in per_cpu_offsets:
+            builder.append(f"\noffset = {offset:#x}\n")
+            builder.append_cpuspec(
+                cpuspec,
+                """
+    virt = per_cpu_ptr(Object(prog, 'void *', offset), cpu)
+                """,
+            )
+
+        # Print the generated code once at the end
+        builder.print()
+        return
+
+    # Handle direct execution without --drgn
+    for i, address in enumerate(args.address):
+        if i > 0:
+            print()  # Add a blank line between outputs for multiple addresses
+
+        if ":" in address:
+            # Handle per-CPU offset case
+            offset_str, cpu_spec = address.split(":", 1)
+            offset = int(offset_str, 16)
+
+            # Parse CPU specifier using parse_cpuspec
+            cpus = parse_cpuspec(cpu_spec)
+
+            # Print offset information
+            print(f"PER-CPU OFFSET: {offset:x}")  # Directly print offset information
+
+            # Prepare data for print_table()
+            rows = [("  CPU", "  VIRTUAL")]  # Add CPU and VIRTUAL header
+            ptr = Object(prog, "void *", offset)  # Changed type to "void *"
+            for cpu in cpus.cpus(prog):
+                virt = per_cpu_ptr(ptr, cpu)
+                rows.append((f"  [{cpu}]", f"{virt.value_():016x}"))
+
+            # Print the table
+            print_table(rows)
+        else:
+            # Handle physical address case
+            phys = int(address, 16)
+            virt = phys_to_virt(prog, phys)
+            virt_int = virt.value_()
+
+            # Prepare data for print_table()
+            rows = [("VIRTUAL", "PHYSICAL"), (f"{virt_int:016x}", f"{phys:x}")]
+
+            # Print the table
+            print_table(rows)
diff --git a/drgn/helpers/linux/sched.py b/drgn/helpers/linux/sched.py
index e082f07d4..cf03aaa27 100644
--- a/drgn/helpers/linux/sched.py
+++ b/drgn/helpers/linux/sched.py
@@ -24,11 +24,14 @@
 
 __all__ = (
     "cpu_curr",
+    "cpu_rq",
     "get_task_state",
     "idle_task",
     "loadavg",
     "task_cpu",
     "task_on_cpu",
+    "task_rq",
+    "task_since_last_arrival_ns",
     "task_state_to_char",
     "task_thread_info",
 )
@@ -189,10 +192,12 @@ def task_rq(task: Object) -> Object:
 
 def task_since_last_arrival_ns(task: Object) -> int:
     """
-    Get the number of nanoseconds since a task last started running.
+    Get the difference between the runqueue timestamp when a task last started
+    running and the current runqueue timestamp.
 
-    Assuming that time slices are short, this is approximately the time that
-    the task has been in its current status (running, queued, or blocked).
+    This is approximately the time that the task has been in its current status
+    (running, queued, or blocked). However, if a CPU is either idle or running
+    the same task for a long time, then the timestamps will not be accurate.
 
     This is only supported if the kernel was compiled with
     ``CONFIG_SCHEDSTATS`` or ``CONFIG_TASK_DELAY_ACCT``.
diff --git a/tests/commands/__init__.py b/tests/commands/__init__.py
new file mode 100644
index 000000000..c4b21ac64
--- /dev/null
+++ b/tests/commands/__init__.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python3
+
+from drgn.commands import DEFAULT_COMMAND_NAMESPACE
+from tests import TestCase
+
+
+class CommandTestCase(TestCase):
+
+    @staticmethod
+    def run_command(source, **kwargs):
+        return DEFAULT_COMMAND_NAMESPACE.run(None, source, globals={}, **kwargs)
diff --git a/tests/commands/test_builtin.py b/tests/commands/test_builtin.py
new file mode 100644
index 000000000..3cfa1c54f
--- /dev/null
+++ b/tests/commands/test_builtin.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+import contextlib
+import os
+from pathlib import Path
+import tempfile
+
+import drgn.commands._builtin  # noqa: F401
+from tests.commands import CommandTestCase
+
+
+@contextlib.contextmanager
+def temporary_working_directory():
+    old_working_directory = os.getcwd()
+    with tempfile.TemporaryDirectory() as f:
+        try:
+            os.chdir(f)
+            yield f
+        finally:
+            os.chdir(old_working_directory)
+
+
+class RedirectedFile:
+    def __init__(self, f):
+        self.tempfile = f
+        self.value = None
+
+
+@contextlib.contextmanager
+def redirect(stdout=False, stderr=False):
+    # To redirect stdout for commands, we need a real file descriptor, not just
+    # a StringIO
+    with contextlib.ExitStack() as stack:
+        f = stack.enter_context(tempfile.TemporaryFile("w+t"))
+        if stdout:
+            stack.enter_context(contextlib.redirect_stdout(f))
+        if stderr:
+            stack.enter_context(contextlib.redirect_stderr(f))
+        redir = RedirectedFile(f)
+        try:
+            yield redir
+        finally:
+            f.seek(0)
+            redir.value = f.read()
+
+
+class TestPyCommand(CommandTestCase):
+
+    def test_py_redirect(self):
+        with temporary_working_directory() as temp_dir:
+            path = Path(temp_dir) / "6"
+            self.run_command("py var = 5; var > 6")
+            self.assertEqual(path.read_text(), "5\n")
+
+    def test_py_paren_avoid_redirect(self):
+        self.run_command("py var = 5; (var > 6)")
+        with redirect(stdout=True) as f:
+            self.run_command("py var = 5; (var > 6)")
+        self.assertEqual(f.value, "False\n")
+
+    def test_py_pipe(self):
+        with redirect(stdout=True) as f:
+            self.run_command("py echo = 5; 2 | echo + 5")
+        self.assertEqual(f.value, "+ 5\n")
+
+    def test_py_avoid_pipe(self):
+        with redirect(stdout=True) as f:
+            self.run_command("py echo = 5; (2 | (echo + 5))")
+        self.assertEqual(f.value, "10\n")
+
+    def test_py_chooses_first_pipe(self):
+        with redirect(stdout=True) as f:
+            # If the first | is used to separate the Python from the pipeline
+            # (the expected behavior), then we'll get the value 5 written into
+            # the "echo" command, which will ignore that and write "+ 6" through
+            # the cat process to stdout. If the second | is used to separate the
+            # Python from the pipeline, then we'll get "15" written into the cat
+            # process. If none of the | were interpreted as a pipeline operator,
+            # then the statement would output 31.
+            self.run_command("py echo = 5; cat = 16; 5 | echo + 6 | cat")
+        self.assertEqual("+ 6\n", f.value)
+
+    def test_py_traceback_on_syntax_error(self):
+        with redirect(stderr=True) as f:
+            self.run_command("py a +")
+        # SyntaxError does not print the "Traceback" header. Rather than trying
+        # to assert too much about the format of the traceback, just assert that
+        # the incorrect code is shown, as it would be for a traceback.
+        self.assertTrue("a +" in f.value)
+        self.assertTrue("SyntaxError" in f.value)
+
+    def test_py_traceback_on_exception(self):
+        with redirect(stderr=True) as f:
+            self.run_command("py raise Exception('text')")
+        self.assertTrue(f.value.startswith("Traceback"))
+        self.assertTrue("Exception" in f.value)
diff --git a/tests/linux_kernel/__init__.py b/tests/linux_kernel/__init__.py
index 2b69b29ac..444002ebf 100644
--- a/tests/linux_kernel/__init__.py
+++ b/tests/linux_kernel/__init__.py
@@ -171,7 +171,7 @@ def proc_state(pid):
 # Context manager that:
 # 1. Forks a process which optionally calls a function and then stops with
 #    SIGSTOP.
-# 2. Waits for the child process to stop.
+# 2. Waits for the child process to stop and unschedule.
 # 3. Returns the PID of the child process, and return value of the function if
 #    provided, from __enter__().
 # 4. Kills the child process in __exit__().
@@ -197,12 +197,32 @@ def fork_and_stop(fn=None, *args, **kwds):
                     traceback.print_exc()
                     sys.stderr.flush()
                     os._exit(1)
+
             if fn:
                 pipe_w.close()
                 ret = pickle.load(pipe_r)
+
             _, status = os.waitpid(pid, os.WUNTRACED)
             if not os.WIFSTOPPED(status):
                 raise Exception("child process exited")
+            # waitpid() can return as soon as the stopped flag is set on the
+            # process; see wait_task_stopped() in the Linux kernel source code:
+            # https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/exit.c?h=v6.17-rc5#n1313
+            # However, the process may still be on the CPU for a short window;
+            # see do_signal_stop():
+            # https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/signal.c?h=v6.17-rc5#n2617
+            # So, we need to wait for it to fully unschedule. /proc/pid/syscall
+            # contains "running" unless the process is unscheduled; see
+            # proc_pid_syscall():
+            # https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/proc/base.c?h=v6.17-rc5#n675
+            # task_current_syscall():
+            # https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/syscall.c?h=v6.17-rc5#n69
+            # and wait_task_inactive():
+            # https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/sched/core.c?h=v6.17-rc5#n2257
+            syscall_path = Path(f"/proc/{pid}/syscall")
+            while syscall_path.read_text() == "running\n":
+                os.sched_yield()
+
             if fn:
                 yield pid, ret
             else:
diff --git a/tests/linux_kernel/crash_commands/test_ptov.py b/tests/linux_kernel/crash_commands/test_ptov.py
new file mode 100644
index 000000000..92ea13f36
--- /dev/null
+++ b/tests/linux_kernel/crash_commands/test_ptov.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2025, Kylin Software, Inc. and affiliates.
+# SPDX-License-Identifier: LGPL-2.1-or-later
+
+import os
+
+from drgn import Object
+from drgn.helpers.linux.cpumask import for_each_online_cpu
+from drgn.helpers.linux.mm import phys_to_virt
+from drgn.helpers.linux.percpu import per_cpu_ptr
+from tests.linux_kernel.crash_commands import CrashCommandTestCase
+
+
+class TestPtov(CrashCommandTestCase):
+    def test_phy_to_virt(self):
+        """Test physical address to virtual address conversion."""
+        phys_addr = 0x123
+        virt_addr = phys_to_virt(self.prog, phys_addr)
+        virt_addr_int = virt_addr.value_()
+
+        cmd = self.check_crash_command(f"ptov {hex(phys_addr)}")
+        self.assertRegex(cmd.stdout, r"(?m)^\s*VIRTUAL\s+PHYSICAL")
+        self.assertRegex(cmd.stdout, rf"(?m)^\s*{virt_addr_int:016x}\s+{phys_addr:x}")
+
+    def test_per_cpu_offset_single_cpu(self):
+        """Test per-CPU offset conversion for a single CPU."""
+        offset = 0x100
+        cpu = 0
+        ptr = Object(self.prog, "unsigned long", offset)
+        virt_ptr = per_cpu_ptr(ptr, cpu)
+        virt_int = virt_ptr.value_()
+
+        cmd = self.check_crash_command(f"ptov {hex(offset)}:{cpu}")
+        self.assertRegex(cmd.stdout, rf"(?m)^\s*PER-CPU OFFSET:\s+{offset:x}")
+        self.assertRegex(cmd.stdout, r"(?m)^\s*CPU\s+VIRTUAL")
+        self.assertRegex(cmd.stdout, rf"(?m)^\s*\[{cpu}\]\s+{virt_int:016x}")
+
+    def test_per_cpu_offset_all_cpus(self):
+        """Test per-CPU offset conversion for all CPUs."""
+        offset = 0x200
+        cmd = self.check_crash_command(f"ptov {hex(offset)}:a")
+
+        self.assertRegex(cmd.stdout, rf"(?m)^\s*PER-CPU OFFSET:\s+{offset:x}")
+        self.assertRegex(cmd.stdout, r"(?m)^\s*CPU\s+VIRTUAL")
+
+        ptr = Object(self.prog, "unsigned long", offset)
+        for cpu in for_each_online_cpu(self.prog):
+            virt = per_cpu_ptr(ptr, cpu)
+            self.assertRegex(cmd.stdout, rf"(?m)^\s*\[{cpu}\]\s+{virt.value_():016x}")
+
+    def test_per_cpu_offset_cpu_list(self):
+        """Test per-CPU offset conversion for a CPU list."""
+        offset = 0x300
+        cpus = sorted(os.sched_getaffinity(0))
+        cmd = self.check_crash_command(f"ptov {hex(offset)}:{','.join(map(str, cpus))}")
+
+        self.assertRegex(cmd.stdout, rf"(?m)^\s*PER-CPU OFFSET:\s+{offset:x}")
+        self.assertRegex(cmd.stdout, r"(?m)^\s*CPU\s+VIRTUAL")
+
+        ptr = Object(self.prog, "unsigned long", offset)
+        for cpu in cpus:
+            virt = per_cpu_ptr(ptr, cpu)
+            self.assertRegex(cmd.stdout, rf"(?m)^\s*\[{cpu}\]\s+{virt.value_():016x}")
+
+    def test_invalid_address(self):
+        """Test invalid physical address input."""
+        with self.assertRaises(Exception) as cm:
+            self.check_crash_command("ptov invalid_address")
+        msg = str(cm.exception).lower()
+        self.assertTrue(
+            "invalid literal" in msg or "base 16" in msg,
+            f"Unexpected error message: {msg}",
+        )
+
+    def test_invalid_cpu_spec(self):
+        """Test invalid per-CPU specifier."""
+        offset = 0x400
+        with self.assertRaises(Exception) as cm:
+            self.check_crash_command(f"ptov {hex(offset)}:invalid")
+        msg = str(cm.exception).lower()
+        self.assertIn("invalid cpuspec", msg, f"Unexpected error message: {msg}")
diff --git a/tests/linux_kernel/helpers/test_mm.py b/tests/linux_kernel/helpers/test_mm.py
index ea6b6cf47..d04e46fe0 100644
--- a/tests/linux_kernel/helpers/test_mm.py
+++ b/tests/linux_kernel/helpers/test_mm.py
@@ -472,14 +472,32 @@ def test_get_task_rss_info(self):
                 )
             }
 
-            # The kernel code uses percpu_counter_read_positive(), but the
-            # helper uses percpu_counter_sum() for better accuracy. We need to
-            # account for the deviation.
-            try:
-                percpu_counter_batch = self.prog["percpu_counter_batch"].value_()
-            except ObjectNotFoundError:
-                percpu_counter_batch = 32
-            delta = percpu_counter_batch * os.cpu_count()
+            # Before Linux kernel commit 82241a83cd15 ("mm: fix the inaccurate
+            # memory statistics issue for users") (in v6.16), the RSS counters
+            # in /proc/pid/meminfo are approximate due to batching, but the
+            # helpers are exact.
+            if hasattr(task, "rss_stat"):
+                # Before Linux kernel commit f1a7941243c10 ("mm: convert mm's
+                # rss stats into percpu_counter") (in v6.2), there is a
+                # per-thread counter that only gets synced to the main counter
+                # every TASK_RSS_EVENTS_THRESH (64) page faults. Each fault can
+                # map in multiple pages based on fault_around_bytes. So, the
+                # maximum error is nr_threads * 64 * (fault_around_bytes / PAGE_SIZE).
+                delta = (
+                    len(os.listdir(f"/proc/{pid}/task"))
+                    * 64
+                    * (self.prog["fault_around_bytes"].value_() // page_size)
+                )
+            else:
+                # Between that and Linux kernel commit 82241a83cd15 ("mm: fix
+                # the inaccurate memory statistics issue for users") (in
+                # v6.16), the kernel code uses percpu_counter_read_positive(),
+                # so the maximum error is nr_cpus * percpu_counter_batch.
+                try:
+                    percpu_counter_batch = self.prog["percpu_counter_batch"].value_()
+                except ObjectNotFoundError:
+                    percpu_counter_batch = 32
+                delta = percpu_counter_batch * os.cpu_count()
 
             self.assertAlmostEqual(rss_info.file, stats["RssFile"], delta=delta)
             self.assertAlmostEqual(rss_info.anon, stats["RssAnon"], delta=delta)
@@ -487,4 +505,6 @@ def test_get_task_rss_info(self):
                 rss_info.shmem, stats.get("RssShmem", 0), delta=delta
             )
             self.assertAlmostEqual(rss_info.swap, stats["VmSwap"], delta=delta)
-            self.assertAlmostEqual(rss_info.total, stats["VmRSS"], delta=delta)
+            # VmRSS is the sum of three counters, so it has triple the error
+            # margin.
+            self.assertAlmostEqual(rss_info.total, stats["VmRSS"], delta=delta * 3)
diff --git a/tests/linux_kernel/helpers/test_sched.py b/tests/linux_kernel/helpers/test_sched.py
index cb3008b20..2196297a7 100644
--- a/tests/linux_kernel/helpers/test_sched.py
+++ b/tests/linux_kernel/helpers/test_sched.py
@@ -105,5 +105,12 @@ def test_loadavg(self):
     def test_task_since_last_arrival_ns(self):
         with fork_and_stop() as pid:
             time.sleep(0.01)
+            # Forcing the process to migrate also forces the rq clock to update
+            # so we can get a reliable reading.
+            affinity = os.sched_getaffinity(pid)
+            if len(affinity) > 1:
+                other_affinity = {affinity.pop()}
+                os.sched_setaffinity(pid, affinity)
+                os.sched_setaffinity(pid, other_affinity)
             task = find_task(self.prog, pid)
             self.assertGreaterEqual(task_since_last_arrival_ns(task), 10000000)
diff --git a/tests/linux_kernel/kmod/drgn_test.c b/tests/linux_kernel/kmod/drgn_test.c
index a10f144ac..20593b245 100644
--- a/tests/linux_kernel/kmod/drgn_test.c
+++ b/tests/linux_kernel/kmod/drgn_test.c
@@ -1055,11 +1055,11 @@ static inline void drgn_test_get_pt_regs(struct pt_regs *regs)
 #endif
 }
 
- __attribute__((__optimize__("O0")))
+__attribute__((__noipa__))
 static void drgn_test_kthread_fn3(void)
 {
 	// Create some local variables for the test cases to use. Use volatile
-	// to make doubly sure that they aren't optimized out.
+	// to prevent them from being optimized out.
 	volatile int a, b, c;
 	volatile struct drgn_test_small_slab_object *slab_object;
 
@@ -1105,14 +1105,15 @@ static void drgn_test_kthread_fn3(void)
 	__asm__ __volatile__ ("" : : "r" (&slab_object) : "memory");
 }
 
- __attribute__((__optimize__("O0")))
+__attribute__((__noipa__))
 static void drgn_test_kthread_fn2(void)
 {
 	drgn_test_kthread_fn3();
+	barrier(); // Prevent tail call.
 }
 
- __attribute__((__optimize__("O0")))
-static int drgn_test_kthread_fn(void *arg)
+__attribute__((__noipa__))
+static noinline int drgn_test_kthread_fn(void *arg)
 {
 	drgn_test_kthread_fn2();
 	return 0;
diff --git a/vmtest/__main__.py b/vmtest/__main__.py
index bc97a1a3e..58920c0d9 100644
--- a/vmtest/__main__.py
+++ b/vmtest/__main__.py
@@ -1,6 +1,9 @@
 #!/usr/bin/env python3
 
-from collections import OrderedDict
+from collections import deque
+import concurrent.futures
+import contextlib
+import functools
 import logging
 import os
 from pathlib import Path
@@ -8,91 +11,48 @@
 import shutil
 import subprocess
 import sys
-from typing import Dict, List, TextIO
+import time
+import traceback
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    Deque,
+    Dict,
+    List,
+    Optional,
+    Protocol,
+    Set,
+    TextIO,
+    Tuple,
+    Union,
+)
 
-from util import KernelVersion
+from util import KernelVersion, nproc
 from vmtest.config import (
     ARCHITECTURES,
     HOST_ARCHITECTURE,
     KERNEL_FLAVORS,
     SUPPORTED_KERNEL_VERSIONS,
     Architecture,
+    Compiler,
     Kernel,
 )
-from vmtest.download import (
-    Download,
-    DownloadCompiler,
-    DownloadKernel,
-    download_in_thread,
-)
+from vmtest.download import Downloader
 from vmtest.rootfsbuild import build_drgn_in_rootfs
 from vmtest.vm import LostVMError, TestKmodMode, run_in_vm
 
 logger = logging.getLogger(__name__)
 
+if TYPE_CHECKING:
+    if sys.version_info < (3, 10):
+        from typing_extensions import ParamSpec
+    else:
+        from typing import ParamSpec  # novermin
+    _P = ParamSpec("_P")
 
-class _ProgressPrinter:
-    def __init__(self, file: TextIO) -> None:
-        self._file = file
-        if hasattr(file, "fileno"):
-            try:
-                columns = os.get_terminal_size(file.fileno())[0]
-                self._color = True
-            except OSError:
-                columns = 80
-                self._color = False
-        self._header = "#" * columns
-        self._passed: Dict[str, List[str]] = {}
-        self._failed: Dict[str, List[str]] = {}
-
-    def succeeded(self) -> bool:
-        return not self._failed
-
-    def _green(self, s: str) -> str:
-        if self._color:
-            return "\033[32m" + s + "\033[0m"
-        else:
-            return s
-
-    def _red(self, s: str) -> str:
-        if self._color:
-            return "\033[31m" + s + "\033[0m"
-        else:
-            return s
-
-    def update(self, category: str, name: str, passed: bool) -> None:
-        d = self._passed if passed else self._failed
-        d.setdefault(category, []).append(name)
 
-        if self._failed:
-            header = self._red(self._header)
-        else:
-            header = self._green(self._header)
-
-        print(header, file=self._file)
-        print(file=self._file)
-
-        if self._passed:
-            first = True
-            for category, names in self._passed.items():
-                if first:
-                    first = False
-                    print(self._green("Passed:"), end=" ", file=self._file)
-                else:
-                    print("       ", end=" ", file=self._file)
-                print(f"{category}: {', '.join(names)}", file=self._file)
-        if self._failed:
-            first = True
-            for category, names in self._failed.items():
-                if first:
-                    first = False
-                    print(self._red("Failed:"), end=" ", file=self._file)
-                else:
-                    print("       ", end=" ", file=self._file)
-                print(f"{category}: {', '.join(names)}", file=self._file)
-
-        print(file=self._file)
-        print(header, file=self._file, flush=True)
+class _TestFunction(Protocol):
+    def __call__(self, *, outfile: Optional[TextIO] = None) -> bool: ...
 
 
 def _kernel_version_is_supported(version: str, arch: Architecture) -> bool:
@@ -131,12 +91,418 @@ def _kdump_works(kernel: Kernel) -> bool:
         assert False, kernel.arch.name
 
 
+def _default_parallelism(mem_gb: float = 2, cpu: float = 1.75) -> int:
+    for line in open("/proc/meminfo"):
+        fields = line.split()
+        if fields[0] == "MemAvailable:":
+            mem_available_gb = int(fields[1]) / (1024 * 1024)
+            break
+    else:
+        return 1
+
+    limit_mem = mem_available_gb // mem_gb
+    limit_cpu = nproc() // cpu
+    return int(max(1, min(limit_mem, limit_cpu)))
+
+
+class _TestRunner:
+    def __init__(
+        self, *, directory: Path, jobs: Optional[int], use_host_rootfs: bool
+    ) -> None:
+        self._directory = directory
+        if jobs is None:
+            self._jobs = 1
+        elif jobs == 0:
+            self._jobs = _default_parallelism()
+            logger.info("using default parallelism %d", self._jobs)
+        else:
+            self._jobs = jobs
+            logger.info("using parallelism %d", self._jobs)
+        self._foreground = jobs is None
+        self._use_host_rootfs = use_host_rootfs
+
+        self._compilers_to_resolve: Dict[Architecture, None] = {}
+        self._kernels_to_resolve: Dict[Tuple[Architecture, str], None] = {}
+        self._drgn_builds: Dict[Architecture, None] = {}
+
+        # + 1 for download tasks.
+        self._pool = concurrent.futures.ThreadPoolExecutor(max_workers=self._jobs + 1)
+        self._futures: Set["concurrent.futures.Future[Callable[[], bool]]"] = set()
+
+        self._downloader = Downloader(directory)
+        self._download_queue: Deque[Union[Compiler, Kernel]] = deque()
+
+        self._test_queue: Deque[Tuple[str, str, _TestFunction]] = deque()
+        self._tests_running: Dict[Tuple[str, str], float] = {}
+        self._tests_passed: Dict[str, List[str]] = {}
+        self._tests_failed: Dict[str, List[str]] = {}
+
+        try:
+            self._color = os.isatty(sys.stderr.fileno())
+        except (AttributeError, OSError):
+            self._color = False
+
+    def add_kernel(self, arch: Architecture, pattern: str) -> None:
+        self._compilers_to_resolve[arch] = None
+        self._kernels_to_resolve[(arch, pattern)] = None
+        self._drgn_builds[arch] = None
+
+    def add_local(self, arch: Architecture) -> None:
+        self._drgn_builds[arch] = None
+        self._queue_local_test(arch)
+
+    def _submit(
+        self,
+        fn: Callable["_P", Callable[[], bool]],
+        *args: "_P.args",
+        **kwargs: "_P.kwargs",
+    ) -> None:
+        self._futures.add(self._pool.submit(fn, *args, **kwargs))
+
+    def run(self) -> bool:
+        try:
+            self._submit(self._resolve_downloads)
+
+            self._submit_next_drgn_build()
+
+            self._print_progress()
+            while self._futures:
+                done, self._futures = concurrent.futures.wait(
+                    self._futures,
+                    timeout=None if self._foreground else 1,
+                    return_when=concurrent.futures.FIRST_COMPLETED,
+                )
+                update_progress = not self._foreground
+                for future in done:
+                    callback = future.result()
+                    update_progress |= callback()
+                if update_progress:
+                    self._print_progress()
+        except Exception:
+            traceback.print_exc()
+            return False
+        finally:
+            for future in self._futures:
+                future.cancel()
+            self._pool.shutdown()
+        return not self._tests_failed
+
+    def _green(self, s: str) -> str:
+        if self._color:
+            return "\033[32m" + s + "\033[m"
+        else:
+            return s
+
+    def _red(self, s: str) -> str:
+        if self._color:
+            return "\033[31m" + s + "\033[m"
+        else:
+            return s
+
+    def _yellow(self, s: str) -> str:
+        if self._color:
+            return "\033[33m" + s + "\033[m"
+        else:
+            return s
+
+    def _cyan(self, s: str) -> str:
+        if self._color:
+            return "\033[36m" + s + "\033[m"
+        else:
+            return s
+
+    def _print_progress(self) -> None:
+        parts = []
+        if self._foreground:
+            endl = "\n"
+        else:
+            # To minimize flicker, we overwrite the output instead of clearing.
+            parts.append("\033[H")  # Move cursor to top left corner.
+            endl = "\033[K\n"  # Clear to the end of line on each newline.
+            if self._compilers_to_resolve or self._kernels_to_resolve:
+                parts.append(self._cyan("Queueing downloads..."))
+                parts.append(endl)
+            elif self._download_queue:
+                num_compilers = sum(
+                    isinstance(download, Compiler) for download in self._download_queue
+                )
+                num_kernels = len(self._download_queue) - num_compilers
+
+                downloading_parts = []
+                if num_compilers == 1:
+                    downloading_parts.append("1 compiler")
+                elif num_compilers > 1:
+                    downloading_parts.append(f"{num_compilers} compilers")
+                if num_kernels == 1:
+                    downloading_parts.append("1 kernel")
+                elif num_kernels > 1:
+                    downloading_parts.append(f"{num_kernels} kernels")
+
+                parts.append(
+                    self._cyan(f"Downloading {' and '.join(downloading_parts)}...")
+                )
+                parts.append(endl)
+
+            if self._test_queue:
+                parts.append(self._cyan(f"{len(self._test_queue)} tests waiting..."))
+                parts.append(endl)
+
+            if self._drgn_builds:
+                parts.append(self._yellow("Building: "))
+                parts.append(", ".join([arch.name for arch in self._drgn_builds]))
+                parts.append(endl)
+
+            now = time.monotonic()
+            first = True
+            for (category_name, test_name), start_time in reversed(
+                self._tests_running.items()
+            ):
+                if first:
+                    parts.append(self._yellow("Running: "))
+                    first = False
+                else:
+                    parts.append("         ")
+                parts.append(f"{category_name}: {test_name} ({int(now - start_time)}s)")
+                parts.append(endl)
+
+        for title, results, color in (
+            ("Passed", self._tests_passed, self._green),
+            ("Failed", self._tests_failed, self._red),
+        ):
+            first = True
+            for category_name, test_names in sorted(results.items()):
+                if first:
+                    parts.append(color(title + ":"))
+                    parts.append(" ")
+                    first = False
+                else:
+                    parts.append(" " * (len(title) + 2))
+                parts.append(f"{category_name}: {', '.join(test_names)}")
+                parts.append(endl)
+
+        if not self._foreground:
+            parts.append("\033[J")  # Clear the rest of the screen.
+        sys.stderr.write("".join(parts))
+
+    def _submit_next_drgn_build(self) -> None:
+        if self._drgn_builds:
+            self._submit(self._build_drgn, next(iter(self._drgn_builds)))
+        else:
+            self._submit_tests()
+
+    def _rootfs(self, arch: Architecture) -> Path:
+        if self._use_host_rootfs and arch is HOST_ARCHITECTURE:
+            return Path("/")
+        else:
+            return self._directory / arch.name / "rootfs"
+
+    def _build_drgn(self, arch: Architecture) -> Callable[[], bool]:
+        with contextlib.ExitStack() as exit_stack:
+            if self._foreground:
+                outfile = None
+            else:
+                outfile = exit_stack.enter_context(
+                    (self._directory / "log" / f"{arch.name}-build.log").open("w")
+                )
+            rootfs = self._rootfs(arch)
+            if rootfs == Path("/"):
+                subprocess.check_call(
+                    [sys.executable, "setup.py", "build_ext", "-i"],
+                    stdout=outfile,
+                    stderr=outfile,
+                )
+            else:
+                build_drgn_in_rootfs(rootfs, outfile=outfile)
+        return functools.partial(self._drgn_build_done, arch)
+
+    def _drgn_build_done(self, arch: Architecture) -> bool:
+        del self._drgn_builds[arch]
+        self._submit_next_drgn_build()
+        return not self._foreground
+
+    def _resolve_downloads(self) -> Callable[[], bool]:
+        for target in self._compilers_to_resolve:
+            compiler = self._downloader.resolve_compiler(target)
+            self._download_queue.append(compiler)
+
+        for arch, pattern in self._kernels_to_resolve:
+            kernel = self._downloader.resolve_kernel(arch, pattern)
+            self._download_queue.append(kernel)
+
+        return self._resolved_downloads
+
+    def _resolved_downloads(self) -> bool:
+        self._compilers_to_resolve.clear()
+        self._kernels_to_resolve.clear()
+        return self._submit_next_download()
+
+    def _submit_next_download(self) -> bool:
+        if self._download_queue:
+            self._submit(self._download, self._download_queue[0])
+        return not self._foreground
+
+    def _download(self, download: Union[Compiler, Kernel]) -> Callable[[], bool]:
+        if isinstance(download, Compiler):
+            self._downloader.download_compiler(download)
+        else:
+            self._downloader.download_kernel(download)
+        return functools.partial(self._download_done, download)
+
+    def _download_done(self, download: Union[Compiler, Kernel]) -> bool:
+        popped = self._download_queue.popleft()
+        assert popped is download
+        self._submit_next_download()
+        if isinstance(download, Kernel):
+            self._queue_kernel_test(download)
+        return not self._foreground
+
+    def _queue_local_test(self, arch: Architecture) -> None:
+        self._queue_test(arch.name, "local", functools.partial(self._test_local, arch))
+
+    def _queue_kernel_test(self, kernel: Kernel) -> None:
+        self._queue_test(
+            kernel.arch.name,
+            kernel.release,
+            functools.partial(self._test_kernel, kernel),
+        )
+
+    def _queue_test(
+        self, category_name: str, test_name: str, fn: _TestFunction
+    ) -> None:
+        self._test_queue.append((category_name, test_name, fn))
+        logger.info("%s %s test queued", category_name, test_name)
+        if not self._drgn_builds:
+            self._submit_tests()
+
+    def _submit_tests(self) -> None:
+        assert not self._drgn_builds
+        while self._test_queue and len(self._tests_running) < self._jobs:
+            category_name, test_name, fn = self._test_queue.popleft()
+            self._tests_running[(category_name, test_name)] = time.monotonic()
+            logger.info("%s %s test started", category_name, test_name)
+            self._submit(self._test_wrapper, category_name, test_name, fn)
+
+    def _test_wrapper(
+        self, category_name: str, test_name: str, fn: _TestFunction
+    ) -> Callable[[], bool]:
+        with contextlib.ExitStack() as exit_stack:
+            if self._foreground:
+                outfile = None
+            else:
+                outfile = exit_stack.enter_context(
+                    (self._directory / "log" / f"{category_name}-{test_name}.log").open(
+                        "w"
+                    )
+                )
+            success = fn(outfile=outfile)
+        return functools.partial(self._test_done, category_name, test_name, success)
+
+    def _test_done(self, category_name: str, test_name: str, success: bool) -> bool:
+        start_time = self._tests_running.pop((category_name, test_name))
+        logger.info(
+            "%s %s test %s (%ds)",
+            category_name,
+            test_name,
+            "passed" if success else "failed",
+            time.monotonic() - start_time,
+        )
+        (self._tests_passed if success else self._tests_failed).setdefault(
+            category_name, []
+        ).append(test_name)
+        self._submit_tests()
+        return True
+
+    def _test_local(
+        self, arch: Architecture, *, outfile: Optional[TextIO] = None
+    ) -> bool:
+        rootfs = self._rootfs(arch)
+        if rootfs == Path("/"):
+            args = [
+                sys.executable,
+                "-m",
+                "pytest",
+                "-v",
+                "--ignore=tests/linux_kernel",
+            ]
+        else:
+            args = [
+                "unshare",
+                "--map-root-user",
+                "--map-users=auto",
+                "--map-groups=auto",
+                "--fork",
+                "--pid",
+                "--mount-proc=" + str(rootfs / "proc"),
+                "sh",
+                "-c",
+                """\
+set -e
+
+mount --bind . "$1/mnt"
+chroot "$1" sh -c 'cd /mnt && pytest -v --ignore=tests/linux_kernel'
+""",
+                "sh",
+                str(rootfs),
+            ]
+        return subprocess.call(args, stdout=outfile, stderr=outfile) == 0
+
+    def _test_kernel(self, kernel: Kernel, *, outfile: Optional[TextIO] = None) -> bool:
+        rootfs = self._rootfs(kernel.arch)
+        if rootfs == Path("/"):
+            python_executable = sys.executable
+        else:
+            python_executable = "/usr/bin/python3"
+
+        if kernel.arch is HOST_ARCHITECTURE:
+            tests_expression = ""
+        else:
+            # Skip excessively slow tests when emulating.
+            tests_expression = "-k 'not test_slab_cache_for_each_allocated_object and not test_mtree_load_three_levels'"
+
+        if _kdump_works(kernel):
+            kdump_command = """\
+    "$PYTHON" -Bm vmtest.enter_kdump
+    # We should crash and not reach this.
+    exit 1
+"""
+        else:
+            kdump_command = ""
+
+        test_command = rf"""
+set -e
+
+export PYTHON={shlex.quote(python_executable)}
+export DRGN_RUN_LINUX_KERNEL_TESTS=1
+if [ -e /proc/vmcore ]; then
+    "$PYTHON" -Bm pytest -v tests/linux_kernel/vmcore
+else
+    insmod "$DRGN_TEST_KMOD"
+    "$PYTHON" -Bm pytest -v tests/linux_kernel --ignore=tests/linux_kernel/vmcore {tests_expression}
+{kdump_command}
+fi
+"""
+
+        try:
+            status = run_in_vm(
+                test_command,
+                kernel,
+                rootfs,
+                self._directory,
+                test_kmod=TestKmodMode.BUILD,
+                outfile=outfile,
+            )
+            return status == 0
+        except (
+            LostVMError,
+            subprocess.CalledProcessError,  # For kmod build errors.
+        ) as e:
+            print(e, file=sys.stderr if outfile is None else outfile)
+            return False
+
+
 if __name__ == "__main__":
     import argparse
 
-    logging.basicConfig(
-        format="%(asctime)s:%(levelname)s:%(name)s:%(message)s", level=logging.INFO
-    )
     parser = argparse.ArgumentParser(
         description="test drgn in a virtual machine",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
@@ -183,6 +549,16 @@ def _kdump_works(kernel: Kernel) -> bool:
         action="store_true",
         help="run local tests",
     )
+    parser.add_argument(
+        "-j",
+        "--jobs",
+        type=int,
+        nargs="?",
+        default=argparse.SUPPRESS,
+        help="number of tests to run in parallel (default: 1). "
+        "If the argument is omitted or 0, "
+        "an appropriate number is chosen automatically",
+    )
     parser.add_argument(
         "--use-host-rootfs",
         choices=["never", "auto"],
@@ -195,51 +571,54 @@ def _kdump_works(kernel: Kernel) -> bool:
     if not hasattr(args, "kernels") and not args.local:
         parser.error("at least one of -k/--kernel or -l/--local is required")
 
-    if args.use_host_rootfs == "auto":
+    if hasattr(args, "jobs"):
+        if args.jobs is None:
+            args.jobs = 0
 
-        def use_host_rootfs(arch: Architecture) -> bool:
-            return arch is HOST_ARCHITECTURE
+        log_directory = args.directory / "log"
+        log_old_directory = args.directory / "log.old"
 
-    else:
+        try:
+            shutil.rmtree(log_old_directory)
+        except FileNotFoundError:
+            pass
+        try:
+            log_directory.rename(log_old_directory)
+        except FileNotFoundError:
+            pass
+        log_directory.mkdir(parents=True)
 
-        def use_host_rootfs(arch: Architecture) -> bool:
-            return False
+        main_log_path = log_directory / "main.log"
+    else:
+        args.jobs = None
+        main_log_path = None
+    logging.basicConfig(
+        format="%(asctime)s:%(levelname)s:%(name)s:%(message)s",
+        level=logging.INFO,
+        filename=main_log_path,
+    )
 
-    architecture_names: List[str] = []
+    architectures: Dict[Architecture, None] = {}
     if hasattr(args, "architectures"):
         for name in args.architectures:
             if name == "all":
-                architecture_names.extend(ARCHITECTURES)
+                for arch in ARCHITECTURES.values():
+                    architectures[arch] = None
             elif name == "foreign":
-                architecture_names.extend(
-                    [
-                        arch.name
-                        for arch in ARCHITECTURES.values()
-                        if arch is not HOST_ARCHITECTURE
-                    ]
-                )
+                for arch in ARCHITECTURES.values():
+                    if arch is not HOST_ARCHITECTURE:
+                        architectures[arch] = None
             else:
-                architecture_names.append(name)
-        architectures = [
-            ARCHITECTURES[name] for name in OrderedDict.fromkeys(architecture_names)
-        ]
+                architectures[ARCHITECTURES[name]] = None
     else:
         assert HOST_ARCHITECTURE is not None
-        architectures = [HOST_ARCHITECTURE]
-
-    seen_arches = set()
-    seen_kernels = set()
-    to_download: List[Download] = []
-    kernels = []
-
-    def add_kernel(arch: Architecture, pattern: str) -> None:
-        key = (arch.name, pattern)
-        if key not in seen_kernels:
-            seen_kernels.add(key)
-            if arch.name not in seen_arches:
-                seen_arches.add(arch.name)
-                to_download.append(DownloadCompiler(arch))
-            kernels.append(DownloadKernel(arch, pattern))
+        architectures = {HOST_ARCHITECTURE: None}
+
+    runner = _TestRunner(
+        directory=args.directory,
+        jobs=args.jobs,
+        use_host_rootfs=args.use_host_rootfs == "auto",
+    )
 
     if hasattr(args, "kernels"):
         for pattern in args.kernels:
@@ -248,133 +627,20 @@ def add_kernel(arch: Architecture, pattern: str) -> None:
                     for arch in architectures:
                         if _kernel_version_is_supported(version, arch):
                             for flavor in KERNEL_FLAVORS.values():
-                                add_kernel(arch, version + ".*" + flavor.name)
+                                runner.add_kernel(arch, version + ".*" + flavor.name)
             elif pattern in KERNEL_FLAVORS:
                 flavor = KERNEL_FLAVORS[pattern]
                 for version in SUPPORTED_KERNEL_VERSIONS:
                     for arch in architectures:
                         if _kernel_version_is_supported(version, arch):
-                            add_kernel(arch, version + ".*" + flavor.name)
+                            runner.add_kernel(arch, version + ".*" + flavor.name)
             else:
                 for arch in architectures:
-                    add_kernel(arch, pattern)
+                    runner.add_kernel(arch, pattern)
 
-    to_download.extend(kernels)
-
-    progress = _ProgressPrinter(sys.stderr)
-
-    in_github_actions = os.getenv("GITHUB_ACTIONS") == "true"
-
-    # Downloading too many files before they can be used for testing runs the
-    # risk of filling up the limited disk space is Github Actions. Set a limit
-    # of no more than 5 files which can be downloaded ahead of time. This is a
-    # magic number which is inexact, but works well enough.
-    # Note that Github Actions does not run vmtest via this script currently,
-    # but may in the future.
-    max_pending_kernels = 5 if in_github_actions else 0
-
-    with download_in_thread(
-        args.directory, to_download, max_pending_kernels
-    ) as downloads:
+    if args.local:
         for arch in architectures:
-            if use_host_rootfs(arch):
-                subprocess.check_call(
-                    [sys.executable, "setup.py", "build_ext", "-i"],
-                    env={
-                        **os.environ,
-                        "CONFIGURE_FLAGS": "--enable-compiler-warnings=error",
-                    },
-                )
-                if args.local:
-                    logger.info("running local tests on %s", arch.name)
-                    status = subprocess.call(
-                        [
-                            sys.executable,
-                            "-m",
-                            "pytest",
-                            "-v",
-                            "--ignore=tests/linux_kernel",
-                        ]
-                    )
-                    progress.update(arch.name, "local", status == 0)
-            else:
-                rootfs = args.directory / arch.name / "rootfs"
-                build_drgn_in_rootfs(rootfs)
-                if args.local:
-                    logger.info("running local tests on %s", arch.name)
-                    status = subprocess.call(
-                        [
-                            "unshare",
-                            "--map-root-user",
-                            "--map-users=auto",
-                            "--map-groups=auto",
-                            "--fork",
-                            "--pid",
-                            "--mount-proc=" + str(rootfs / "proc"),
-                            "sh",
-                            "-c",
-                            r"""
-set -e
-
-mount --bind . "$1/mnt"
-chroot "$1" sh -c 'cd /mnt && pytest -v --ignore=tests/linux_kernel'
-""",
-                            "sh",
-                            rootfs,
-                        ]
-                    )
-                    progress.update(arch.name, "local", status == 0)
-        for kernel in downloads:
-            if not isinstance(kernel, Kernel):
-                continue
-
-            if use_host_rootfs(kernel.arch):
-                python_executable = sys.executable
-                tests_expression = ""
-            else:
-                python_executable = "/usr/bin/python3"
-                # Skip excessively slow tests when emulating.
-                tests_expression = "-k 'not test_slab_cache_for_each_allocated_object and not test_mtree_load_three_levels'"
-
-            if _kdump_works(kernel):
-                kdump_command = """\
-    "$PYTHON" -Bm vmtest.enter_kdump
-    # We should crash and not reach this.
-    exit 1
-"""
-            else:
-                kdump_command = ""
+            runner.add_local(arch)
 
-            test_command = rf"""
-set -e
-
-export PYTHON={shlex.quote(python_executable)}
-export DRGN_RUN_LINUX_KERNEL_TESTS=1
-if [ -e /proc/vmcore ]; then
-    "$PYTHON" -Bm pytest -v tests/linux_kernel/vmcore
-else
-    insmod "$DRGN_TEST_KMOD"
-    "$PYTHON" -Bm pytest -v tests/linux_kernel --ignore=tests/linux_kernel/vmcore {tests_expression}
-{kdump_command}
-fi
-"""
-            try:
-                status = run_in_vm(
-                    test_command,
-                    kernel,
-                    (
-                        Path("/")
-                        if use_host_rootfs(kernel.arch)
-                        else args.directory / kernel.arch.name / "rootfs"
-                    ),
-                    args.directory,
-                    test_kmod=TestKmodMode.BUILD,
-                )
-            except LostVMError as e:
-                print("error:", e, file=sys.stderr)
-                status = -1
-
-            if in_github_actions:
-                shutil.rmtree(kernel.path)
-            progress.update(kernel.arch.name, kernel.release, status == 0)
-    sys.exit(0 if progress.succeeded() else 1)
+    success = runner.run()
+    sys.exit(0 if success else 1)
diff --git a/vmtest/chroot.py b/vmtest/chroot.py
new file mode 100644
index 000000000..617fb9cc5
--- /dev/null
+++ b/vmtest/chroot.py
@@ -0,0 +1,64 @@
+# Copyright (c) 2025 Oracle and/or its affiliates
+# SPDX-License-Identifier: LGPL-2.1-or-later
+import argparse
+import os
+from pathlib import Path
+import subprocess
+import sys
+
+from vmtest.config import ARCHITECTURES, HOST_ARCHITECTURE
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="run commands in the root filesystems for vmtest",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "-d",
+        "--directory",
+        metavar="DIR",
+        type=Path,
+        default="build/vmtest",
+        help="directory for vmtest artifacts",
+    )
+    parser.add_argument(
+        "-a",
+        "--architecture",
+        type=str,
+        choices=sorted(ARCHITECTURES),
+        default=None if HOST_ARCHITECTURE is None else HOST_ARCHITECTURE.name,
+        required=HOST_ARCHITECTURE is None,
+        help="architecture to run in",
+    )
+    parser.add_argument(
+        "command",
+        type=str,
+        nargs=argparse.REMAINDER,
+        help="command to run in rootfs (default: bash -i)",
+    )
+    args = parser.parse_args()
+    arch = ARCHITECTURES[args.architecture]
+    dir = args.directory / arch.name / "rootfs"
+    command = args.command or ["bash", "-i"]
+    env_passthrough = {
+        "TERM",
+        "COLORTERM",
+    }
+    filtered_env = {k: v for k, v in os.environ.items() if k in env_passthrough}
+    sys.exit(
+        subprocess.run(
+            [
+                "unshare",
+                "--map-root-user",
+                "--map-users=auto",
+                "--map-groups=auto",
+                "--fork",
+                "--pid",
+                f"--mount-proc={dir / 'proc'}",
+                "chroot",
+                dir,
+                *command,
+            ],
+            env=filtered_env,
+        ).returncode
+    )
diff --git a/vmtest/config.py b/vmtest/config.py
index 5382a302b..12d603122 100644
--- a/vmtest/config.py
+++ b/vmtest/config.py
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: LGPL-2.1-or-later
 
 
-from collections import OrderedDict
+import dataclasses
 import inspect
 import os
 from pathlib import Path
@@ -187,14 +187,15 @@
 """
 
 
-class KernelFlavor(NamedTuple):
+@dataclasses.dataclass(frozen=True, eq=False)
+class KernelFlavor:
     name: str
     description: str
     config: str
 
 
-KERNEL_FLAVORS = OrderedDict(
-    (flavor.name, flavor)
+KERNEL_FLAVORS = {
+    flavor.name: flavor
     for flavor in (
         KernelFlavor(
             name="default",
@@ -246,10 +247,11 @@ class KernelFlavor(NamedTuple):
             """,
         ),
     )
-)
+}
 
 
-class Architecture(NamedTuple):
+@dataclasses.dataclass(frozen=True, eq=False)
+class Architecture:
     # Architecture name. This matches the names used by
     # _drgn_util.platform.NORMALIZED_MACHINE_NAME and qemu-system-$arch_name.
     name: str
@@ -450,6 +452,8 @@ def kconfig_localversion(arch: Architecture, flavor: KernelFlavor, version: str)
     # rebuilt, conditionally increment the patch level here.
     if flavor.name == "alternative" and KernelVersion(version) >= KernelVersion("6.8"):
         patch_level += 1
+    if KernelVersion(version) < KernelVersion("5.10"):
+        patch_level += 1
     if patch_level:
         vmtest_kernel_version.append(patch_level)
 
diff --git a/vmtest/download.py b/vmtest/download.py
index 11327afe0..b8faff89b 100644
--- a/vmtest/download.py
+++ b/vmtest/download.py
@@ -79,52 +79,8 @@ class DownloadCompiler(NamedTuple):
 Downloaded = Union[Kernel, Compiler]
 
 
-def _download_kernel(
-    gh: GitHubApi, arch: Architecture, release: str, url: Optional[str], dir: Path
-) -> Kernel:
-    if url is None:
-        logger.info(
-            "kernel release %s for %s already downloaded to %s", release, arch.name, dir
-        )
-    else:
-        logger.info(
-            "downloading kernel release %s for %s to %s from %s",
-            release,
-            arch.name,
-            dir,
-            url,
-        )
-        dir.parent.mkdir(parents=True, exist_ok=True)
-        tmp_dir = Path(tempfile.mkdtemp(dir=dir.parent))
-        try:
-            # Don't assume that the available version of tar has zstd support or
-            # the non-standard -I/--use-compress-program option.
-            with subprocess.Popen(
-                ["zstd", "-d", "-", "--stdout"],
-                stdin=subprocess.PIPE,
-                stdout=subprocess.PIPE,
-            ) as zstd_proc, subprocess.Popen(
-                ["tar", "-C", str(tmp_dir), "-x"],
-                stdin=zstd_proc.stdout,
-            ) as tar_proc:
-                assert zstd_proc.stdin is not None
-                try:
-                    with gh.download(url) as resp:
-                        shutil.copyfileobj(resp, zstd_proc.stdin)
-                finally:
-                    zstd_proc.stdin.close()
-            if zstd_proc.returncode != 0:
-                raise subprocess.CalledProcessError(
-                    zstd_proc.returncode, zstd_proc.args
-                )
-            if tar_proc.returncode != 0:
-                raise subprocess.CalledProcessError(tar_proc.returncode, tar_proc.args)
-        except BaseException:
-            shutil.rmtree(tmp_dir, ignore_errors=True)
-            raise
-        else:
-            tmp_dir.rename(dir)
-    return Kernel(arch, release, dir)
+class DownloadNotFoundError(Exception):
+    pass
 
 
 _KERNEL_ORG_COMPILER_HOST_NAME = {
@@ -136,7 +92,7 @@ def _download_kernel(
 
 def downloaded_compiler(download_dir: Path, target: Architecture) -> Compiler:
     if _KERNEL_ORG_COMPILER_HOST_NAME is None:
-        raise FileNotFoundError(
+        raise DownloadNotFoundError(
             f"kernel.org compilers are not available for {NORMALIZED_MACHINE_NAME} hosts"
         )
     return Compiler(
@@ -148,126 +104,163 @@ def downloaded_compiler(download_dir: Path, target: Architecture) -> Compiler:
     )
 
 
-def _download_compiler(compiler: Compiler) -> Compiler:
-    dir = compiler.bin.parent
-    if dir.exists():
-        logger.info(
-            "compiler for %s already downloaded to %s", compiler.target.name, dir
-        )
-    else:
-        url = f"{COMPILER_URL}files/bin/{_KERNEL_ORG_COMPILER_HOST_NAME}/{KERNEL_ORG_COMPILER_VERSION}/{dir.name}.tar.xz"
+class Downloader:
+    def __init__(self, directory: Path) -> None:
+        self._directory = directory
+        self._gh = GitHubApi(os.getenv("GITHUB_TOKEN"))
+        self._cached_kernel_releases: Optional[Dict[str, Dict[str, GitHubAsset]]] = None
+
+    def _available_kernel_releases(self) -> Dict[str, Dict[str, GitHubAsset]]:
+        if self._cached_kernel_releases is None:
+            logger.info("getting available kernel releases")
+            self._directory.mkdir(parents=True, exist_ok=True)
+            self._cached_kernel_releases = available_kernel_releases(
+                self._gh.get_release_by_tag(
+                    *VMTEST_GITHUB_RELEASE,
+                    cache=self._directory / "github_release.json",
+                ),
+            )
+        return self._cached_kernel_releases
+
+    def resolve_kernel(self, arch: Architecture, pattern: str) -> Kernel:
+        if pattern == glob.escape(pattern):
+            release = pattern
+        else:
+            try:
+                release = max(
+                    (
+                        available
+                        for available in self._available_kernel_releases()[arch.name]
+                        if fnmatch.fnmatch(available, pattern)
+                    ),
+                    key=KernelVersion,
+                )
+            except ValueError:
+                raise DownloadNotFoundError(
+                    f"no available kernel release matches {pattern!r} on {arch.name}"
+                )
+            else:
+                logger.info(
+                    "kernel release pattern %s matches %s on %s",
+                    pattern,
+                    release,
+                    arch.name,
+                )
+        kernel_dir = self._directory / arch.name / ("kernel-" + release)
+        if (
+            not kernel_dir.exists()
+            and release not in self._available_kernel_releases()[arch.name]
+        ):
+            raise DownloadNotFoundError(
+                f"kernel release {release} not found on {arch.name}"
+            )
+        return Kernel(arch, release, kernel_dir)
+
+    def download_kernel(self, kernel: Kernel) -> Kernel:
+        if kernel.path.exists():
+            # As a policy, vmtest assets will never be updated with the same
+            # name. Therefore, if the kernel was previously downloaded, we
+            # don't need to download it again.
+            logger.info(
+                "kernel release %s for %s already downloaded to %s",
+                kernel.release,
+                kernel.arch.name,
+                kernel.path,
+            )
+            return kernel
+
+        url = self._available_kernel_releases()[kernel.arch.name][kernel.release]["url"]
         logger.info(
-            "downloading compiler for %s from %s to %s", compiler.target.name, url, dir
+            "downloading kernel release %s for %s to %s from %s",
+            kernel.release,
+            kernel.arch.name,
+            kernel.path,
+            url,
         )
-        dir.parent.mkdir(parents=True, exist_ok=True)
-        with tempfile.TemporaryDirectory(dir=dir.parent) as tmp_name:
-            tmp_dir = Path(tmp_name)
+        kernel.path.parent.mkdir(parents=True, exist_ok=True)
+        tmp_dir = Path(tempfile.mkdtemp(dir=kernel.path.parent))
+        try:
+            # Don't assume that the available version of tar has zstd support or
+            # the non-standard -I/--use-compress-program option.
             with subprocess.Popen(
-                ["xz", "--decompress"],
+                ["zstd", "-d", "-", "--stdout"],
                 stdin=subprocess.PIPE,
                 stdout=subprocess.PIPE,
-            ) as xz_proc, subprocess.Popen(
+            ) as zstd_proc, subprocess.Popen(
                 ["tar", "-C", str(tmp_dir), "-x"],
-                stdin=xz_proc.stdout,
+                stdin=zstd_proc.stdout,
             ) as tar_proc:
-                assert xz_proc.stdin is not None
+                assert zstd_proc.stdin is not None
                 try:
-                    with urllib.request.urlopen(url) as resp:
-                        shutil.copyfileobj(resp, xz_proc.stdin)
+                    with self._gh.download(url) as resp:
+                        shutil.copyfileobj(resp, zstd_proc.stdin)
                 finally:
-                    xz_proc.stdin.close()
-            if xz_proc.returncode != 0:
-                raise subprocess.CalledProcessError(xz_proc.returncode, xz_proc.args)
+                    zstd_proc.stdin.close()
+            if zstd_proc.returncode != 0:
+                raise subprocess.CalledProcessError(
+                    zstd_proc.returncode, zstd_proc.args
+                )
             if tar_proc.returncode != 0:
                 raise subprocess.CalledProcessError(tar_proc.returncode, tar_proc.args)
-            archive_subdir = Path(
-                f"gcc-{KERNEL_ORG_COMPILER_VERSION}-nolibc/{compiler.target.kernel_org_compiler_name}"
-            )
-            archive_bin_subdir = archive_subdir / "bin"
-            if not (tmp_dir / archive_bin_subdir).exists():
-                raise FileNotFoundError(
-                    f"downloaded archive does not contain {archive_bin_subdir}"
-                )
-            (tmp_dir / archive_subdir).rename(dir)
-    return compiler
-
-
-def download(download_dir: Path, downloads: Iterable[Download]) -> Iterator[Downloaded]:
-    gh = GitHubApi(os.getenv("GITHUB_TOKEN"))
+        except BaseException:
+            shutil.rmtree(tmp_dir, ignore_errors=True)
+            raise
+        else:
+            tmp_dir.rename(kernel.path)
+        return kernel
 
-    # We don't want to make any API requests if we don't have to, so we don't
-    # fetch this until we need it.
-    cached_kernel_releases = None
+    def resolve_compiler(self, target: Architecture) -> Compiler:
+        return downloaded_compiler(self._directory, target)
 
-    def get_available_kernel_releases() -> Dict[str, Dict[str, GitHubAsset]]:
-        nonlocal cached_kernel_releases
-        if cached_kernel_releases is None:
-            logger.info("getting available kernel releases")
-            download_dir.mkdir(parents=True, exist_ok=True)
-            cached_kernel_releases = available_kernel_releases(
-                gh.get_release_by_tag(
-                    *VMTEST_GITHUB_RELEASE, cache=download_dir / "github_release.json"
-                ),
+    def download_compiler(self, compiler: Compiler) -> Compiler:
+        dir = compiler.bin.parent
+        if dir.exists():
+            logger.info(
+                "compiler for %s already downloaded to %s", compiler.target.name, dir
             )
-        return cached_kernel_releases
-
-    download_calls: List[Callable[[], Downloaded]] = []
-    for download in downloads:
-        if isinstance(download, DownloadKernel):
-            if download.pattern == glob.escape(download.pattern):
-                release = download.pattern
-            else:
-                try:
-                    release = max(
-                        (
-                            available
-                            for available in get_available_kernel_releases()[
-                                download.arch.name
-                            ]
-                            if fnmatch.fnmatch(available, download.pattern)
-                        ),
-                        key=KernelVersion,
-                    )
-                except ValueError:
-                    raise Exception(
-                        f"no available kernel release matches {download.pattern!r} on {download.arch.name}"
+        else:
+            url = f"{COMPILER_URL}files/bin/{_KERNEL_ORG_COMPILER_HOST_NAME}/{KERNEL_ORG_COMPILER_VERSION}/{dir.name}.tar.xz"
+            logger.info(
+                "downloading compiler for %s from %s to %s",
+                compiler.target.name,
+                url,
+                dir,
+            )
+            dir.parent.mkdir(parents=True, exist_ok=True)
+            with tempfile.TemporaryDirectory(dir=dir.parent) as tmp_name:
+                tmp_dir = Path(tmp_name)
+                with subprocess.Popen(
+                    ["xz", "--decompress"],
+                    stdin=subprocess.PIPE,
+                    stdout=subprocess.PIPE,
+                ) as xz_proc, subprocess.Popen(
+                    ["tar", "-C", str(tmp_dir), "-x"],
+                    stdin=xz_proc.stdout,
+                ) as tar_proc:
+                    assert xz_proc.stdin is not None
+                    try:
+                        with urllib.request.urlopen(url) as resp:
+                            shutil.copyfileobj(resp, xz_proc.stdin)
+                    finally:
+                        xz_proc.stdin.close()
+                if xz_proc.returncode != 0:
+                    raise subprocess.CalledProcessError(
+                        xz_proc.returncode, xz_proc.args
                     )
-                else:
-                    logger.info(
-                        "kernel release pattern %s matches %s on %s",
-                        download.pattern,
-                        release,
-                        download.arch.name,
+                if tar_proc.returncode != 0:
+                    raise subprocess.CalledProcessError(
+                        tar_proc.returncode, tar_proc.args
                     )
-            kernel_dir = download_dir / download.arch.name / ("kernel-" + release)
-            if kernel_dir.exists():
-                # As a policy, vmtest assets will never be updated with the
-                # same name. Therefore, if the kernel was previously
-                # downloaded, we don't need to download it again.
-                url = None
-            else:
-                try:
-                    asset = get_available_kernel_releases()[download.arch.name][release]
-                except KeyError:
-                    raise Exception(f"kernel release {release} not found")
-                url = asset["url"]
-            download_calls.append(
-                functools.partial(
-                    _download_kernel, gh, download.arch, release, url, kernel_dir
-                )
-            )
-        elif isinstance(download, DownloadCompiler):
-            download_calls.append(
-                functools.partial(
-                    _download_compiler,
-                    downloaded_compiler(download_dir, download.target),
+                archive_subdir = Path(
+                    f"gcc-{KERNEL_ORG_COMPILER_VERSION}-nolibc/{compiler.target.kernel_org_compiler_name}"
                 )
-            )
-        else:
-            assert False
-
-    for call in download_calls:
-        yield call()
+                archive_bin_subdir = archive_subdir / "bin"
+                if not (tmp_dir / archive_bin_subdir).exists():
+                    raise FileNotFoundError(
+                        f"downloaded archive does not contain {archive_bin_subdir}"
+                    )
+                (tmp_dir / archive_subdir).rename(dir)
+        return compiler
 
 
 def _download_thread(
@@ -276,9 +269,25 @@ def _download_thread(
     q: "queue.Queue[Union[Downloaded, Exception]]",
 ) -> None:
     try:
-        it = download(download_dir, downloads)
-        while True:
-            q.put(next(it))
+        downloader = Downloader(download_dir)
+        download_calls: List[Callable[[], Downloaded]] = []
+
+        for download in downloads:
+            if isinstance(download, DownloadKernel):
+                kernel = downloader.resolve_kernel(download.arch, download.pattern)
+                download_calls.append(
+                    functools.partial(downloader.download_kernel, kernel)
+                )
+            elif isinstance(download, DownloadCompiler):
+                compiler = downloader.resolve_compiler(download.target)
+                download_calls.append(
+                    functools.partial(downloader.download_compiler, compiler)
+                )
+            else:
+                assert False
+
+        for call in download_calls:
+            q.put(call())
     except Exception as e:
         q.put(e)
 
@@ -390,14 +399,21 @@ def main() -> None:
             assert HOST_ARCHITECTURE is not None
             args.downloads[i] = DownloadCompiler(HOST_ARCHITECTURE)
 
-    for downloaded in download(args.download_directory, args.downloads):
-        if isinstance(downloaded, Kernel):
+    downloader = Downloader(args.download_directory)
+    for download in args.downloads:
+        if isinstance(download, DownloadKernel):
+            kernel = downloader.download_kernel(
+                downloader.resolve_kernel(download.arch, download.pattern)
+            )
             print(
-                f"kernel: arch={downloaded.arch.name} release={downloaded.release} path={downloaded.path}"
+                f"kernel: arch={kernel.arch.name} release={kernel.release} path={kernel.path}"
+            )
+        elif isinstance(download, DownloadCompiler):
+            compiler = downloader.download_compiler(
+                downloader.resolve_compiler(download.target)
             )
-        elif isinstance(downloaded, Compiler):
             print(
-                f"compiler: target={downloaded.target.name} bin={downloaded.bin} prefix={downloaded.prefix}"
+                f"compiler: target={compiler.target.name} bin={compiler.bin} prefix={compiler.prefix}"
             )
         else:
             assert False
diff --git a/vmtest/githubapi.py b/vmtest/githubapi.py
index af0a7aceb..ddd6de3b0 100644
--- a/vmtest/githubapi.py
+++ b/vmtest/githubapi.py
@@ -55,7 +55,7 @@ def _read_cache(self, cache: _CACHE) -> Optional[Mapping[str, Any]]:
             return None
         try:
             with open(cache, "r") as f:
-                return json.load(f)  # type: ignore[no-any-return]
+                return json.load(f)
         except FileNotFoundError:
             return None
 
@@ -69,15 +69,6 @@ def _cached_get_headers(
                 return {**self._headers, "If-Modified-Since": cached["last_modified"]}
         return self._headers
 
-    @staticmethod
-    def _trust_cache(cached: Any) -> bool:
-        # If the request was cached and the VMTEST_TRUST_CACHE environment
-        # variable is non-zero, assume the cache is still valid.
-        try:
-            return cached is not None and int(os.getenv("VMTEST_TRUST_CACHE", "0")) != 0
-        except ValueError:
-            return False
-
     def _write_cache(
         self, cache: _CACHE, body: Any, headers: Mapping[str, str]
     ) -> None:
@@ -128,23 +119,25 @@ def _request(
             method=method,
         )
         # Work around python/cpython#77842.
-        if req.has_header("Authorization"):
-            authorization = req.get_header("Authorization")
+        authorization = req.get_header("Authorization")
+        if authorization is not None:
             req.remove_header("Authorization")
             req.add_unredirected_header("Authorization", authorization)
         return urllib.request.urlopen(req)
 
     def _cached_get_json(self, endpoint: str, cache: _CACHE) -> Any:
         cached = self._read_cache(cache)
-        if self._trust_cache(cached):
+        # If the request was cached and the VMTEST_TRUST_CACHE environment
+        # variable is set, assume the cache is still valid.
+        if cached is not None and "VMTEST_TRUST_CACHE" in os.environ:
             return cached["body"]
         req = urllib.request.Request(
             self._HOST + "/" + endpoint,
             headers=self._cached_get_headers(cached),
         )
         # Work around python/cpython#77842.
-        if req.has_header("Authorization"):
-            authorization = req.get_header("Authorization")
+        authorization = req.get_header("Authorization")
+        if authorization is not None:
             req.remove_header("Authorization")
             req.add_unredirected_header("Authorization", authorization)
         try:
@@ -184,7 +177,7 @@ def _request(
 
     async def _cached_get_json(self, endpoint: str, cache: _CACHE) -> Any:
         cached = self._read_cache(cache)
-        if self._trust_cache(cached):
+        if cached is not None and "VMTEST_TRUST_CACHE" in os.environ:
             return cached["body"]
         async with self._session.get(
             self._HOST + "/" + endpoint,
diff --git a/vmtest/kbuild.py b/vmtest/kbuild.py
index 1fd494cfc..812e50170 100644
--- a/vmtest/kbuild.py
+++ b/vmtest/kbuild.py
@@ -26,12 +26,11 @@
     HOST_ARCHITECTURE,
     KERNEL_FLAVORS,
     Architecture,
-    Compiler,
     KernelFlavor,
     kconfig,
     kconfig_localversion,
 )
-from vmtest.download import COMPILER_URL, DownloadCompiler, download
+from vmtest.download import COMPILER_URL, Downloader
 
 logger = logging.getLogger(__name__)
 
@@ -180,6 +179,14 @@ class _Patch(NamedTuple):
             (None, KernelVersion("5.4.262")),
         ),
     ),
+    _Patch(
+        name="kbuild-Only-add-fno-var-tracking-assignments-for-old.patch",
+        versions=((KernelVersion("5.1"), KernelVersion("5.10")),),
+    ),
+    _Patch(
+        name="4.19-kbuild-Only-add-fno-var-tracking-assignments-for-old.patch",
+        versions=((None, KernelVersion("5.1")),),
+    ),
 )
 
 
@@ -209,6 +216,7 @@ async def apply_patches(kernel_dir: Path) -> None:
             cwd=kernel_dir,
             stderr=asyncio.subprocess.PIPE,
         )
+        assert proc.stderr is not None  # for mypy
         stderr = await proc.stderr.read()
         if await proc.wait() != 0:
             try:
@@ -514,9 +522,15 @@ async def _test_external_module_build(self, modules_build_dir: Path) -> None:
                 stderr=asyncio.subprocess.PIPE,
                 env=self._env,
             )
+            assert proc.stdout is not None  # for mypy
+            assert proc.stderr is not None  # for mypy
             try:
-                stdout_task = asyncio.create_task(proc.stdout.readline())
-                stderr_task = asyncio.create_task(proc.stderr.readline())
+                stdout_task: Optional[asyncio.Task[bytes]] = asyncio.create_task(
+                    proc.stdout.readline()
+                )
+                stderr_task: Optional[asyncio.Task[bytes]] = asyncio.create_task(
+                    proc.stderr.readline()
+                )
                 error = False
                 while stdout_task is not None or stderr_task is not None:
                     aws = []
@@ -771,8 +785,8 @@ async def main() -> None:
     if hasattr(args, "download_compiler"):
         if args.download_compiler is None:
             args.download_compiler = default_download_compiler_directory
-        downloaded = next(download(args.download_compiler, [DownloadCompiler(arch)]))
-        assert isinstance(downloaded, Compiler)
+        downloader = Downloader(args.download_compiler)
+        downloaded = downloader.download_compiler(downloader.resolve_compiler(arch))
         env = {**os.environ, **downloaded.env()}
     else:
         env = None
diff --git a/vmtest/kmod.py b/vmtest/kmod.py
index 9e686917d..a2906537c 100644
--- a/vmtest/kmod.py
+++ b/vmtest/kmod.py
@@ -7,6 +7,7 @@
 import shutil
 import subprocess
 import tempfile
+from typing import Optional, TextIO
 
 from util import nproc, out_of_date
 from vmtest.config import Kernel, local_kernel
@@ -15,7 +16,9 @@
 logger = logging.getLogger(__name__)
 
 
-def build_kmod(download_dir: Path, kernel: Kernel) -> Path:
+def build_kmod(
+    download_dir: Path, kernel: Kernel, outfile: Optional[TextIO] = None
+) -> Path:
     kmod = kernel.path.parent / f"drgn_test-{kernel.release}.ko"
     # External modules can't do out-of-tree builds for some reason, so copy the
     # source files to a temporary directory and build the module there, then
@@ -52,6 +55,8 @@ def build_kmod(download_dir: Path, kernel: Kernel) -> Path:
                     str(nproc()),
                 ],
                 env={**os.environ, **compiler.env()},
+                stdout=outfile,
+                stderr=outfile,
             )
             (tmp_dir / "drgn_test.ko").rename(kmod)
     else:
diff --git a/vmtest/manage.py b/vmtest/manage.py
index 6d5626ab6..1a4036528 100644
--- a/vmtest/manage.py
+++ b/vmtest/manage.py
@@ -19,7 +19,6 @@
     Sequence,
     Tuple,
     Union,
-    cast,
 )
 
 import aiohttp
@@ -35,12 +34,7 @@
     KernelFlavor,
     kconfig_localversion,
 )
-from vmtest.download import (
-    VMTEST_GITHUB_RELEASE,
-    DownloadCompiler,
-    available_kernel_releases,
-    download,
-)
+from vmtest.download import VMTEST_GITHUB_RELEASE, Downloader, available_kernel_releases
 from vmtest.githubapi import AioGitHubApi
 from vmtest.kbuild import KBuild, apply_patches
 
@@ -366,16 +360,16 @@ async def main() -> None:
                 )
 
             if args.build:
+                downloader = Downloader(args.download_directory)
                 compilers = {
-                    cast(Compiler, downloaded).target.name: cast(Compiler, downloaded)
-                    for downloaded in download(
-                        args.download_directory,
-                        {
-                            arch.name: DownloadCompiler(arch)
-                            for _, tag_arches_to_build in to_build
-                            for arch, _ in tag_arches_to_build
-                        }.values(),
+                    arch.name: downloader.download_compiler(
+                        downloader.resolve_compiler(arch)
                     )
+                    for arch in {
+                        arch.name: arch
+                        for _, tag_arches_to_build in to_build
+                        for arch, _ in tag_arches_to_build
+                    }.values()
                 }
 
                 if args.upload:
diff --git a/vmtest/patches/4.19-kbuild-Only-add-fno-var-tracking-assignments-for-old.patch b/vmtest/patches/4.19-kbuild-Only-add-fno-var-tracking-assignments-for-old.patch
new file mode 100644
index 000000000..12f1af29b
--- /dev/null
+++ b/vmtest/patches/4.19-kbuild-Only-add-fno-var-tracking-assignments-for-old.patch
@@ -0,0 +1,44 @@
+From 41f159de9ca697880f9dc84b7f4e1bc87043a774 Mon Sep 17 00:00:00 2001
+Message-ID: <41f159de9ca697880f9dc84b7f4e1bc87043a774.1757532466.git.osandov@osandov.com>
+From: Mark Wielaard <mark@klomp.org>
+Date: Sat, 17 Oct 2020 14:01:35 +0200
+Subject: [PATCH] kbuild: Only add -fno-var-tracking-assignments for old GCC
+ versions
+
+Some old GCC versions between 4.5.0 and 4.9.1 might miscompile code
+with -fvar-tracking-assingments (which is enabled by default with -g -O2).
+Commit 2062afb4f804 ("Fix gcc-4.9.0 miscompilation of load_balance()
+in scheduler") added -fno-var-tracking-assignments unconditionally to
+work around this. But newer versions of GCC no longer have this bug, so
+only add it for versions of GCC before 5.0. This allows various tools
+such as a perf probe or gdb debuggers or systemtap to resolve variable
+locations using dwarf locations in more code.
+
+Signed-off-by: Mark Wielaard <mark@klomp.org>
+Acked-by: Ian Rogers <irogers@google.com>
+Reviewed-by: Andi Kleen <andi@firstfloor.org>
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+(cherry picked from commit 121c5d08d53cb1f95d9881838523b0305c3f3bef)
+Signed-off-by: Omar Sandoval <osandov@osandov.com>
+---
+ Makefile | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/Makefile b/Makefile
+index 8df76f9b0712..06ae74da97fc 100644
+--- a/Makefile
++++ b/Makefile
+@@ -748,5 +748,9 @@ endif
+ endif
+ 
+-KBUILD_CFLAGS   += $(call cc-option, -fno-var-tracking-assignments)
++# Workaround for GCC versions < 5.0
++# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61801
++ifdef CONFIG_CC_IS_GCC
++KBUILD_CFLAGS	+= $(call cc-ifversion, -lt, 0500, $(call cc-option, -fno-var-tracking-assignments))
++endif
+ 
+ ifdef CONFIG_DEBUG_INFO
+-- 
+2.51.0
+
diff --git a/vmtest/patches/kbuild-Only-add-fno-var-tracking-assignments-for-old.patch b/vmtest/patches/kbuild-Only-add-fno-var-tracking-assignments-for-old.patch
new file mode 100644
index 000000000..56f33b517
--- /dev/null
+++ b/vmtest/patches/kbuild-Only-add-fno-var-tracking-assignments-for-old.patch
@@ -0,0 +1,42 @@
+From 121c5d08d53cb1f95d9881838523b0305c3f3bef Mon Sep 17 00:00:00 2001
+Message-ID: <121c5d08d53cb1f95d9881838523b0305c3f3bef.1757526866.git.osandov@osandov.com>
+From: Mark Wielaard <mark@klomp.org>
+Date: Sat, 17 Oct 2020 14:01:35 +0200
+Subject: [PATCH] kbuild: Only add -fno-var-tracking-assignments for old GCC
+ versions
+
+Some old GCC versions between 4.5.0 and 4.9.1 might miscompile code
+with -fvar-tracking-assingments (which is enabled by default with -g -O2).
+Commit 2062afb4f804 ("Fix gcc-4.9.0 miscompilation of load_balance()
+in scheduler") added -fno-var-tracking-assignments unconditionally to
+work around this. But newer versions of GCC no longer have this bug, so
+only add it for versions of GCC before 5.0. This allows various tools
+such as a perf probe or gdb debuggers or systemtap to resolve variable
+locations using dwarf locations in more code.
+
+Signed-off-by: Mark Wielaard <mark@klomp.org>
+Acked-by: Ian Rogers <irogers@google.com>
+Reviewed-by: Andi Kleen <andi@firstfloor.org>
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+---
+ Makefile | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/Makefile b/Makefile
+index b76c4122c967..17a62e365a38 100644
+--- a/Makefile
++++ b/Makefile
+@@ -815,5 +815,9 @@ KBUILD_CFLAGS	+= -enable-trivial-auto-var-init-zero-knowing-it-will-be-removed-f
+ endif
+ 
+-DEBUG_CFLAGS	:= $(call cc-option, -fno-var-tracking-assignments)
++# Workaround for GCC versions < 5.0
++# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61801
++ifdef CONFIG_CC_IS_GCC
++DEBUG_CFLAGS	:= $(call cc-ifversion, -lt, 0500, $(call cc-option, -fno-var-tracking-assignments))
++endif
+ 
+ ifdef CONFIG_DEBUG_INFO
+-- 
+2.51.0
+
diff --git a/vmtest/rootfsbuild.py b/vmtest/rootfsbuild.py
index da62ce1c5..c4ade8bf9 100644
--- a/vmtest/rootfsbuild.py
+++ b/vmtest/rootfsbuild.py
@@ -5,7 +5,7 @@
 from pathlib import Path
 import subprocess
 import tempfile
-from typing import Literal
+from typing import Literal, Optional, TextIO
 
 from vmtest.config import ARCHITECTURES, HOST_ARCHITECTURE, Architecture
 
@@ -59,7 +59,7 @@ def build_rootfs(
 
         if btrfs != "never":
             try:
-                import btrfsutil
+                import btrfsutil  # type: ignore  # No type hints available.
 
                 btrfsutil.create_subvolume(tmp_dir / path.name)
                 snapshot = True
@@ -104,7 +104,7 @@ def build_rootfs(
         logger.info("created snapshot %s", snapshot_dir)
 
 
-def build_drgn_in_rootfs(rootfs: Path) -> None:
+def build_drgn_in_rootfs(rootfs: Path, outfile: Optional[TextIO] = None) -> None:
     logger.info("building drgn using %s", rootfs)
     subprocess.check_call(
         [
@@ -123,7 +123,9 @@ def build_drgn_in_rootfs(rootfs: Path) -> None:
 """,
             "sh",
             rootfs,
-        ]
+        ],
+        stdout=outfile,
+        stderr=outfile,
     )
 
 
diff --git a/vmtest/vm.py b/vmtest/vm.py
index 8362a1737..01fc32283 100644
--- a/vmtest/vm.py
+++ b/vmtest/vm.py
@@ -12,14 +12,14 @@
 import subprocess
 import sys
 import tempfile
-from typing import Any, Optional, Sequence
+from typing import Any, Optional, Sequence, TextIO
 
 from util import nproc, out_of_date
 from vmtest.config import HOST_ARCHITECTURE, Kernel, local_kernel
 from vmtest.download import (
     DOWNLOAD_KERNEL_ARGPARSE_METAVAR,
+    Downloader,
     DownloadKernel,
-    download,
     download_kernel_argparse_type,
 )
 from vmtest.kmod import build_kmod
@@ -226,6 +226,7 @@ def run_in_vm(
     extra_qemu_options: Sequence[str] = (),
     test_kmod: TestKmodMode = TestKmodMode.NONE,
     interactive: bool = False,
+    outfile: Optional[TextIO] = None,
 ) -> int:
     if root_dir is None:
         if kernel.arch is HOST_ARCHITECTURE:
@@ -234,7 +235,7 @@ def run_in_vm(
             root_dir = build_dir / kernel.arch.name / "rootfs"
 
     if test_kmod != TestKmodMode.NONE:
-        kmod = build_kmod(build_dir, kernel)
+        kmod = build_kmod(build_dir, kernel, outfile=outfile)
 
     qemu_exe = "qemu-system-" + kernel.arch.name
     match = re.search(
@@ -360,8 +361,9 @@ def run_in_vm(
 
                 qemu_exe, *kvm_args,
 
-                # Limit the number of cores to 8, otherwise we can reach an OOM troubles.
-                "-smp", str(min(nproc(), 8)), "-m", "2G",
+                # Limit the number of cores to 2. We want to test SMP, but each additional
+                # virtualized CPU costs memory and CPU time, so 2 is enaugh.
+                "-smp", str(min(nproc(), 2)), "-m", "2G",
 
                 "-display", "none", *serial_args,
 
@@ -392,6 +394,8 @@ def run_in_vm(
                 # fmt: on
             ],
             env=env,
+            stdout=outfile,
+            stderr=outfile,
             stdin=infile,
         )
         try:
@@ -527,10 +531,16 @@ def __call__(
     if not hasattr(args, "test_kmod"):
         args.test_kmod = TestKmodMode.NONE
 
+    downloader = Downloader(args.directory)
+    if args.test_kmod != TestKmodMode.NONE:
+        downloader.download_compiler(downloader.resolve_compiler(args.kernel.arch))
+
     if args.kernel.pattern.startswith(".") or args.kernel.pattern.startswith("/"):
         kernel = local_kernel(args.kernel.arch, Path(args.kernel.pattern))
     else:
-        kernel = next(download(args.directory, [args.kernel]))  # type: ignore[assignment]
+        kernel = downloader.download_kernel(
+            downloader.resolve_kernel(args.kernel.arch, args.kernel.pattern)
+        )
 
     try:
         command = (