diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9eb2fb625..041ab32a2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,14 +22,14 @@ jobs: - name: Install dependencies run: | sudo apt-get update - sudo apt-get install busybox-static libelf-dev libdw-dev qemu-kvm zstd ${{ matrix.cc == 'clang' && 'libomp-dev' || '' }} + sudo apt-get install busybox-static libelf-dev libdw-dev qemu-kvm zstd ${{ matrix.cc == 'clang' && 'libomp-$(clang --version | sed -rn "s/.*clang version ([0-9]+).*/\\1/p")-dev' || '' }} pip install mypy - name: Generate version.py run: python setup.py --version - name: Check with mypy run: mypy --strict --no-warn-return-any drgn _drgn.pyi - name: Build and test with ${{ matrix.cc }} - run: python setup.py test + run: python setup.py test -K lint: runs-on: ubuntu-latest diff --git a/.github/workflows/vmtest-build.yml b/.github/workflows/vmtest-build.yml new file mode 100644 index 000000000..449f3f6fd --- /dev/null +++ b/.github/workflows/vmtest-build.yml @@ -0,0 +1,28 @@ +name: vmtest Build + +on: + schedule: + - cron: '16 6 * * MON' + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + env: + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} + steps: + - uses: actions/checkout@v2 + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install libelf-dev + pip install aiohttp uritemplate + - name: Build and upload assets + run: python3 -m vmtest.manage --kernel-directory build/vmtest/linux.git --build-directory build/vmtest/kbuild -K + - name: Upload kernel build logs + if: always() + uses: actions/upload-artifact@v2 + with: + name: kernel-build-logs + path: build/vmtest/kbuild/*.log + if-no-files-found: ignore diff --git a/MANIFEST.in b/MANIFEST.in index f4ea67285..ef05b5490 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,4 +3,4 @@ recursive-include examples *.py recursive-include tests *.py recursive-include tools *.py recursive-include vmtest *.c *.py *.rst -include util.py vmtest/config +include COPYING util.py vmtest/config diff --git a/README.rst b/README.rst index 087950b1c..8e8bd4951 100644 --- a/README.rst +++ b/README.rst @@ -40,7 +40,7 @@ scripting support, drgn aims to make scripting as natural as possible so that debugging feels like coding. This makes it well-suited for introspecting the complex, inter-connected state in large programs. It is also designed as a library that can be used to build debugging and introspection tools; see the -official `tools `_. +official `tools `_. drgn was developed for debugging the Linux kernel (as an alternative to the `crash `_ utility), but it can also debug diff --git a/_drgn.pyi b/_drgn.pyi index 0fcc55cb1..7774ab84b 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -19,6 +19,7 @@ from typing import ( Mapping, Optional, Sequence, + Tuple, Union, overload, ) @@ -105,7 +106,15 @@ class Program: >>> prog['jiffies'] Object(prog, 'volatile unsigned long', address=0xffffffff94c05000) - :param name: The object name. + :param name: Object name. + """ + ... + def __contains__(self, name: str) -> bool: + """ + Implement ``name in self``. Return whether an object (variable, + constant, or function) with the given name exists in the program. + + :param name: Object name. """ ... def variable(self, name: str, filename: Optional[str] = None) -> Object: @@ -258,16 +267,16 @@ class Program: """ ... def read_u8(self, address: IntegerLike, physical: bool = False) -> int: - "" + """ """ ... def read_u16(self, address: IntegerLike, physical: bool = False) -> int: - "" + """ """ ... def read_u32(self, address: IntegerLike, physical: bool = False) -> int: - "" + """ """ ... def read_u64(self, address: IntegerLike, physical: bool = False) -> int: - "" + """ """ ... def read_word(self, address: IntegerLike, physical: bool = False) -> int: """ @@ -1374,23 +1383,24 @@ class StackTrace: .. code-block:: python3 for frame in trace: - if frame.symbol().name == 'io_schedule': + if frame.name == 'io_schedule': print('Thread is doing I/O') :class:`str() ` returns a pretty-printed stack trace: - >>> print(prog.stack_trace(1)) - #0 __schedule+0x25c/0x8ba - #1 schedule+0x3c/0x7e - #2 schedule_hrtimeout_range_clock+0x10c/0x118 - #3 ep_poll+0x3ca/0x40a - #4 do_epoll_wait+0xb0/0xc6 - #5 __x64_sys_epoll_wait+0x1a/0x1d - #6 do_syscall_64+0x55/0x17c - #7 entry_SYSCALL_64+0x7c/0x156 + >>> prog.stack_trace(1) + #0 context_switch (kernel/sched/core.c:4339:2) + #1 __schedule (kernel/sched/core.c:5147:8) + #2 schedule (kernel/sched/core.c:5226:3) + #3 do_wait (kernel/exit.c:1534:4) + #4 kernel_wait4 (kernel/exit.c:1678:8) + #5 __do_sys_wait4 (kernel/exit.c:1706:13) + #6 do_syscall_64 (arch/x86/entry/common.c:47:14) + #7 entry_SYSCALL_64+0x7c/0x15b (arch/x86/entry/entry_64.S:112) + #8 0x4d49dd - The drgn CLI is set up so that stack traces are displayed with ``str()`` by - default. + The format is subject to change. The drgn CLI is set up so that stack + traces are displayed with ``str()`` by default. """ def __getitem__(self, idx: IntegerLike) -> StackFrame: ... @@ -1398,6 +1408,38 @@ class StackTrace: class StackFrame: """ A ``StackFrame`` represents a single *frame* in a thread's call stack. + + :class:`str() ` returns a pretty-printed stack frame: + + >>> prog.stack_trace(1)[0] + #0 at 0xffffffffb64ac287 (__schedule+0x227/0x606) in context_switch at kernel/sched/core.c:4339:2 (inlined) + + This includes more information than when printing the full stack trace. The + format is subject to change. The drgn CLI is set up so that stack frames + are displayed with ``str()`` by default. + + The :meth:`[] <.__getitem__>` operator can look up function parameters, + local variables, and global variables in the scope of the stack frame: + + >>> prog.stack_trace(1)[0]['prev'].pid + (pid_t)1 + >>> prog.stack_trace(1)[0]['scheduler_running'] + (int)1 + """ + + name: Optional[str] + """ + Name of the function at this frame, or ``None`` if it could not be + determined. + """ + + is_inline: bool + """ + Whether this frame is for an inlined call. + + An inline frame shares the same stack frame in memory as its caller. + Therefore, it has the same registers (including program counter and thus + symbol). """ interrupted: bool @@ -1413,8 +1455,36 @@ class StackFrame: particular, the program counter is the return address, which is typically the instruction after the call instruction. """ + pc: int """Program counter at this stack frame.""" + def __getitem__(self, name: str) -> Object: + """ + Implement ``self[name]``. Get the object (variable, function parameter, + constant, or function) with the given name in the scope of this frame. + + If the object exists but has been optimized out, this returns an + :ref:`absent object `. + + :param name: Object name. + """ + ... + def __contains__(self, name: str) -> bool: + """ + Implement ``name in self``. Return whether an object with the given + name exists in the scope of this frame. + + :param name: Object name. + """ + ... + def source(self) -> Tuple[str, int, int]: + """ + Get the source code location of this frame. + + :return: Location as a ``(filename, line, column)`` triple. + :raises LookupError: if the source code location is not available + """ + ... def symbol(self) -> Symbol: """ Get the function symbol at this stack frame. @@ -1952,11 +2022,14 @@ class FaultError(Exception): accessing a memory address which is not valid in a program). """ - def __init__(self, address: int) -> None: + def __init__(self, message: str, address: int) -> None: """ + :param message: :attr:`FaultError.message` :param address: :attr:`FaultError.address` """ ... + message: str + """Error message.""" address: int """Address that couldn't be accessed.""" @@ -1981,6 +2054,7 @@ class OutOfBoundsError(Exception): ... +_elfutils_version: str _with_libkdumpfile: bool def _linux_helper_read_vm( diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index 8252801cc..f951d3056 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -35,7 +35,7 @@ The core functionality of drgn is implemented in C and is available as a C library, ``libdrgn``. See |drgn.h|_. .. |drgn.h| replace:: ``drgn.h`` -.. _drgn.h: https://github.com/osandov/drgn/blob/master/libdrgn/drgn.h.in +.. _drgn.h: https://github.com/osandov/drgn/blob/main/libdrgn/drgn.h.in Full documentation can be generated by running ``doxygen`` in the ``libdrgn`` directory of the source code. Note that the API and ABI are not yet stable. diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 97efe150d..c56eccff7 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -139,6 +139,8 @@ address it points to):: >>> print(hex(jiffiesp.value_())) 0xffffffffbe405000 +.. _absent-objects: + Absent Objects """""""""""""" @@ -317,5 +319,5 @@ Next Steps Refer to the :doc:`api_reference`. Look through the :doc:`helpers`. Browse through the official `examples -`_ and `tools -`_. +`_ and `tools +`_. diff --git a/drgn/__init__.py b/drgn/__init__.py index f3998cc0c..893c36dce 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -73,6 +73,7 @@ TypeMember, TypeParameter, TypeTemplateParameter, + _elfutils_version as _elfutils_version, _with_libkdumpfile as _with_libkdumpfile, cast, container_of, @@ -195,7 +196,7 @@ def task_exe_path(task): :param args: Zero or more additional arguments to pass to the script. This is a :ref:`variable argument list `. """ - # This is based on runpy.run_code, which we can't use because we want to + # This is based on runpy.run_path(), which we can't use because we want to # update globals even if the script throws an exception. saved_module = [] try: diff --git a/drgn/internal/cli.py b/drgn/internal/cli.py index ac2f9acd6..77a25d1a2 100644 --- a/drgn/internal/cli.py +++ b/drgn/internal/cli.py @@ -23,7 +23,7 @@ def displayhook(value: Any) -> None: setattr(builtins, "_", None) if isinstance(value, drgn.Object): text = value.format_(columns=shutil.get_terminal_size((0, 0)).columns) - elif isinstance(value, (drgn.StackTrace, drgn.Type)): + elif isinstance(value, (drgn.StackFrame, drgn.StackTrace, drgn.Type)): text = str(value) else: text = repr(value) @@ -43,7 +43,7 @@ def displayhook(value: Any) -> None: def main() -> None: python_version = ".".join(str(v) for v in sys.version_info[:3]) libkdumpfile = f'with{"" if drgn._with_libkdumpfile else "out"} libkdumpfile' - version = f"drgn {drgn.__version__} (using Python {python_version}, {libkdumpfile})" + version = f"drgn {drgn.__version__} (using Python {python_version}, elfutils {drgn._elfutils_version}, {libkdumpfile})" parser = argparse.ArgumentParser(prog="drgn", description="Scriptable debugger") program_group = parser.add_argument_group( diff --git a/examples/linux/cgroup.py b/examples/linux/cgroup.py index eab17d4f4..e215136a2 100755 --- a/examples/linux/cgroup.py +++ b/examples/linux/cgroup.py @@ -1,3 +1,4 @@ +#!/usr/bin/env drgn # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0-or-later diff --git a/examples/linux/fs_inodes.py b/examples/linux/fs_inodes.py index 6bc32de23..856d13f76 100755 --- a/examples/linux/fs_inodes.py +++ b/examples/linux/fs_inodes.py @@ -1,3 +1,4 @@ +#!/usr/bin/env drgn # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0-or-later diff --git a/examples/linux/lsmod.py b/examples/linux/lsmod.py index a60917eb6..1585e2c22 100755 --- a/examples/linux/lsmod.py +++ b/examples/linux/lsmod.py @@ -1,3 +1,4 @@ +#!/usr/bin/env drgn # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0-or-later diff --git a/examples/linux/ps.py b/examples/linux/ps.py index 3f1b8d230..0eda3be28 100755 --- a/examples/linux/ps.py +++ b/examples/linux/ps.py @@ -1,3 +1,4 @@ +#!/usr/bin/env drgn # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0-or-later diff --git a/examples/linux/tcp_sock.py b/examples/linux/tcp_sock.py index 1df33cf6d..ad628974c 100755 --- a/examples/linux/tcp_sock.py +++ b/examples/linux/tcp_sock.py @@ -1,3 +1,4 @@ +#!/usr/bin/env drgn # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0-or-later diff --git a/libdrgn/.gitignore b/libdrgn/.gitignore index cf9d307a9..88373b6a1 100644 --- a/libdrgn/.gitignore +++ b/libdrgn/.gitignore @@ -11,6 +11,7 @@ /config.log /config.status /configure +/configure~ /html /libtool /python/constants.c diff --git a/libdrgn/configure.ac b/libdrgn/configure.ac index 35fee53cb..88378aefe 100644 --- a/libdrgn/configure.ac +++ b/libdrgn/configure.ac @@ -1,7 +1,7 @@ dnl Copyright (c) Facebook, Inc. and its affiliates. dnl SPDX-License-Identifier: GPL-3.0-or-later -AC_INIT([libdrgn], [0.0.11], +AC_INIT([libdrgn], [0.0.13], [https://github.com/osandov/drgn/issues],, [https://github.com/osandov/drgn]) @@ -15,6 +15,8 @@ AM_PROG_AR LT_INIT +AC_SYS_LARGEFILE + AC_ARG_ENABLE([openmp], [AS_HELP_STRING([--enable-openmp@<:@=ARG@:>@], [use OpenMP. ARG may be yes, no, or the name of diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 051b44c09..f7095280d 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -25,6 +25,7 @@ #include "language.h" #include "lazy_object.h" #include "linux_kernel.h" +#include "minmax.h" #include "object.h" #include "orc.h" #include "path.h" @@ -34,6 +35,12 @@ #include "type.h" #include "util.h" +/** + * Arbitrary limit for number of operations to execute in a DWARF expression to + * avoid infinite loops. + */ +static const int MAX_DWARF_EXPR_OPS = 10000; + struct drgn_dwarf_cie { /* Whether this CIE is from .eh_frame. */ bool is_eh; @@ -68,6 +75,8 @@ DEFINE_HASH_MAP(drgn_dwarf_cie_map, size_t, size_t, int_key_hash_pair, DEFINE_VECTOR(drgn_cfi_row_vector, struct drgn_cfi_row *) DEFINE_VECTOR(uint64_vector, uint64_t) +DEFINE_VECTOR(dwarf_die_vector, Dwarf_Die) + #define DW_TAG_UNKNOWN_FORMAT "unknown DWARF tag 0x%02x" #define DW_TAG_BUF_LEN (sizeof(DW_TAG_UNKNOWN_FORMAT) - 4 + 2 * sizeof(int)) @@ -97,6 +106,7 @@ static const char *dwarf_tag_str(Dwarf_Die *die, char buf[DW_TAG_BUF_LEN]) static const char * const drgn_debug_scn_names[] = { [DRGN_SCN_DEBUG_INFO] = ".debug_info", + [DRGN_SCN_DEBUG_TYPES] = ".debug_types", [DRGN_SCN_DEBUG_ABBREV] = ".debug_abbrev", [DRGN_SCN_DEBUG_STR] = ".debug_str", [DRGN_SCN_DEBUG_LINE] = ".debug_line", @@ -104,6 +114,7 @@ static const char * const drgn_debug_scn_names[] = { [DRGN_SCN_EH_FRAME] = ".eh_frame", [DRGN_SCN_ORC_UNWIND_IP] = ".orc_unwind_ip", [DRGN_SCN_ORC_UNWIND] = ".orc_unwind", + [DRGN_SCN_DEBUG_LOC] = ".debug_loc", [DRGN_SCN_TEXT] = ".text", [DRGN_SCN_GOT] = ".got", }; @@ -131,6 +142,344 @@ struct drgn_error *drgn_debug_info_buffer_error(struct binary_buffer *bb, message); } + +/** Iterator over DWARF DIEs in a @ref drgn_debug_info_module. */ +struct drgn_dwarf_die_iterator { + /** Stack of current DIE and its ancestors. */ + struct dwarf_die_vector dies; + Dwarf *dwarf; + /** End of current CU (for bounds checking). */ + const char *cu_end; + /** Offset of next CU. */ + Dwarf_Off next_cu_off; + /** Whether current CU is from .debug_types. */ + bool debug_types; +}; + +static void drgn_dwarf_die_iterator_init(struct drgn_dwarf_die_iterator *it, + Dwarf *dwarf) +{ + dwarf_die_vector_init(&it->dies); + it->dwarf = dwarf; + it->next_cu_off = 0; + it->debug_types = false; +} + +static void drgn_dwarf_die_iterator_deinit(struct drgn_dwarf_die_iterator *it) +{ + dwarf_die_vector_deinit(&it->dies); +} + +/** + * Return the next DWARF DIE in a @ref drgn_dwarf_die_iterator. + * + * The first call returns the top-level DIE for the first unit in the module. + * Subsequent calls return children, siblings, and unit DIEs. + * + * This includes the .debug_types section. + * + * @param[in] children If @c true and the last returned DIE has children, return + * its first child (this is a pre-order traversal). Otherwise, return the next + * DIE at the level less than or equal to the last returned DIE, i.e., the last + * returned DIE's sibling, or its ancestor's sibling, or the next top-level unit + * DIE. + * @param[in] subtree If zero, iterate over all DIEs in all units. If non-zero, + * stop after returning all DIEs in the subtree rooted at the DIE that was + * returned in the last call as `(*dies_ret)[subtree - 1]`. + * @param[out] dies_ret Returned array containing DIE and its ancestors. + * `(*dies_ret)[*length_ret - 1]` is the DIE itself, + * `(*dies_ret)[*length_ret - 2]` is its parent, `(*dies_ret)[*length_ret - 3]` + * is its grandparent, etc., and `(*dies_ret)[0]` is the top-level unit DIE. + * This is valid until the next call to @ref drgn_dwarf_die_iterator_next() or + * @ref drgn_dwarf_die_iterator_deinit(). + * @param[out] length_ret Returned length of @p dies_ret. + * @return @c NULL on success, `&drgn_stop` if there are no more DIEs, in which + * case `*length_ret` equals @p subtree and @p dies_ret refers to the root of + * the iterated subtree, non-@c NULL on error, in which case this should not be + * called again. + */ +static struct drgn_error * +drgn_dwarf_die_iterator_next(struct drgn_dwarf_die_iterator *it, bool children, + size_t subtree, Dwarf_Die **dies_ret, + size_t *length_ret) +{ +#define TOP() (&it->dies.data[it->dies.size - 1]) + struct drgn_error *err = NULL; + int r; + Dwarf_Die die; + assert(subtree <= it->dies.size); + if (it->dies.size == 0) { + /* This is the first call. Get the first unit DIE. */ + if (!dwarf_die_vector_append_entry(&it->dies)) { + err = &drgn_enomem; + goto out; + } + } else { + if (children) { + r = dwarf_child(TOP(), &die); + if (r == 0) { + /* The previous DIE has a child. Return it. */ + if (!dwarf_die_vector_append(&it->dies, &die)) + err = &drgn_enomem; + goto out; + } else if (r < 0) { + err = drgn_error_libdw(); + goto out; + } + /* The previous DIE has no children. */ + } + + if (it->dies.size == subtree) { + /* + * The previous DIE is the root of the subtree. We're + * done. + */ + err = &drgn_stop; + goto out; + } + + if (it->dies.size > 1) { + r = dwarf_siblingof(TOP(), &die); + if (r == 0) { + /* The previous DIE has a sibling. Return it. */ + *TOP() = die; + goto out; + } else if (r > 0) { + if (!die.addr) + goto next_unit; + /* + * The previous DIE is the last child of its + * parent. + */ + char *addr = die.addr; + do { + /* + * addr points to the null terminator + * for the list of siblings. Go back up + * to its parent. The next byte is + * either the parent's sibling or + * another null terminator. + */ + it->dies.size--; + addr++; + if (it->dies.size == subtree) { + /* + * We're back to the root of the + * subtree. We're done. + */ + err = &drgn_stop; + goto out; + } + if (it->dies.size == 1 || + addr >= it->cu_end) + goto next_unit; + } while (*addr == '\0'); + /* + * addr now points to the next DIE. Return it. + */ + *TOP() = (Dwarf_Die){ + .cu = it->dies.data[0].cu, + .addr = addr, + }; + goto out; + } else { + err = drgn_error_libdw(); + goto out; + } + } + } + +next_unit:; + /* There are no more DIEs in the current unit. */ + Dwarf_Off cu_off = it->next_cu_off; + size_t cu_header_size; + uint64_t type_signature; + r = dwarf_next_unit(it->dwarf, cu_off, &it->next_cu_off, + &cu_header_size, NULL, NULL, NULL, NULL, + it->debug_types ? &type_signature : NULL, NULL); + if (r == 0) { + /* Got the next unit. Return the unit DIE. */ + if (it->debug_types) { + r = !dwarf_offdie_types(it->dwarf, + cu_off + cu_header_size, TOP()); + } else { + r = !dwarf_offdie(it->dwarf, cu_off + cu_header_size, + TOP()); + } + if (r) { + err = drgn_error_libdw(); + goto out; + } + it->cu_end = ((const char *)TOP()->addr + - dwarf_dieoffset(TOP()) + + it->next_cu_off); + } else if (r > 0) { + if (!it->debug_types) { + it->next_cu_off = 0; + it->debug_types = true; + goto next_unit; + } + /* There are no more units. */ + err = &drgn_stop; + } else { + err = drgn_error_libdw(); + } + +out: + /* + * Return these even in the error case to avoid maybe uninitialized + * warnings in the caller. + */ + *dies_ret = it->dies.data; + *length_ret = it->dies.size; + return err; +#undef TOP +} + +struct drgn_error * +drgn_debug_info_module_find_dwarf_scopes(struct drgn_debug_info_module *module, + uint64_t pc, uint64_t *bias_ret, + Dwarf_Die **dies_ret, + size_t *length_ret) +{ + struct drgn_error *err; + + Dwarf_Addr bias; + Dwarf *dwarf = dwfl_module_getdwarf(module->dwfl_module, &bias); + if (!dwarf) + return drgn_error_libdw(); + *bias_ret = bias; + pc -= bias; + + /* First, try to get the CU containing the PC. */ + Dwarf_Aranges *aranges; + size_t naranges; + if (dwarf_getaranges(dwarf, &aranges, &naranges) < 0) + return drgn_error_libdw(); + + struct drgn_dwarf_die_iterator it; + bool children; + size_t subtree; + if (naranges > 0) { + Dwarf_Off offset; + if (dwarf_getarangeinfo(dwarf_getarange_addr(aranges, pc), NULL, + NULL, &offset) < 0) { + /* No ranges match the PC. */ + *dies_ret = NULL; + *length_ret = 0; + return NULL; + } + + drgn_dwarf_die_iterator_init(&it, dwarf); + Dwarf_Die *cu_die = dwarf_die_vector_append_entry(&it.dies); + if (!cu_die) { + err = &drgn_enomem; + goto err; + } + if (!dwarf_offdie(dwarf, offset, cu_die)) { + err = drgn_error_libdw(); + goto err; + } + if (dwarf_next_unit(dwarf, offset - dwarf_cuoffset(cu_die), + &it.next_cu_off, NULL, NULL, NULL, NULL, + NULL, NULL, NULL)) { + err = drgn_error_libdw(); + goto err; + } + it.cu_end = ((const char *)cu_die->addr + - dwarf_dieoffset(cu_die) + + it.next_cu_off); + children = true; + subtree = 1; + } else { + /* + * .debug_aranges is empty or missing. Fall back to checking + * each CU. + */ + drgn_dwarf_die_iterator_init(&it, dwarf); + children = false; + subtree = 0; + } + + /* Now find DIEs containing the PC. */ + Dwarf_Die *dies; + size_t length; + while (!(err = drgn_dwarf_die_iterator_next(&it, children, subtree, + &dies, &length))) { + int r = dwarf_haspc(&dies[length - 1], pc); + if (r > 0) { + children = true; + subtree = length; + } else if (r < 0) { + err = drgn_error_libdw(); + goto err; + } + } + if (err != &drgn_stop) + goto err; + + *dies_ret = dies; + *length_ret = length; + return NULL; + +err: + drgn_dwarf_die_iterator_deinit(&it); + return err; +} + +struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret, + size_t *length_ret) +{ + struct drgn_error *err; + + Dwarf *dwarf = dwarf_cu_getdwarf(die->cu); + if (!dwarf) + return drgn_error_libdw(); + + struct drgn_dwarf_die_iterator it; + drgn_dwarf_die_iterator_init(&it, dwarf); + Dwarf_Die *cu_die = dwarf_die_vector_append_entry(&it.dies); + if (!cu_die) { + err = &drgn_enomem; + goto err; + } + Dwarf_Half cu_version; + Dwarf_Off type_offset; + if (!dwarf_cu_die(die->cu, cu_die, &cu_version, NULL, NULL, NULL, NULL, + &type_offset)) { + err = drgn_error_libdw(); + goto err; + } + it.debug_types = cu_version == 4 && type_offset != 0; + uint64_t type_signature; + Dwarf_Off cu_die_offset = dwarf_dieoffset(cu_die); + if (dwarf_next_unit(dwarf, cu_die_offset - dwarf_cuoffset(cu_die), + &it.next_cu_off, NULL, NULL, NULL, NULL, NULL, + it.debug_types ? &type_signature : NULL, NULL)) { + err = drgn_error_libdw(); + goto err; + } + it.cu_end = (const char *)cu_die->addr - cu_die_offset + it.next_cu_off; + + Dwarf_Die *dies; + size_t length; + while (!(err = drgn_dwarf_die_iterator_next(&it, true, 1, &dies, + &length))) { + if (dies[length - 1].addr == die->addr) { + *dies_ret = dies; + *length_ret = length - 1; + return NULL; + } + } + if (err == &drgn_stop) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "could not find DWARF DIE ancestors"); + } +err: + drgn_dwarf_die_iterator_deinit(&it); + return err; +} + DEFINE_VECTOR_FUNCTIONS(drgn_debug_info_module_vector) static inline struct hash_pair @@ -959,6 +1308,15 @@ drgn_debug_info_precache_sections(struct drgn_debug_info_module *module) return NULL; } +static struct drgn_error * +drgn_debug_info_module_cache_section(struct drgn_debug_info_module *module, + enum drgn_debug_info_scn scn) +{ + if (module->scn_data[scn]) + return NULL; + return read_elf_section(module->scns[scn], &module->scn_data[scn]); +} + static struct drgn_error * drgn_debug_info_read_module(struct drgn_debug_info_load_state *load, struct drgn_dwarf_index_update_state *dindex_state, @@ -980,8 +1338,8 @@ drgn_debug_info_read_module(struct drgn_debug_info_load_state *load, continue; } module->state = DRGN_DEBUG_INFO_MODULE_INDEXING; - drgn_dwarf_index_read_module(dindex_state, module); - return NULL; + return drgn_dwarf_index_read_module(dindex_state, + module); } } /* @@ -1022,29 +1380,32 @@ drgn_debug_info_update_index(struct drgn_debug_info_load_state *load) c_string_set_size(&dbinfo->module_names) + load->new_modules.size)) return &drgn_enomem; + struct drgn_dwarf_index_update_state dindex_state; - drgn_dwarf_index_update_begin(&dindex_state, &dbinfo->dindex); - /* - * In OpenMP 5.0, this could be "#pragma omp parallel master taskloop" - * (added in GCC 9 and Clang 10). - */ - #pragma omp parallel - #pragma omp master - #pragma omp taskloop + if (!drgn_dwarf_index_update_state_init(&dindex_state, &dbinfo->dindex)) + return &drgn_enomem; + struct drgn_error *err = NULL; + #pragma omp parallel for schedule(dynamic) for (size_t i = 0; i < load->new_modules.size; i++) { - if (drgn_dwarf_index_update_cancelled(&dindex_state)) + if (err) continue; struct drgn_error *module_err = drgn_debug_info_read_module(load, &dindex_state, load->new_modules.data[i]); - if (module_err) - drgn_dwarf_index_update_cancel(&dindex_state, module_err); + if (module_err) { + #pragma omp critical(drgn_debug_info_update_index_error) + if (err) + drgn_error_destroy(module_err); + else + err = module_err; + } } - struct drgn_error *err = drgn_dwarf_index_update_end(&dindex_state); - if (err) - return err; - drgn_debug_info_free_modules(dbinfo, true, false); - return NULL; + if (!err) + err = drgn_dwarf_index_update(&dindex_state); + drgn_dwarf_index_update_state_deinit(&dindex_state); + if (!err) + drgn_debug_info_free_modules(dbinfo, true, false); + return err; } struct drgn_error * @@ -1143,6 +1504,114 @@ bool drgn_debug_info_is_indexed(struct drgn_debug_info *dbinfo, return c_string_set_search(&dbinfo->module_names, &name).entry != NULL; } +static struct drgn_error * +drgn_dwarf_location(struct drgn_debug_info_module *module, + Dwarf_Attribute *attr, + const struct drgn_register_state *regs, + const char **expr_ret, size_t *expr_size_ret) +{ + struct drgn_error *err; + switch (attr->form) { + case DW_FORM_sec_offset: { + if (!module->scns[DRGN_SCN_DEBUG_LOC]) { + return drgn_error_create(DRGN_ERROR_OTHER, + "loclistptr without .debug_loc section"); + } + + struct optional_uint64 pc; + if (!regs || + !(pc = drgn_register_state_get_pc(regs)).has_value) { + *expr_ret = NULL; + *expr_size_ret = 0; + return NULL; + } + + err = drgn_debug_info_module_cache_section(module, + DRGN_SCN_DEBUG_LOC); + if (err) + return err; + + Dwarf_Addr bias; + dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, &bias, + NULL, NULL, NULL); + pc.value = pc.value - !regs->interrupted - bias; + + Dwarf_Word offset; + if (dwarf_formudata(attr, &offset)) + return drgn_error_libdw(); + + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, module, + DRGN_SCN_DEBUG_LOC); + if (offset > buffer.bb.end - buffer.bb.pos) { + return drgn_error_create(DRGN_ERROR_OTHER, + "loclistptr is out of bounds"); + } + buffer.bb.pos += offset; + + uint8_t address_size = + drgn_platform_address_size(&module->platform); + uint64_t address_max = uint_max(address_size); + uint64_t base; + bool base_valid = false; + for (;;) { + uint64_t start, end; + if ((err = binary_buffer_next_uint(&buffer.bb, + address_size, + &start)) || + (err = binary_buffer_next_uint(&buffer.bb, + address_size, &end))) + return err; + if (start == 0 && end == 0) { + break; + } else if (start == address_max) { + base = end; + base_valid = true; + } else { + if (!base_valid) { + Dwarf_Die cu_die; + if (!dwarf_cu_die(attr->cu, &cu_die, + NULL, NULL, NULL, + NULL, NULL, NULL)) + return drgn_error_libdw(); + Dwarf_Addr low_pc; + if (dwarf_lowpc(&cu_die, &low_pc)) + return drgn_error_libdw(); + base = low_pc; + base_valid = true; + } + uint16_t expr_size; + if ((err = binary_buffer_next_u16(&buffer.bb, + &expr_size))) + return err; + if (expr_size > buffer.bb.end - buffer.bb.pos) { + return binary_buffer_error(&buffer.bb, + "location description size is out of bounds"); + } + if (base + start <= pc.value && + pc.value < base + end) { + *expr_ret = buffer.bb.pos; + *expr_size_ret = expr_size; + return NULL; + } + buffer.bb.pos += expr_size; + } + } + *expr_ret = NULL; + *expr_size_ret = 0; + return NULL; + } + default: { + Dwarf_Block block; + if (dwarf_formblock(attr, &block)) + return drgn_error_libdw(); + *expr_ret = (char *)block.data; + *expr_size_ret = block.length; + return NULL; + } + } +} + struct drgn_dwarf_expression_buffer { struct binary_buffer bb; const char *start; @@ -1204,11 +1673,23 @@ drgn_dwarf_expression_buffer_init(struct drgn_dwarf_expression_buffer *buffer, buffer->module = module; } -/* Returns &drgn_not_found if it tried to use an unknown register value. */ +static struct drgn_error * +drgn_dwarf_frame_base(struct drgn_program *prog, + struct drgn_debug_info_module *module, Dwarf_Die *die, + int *remaining_ops, + const struct drgn_register_state *regs, uint64_t *ret); + +/* + * Evaluate a DWARF expression up to the next location description operation. + * + * Returns &drgn_not_found if it tried to use an unknown register value. + */ static struct drgn_error * drgn_eval_dwarf_expression(struct drgn_program *prog, struct drgn_dwarf_expression_buffer *expr, struct uint64_vector *stack, + int *remaining_ops, + Dwarf_Die *function_die, const struct drgn_register_state *regs) { struct drgn_error *err; @@ -1231,7 +1712,7 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, #define ELEM(i) stack->data[stack->size - 1 - (i)] #define PUSH(x) do { \ - uint64_t push = x; \ + uint64_t push = (x); \ if (!uint64_vector_append(stack, &push)) \ return &drgn_enomem; \ } while (0) @@ -1239,6 +1720,11 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, #define PUSH_MASK(x) PUSH((x) & address_mask) while (binary_buffer_has_next(&expr->bb)) { + if (*remaining_ops <= 0) { + return binary_buffer_error(&expr->bb, + "DWARF expression executed too many operations"); + } + (*remaining_ops)--; uint8_t opcode; if ((err = binary_buffer_next_u8(&expr->bb, &opcode))) return err; @@ -1317,6 +1803,19 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, PUSH_MASK(uvalue); break; /* Register values. */ + case DW_OP_fbreg: { + err = drgn_dwarf_frame_base(prog, expr->module, + function_die, remaining_ops, + regs, &uvalue); + if (err) + return err; + int64_t svalue; + if ((err = binary_buffer_next_sleb128(&expr->bb, + &svalue))) + return err; + PUSH_MASK(uvalue + svalue); + break; + } case DW_OP_breg0 ... DW_OP_breg31: dwarf_regno = opcode - DW_OP_breg0; goto breg; @@ -1383,7 +1882,6 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, deref_size = address_size; goto deref; case DW_OP_deref_size: - CHECK(1); if ((err = binary_buffer_next_u8(&expr->bb, &deref_size))) return err; @@ -1393,6 +1891,7 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, } deref: { + CHECK(1); char deref_buf[8]; err = drgn_program_read_memory(prog, deref_buf, ELEM(0), deref_size, false); @@ -1580,17 +2079,24 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, /* Special operations. */ case DW_OP_nop: break; + /* Location description operations. */ + case DW_OP_reg0 ... DW_OP_reg31: + case DW_OP_regx: + case DW_OP_implicit_value: + case DW_OP_stack_value: + case DW_OP_piece: + case DW_OP_bit_piece: + /* The caller must handle it. */ + expr->bb.pos = expr->bb.prev; + return NULL; /* * We don't yet support: * - * - DW_OP_fbreg * - DW_OP_push_object_address * - DW_OP_form_tls_address * - DW_OP_entry_value + * DW_OP_implicit_pointer * - Procedure calls: DW_OP_call2, DW_OP_call4, DW_OP_call_ref. - * - Location description operations: DW_OP_reg0-DW_OP_reg31, - * DW_OP_regx, DW_OP_implicit_value, DW_OP_stack_value, - * DW_OP_implicit_pointer, DW_OP_piece, DW_OP_bit_piece. * - Operations that use .debug_addr: DW_OP_addrx, * DW_OP_constx. * - Typed operations: DW_OP_const_type, DW_OP_regval_type, @@ -1600,7 +2106,7 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, */ default: return binary_buffer_error(&expr->bb, - "unknown DWARF expression opcode %#x", + "unknown DWARF expression opcode %#" PRIx8, opcode); } } @@ -1613,6 +2119,101 @@ drgn_eval_dwarf_expression(struct drgn_program *prog, return NULL; } +static struct drgn_error * +drgn_dwarf_frame_base(struct drgn_program *prog, + struct drgn_debug_info_module *module, Dwarf_Die *die, + int *remaining_ops, + const struct drgn_register_state *regs, uint64_t *ret) +{ + struct drgn_error *err; + bool little_endian = drgn_platform_is_little_endian(&module->platform); + drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = + module->platform.arch->dwarf_regno_to_internal; + + if (!die) + return &drgn_not_found; + Dwarf_Attribute attr_mem, *attr; + if (!(attr = dwarf_attr_integrate(die, DW_AT_frame_base, &attr_mem))) + return &drgn_not_found; + const char *expr; + size_t expr_size; + err = drgn_dwarf_location(module, attr, regs, &expr, &expr_size); + if (err) + return err; + + struct uint64_vector stack = VECTOR_INIT; + struct drgn_dwarf_expression_buffer buffer; + drgn_dwarf_expression_buffer_init(&buffer, module, expr, expr_size); + for (;;) { + err = drgn_eval_dwarf_expression(prog, &buffer, &stack, + remaining_ops, NULL, regs); + if (err) + goto out; + if (binary_buffer_has_next(&buffer.bb)) { + uint8_t opcode; + if ((err = binary_buffer_next_u8(&buffer.bb, &opcode))) + goto out; + + uint64_t dwarf_regno; + switch (opcode) { + case DW_OP_reg0 ... DW_OP_reg31: + dwarf_regno = opcode - DW_OP_reg0; + goto reg; + case DW_OP_regx: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno))) + goto out; +reg: + { + if (!regs) { + err = &drgn_not_found; + goto out; + } + drgn_register_number regno = + dwarf_regno_to_internal(dwarf_regno); + if (!drgn_register_state_has_register(regs, + regno)) { + err = &drgn_not_found; + goto out; + } + const struct drgn_register_layout *layout = + &prog->platform.arch->register_layout[regno]; + /* + * Note that this doesn't mask the address since + * the caller does that. + */ + copy_lsbytes(ret, sizeof(*ret), + HOST_LITTLE_ENDIAN, + ®s->buf[layout->offset], + layout->size, little_endian); + if (binary_buffer_has_next(&buffer.bb)) { + err = binary_buffer_error(&buffer.bb, + "stray operations in DW_AT_frame_base expression"); + } else { + err = NULL; + } + goto out; + } + default: + err = binary_buffer_error(&buffer.bb, + "invalid opcode %#" PRIx8 " for DW_AT_frame_base expression", + opcode); + goto out; + } + } else if (stack.size) { + *ret = stack.data[stack.size - 1]; + err = NULL; + break; + } else { + err = &drgn_not_found; + break; + } + } +out: + uint64_vector_deinit(&stack); + return err; +} + DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_type_map, ptr_key_hash_pair, scalar_key_eq) @@ -1871,6 +2472,380 @@ drgn_object_from_dwarf_subprogram(struct drgn_debug_info *dbinfo, 0); } +static struct drgn_error *read_bits(struct drgn_program *prog, void *dst, + unsigned int dst_bit_offset, uint64_t src, + unsigned int src_bit_offset, + uint64_t bit_size, bool lsb0) +{ + struct drgn_error *err; + + assert(dst_bit_offset < 8); + assert(src_bit_offset < 8); + + if (bit_size == 0) + return NULL; + + if (dst_bit_offset == src_bit_offset) { + /* + * We can read directly into the the destination buffer, but we + * may have to preserve some bits at the start and/or end. + */ + uint8_t *d = dst; + uint64_t last_bit = dst_bit_offset + bit_size - 1; + uint8_t first_byte = d[0]; + uint8_t last_byte = d[last_bit / 8]; + err = drgn_program_read_memory(prog, d, src, last_bit / 8 + 1, + false); + if (err) + return err; + if (dst_bit_offset != 0) { + uint8_t mask = + copy_bits_first_mask(dst_bit_offset, lsb0); + d[0] = (first_byte & ~mask) | (d[0] & mask); + } + if (last_bit % 8 != 7) { + uint8_t mask = copy_bits_last_mask(last_bit, lsb0); + d[last_bit / 8] = ((last_byte & ~mask) + | (d[last_bit / 8] & mask)); + } + return NULL; + } else { + /* + * If the source and destination have different offsets, then + * depending on the size and source offset, we may have to read + * one more byte than is available in the destination. To keep + * things simple, we always read into a temporary buffer (rather + * than adding a special case for reading directly into the + * destination and shifting bits around). + */ + uint64_t src_bytes = (src_bit_offset + bit_size - 1) / 8 + 1; + char stack_tmp[16], *tmp; + if (src_bytes <= sizeof(stack_tmp)) { + tmp = stack_tmp; + } else { + tmp = malloc64(src_bytes); + if (!tmp) + return &drgn_enomem; + } + err = drgn_program_read_memory(prog, tmp, src, src_bytes, + false); + if (!err) { + copy_bits(dst, dst_bit_offset, tmp, src_bit_offset, + bit_size, lsb0); + } + if (src_bytes > sizeof(stack_tmp)) + free(tmp); + return err; + } +} + +static struct drgn_error * +drgn_object_from_dwarf_location(struct drgn_program *prog, + struct drgn_debug_info_module *module, + Dwarf_Die *die, + struct drgn_qualified_type qualified_type, + const char *expr, size_t expr_size, + Dwarf_Die *function_die, + const struct drgn_register_state *regs, + struct drgn_object *ret) +{ + struct drgn_error *err; + bool little_endian = drgn_platform_is_little_endian(&module->platform); + uint64_t address_mask = drgn_platform_address_mask(&module->platform); + drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = + module->platform.arch->dwarf_regno_to_internal; + + struct drgn_object_type type; + err = drgn_object_type(qualified_type, 0, &type); + if (err) + return err; + + union drgn_value value; + char *value_buf = NULL; + + uint64_t address = 0; /* GCC thinks this may be used uninitialized. */ + int bit_offset = -1; /* -1 means that we don't have an address. */ + + uint64_t bit_pos = 0; + + struct uint64_vector stack = VECTOR_INIT; + int remaining_ops = MAX_DWARF_EXPR_OPS; + struct drgn_dwarf_expression_buffer buffer; + drgn_dwarf_expression_buffer_init(&buffer, module, expr, expr_size); + do { + stack.size = 0; + err = drgn_eval_dwarf_expression(prog, &buffer, &stack, + &remaining_ops, function_die, + regs); + if (err == &drgn_not_found) + goto absent; + else if (err) + goto out; + + const void *src = NULL; + size_t src_size; + + if (binary_buffer_has_next(&buffer.bb)) { + uint8_t opcode; + if ((err = binary_buffer_next_u8(&buffer.bb, &opcode))) + goto out; + + uint64_t uvalue; + uint64_t dwarf_regno; + drgn_register_number regno; + switch (opcode) { + case DW_OP_reg0 ... DW_OP_reg31: + dwarf_regno = opcode - DW_OP_reg0; + goto reg; + case DW_OP_regx: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno))) + goto out; +reg: + if (!regs) + goto absent; + regno = dwarf_regno_to_internal(dwarf_regno); + if (!drgn_register_state_has_register(regs, + regno)) + goto absent; + const struct drgn_register_layout *layout = + &prog->platform.arch->register_layout[regno]; + src = ®s->buf[layout->offset]; + src_size = layout->size; + break; + case DW_OP_implicit_value: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &uvalue))) + goto out; + if (uvalue > buffer.bb.end - buffer.bb.pos) { + err = binary_buffer_error(&buffer.bb, + "DW_OP_implicit_value size is out of bounds"); + goto out; + } + src = buffer.bb.pos; + src_size = uvalue; + buffer.bb.pos += uvalue; + break; + case DW_OP_stack_value: + if (!stack.size) + goto absent; + if (little_endian != HOST_LITTLE_ENDIAN) { + stack.data[stack.size - 1] = + bswap_64(stack.data[stack.size - 1]); + } + src = &stack.data[stack.size - 1]; + src_size = sizeof(stack.data[0]); + break; + default: + buffer.bb.pos = buffer.bb.prev; + break; + } + } + + uint64_t piece_bit_size; + uint64_t piece_bit_offset; + if (binary_buffer_has_next(&buffer.bb)) { + uint8_t opcode; + if ((err = binary_buffer_next_u8(&buffer.bb, &opcode))) + goto out; + + switch (opcode) { + case DW_OP_piece: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &piece_bit_size))) + goto out; + /* + * It's probably bogus for the piece size to be + * larger than the remaining value size, but + * that's not explicitly stated in the DWARF 5 + * specification, so clamp it instead. + */ + if (__builtin_mul_overflow(piece_bit_size, 8U, + &piece_bit_size) || + piece_bit_size > type.bit_size - bit_pos) + piece_bit_size = type.bit_size - bit_pos; + piece_bit_offset = 0; + break; + case DW_OP_bit_piece: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &piece_bit_size)) || + (err = binary_buffer_next_uleb128(&buffer.bb, + &piece_bit_offset))) + goto out; + if (piece_bit_size > type.bit_size - bit_pos) + piece_bit_size = type.bit_size - bit_pos; + break; + default: + err = binary_buffer_error(&buffer.bb, + "unknown DWARF expression opcode %#" PRIx8 " after simple location description", + opcode); + goto out; + } + } else { + piece_bit_size = type.bit_size - bit_pos; + piece_bit_offset = 0; + } + + /* + * TODO: there are a few cases that a DWARF location can + * describe that can't be represented in drgn's object model: + * + * 1. An object that is partially known and partially unknown. + * 2. An object that is partially in memory and partially a + * value. + * 3. An object that is in memory at non-contiguous addresses. + * 4. A pointer object whose pointer value is not known but + * whose referenced value is known (DW_OP_implicit_pointer). + * + * For case 1, we consider the whole object as absent. For cases + * 2 and 3, we convert the whole object to a value. Case 4 is + * not supported at all. We should add a way to represent all of + * these situations precisely. + */ + if (src && piece_bit_size == 0) { + /* Ignore empty value. */ + } else if (src) { + if (!value_buf && + !drgn_value_zalloc(drgn_value_size(type.bit_size), + &value, &value_buf)) { + err = &drgn_enomem; + goto out; + } + if (bit_offset >= 0) { + /* + * We previously had an address. Read it into + * the value. + */ + err = read_bits(prog, value_buf, 0, address, + bit_offset, bit_pos, + little_endian); + if (err) + goto out; + bit_offset = -1; + } + /* + * It's probably safe to assume that we don't have an + * implicit value larger than 2 exabytes. + */ + assert(src_size <= UINT64_MAX / 8); + uint64_t src_bit_size = UINT64_C(8) * src_size; + if (piece_bit_offset > src_bit_size) + piece_bit_offset = src_bit_size; + uint64_t copy_bit_size = + min(piece_bit_size, + src_bit_size - piece_bit_offset); + uint64_t copy_bit_offset = bit_pos; + if (!little_endian) { + copy_bit_offset += piece_bit_size - copy_bit_size; + piece_bit_offset = (src_bit_size + - copy_bit_size + - piece_bit_offset); + } + copy_bits(&value_buf[copy_bit_offset / 8], + copy_bit_offset % 8, + (const char *)src + (piece_bit_offset / 8), + piece_bit_offset % 8, copy_bit_size, + little_endian); + } else if (stack.size) { + uint64_t piece_address = + ((stack.data[stack.size - 1] + piece_bit_offset / 8) + & address_mask); + piece_bit_offset %= 8; + if (bit_pos > 0 && bit_offset >= 0) { + /* + * We already had an address. Merge the pieces + * if the addresses are contiguous, otherwise + * convert to a value. + * + * The obvious way to write this is + * (address + (bit_pos + bit_offset) / 8), but + * (bit_pos + bit_offset) can overflow uint64_t. + */ + uint64_t end_address = + ((address + + bit_pos / 8 + + (bit_pos % 8 + bit_offset) / 8) + & address_mask); + unsigned int end_bit_offset = + (bit_offset + bit_pos) % 8; + if (piece_bit_size == 0 || + (piece_address == end_address && + piece_bit_offset == end_bit_offset)) { + /* Piece is contiguous. */ + piece_address = address; + piece_bit_offset = bit_offset; + } else { + if (!drgn_value_zalloc(drgn_value_size(type.bit_size), + &value, + &value_buf)) { + err = &drgn_enomem; + goto out; + } + err = read_bits(prog, value_buf, 0, + address, bit_offset, + bit_pos, little_endian); + if (err) + goto out; + bit_offset = -1; + } + } + if (value_buf) { + /* We already have a value. Read into it. */ + err = read_bits(prog, &value_buf[bit_pos / 8], + bit_pos % 8, piece_address, + piece_bit_offset, + piece_bit_size, little_endian); + if (err) + goto out; + } else { + address = piece_address; + bit_offset = piece_bit_offset; + } + } else if (piece_bit_size > 0) { + goto absent; + } + bit_pos += piece_bit_size; + } while (binary_buffer_has_next(&buffer.bb)); + + if (bit_pos < type.bit_size || (bit_offset < 0 && !value_buf)) { +absent: + if (dwarf_tag(die) == DW_TAG_template_value_parameter) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_AT_template_value_parameter is missing value"); + } + drgn_object_reinit(ret, &type, DRGN_OBJECT_ABSENT); + err = NULL; + } else if (bit_offset >= 0) { + Dwarf_Addr start, end, bias; + dwfl_module_info(module->dwfl_module, NULL, &start, &end, &bias, + NULL, NULL, NULL); + /* + * If the address is not in the module's address range, then + * it's probably something special like a Linux per-CPU variable + * (which isn't actually a variable address but an offset). + * Don't apply the bias in that case. + */ + if (start <= address + bias && address + bias < end) + address += bias; + err = drgn_object_set_reference_internal(ret, &type, address, + bit_offset); + } else if (type.encoding == DRGN_OBJECT_ENCODING_BUFFER) { + drgn_object_reinit(ret, &type, DRGN_OBJECT_VALUE); + ret->value = value; + value_buf = NULL; + err = NULL; + } else { + err = drgn_object_set_from_buffer_internal(ret, &type, + value_buf, 0); + } + +out: + if (value_buf != value.ibuf) + free(value_buf); + uint64_vector_deinit(&stack); + return err; +} + static struct drgn_error * drgn_object_from_dwarf_constant(struct drgn_debug_info *dbinfo, Dwarf_Die *die, struct drgn_qualified_type qualified_type, @@ -1910,59 +2885,130 @@ drgn_object_from_dwarf_constant(struct drgn_debug_info *dbinfo, Dwarf_Die *die, } } -static struct drgn_error * -drgn_object_from_dwarf_variable(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, struct drgn_object *ret) +struct drgn_error * +drgn_object_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, Dwarf_Die *type_die, + Dwarf_Die *function_die, + const struct drgn_register_state *regs, + struct drgn_object *ret) { + struct drgn_error *err; + if (dwarf_tag(die) == DW_TAG_subprogram) { + return drgn_object_from_dwarf_subprogram(dbinfo, module, die, + ret); + } /* * The DWARF 5 specifications mentions that data object entries can have * DW_AT_endianity, but that doesn't seem to be used in practice. It * would be inconvenient to support, so ignore it for now. */ struct drgn_qualified_type qualified_type; - struct drgn_error *err = drgn_type_from_dwarf_attr(dbinfo, module, - die, NULL, true, - true, NULL, - &qualified_type); + if (type_die) { + err = drgn_type_from_dwarf(dbinfo, module, type_die, + &qualified_type); + } else { + err = drgn_type_from_dwarf_attr(dbinfo, module, die, NULL, true, + true, NULL, &qualified_type); + } if (err) return err; Dwarf_Attribute attr_mem, *attr; + const char *expr; + size_t expr_size; if ((attr = dwarf_attr_integrate(die, DW_AT_location, &attr_mem))) { - Dwarf_Op *loc; - size_t nloc; - if (dwarf_getlocation(attr, &loc, &nloc)) - return drgn_error_libdw(); - if (nloc != 1 || loc[0].atom != DW_OP_addr) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_AT_location has unimplemented operation"); - } - uint64_t address = loc[0].number; - Dwarf_Addr start, end, bias; - dwfl_module_info(module->dwfl_module, NULL, &start, &end, &bias, - NULL, NULL, NULL); - /* - * If the address is not in the module's address range, then - * it's probably something special like a Linux per-CPU variable - * (which isn't actually a variable address but an offset). - * Don't apply the bias in that case. - */ - if (start <= address + bias && address + bias < end) - address += bias; - return drgn_object_set_reference(ret, qualified_type, address, - 0, 0); + err = drgn_dwarf_location(module, attr, regs, &expr, + &expr_size); + if (err) + return err; } else if ((attr = dwarf_attr_integrate(die, DW_AT_const_value, &attr_mem))) { return drgn_object_from_dwarf_constant(dbinfo, die, qualified_type, attr, ret); } else { - if (dwarf_tag(die) == DW_TAG_template_value_parameter) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_AT_template_value_parameter is missing value"); - } - return drgn_object_set_absent(ret, qualified_type, 0); + expr = NULL; + expr_size = 0; } + return drgn_object_from_dwarf_location(dbinfo->prog, module, die, + qualified_type, expr, expr_size, + function_die, regs, ret); +} + +static struct drgn_error *find_dwarf_enumerator(Dwarf_Die *enumeration_type, + const char *name, + Dwarf_Die *ret) +{ + int r = dwarf_child(enumeration_type, ret); + while (r == 0) { + if (dwarf_tag(ret) == DW_TAG_enumerator && + strcmp(dwarf_diename(ret), name) == 0) + return NULL; + r = dwarf_siblingof(ret, ret); + } + if (r < 0) + return drgn_error_libdw(); + ret->addr = NULL; + return NULL; +} + +struct drgn_error *drgn_find_in_dwarf_scopes(Dwarf_Die *scopes, + size_t num_scopes, + const char *name, + Dwarf_Die *die_ret, + Dwarf_Die *type_ret) +{ + struct drgn_error *err; + Dwarf_Die die; + for (size_t scope = num_scopes; scope--;) { + bool have_declaration = false; + if (dwarf_child(&scopes[scope], &die) != 0) + continue; + do { + switch (dwarf_tag(&die)) { + case DW_TAG_variable: + case DW_TAG_formal_parameter: + case DW_TAG_subprogram: + if (strcmp(dwarf_diename(&die), name) == 0) { + *die_ret = die; + bool declaration; + if (dwarf_flag(&die, DW_AT_declaration, + &declaration)) + return drgn_error_libdw(); + if (declaration) + have_declaration = true; + else + return NULL; + } + break; + case DW_TAG_enumeration_type: { + bool enum_class; + if (dwarf_flag_integrate(&die, DW_AT_enum_class, + &enum_class)) + return drgn_error_libdw(); + if (!enum_class) { + Dwarf_Die enumerator; + err = find_dwarf_enumerator(&die, name, + &enumerator); + if (err) + return err; + if (enumerator.addr) { + *die_ret = enumerator; + *type_ret = die; + return NULL; + } + } + break; + } + default: + continue; + } + } while (dwarf_siblingof(&die, &die) == 0); + if (have_declaration) + return NULL; + } + die_ret->addr = NULL; + return NULL; } static struct drgn_error * @@ -2297,9 +3343,9 @@ drgn_dwarf_template_value_parameter_thunk_fn(struct drgn_object *res, struct drgn_error *err; struct drgn_dwarf_die_thunk_arg *arg = arg_; if (res) { - err = drgn_object_from_dwarf_variable(drgn_object_program(res)->_dbinfo, - arg->module, &arg->die, - res); + err = drgn_object_from_dwarf(drgn_object_program(res)->_dbinfo, + arg->module, &arg->die, NULL, NULL, + NULL, res); if (err) return err; } @@ -2985,6 +4031,18 @@ drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, "maximum DWARF type parsing depth exceeded"); } + /* If the DIE has a type unit signature, follow it. */ + Dwarf_Die definition_die; + { + Dwarf_Attribute attr_mem, *attr; + if ((attr = dwarf_attr_integrate(die, DW_AT_signature, + &attr_mem))) { + if (!dwarf_formref_die(attr, &definition_die)) + return drgn_error_libdw(); + die = &definition_die; + } + } + /* If we got a declaration, try to find the definition. */ bool declaration; if (dwarf_flag(die, DW_AT_declaration, &declaration)) @@ -3001,8 +4059,20 @@ drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, return drgn_error_libdwfl(); uintptr_t start = (uintptr_t)module->scn_data[DRGN_SCN_DEBUG_INFO]->d_buf; - if (!dwarf_offdie(dwarf, die_addr - start, die)) - return drgn_error_libdw(); + size_t size = + module->scn_data[DRGN_SCN_DEBUG_INFO]->d_size; + if (die_addr >= start && die_addr < start + size) { + if (!dwarf_offdie(dwarf, die_addr - start, + &definition_die)) + return drgn_error_libdw(); + } else { + start = (uintptr_t)module->scn_data[DRGN_SCN_DEBUG_TYPES]->d_buf; + /* Assume .debug_types */ + if (!dwarf_offdie_types(dwarf, die_addr - start, + &definition_die)) + return drgn_error_libdw(); + } + die = &definition_die; } } @@ -3251,22 +4321,15 @@ drgn_debug_info_find_object(const char *name, size_t name_len, return err; if (!die_matches_filename(&die, filename)) continue; - switch (dwarf_tag(&die)) { - case DW_TAG_enumeration_type: + if (dwarf_tag(&die) == DW_TAG_enumeration_type) { return drgn_object_from_dwarf_enumerator(dbinfo, index_die->module, &die, name, ret); - case DW_TAG_subprogram: - return drgn_object_from_dwarf_subprogram(dbinfo, - index_die->module, - &die, ret); - case DW_TAG_variable: - return drgn_object_from_dwarf_variable(dbinfo, - index_die->module, - &die, ret); - default: - UNREACHABLE(); + } else { + return drgn_object_from_dwarf(dbinfo, index_die->module, + &die, NULL, NULL, NULL, + ret); } } return &drgn_not_found; @@ -3327,7 +4390,8 @@ drgn_dwarf_cfi_next_encoded(struct drgn_debug_info_buffer *buffer, if (encoding & DW_EH_PE_indirect) { unknown_fde_encoding: return binary_buffer_error(&buffer->bb, - "unknown EH encoding %#x", encoding); + "unknown EH encoding %#" PRIx8, + encoding); } size_t pos = (buffer->bb.pos - @@ -3604,7 +4668,7 @@ drgn_parse_dwarf_frames(struct drgn_debug_info_module *module, if (!module->scns[scn]) return NULL; - err = read_elf_section(module->scns[scn], &module->scn_data[scn]); + err = drgn_debug_info_module_cache_section(module, scn); if (err) return err; Elf_Data *data = module->scn_data[scn]; @@ -4173,7 +5237,7 @@ drgn_eval_dwarf_cfi(struct drgn_debug_info_module *module, if (!initial_row) { invalid_for_initial: err = binary_buffer_error(&buffer.bb, - "invalid initial DWARF CFI opcode %#x", + "invalid initial DWARF CFI opcode %#" PRIx8, opcode); goto out; } @@ -4218,7 +5282,7 @@ drgn_eval_dwarf_cfi(struct drgn_debug_info_module *module, break; default: err = binary_buffer_error(&buffer.bb, - "unknown DWARF CFI opcode %#x", + "unknown DWARF CFI opcode %#" PRIx8, opcode); goto out; } @@ -4444,20 +5508,14 @@ drgn_debug_info_parse_orc(struct drgn_debug_info_module *module) return drgn_error_libelf(); module->orc_pc_base = shdr->sh_addr; - if (!module->scn_data[DRGN_SCN_ORC_UNWIND_IP]) { - err = read_elf_section(module->scns[DRGN_SCN_ORC_UNWIND_IP], - &module->scn_data[DRGN_SCN_ORC_UNWIND_IP]); - if (err) - return err; - } + err = drgn_debug_info_module_cache_section(module, + DRGN_SCN_ORC_UNWIND_IP); + if (err) + return err; + err = drgn_debug_info_module_cache_section(module, DRGN_SCN_ORC_UNWIND); + if (err) + return err; Elf_Data *orc_unwind_ip = module->scn_data[DRGN_SCN_ORC_UNWIND_IP]; - - if (!module->scn_data[DRGN_SCN_ORC_UNWIND]) { - err = read_elf_section(module->scns[DRGN_SCN_ORC_UNWIND], - &module->scn_data[DRGN_SCN_ORC_UNWIND]); - if (err) - return err; - } Elf_Data *orc_unwind = module->scn_data[DRGN_SCN_ORC_UNWIND]; size_t num_entries = orc_unwind_ip->d_size / sizeof(int32_t); @@ -4634,12 +5692,24 @@ drgn_eval_cfi_dwarf_expression(struct drgn_program *prog, } } + int remaining_ops = MAX_DWARF_EXPR_OPS; struct drgn_dwarf_expression_buffer buffer; drgn_dwarf_expression_buffer_init(&buffer, regs->module, rule->expr, rule->expr_size); - err = drgn_eval_dwarf_expression(prog, &buffer, &stack, regs); + err = drgn_eval_dwarf_expression(prog, &buffer, &stack, &remaining_ops, + NULL, regs); if (err) goto out; + if (binary_buffer_has_next(&buffer.bb)) { + uint8_t opcode; + err = binary_buffer_next_u8(&buffer.bb, &opcode); + if (!err) { + err = binary_buffer_error(&buffer.bb, + "invalid opcode %#" PRIx8 " for CFI expression", + opcode); + } + goto out; + } if (stack.size == 0) { err = &drgn_not_found; } else if (rule->kind == DRGN_CFI_RULE_AT_DWARF_EXPRESSION) { diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index ef56c9739..b7f313ea5 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -12,6 +12,7 @@ #ifndef DRGN_DEBUG_INFO_H #define DRGN_DEBUG_INFO_H +#include #include #include @@ -52,6 +53,7 @@ enum drgn_debug_info_module_state { enum drgn_debug_info_scn { /* Sections whose data we should cache when loading the module. */ DRGN_SCN_DEBUG_INFO, + DRGN_SCN_DEBUG_TYPES, DRGN_SCN_DEBUG_ABBREV, DRGN_SCN_DEBUG_STR, DRGN_SCN_DEBUG_LINE, @@ -63,6 +65,7 @@ enum drgn_debug_info_scn { DRGN_SCN_EH_FRAME, DRGN_SCN_ORC_UNWIND_IP, DRGN_SCN_ORC_UNWIND, + DRGN_SCN_DEBUG_LOC, DRGN_NUM_DEBUG_SCN_DATA, @@ -200,6 +203,43 @@ drgn_debug_info_buffer_init(struct drgn_debug_info_buffer *buffer, buffer->scn = scn; } +/** + * Find the DWARF DIEs in a @ref drgn_debug_info_module for the scope containing + * a given program counter. + * + * @param[in] module Module containing @p pc. + * @param[in] pc Program counter. + * @param[out] bias_ret Returned difference between addresses in the loaded + * module and addresses in the returned DIEs. + * @param[out] dies_ret Returned DIEs. `(*dies_ret)[*length_ret - 1]` is the + * innermost DIE containing @p pc, `(*dies_ret)[*length_ret - 2]` is its parent + * (which may not contain @p pc itself), `(*dies_ret)[*length_ret - 3]` is its + * grandparent, etc. Must be freed with @c free(). + * @param[out] length_ret Returned length of @p dies_ret. + */ +struct drgn_error * +drgn_debug_info_module_find_dwarf_scopes(struct drgn_debug_info_module *module, + uint64_t pc, uint64_t *bias_ret, + Dwarf_Die **dies_ret, + size_t *length_ret) + __attribute__((__nonnull__(1, 3, 4, 5))); + +/** + * Find the ancestors of a DWARF DIE. + * + * This finds the parent, grandparent, etc., of a DWARF DIE in the tree of DIEs. + * + * @param[in] module Module containing @p die. + * @param[in] die DIE to find. + * @param[out] dies_ret Returned DIEs. `(*dies_ret)[*length_ret]` is the DIE, + * `(*dies_ret)[*length_ret - 1]` is its parent, `(*dies_ret)[*length_ret - 2]` + * is its grandparent, etc., and `(*dies_ret)[0]` is the top-level unit DIE. + * @param[out] length_ret Returned number of ancestors in @p dies_ret. + */ +struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret, + size_t *length_ret) + __attribute__((__nonnull__(2, 3))); + struct drgn_debug_info_module_key { const void *build_id; size_t build_id_len; @@ -378,6 +418,44 @@ drgn_debug_info_find_object(const char *name, size_t name_len, enum drgn_find_object_flags flags, void *arg, struct drgn_object *ret); +/** + * Find an object DIE in an array of DWARF scopes. + * + * @param[in] scopes Array of scopes, from outermost to innermost. + * @param[in] num_scopes Number of scopes in @p scopes. + * @param[out] die_ret Returned object DIE. + * @param[out] type_ret If @p die_ret is a `DW_TAG_enumerator` DIE, its parent. + * Otherwise, undefined. + */ +struct drgn_error *drgn_find_in_dwarf_scopes(Dwarf_Die *scopes, + size_t num_scopes, + const char *name, + Dwarf_Die *die_ret, + Dwarf_Die *type_ret); + +/** + * Create a @ref drgn_object from a `Dwarf_Die`. + * + * @param[in] die Object DIE (e.g., `DW_TAG_subprogram`, `DW_TAG_variable`, + * `DW_TAG_formal_parameter`, `DW_TAG_enumerator`, + * `DW_TAG_template_value_parameter`). + * @param[in] type_die DIE of object's type. If @c NULL, use the `DW_AT_type` + * attribute of @p die. If @p die is a `DW_TAG_enumerator` DIE, this should be + * its parent. + * @param[in] function_die DIE of current function. @c NULL if not in function + * context. + * @param[in] regs Registers of current stack frame. @c NULL if not in stack + * frame context. + * @param[out] ret Returned object. + */ +struct drgn_error * +drgn_object_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, Dwarf_Die *type_die, + Dwarf_Die *function_die, + const struct drgn_register_state *regs, + struct drgn_object *ret); + /** * Get the Call Frame Information in a @ref drgn_debug_info_module at a given * program counter. diff --git a/libdrgn/drgn.h.in b/libdrgn/drgn.h.in index 89f642051..b6b0827a5 100644 --- a/libdrgn/drgn.h.in +++ b/libdrgn/drgn.h.in @@ -497,7 +497,8 @@ typedef struct drgn_error *(*drgn_memory_read_fn)(void *buf, uint64_t address, * Register a segment of memory in a @ref drgn_program. * * If the segment overlaps a previously registered segment, the new segment - * takes precedence. + * takes precedence. If any part of the segment is beyond the maximum address, + * that part is ignored. * * @param[in] address Address of the segment. * @param[in] size Size of the segment in bytes. @@ -2607,6 +2608,42 @@ struct drgn_error *drgn_format_stack_trace(struct drgn_stack_trace *trace, /** Return whether a stack frame was interrupted (e.g., by a signal). */ bool drgn_stack_frame_interrupted(struct drgn_stack_trace *trace, size_t frame); +/** + * Format a stack frame as a string. + * + * @param[out] ret Returned string. On success, it must be freed with @c free(). + * On error, its contents are undefined. + * @return @c NULL on success, non-@c NULL on error. + */ +struct drgn_error *drgn_format_stack_frame(struct drgn_stack_trace *trace, + size_t frame, char **ret); + +/** + * Get the name of the function at a stack frame. + * + * @return Function name. This is valid until the stack trace is destroyed; it + * should not be freed. @c NULL if the name could not be determined. + */ +const char *drgn_stack_frame_name(struct drgn_stack_trace *trace, size_t frame); + +/** Return whether a stack frame is for an inlined call. */ +bool drgn_stack_frame_is_inline(struct drgn_stack_trace *trace, size_t frame); + +/** + * Get the source code location of a stack frame. + * + * @param[out] line_ret Returned line number. Returned as 0 if unknown. May be + * @c NULL if not needed. + * @param[out] column_ret Returned column number. Returned as 0 if unknown. May + * be @c NULL if not needed. + * @return Filename. This is valid until the stack trace is destroyed; it should + * not be freed. @c NULL if the location could not be determined (in which case + * `*line_ret` and `*column_ret` are undefined). + */ +const char *drgn_stack_frame_source(struct drgn_stack_trace *trace, + size_t frame, int *line_ret, + int *column_ret); + /** * Get the program counter at a stack frame. * @@ -2626,6 +2663,19 @@ bool drgn_stack_frame_pc(struct drgn_stack_trace *trace, size_t frame, struct drgn_error *drgn_stack_frame_symbol(struct drgn_stack_trace *trace, size_t frame, struct drgn_symbol **ret); + +/** + * Find an object in the scope of a stack frame. + * + * @param[in] name Object name. + * @param[out] ret Returned object. This must have already been initialized with + * @ref drgn_object_init(). + * @return @c NULL on success, non-@c NULL on error. + */ +struct drgn_error * +drgn_stack_frame_find_object(struct drgn_stack_trace *trace, size_t frame, + const char *name, struct drgn_object *ret); + /** * Get the value of a register in a stack frame. * diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c index 8575fe9e1..5308f6ca6 100644 --- a/libdrgn/dwarf_index.c +++ b/libdrgn/dwarf_index.c @@ -19,6 +19,16 @@ #include "siphash.h" #include "util.h" +struct drgn_dwarf_index_pending_cu { + struct drgn_debug_info_module *module; + const char *buf; + size_t len; + bool is_64_bit; + enum drgn_debug_info_scn scn; +}; + +DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_pending_cu_vector) + /* * The DWARF abbreviation table gets translated into a series of instructions. * An instruction <= INSN_MAX_SKIP indicates a number of bytes to be skipped @@ -30,7 +40,7 @@ * set to zero if the tag is not of interest); see DIE_FLAG_*. */ enum { - INSN_MAX_SKIP = 226, + INSN_MAX_SKIP = 219, ATTRIB_BLOCK1, ATTRIB_BLOCK2, ATTRIB_BLOCK4, @@ -60,7 +70,14 @@ enum { ATTRIB_SPECIFICATION_REF_UDATA, ATTRIB_SPECIFICATION_REF_ADDR4, ATTRIB_SPECIFICATION_REF_ADDR8, - ATTRIB_MAX_INSN = ATTRIB_SPECIFICATION_REF_ADDR8, + ATTRIB_INDIRECT, + ATTRIB_SIBLING_INDIRECT, + ATTRIB_NAME_INDIRECT, + ATTRIB_STMT_LIST_INDIRECT, + ATTRIB_DECL_FILE_INDIRECT, + ATTRIB_DECLARATION_INDIRECT, + ATTRIB_SPECIFICATION_INDIRECT, + ATTRIB_MAX_INSN = ATTRIB_SPECIFICATION_INDIRECT, }; enum { @@ -82,6 +99,7 @@ struct drgn_dwarf_index_cu { uint8_t version; uint8_t address_size; bool is_64_bit; + bool is_type_unit; /* * This is indexed on the DWARF abbreviation code minus one. It maps the * abbreviation code to an index in abbrev_insns where the instruction @@ -212,22 +230,285 @@ void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex) drgn_dwarf_index_namespace_deinit(&dindex->global); } -void drgn_dwarf_index_update_begin(struct drgn_dwarf_index_update_state *state, +bool +drgn_dwarf_index_update_state_init(struct drgn_dwarf_index_update_state *state, struct drgn_dwarf_index *dindex) { state->dindex = dindex; - state->old_cus_size = dindex->cus.size; - state->err = NULL; + state->max_threads = omp_get_max_threads(); + state->cus = malloc_array(state->max_threads, sizeof(*state->cus)); + if (!state->cus) + return false; + for (size_t i = 0; i < state->max_threads; i++) + drgn_dwarf_index_pending_cu_vector_init(&state->cus[i]); + return true; +} + +void +drgn_dwarf_index_update_state_deinit(struct drgn_dwarf_index_update_state *state) +{ + for (size_t i = 0; i < state->max_threads; i++) + drgn_dwarf_index_pending_cu_vector_deinit(&state->cus[i]); + free(state->cus); +} + +static struct drgn_error *dw_form_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, + uint64_t form, uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_addr: + *insn_ret = cu->address_size; + return NULL; + case DW_FORM_data1: + case DW_FORM_ref1: + case DW_FORM_flag: + *insn_ret = 1; + return NULL; + case DW_FORM_data2: + case DW_FORM_ref2: + *insn_ret = 2; + return NULL; + case DW_FORM_data4: + case DW_FORM_ref4: + *insn_ret = 4; + return NULL; + case DW_FORM_data8: + case DW_FORM_ref8: + case DW_FORM_ref_sig8: + *insn_ret = 8; + return NULL; + case DW_FORM_block1: + *insn_ret = ATTRIB_BLOCK1; + return NULL; + case DW_FORM_block2: + *insn_ret = ATTRIB_BLOCK2; + return NULL; + case DW_FORM_block4: + *insn_ret = ATTRIB_BLOCK4; + return NULL; + case DW_FORM_exprloc: + *insn_ret = ATTRIB_EXPRLOC; + return NULL; + case DW_FORM_sdata: + case DW_FORM_udata: + case DW_FORM_ref_udata: + *insn_ret = ATTRIB_LEB128; + return NULL; + case DW_FORM_ref_addr: + case DW_FORM_sec_offset: + case DW_FORM_strp: + *insn_ret = cu->is_64_bit ? 8 : 4; + return NULL; + case DW_FORM_string: + *insn_ret = ATTRIB_STRING; + return NULL; + case DW_FORM_flag_present: + *insn_ret = 0; + return NULL; + case DW_FORM_indirect: + *insn_ret = ATTRIB_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %" PRIu64, + form); + } +} + +static struct drgn_error *dw_at_sibling_to_insn(struct binary_buffer *bb, + uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_ref1: + *insn_ret = ATTRIB_SIBLING_REF1; + return NULL; + case DW_FORM_ref2: + *insn_ret = ATTRIB_SIBLING_REF2; + return NULL; + case DW_FORM_ref4: + *insn_ret = ATTRIB_SIBLING_REF4; + return NULL; + case DW_FORM_ref8: + *insn_ret = ATTRIB_SIBLING_REF8; + return NULL; + case DW_FORM_ref_udata: + *insn_ret = ATTRIB_SIBLING_REF_UDATA; + return NULL; + case DW_FORM_indirect: + *insn_ret = ATTRIB_SIBLING_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %" PRIu64 " for DW_AT_sibling", + form); + } +} + +static struct drgn_error *dw_at_name_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, + uint64_t form, uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_strp: + if (!cu->module->scn_data[DRGN_SCN_DEBUG_STR]) { + return binary_buffer_error(bb, + "DW_FORM_strp without .debug_str section"); + } + if (cu->is_64_bit) + *insn_ret = ATTRIB_NAME_STRP8; + else + *insn_ret = ATTRIB_NAME_STRP4; + return NULL; + case DW_FORM_string: + *insn_ret = ATTRIB_NAME_STRING; + return NULL; + case DW_FORM_indirect: + *insn_ret = ATTRIB_NAME_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %" PRIu64 " for DW_AT_name", + form); + } +} + +static struct drgn_error * +dw_at_stmt_list_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_data4: + *insn_ret = ATTRIB_STMT_LIST_LINEPTR4; + return NULL; + case DW_FORM_data8: + *insn_ret = ATTRIB_STMT_LIST_LINEPTR8; + return NULL; + case DW_FORM_sec_offset: + if (cu->is_64_bit) + *insn_ret = ATTRIB_STMT_LIST_LINEPTR8; + else + *insn_ret = ATTRIB_STMT_LIST_LINEPTR4; + return NULL; + case DW_FORM_indirect: + *insn_ret = ATTRIB_STMT_LIST_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %" PRIu64 " for DW_AT_stmt_list", + form); + } +} + +static struct drgn_error *dw_at_decl_file_to_insn(struct binary_buffer *bb, + uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_data1: + *insn_ret = ATTRIB_DECL_FILE_DATA1; + return NULL; + case DW_FORM_data2: + *insn_ret = ATTRIB_DECL_FILE_DATA2; + return NULL; + case DW_FORM_data4: + *insn_ret = ATTRIB_DECL_FILE_DATA4; + return NULL; + case DW_FORM_data8: + *insn_ret = ATTRIB_DECL_FILE_DATA8; + return NULL; + /* + * decl_file must be positive, so if the compiler uses + * DW_FORM_sdata for some reason, just treat it as udata. + */ + case DW_FORM_sdata: + case DW_FORM_udata: + *insn_ret = ATTRIB_DECL_FILE_UDATA; + return NULL; + case DW_FORM_indirect: + *insn_ret = ATTRIB_DECL_FILE_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %" PRIu64 " for DW_AT_decl_file", + form); + } } -void drgn_dwarf_index_update_cancel(struct drgn_dwarf_index_update_state *state, - struct drgn_error *err) +static struct drgn_error * +dw_at_declaration_to_insn(struct binary_buffer *bb, uint64_t form, + uint8_t *insn_ret, uint8_t *die_flags) { - #pragma omp critical(drgn_dwarf_index_update_cancel) - if (state->err) - drgn_error_destroy(err); - else - state->err = err; + switch (form) { + case DW_FORM_flag: + *insn_ret = ATTRIB_DECLARATION_FLAG; + return NULL; + case DW_FORM_flag_present: + /* + * This could be an instruction, but as long as we have a free + * DIE flag bit, we might as well use it. + */ + *insn_ret = 0; + *die_flags |= DIE_FLAG_DECLARATION; + return NULL; + case DW_FORM_indirect: + *insn_ret = ATTRIB_DECLARATION_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %" PRIu64 " for DW_AT_declaration", + form); + } +} + +static struct drgn_error * +dw_at_specification_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_ref1: + *insn_ret = ATTRIB_SPECIFICATION_REF1; + return NULL; + case DW_FORM_ref2: + *insn_ret = ATTRIB_SPECIFICATION_REF2; + return NULL; + case DW_FORM_ref4: + *insn_ret = ATTRIB_SPECIFICATION_REF4; + return NULL; + case DW_FORM_ref8: + *insn_ret = ATTRIB_SPECIFICATION_REF8; + return NULL; + case DW_FORM_ref_udata: + *insn_ret = ATTRIB_SPECIFICATION_REF_UDATA; + return NULL; + case DW_FORM_ref_addr: + if (cu->version >= 3) { + if (cu->is_64_bit) + *insn_ret = ATTRIB_SPECIFICATION_REF_ADDR8; + else + *insn_ret = ATTRIB_SPECIFICATION_REF_ADDR4; + } else { + if (cu->address_size == 8) + *insn_ret = ATTRIB_SPECIFICATION_REF_ADDR8; + else if (cu->address_size == 4) + *insn_ret = ATTRIB_SPECIFICATION_REF_ADDR4; + else + return binary_buffer_error(bb, + "unsupported address size %" PRIu8 " for DW_FORM_ref_addr", + cu->address_size); + } + return NULL; + case DW_FORM_indirect: + *insn_ret = ATTRIB_SPECIFICATION_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %" PRIu64 " for DW_AT_specification", + form); + } } static struct drgn_error * @@ -302,221 +583,48 @@ read_abbrev_decl(struct drgn_debug_info_buffer *buffer, break; if (name == DW_AT_sibling) { - switch (form) { - case DW_FORM_ref1: - insn = ATTRIB_SIBLING_REF1; - goto append_insn; - case DW_FORM_ref2: - insn = ATTRIB_SIBLING_REF2; - goto append_insn; - case DW_FORM_ref4: - insn = ATTRIB_SIBLING_REF4; - goto append_insn; - case DW_FORM_ref8: - insn = ATTRIB_SIBLING_REF8; - goto append_insn; - case DW_FORM_ref_udata: - insn = ATTRIB_SIBLING_REF_UDATA; - goto append_insn; - default: - break; - } + err = dw_at_sibling_to_insn(&buffer->bb, form, &insn); } else if (name == DW_AT_name && should_index) { - switch (form) { - case DW_FORM_strp: - if (!cu->module->scn_data[DRGN_SCN_DEBUG_STR]) { - return binary_buffer_error(&buffer->bb, - "DW_FORM_strp without .debug_str section"); - } - if (cu->is_64_bit) - insn = ATTRIB_NAME_STRP8; - else - insn = ATTRIB_NAME_STRP4; - goto append_insn; - case DW_FORM_string: - insn = ATTRIB_NAME_STRING; - goto append_insn; - default: - break; - } - } else if (name == DW_AT_stmt_list && - cu->module->scn_data[DRGN_SCN_DEBUG_LINE]) { - switch (form) { - case DW_FORM_data4: - insn = ATTRIB_STMT_LIST_LINEPTR4; - goto append_insn; - case DW_FORM_data8: - insn = ATTRIB_STMT_LIST_LINEPTR8; - goto append_insn; - case DW_FORM_sec_offset: - if (cu->is_64_bit) - insn = ATTRIB_STMT_LIST_LINEPTR8; - else - insn = ATTRIB_STMT_LIST_LINEPTR4; - goto append_insn; - default: - break; + err = dw_at_name_to_insn(cu, &buffer->bb, form, &insn); + } else if (name == DW_AT_stmt_list) { + if (!cu->module->scn_data[DRGN_SCN_DEBUG_LINE]) { + return binary_buffer_error(&buffer->bb, + "DW_AT_stmt_list without .debug_line section"); } + err = dw_at_stmt_list_to_insn(cu, &buffer->bb, form, + &insn); } else if (name == DW_AT_decl_file && should_index && /* Namespaces are merged, so we ignore their file. */ tag != DW_TAG_namespace) { - switch (form) { - case DW_FORM_data1: - insn = ATTRIB_DECL_FILE_DATA1; - goto append_insn; - case DW_FORM_data2: - insn = ATTRIB_DECL_FILE_DATA2; - goto append_insn; - case DW_FORM_data4: - insn = ATTRIB_DECL_FILE_DATA4; - goto append_insn; - case DW_FORM_data8: - insn = ATTRIB_DECL_FILE_DATA8; - goto append_insn; - /* - * decl_file must be positive, so if the compiler uses - * DW_FORM_sdata for some reason, just treat it as - * udata. - */ - case DW_FORM_sdata: - case DW_FORM_udata: - insn = ATTRIB_DECL_FILE_UDATA; - goto append_insn; - default: - break; - } + err = dw_at_decl_file_to_insn(&buffer->bb, form, &insn); } else if (name == DW_AT_declaration && should_index) { - switch (form) { - case DW_FORM_flag: - insn = ATTRIB_DECLARATION_FLAG; - goto append_insn; - case DW_FORM_flag_present: - /* - * This could be an instruction, but as long as - * we have a free DIE flag bit, we might as well - * use it. - */ - die_flags |= DIE_FLAG_DECLARATION; - break; - default: - return binary_buffer_error(&buffer->bb, - "unknown attribute form %" PRIu64 " for DW_AT_declaration", - form); - } + err = dw_at_declaration_to_insn(&buffer->bb, form, + &insn, &die_flags); } else if (name == DW_AT_specification && should_index) { - switch (form) { - case DW_FORM_ref1: - insn = ATTRIB_SPECIFICATION_REF1; - goto append_insn; - case DW_FORM_ref2: - insn = ATTRIB_SPECIFICATION_REF2; - goto append_insn; - case DW_FORM_ref4: - insn = ATTRIB_SPECIFICATION_REF4; - goto append_insn; - case DW_FORM_ref8: - insn = ATTRIB_SPECIFICATION_REF8; - goto append_insn; - case DW_FORM_ref_udata: - insn = ATTRIB_SPECIFICATION_REF_UDATA; - goto append_insn; - case DW_FORM_ref_addr: - if (cu->version >= 3) { - if (cu->is_64_bit) - insn = ATTRIB_SPECIFICATION_REF_ADDR8; - else - insn = ATTRIB_SPECIFICATION_REF_ADDR4; - } else { - if (cu->address_size == 8) - insn = ATTRIB_SPECIFICATION_REF_ADDR8; - else if (cu->address_size == 4) - insn = ATTRIB_SPECIFICATION_REF_ADDR4; - else - return binary_buffer_error(&buffer->bb, - "unsupported address size %" PRIu8 " for DW_FORM_ref_addr", - cu->address_size); - } - goto append_insn; - default: - return binary_buffer_error(&buffer->bb, - "unknown attribute form %" PRIu64 " for DW_AT_specification", - form); - } - } - - switch (form) { - case DW_FORM_addr: - insn = cu->address_size; - break; - case DW_FORM_data1: - case DW_FORM_ref1: - case DW_FORM_flag: - insn = 1; - break; - case DW_FORM_data2: - case DW_FORM_ref2: - insn = 2; - break; - case DW_FORM_data4: - case DW_FORM_ref4: - insn = 4; - break; - case DW_FORM_data8: - case DW_FORM_ref8: - case DW_FORM_ref_sig8: - insn = 8; - break; - case DW_FORM_block1: - insn = ATTRIB_BLOCK1; - goto append_insn; - case DW_FORM_block2: - insn = ATTRIB_BLOCK2; - goto append_insn; - case DW_FORM_block4: - insn = ATTRIB_BLOCK4; - goto append_insn; - case DW_FORM_exprloc: - insn = ATTRIB_EXPRLOC; - goto append_insn; - case DW_FORM_sdata: - case DW_FORM_udata: - case DW_FORM_ref_udata: - insn = ATTRIB_LEB128; - goto append_insn; - case DW_FORM_ref_addr: - case DW_FORM_sec_offset: - case DW_FORM_strp: - insn = cu->is_64_bit ? 8 : 4; - break; - case DW_FORM_string: - insn = ATTRIB_STRING; - goto append_insn; - case DW_FORM_flag_present: - continue; - case DW_FORM_indirect: - return binary_buffer_error(&buffer->bb, - "DW_FORM_indirect is not implemented"); - default: - return binary_buffer_error(&buffer->bb, - "unknown attribute form %" PRIu64, - form); + err = dw_at_specification_to_insn(cu, &buffer->bb, form, + &insn); + } else { + err = dw_form_to_insn(cu, &buffer->bb, form, &insn); } + if (err) + return err; - if (!first) { - uint8_t last_insn = insns->data[insns->size - 1]; - if (last_insn + insn <= INSN_MAX_SKIP) { - insns->data[insns->size - 1] += insn; - continue; - } else if (last_insn < INSN_MAX_SKIP) { - insn = last_insn + insn - INSN_MAX_SKIP; - insns->data[insns->size - 1] = INSN_MAX_SKIP; + if (insn != 0) { + if (!first && insn <= INSN_MAX_SKIP) { + uint8_t last_insn = insns->data[insns->size - 1]; + if (last_insn + insn <= INSN_MAX_SKIP) { + insns->data[insns->size - 1] += insn; + continue; + } else if (last_insn < INSN_MAX_SKIP) { + insn = last_insn + insn - INSN_MAX_SKIP; + insns->data[insns->size - 1] = INSN_MAX_SKIP; + } } - } -append_insn: - first = false; - if (!uint8_vector_append(insns, &insn)) - return &drgn_enomem; + if (!uint8_vector_append(insns, &insn)) + return &drgn_enomem; + first = false; + } } insn = 0; if (!uint8_vector_append(insns, &insn) || @@ -585,6 +693,12 @@ static struct drgn_error *read_cu(struct drgn_dwarf_index_cu_buffer *buffer) &buffer->cu->address_size))) return err; + /* Skip type_signature and type_offset for type units. */ + if (buffer->cu->is_type_unit && + (err = binary_buffer_skip(&buffer->bb, + buffer->cu->is_64_bit ? 16 : 12))) + return err; + return read_abbrev_table(buffer->cu, debug_abbrev_offset); } @@ -766,6 +880,35 @@ index_specification(struct drgn_dwarf_index *dindex, uintptr_t declaration, return ret == -1 ? &drgn_enomem : NULL; } +static struct drgn_error *read_indirect_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, + uint8_t insn, uint8_t *insn_ret, + uint8_t *die_flags) +{ + struct drgn_error *err; + uint64_t form; + if ((err = binary_buffer_next_uleb128(bb, &form))) + return err; + switch (insn) { + case ATTRIB_INDIRECT: + return dw_form_to_insn(cu, bb, form, insn_ret); + case ATTRIB_SIBLING_INDIRECT: + return dw_at_sibling_to_insn(bb, form, insn_ret); + case ATTRIB_NAME_INDIRECT: + return dw_at_name_to_insn(cu, bb, form, insn_ret); + case ATTRIB_STMT_LIST_INDIRECT: + return dw_at_stmt_list_to_insn(cu, bb, form, insn_ret); + case ATTRIB_DECL_FILE_INDIRECT: + return dw_at_decl_file_to_insn(bb, form, insn_ret); + case ATTRIB_DECLARATION_INDIRECT: + return dw_at_declaration_to_insn(bb, form, insn_ret, die_flags); + case ATTRIB_SPECIFICATION_INDIRECT: + return dw_at_specification_to_insn(cu, bb, form, insn_ret); + default: + UNREACHABLE(); + } +} + /* * First pass: read the file name tables and index DIEs with * DW_AT_specification. This recurses into namespaces. @@ -776,7 +919,8 @@ index_cu_first_pass(struct drgn_dwarf_index *dindex, { struct drgn_error *err; struct drgn_dwarf_index_cu *cu = buffer->cu; - Elf_Data *debug_info = cu->module->scn_data[DRGN_SCN_DEBUG_INFO]; + Elf_Data *debug_info = cu->module->scn_data[ + cu->is_type_unit ? DRGN_SCN_DEBUG_TYPES : DRGN_SCN_DEBUG_INFO]; const char *debug_info_buffer = debug_info->d_buf; unsigned int depth = 0; for (;;) { @@ -803,7 +947,9 @@ index_cu_first_pass(struct drgn_dwarf_index *dindex, uint64_t stmt_list; const char *sibling = NULL; uint8_t insn; + uint8_t extra_die_flags = 0; while ((insn = *insnp++)) { +indirect_insn:; uint64_t skip, tmp; switch (insn) { case ATTRIB_BLOCK1: @@ -946,6 +1092,21 @@ index_cu_first_pass(struct drgn_dwarf_index *dindex, specification_ref_addr: specification = (uintptr_t)debug_info_buffer + tmp; break; + case ATTRIB_INDIRECT: + case ATTRIB_SIBLING_INDIRECT: + case ATTRIB_NAME_INDIRECT: + case ATTRIB_STMT_LIST_INDIRECT: + case ATTRIB_DECL_FILE_INDIRECT: + case ATTRIB_DECLARATION_INDIRECT: + case ATTRIB_SPECIFICATION_INDIRECT: + if ((err = read_indirect_insn(cu, &buffer->bb, + insn, &insn, + &extra_die_flags))) + return err; + if (insn) + goto indirect_insn; + else + continue; default: skip = insn; skip: @@ -955,7 +1116,7 @@ index_cu_first_pass(struct drgn_dwarf_index *dindex, break; } } - insn = *insnp; + insn = *insnp | extra_die_flags; if (depth == 0) { if (stmt_list_ptr) { @@ -997,71 +1158,62 @@ index_cu_first_pass(struct drgn_dwarf_index *dindex, return NULL; } -void drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, - struct drgn_debug_info_module *module) +static struct drgn_error * +drgn_dwarf_index_read_cus(struct drgn_dwarf_index_update_state *state, + struct drgn_debug_info_module *module, + enum drgn_debug_info_scn scn) { + struct drgn_dwarf_index_pending_cu_vector *cus = + &state->cus[omp_get_thread_num()]; + struct drgn_error *err; struct drgn_debug_info_buffer buffer; - drgn_debug_info_buffer_init(&buffer, module, DRGN_SCN_DEBUG_INFO); + drgn_debug_info_buffer_init(&buffer, module, scn); while (binary_buffer_has_next(&buffer.bb)) { - const char *cu_buf = buffer.bb.pos; + struct drgn_dwarf_index_pending_cu *cu = + drgn_dwarf_index_pending_cu_vector_append_entry(cus); + if (!cu) + return &drgn_enomem; + cu->module = module; + cu->buf = buffer.bb.pos; uint32_t unit_length32; if ((err = binary_buffer_next_u32(&buffer.bb, &unit_length32))) - goto err; - bool is_64_bit = unit_length32 == UINT32_C(0xffffffff); - if (is_64_bit) { + return err; + cu->is_64_bit = unit_length32 == UINT32_C(0xffffffff); + if (cu->is_64_bit) { uint64_t unit_length64; if ((err = binary_buffer_next_u64(&buffer.bb, &unit_length64))) - goto err; + return err; if (unit_length64 > SIZE_MAX) { - err = binary_buffer_error(&buffer.bb, - "unit length is too large"); - goto err; + return binary_buffer_error(&buffer.bb, + "unit length is too large"); } if ((err = binary_buffer_skip(&buffer.bb, unit_length64))) - goto err; + return err; } else { if ((err = binary_buffer_skip(&buffer.bb, unit_length32))) - goto err; - } - size_t cu_len = buffer.bb.pos - cu_buf; - - #pragma omp task - { - struct drgn_dwarf_index_cu cu = { - .module = module, - .buf = cu_buf, - .len = cu_len, - .is_64_bit = is_64_bit, - }; - struct drgn_dwarf_index_cu_buffer cu_buffer; - drgn_dwarf_index_cu_buffer_init(&cu_buffer, &cu); - struct drgn_error *cu_err = read_cu(&cu_buffer); - if (cu_err) - goto cu_err; - - cu_err = index_cu_first_pass(state->dindex, &cu_buffer); - if (cu_err) - goto cu_err; - - #pragma omp critical(drgn_dwarf_index_cus) - if (!drgn_dwarf_index_cu_vector_append(&state->dindex->cus, - &cu)) - cu_err = &drgn_enomem; - if (cu_err) { -cu_err: - drgn_dwarf_index_cu_deinit(&cu); - drgn_dwarf_index_update_cancel(state, cu_err); - } + return err; } + cu->len = buffer.bb.pos - cu->buf; + cu->scn = scn; } - return; + return NULL; +} -err: - drgn_dwarf_index_update_cancel(state, err); +struct drgn_error * +drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, + struct drgn_debug_info_module *module) +{ + struct drgn_error *err; + err = drgn_dwarf_index_read_cus(state, module, DRGN_SCN_DEBUG_INFO); + if (!err && module->scn_data[DRGN_SCN_DEBUG_TYPES]) { + err = drgn_dwarf_index_read_cus(state, module, + DRGN_SCN_DEBUG_TYPES); + } + return err; } bool @@ -1225,7 +1377,9 @@ index_cu_second_pass(struct drgn_dwarf_index_namespace *ns, bool specification = false; const char *sibling = NULL; uint8_t insn; + uint8_t extra_die_flags = 0; while ((insn = *insnp++)) { +indirect_insn:; uint64_t skip, tmp; switch (insn) { case ATTRIB_BLOCK1: @@ -1377,6 +1531,21 @@ index_cu_second_pass(struct drgn_dwarf_index_namespace *ns, specification = true; skip = 8; goto skip; + case ATTRIB_INDIRECT: + case ATTRIB_SIBLING_INDIRECT: + case ATTRIB_NAME_INDIRECT: + case ATTRIB_STMT_LIST_INDIRECT: + case ATTRIB_DECL_FILE_INDIRECT: + case ATTRIB_DECLARATION_INDIRECT: + case ATTRIB_SPECIFICATION_INDIRECT: + if ((err = read_indirect_insn(cu, &buffer->bb, + insn, &insn, + &extra_die_flags))) + return err; + if (insn) + goto indirect_insn; + else + continue; default: skip = insn; skip: @@ -1386,7 +1555,7 @@ index_cu_second_pass(struct drgn_dwarf_index_namespace *ns, break; } } - insn = *insnp; + insn = *insnp | extra_die_flags; uint8_t tag = insn & DIE_FLAG_TAG_MASK; if (depth == 1) { @@ -1517,37 +1686,81 @@ static void drgn_dwarf_index_rollback(struct drgn_dwarf_index *dindex) } struct drgn_error * -drgn_dwarf_index_update_end(struct drgn_dwarf_index_update_state *state) +drgn_dwarf_index_update(struct drgn_dwarf_index_update_state *state) { struct drgn_dwarf_index *dindex = state->dindex; - if (state->err) + size_t old_cus_size = dindex->cus.size; + size_t new_cus_size = old_cus_size; + for (size_t i = 0; i < state->max_threads; i++) + new_cus_size += state->cus[i].size; + if (!drgn_dwarf_index_cu_vector_reserve(&dindex->cus, new_cus_size)) + return &drgn_enomem; + for (size_t i = 0; i < state->max_threads; i++) { + for (size_t j = 0; j < state->cus[i].size; j++) { + struct drgn_dwarf_index_pending_cu *pending_cu = + &state->cus[i].data[j]; + dindex->cus.data[dindex->cus.size++] = (struct drgn_dwarf_index_cu){ + .module = pending_cu->module, + .buf = pending_cu->buf, + .len = pending_cu->len, + .is_64_bit = pending_cu->is_64_bit, + .is_type_unit = + pending_cu->scn == DRGN_SCN_DEBUG_TYPES, + }; + } + } + + struct drgn_error *err = NULL; + #pragma omp parallel for schedule(dynamic) + for (size_t i = old_cus_size; i < dindex->cus.size; i++) { + if (err) + continue; + struct drgn_dwarf_index_cu *cu = &dindex->cus.data[i]; + struct drgn_dwarf_index_cu_buffer cu_buffer; + drgn_dwarf_index_cu_buffer_init(&cu_buffer, cu); + struct drgn_error *cu_err = read_cu(&cu_buffer); + if (!cu_err) + cu_err = index_cu_first_pass(state->dindex, &cu_buffer); + if (cu_err) { + #pragma omp critical(drgn_dwarf_index_update_end_error) + if (err) + drgn_error_destroy(cu_err); + else + err = cu_err; + } + } + if (err) goto err; #pragma omp parallel for schedule(dynamic) - for (size_t i = state->old_cus_size; i < dindex->cus.size; i++) { - if (drgn_dwarf_index_update_cancelled(state)) + for (size_t i = old_cus_size; i < dindex->cus.size; i++) { + if (err) continue; struct drgn_dwarf_index_cu *cu = &dindex->cus.data[i]; struct drgn_dwarf_index_cu_buffer buffer; drgn_dwarf_index_cu_buffer_init(&buffer, cu); buffer.bb.pos += cu->is_64_bit ? 23 : 11; + if (cu->is_type_unit) + buffer.bb.pos += cu->is_64_bit ? 16 : 12; struct drgn_error *cu_err = index_cu_second_pass(&dindex->global, &buffer); - if (cu_err) - drgn_dwarf_index_update_cancel(state, cu_err); - } - if (state->err) { - drgn_dwarf_index_rollback(state->dindex); - goto err; + if (cu_err) { + #pragma omp critical(drgn_dwarf_index_update_end_error) + if (err) + drgn_error_destroy(cu_err); + else + err = cu_err; + } } - return NULL; - + if (err) { + drgn_dwarf_index_rollback(dindex); err: - for (size_t i = state->old_cus_size; i < dindex->cus.size; i++) - drgn_dwarf_index_cu_deinit(&dindex->cus.data[i]); - dindex->cus.size = state->old_cus_size; - return state->err; + for (size_t i = old_cus_size; i < dindex->cus.size; i++) + drgn_dwarf_index_cu_deinit(&dindex->cus.data[i]); + dindex->cus.size = old_cus_size; + } + return err; } static struct drgn_error *index_namespace(struct drgn_dwarf_index_namespace *ns) @@ -1692,7 +1905,14 @@ struct drgn_error *drgn_dwarf_index_get_die(struct drgn_dwarf_index_die *die, return drgn_error_libdwfl(); uintptr_t start = (uintptr_t)die->module->scn_data[DRGN_SCN_DEBUG_INFO]->d_buf; - if (!dwarf_offdie(dwarf, die->addr - start, die_ret)) - return drgn_error_libdw(); + size_t size = die->module->scn_data[DRGN_SCN_DEBUG_INFO]->d_size; + if (die->addr >= start && die->addr < start + size) { + if (!dwarf_offdie(dwarf, die->addr - start, die_ret)) + return drgn_error_libdw(); + } else { + start = (uintptr_t)die->module->scn_data[DRGN_SCN_DEBUG_TYPES]->d_buf; + if (!dwarf_offdie_types(dwarf, die->addr - start, die_ret)) + return drgn_error_libdw(); + } return NULL; } diff --git a/libdrgn/dwarf_index.h b/libdrgn/dwarf_index.h index 116c8273a..ce3f65193 100644 --- a/libdrgn/dwarf_index.h +++ b/libdrgn/dwarf_index.h @@ -25,6 +25,14 @@ typedef struct {} omp_lock_t; #define omp_destroy_lock(lock) do {} while (0) #define omp_set_lock(lock) do {} while (0) #define omp_unset_lock(lock) do {} while (0) +static inline int omp_get_thread_num(void) +{ + return 0; +} +static inline int omp_get_max_threads(void) +{ + return 1; +} #endif #include "hash_table.h" @@ -183,78 +191,43 @@ void drgn_dwarf_index_init(struct drgn_dwarf_index *dindex); */ void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex); +DEFINE_VECTOR_TYPE(drgn_dwarf_index_pending_cu_vector, + struct drgn_dwarf_index_pending_cu) + /** State tracked while updating a @ref drgn_dwarf_index. */ struct drgn_dwarf_index_update_state { struct drgn_dwarf_index *dindex; - size_t old_cus_size; - struct drgn_error *err; + /** Per-thread arrays of CUs to be indexed. */ + struct drgn_dwarf_index_pending_cu_vector *cus; + size_t max_threads; }; /** - * Prepare to update a @ref drgn_dwarf_index. + * Initialize state for updating a @ref drgn_dwarf_index. * - * @param[out] state Initialized update state. Must be passed to @ref - * drgn_dwarf_index_update_end(). + * @return @c true on success, @c false on failure to allocate memory. */ -void drgn_dwarf_index_update_begin(struct drgn_dwarf_index_update_state *state, +bool +drgn_dwarf_index_update_state_init(struct drgn_dwarf_index_update_state *state, struct drgn_dwarf_index *dindex); -/** - * Finish updating a @ref drgn_dwarf_index. - * - * This should be called once all of the tasks created by @ref - * drgn_dwarf_index_read_module() have completed (even if the update was - * cancelled). - * - * If the update was not cancelled, this finishes indexing all modules reported - * by @ref drgn_dwarf_index_read_module(). If it was cancelled or there is an - * error while indexing, this rolls back the index and removes the newly - * reported modules. - * - * @return @c NULL on success, non-@c NULL if the update was cancelled or there - * was another error. - */ -struct drgn_error * -drgn_dwarf_index_update_end(struct drgn_dwarf_index_update_state *state); - -/** - * Cancel an update of a @ref drgn_dwarf_index. - * - * This should be called if there is a fatal error and the update must be - * aborted. - * - * @param[in] err Error to report. This will be returned from @ref - * drgn_dwarf_index_update_end(). If an error has already been reported, this - * error is destroyed. - */ -void drgn_dwarf_index_update_cancel(struct drgn_dwarf_index_update_state *state, - struct drgn_error *err); +/** Deinitialize state for updating a @ref drgn_dwarf_index. */ +void +drgn_dwarf_index_update_state_deinit(struct drgn_dwarf_index_update_state *state); -/** - * Return whether an update of a @ref drgn_dwarf_index has been cancelled by - * @ref drgn_dwarf_index_update_cancel(). - * - * Because updating is parallelized, this allows tasks other than the one that - * encountered the error to "fail fast". - */ -static inline bool -drgn_dwarf_index_update_cancelled(struct drgn_dwarf_index_update_state *state) -{ - /* - * No need for omp critical/omp atomic since this is a best-effort - * optimization. - */ - return state->err != NULL; -} +/** Read a module for updating a @ref drgn_dwarf_index. */ +struct drgn_error * +drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, + struct drgn_debug_info_module *module); /** - * Read a module for updating a @ref drgn_dwarf_index. + * Update a @ref drgn_dwarf_index. * - * This creates OpenMP tasks to begin indexing the module. It may cancel the - * update. + * This should be called once all modules have been read with @ref + * drgn_dwarf_index_read_module() to finish indexing those modules. */ -void drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, - struct drgn_debug_info_module *module); +struct drgn_error * +drgn_dwarf_index_update(struct drgn_dwarf_index_update_state *state); /** * Iterator over DWARF debugging information. diff --git a/libdrgn/examples/load_debug_info.c b/libdrgn/examples/load_debug_info.c index 247c1ebae..3868af1f0 100644 --- a/libdrgn/examples/load_debug_info.c +++ b/libdrgn/examples/load_debug_info.c @@ -1,10 +1,27 @@ +#include #include #include #include +#include #include #include "drgn.h" +static inline struct timespec timespec_sub(struct timespec a, struct timespec b) +{ + if (a.tv_nsec < b.tv_nsec) { + return (struct timespec){ + .tv_sec = a.tv_sec - 1 - b.tv_sec, + .tv_nsec = a.tv_nsec + 1000000000L - b.tv_nsec, + }; + } else { + return (struct timespec){ + .tv_sec = a.tv_sec - b.tv_sec, + .tv_nsec = a.tv_nsec - b.tv_nsec, + }; + } +} + static void usage(bool error) { fprintf(error ? stderr : stdout, @@ -16,6 +33,7 @@ static void usage(bool error) " -k, --kernel debug the running kernel (default)\n" " -c PATH, --core PATH debug the given core dump\n" " -p PID, --pid PID debug the running process with the given PID\n" + " -T, --time print how long loading debug info took in seconds\n" " -h, --help display this help message and exit\n"); exit(error ? EXIT_FAILURE : EXIT_SUCCESS); } @@ -26,14 +44,16 @@ int main(int argc, char **argv) {"kernel", no_argument, NULL, 'k'}, {"core", required_argument, NULL, 'c'}, {"pid", required_argument, NULL, 'p'}, + {"time", no_argument, NULL, 'T'}, {"help", no_argument, NULL, 'h'}, {}, }; bool kernel = false; const char *core = NULL; const char *pid = NULL; + bool print_time = false; for (;;) { - int c = getopt_long(argc, argv, "kc:p:h", long_options, NULL); + int c = getopt_long(argc, argv, "kc:p:Th", long_options, NULL); if (c == -1) break; switch (c) { @@ -46,6 +66,9 @@ int main(int argc, char **argv) case 'p': pid = optarg; break; + case 'T': + print_time = true; + break; case 'h': usage(false); default: @@ -71,7 +94,16 @@ int main(int argc, char **argv) if (err) goto out; + struct timespec start, end; + if (print_time && clock_gettime(CLOCK_MONOTONIC, &start)) + abort(); err = drgn_program_load_debug_info(prog, NULL, 0, true, true); + if ((!err || err->code == DRGN_ERROR_MISSING_DEBUG_INFO) && print_time) { + if (clock_gettime(CLOCK_MONOTONIC, &end)) + abort(); + struct timespec diff = timespec_sub(end, start); + printf("%lld.%09ld\n", (long long)diff.tv_sec, diff.tv_nsec); + } out:; int status; diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index 732b31f10..0a29715bc 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -15,7 +15,6 @@ #include "hash_table.h" #include "language.h" // IWYU pragma: associated #include "lexer.h" -#include "memory_reader.h" #include "minmax.h" #include "object.h" #include "program.h" @@ -653,8 +652,8 @@ c_format_character(unsigned char c, bool escape_single_quote, } static struct drgn_error * -c_format_string(struct drgn_memory_reader *reader, uint64_t address, - uint64_t length, struct string_builder *sb) +c_format_string(struct drgn_program *prog, uint64_t address, uint64_t length, + struct string_builder *sb) { struct drgn_error *err; @@ -662,8 +661,7 @@ c_format_string(struct drgn_memory_reader *reader, uint64_t address, return &drgn_enomem; while (length) { unsigned char c; - - err = drgn_memory_reader_read(reader, &c, address++, 1, false); + err = drgn_program_read_memory(prog, &c, address++, 1, false); if (err) return err; @@ -1318,7 +1316,7 @@ c_format_pointer_object(const struct drgn_object *obj, return &drgn_enomem; if (c_string) { - err = c_format_string(&drgn_object_program(obj)->reader, uvalue, + err = c_format_string(drgn_object_program(obj), uvalue, UINT64_MAX, sb); } else { struct drgn_object dereferenced; @@ -1474,7 +1472,7 @@ c_format_array_object(const struct drgn_object *obj, return NULL; } case DRGN_OBJECT_REFERENCE: - return c_format_string(&drgn_object_program(obj)->reader, + return c_format_string(drgn_object_program(obj), obj->address, iter.length, sb); case DRGN_OBJECT_ABSENT: ) diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index e353b695c..769c26b5c 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -24,7 +24,6 @@ #include "helpers.h" #include "language.h" #include "linux_kernel.h" -#include "memory_reader.h" #include "platform.h" #include "program.h" #include "type.h" @@ -195,8 +194,7 @@ struct drgn_error *proc_kallsyms_symbol_addr(const char *name, * we can read from the physical address of the vmcoreinfo note exported in * sysfs. */ -struct drgn_error *read_vmcoreinfo_fallback(struct drgn_memory_reader *reader, - struct vmcoreinfo *ret) +struct drgn_error *read_vmcoreinfo_fallback(struct drgn_program *prog) { struct drgn_error *err; FILE *file; @@ -221,7 +219,7 @@ struct drgn_error *read_vmcoreinfo_fallback(struct drgn_memory_reader *reader, if (!buf) return &drgn_enomem; - err = drgn_memory_reader_read(reader, buf, address, size, true); + err = drgn_program_read_memory(prog, buf, address, size, true); if (err) goto out; @@ -239,7 +237,7 @@ struct drgn_error *read_vmcoreinfo_fallback(struct drgn_memory_reader *reader, goto out; } - err = parse_vmcoreinfo(buf + 24, nhdr->n_descsz, ret); + err = parse_vmcoreinfo(buf + 24, nhdr->n_descsz, &prog->vmcoreinfo); out: free(buf); return err; diff --git a/libdrgn/linux_kernel.h b/libdrgn/linux_kernel.h index 936155fc3..62bead4c4 100644 --- a/libdrgn/linux_kernel.h +++ b/libdrgn/linux_kernel.h @@ -7,7 +7,6 @@ #include "drgn.h" struct drgn_debug_info_load_state; -struct drgn_memory_reader; struct vmcoreinfo; struct drgn_error *read_memory_via_pgtable(void *buf, uint64_t address, @@ -20,8 +19,7 @@ struct drgn_error *parse_vmcoreinfo(const char *desc, size_t descsz, struct drgn_error *proc_kallsyms_symbol_addr(const char *name, unsigned long *ret); -struct drgn_error *read_vmcoreinfo_fallback(struct drgn_memory_reader *reader, - struct vmcoreinfo *ret); +struct drgn_error *read_vmcoreinfo_fallback(struct drgn_program *prog); struct drgn_error *linux_kernel_object_find(const char *name, size_t name_len, const char *filename, diff --git a/libdrgn/memory_reader.c b/libdrgn/memory_reader.c index 8ac6e09f8..18ad93b62 100644 --- a/libdrgn/memory_reader.c +++ b/libdrgn/memory_reader.c @@ -1,6 +1,7 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0-or-later +#include #include #include #include @@ -45,25 +46,15 @@ bool drgn_memory_reader_empty(struct drgn_memory_reader *reader) struct drgn_error * drgn_memory_reader_add_segment(struct drgn_memory_reader *reader, - uint64_t address, uint64_t size, + uint64_t min_address, uint64_t max_address, drgn_memory_read_fn read_fn, void *arg, bool physical) { + assert(min_address <= max_address); + struct drgn_memory_segment_tree *tree = (physical ? &reader->physical_segments : &reader->virtual_segments); - struct drgn_memory_segment_tree_iterator it; - struct drgn_memory_segment *stolen = NULL, *segment; - struct drgn_memory_segment *truncate_head = NULL, *truncate_tail = NULL; - uint64_t end, existing_end; - - if (size == 0) - return NULL; - - if (__builtin_add_overflow(address, size, &end)) { - return drgn_error_create(DRGN_ERROR_OVERFLOW, - "memory segment end is too large"); - } /* * This is split into two steps: the first step handles an overlapping @@ -72,22 +63,23 @@ drgn_memory_reader_add_segment(struct drgn_memory_reader *reader, * can steal an existing segment instead of allocating a new one. */ - it = drgn_memory_segment_tree_search_le(tree, &address); + struct drgn_memory_segment *stolen = NULL, *segment; + struct drgn_memory_segment *truncate_head = NULL, *truncate_tail = NULL; + struct drgn_memory_segment_tree_iterator it = + drgn_memory_segment_tree_search_le(tree, &min_address); if (it.entry) { - existing_end = it.entry->address + it.entry->size; - if (end < existing_end) { + if (max_address < it.entry->max_address) { /* * The new segment lies entirely within an existing * segment, and part of the existing segment extends * after the new segment (a "tail"). */ - struct drgn_memory_segment *tail; - - tail = malloc(sizeof(*tail)); + struct drgn_memory_segment *tail = + malloc(sizeof(*tail)); if (!tail) return &drgn_enomem; - if (it.entry->address == address) { + if (it.entry->min_address == min_address) { /* * The new segment starts at the same address as * the existing segment, so we can steal the @@ -108,23 +100,22 @@ drgn_memory_reader_add_segment(struct drgn_memory_reader *reader, truncate_tail = it.entry; } - tail->address = end; - tail->size = existing_end - end; - tail->orig_address = it.entry->orig_address; + tail->min_address = max_address + 1; + tail->max_address = it.entry->max_address; + tail->orig_min_address = it.entry->orig_min_address; tail->read_fn = it.entry->read_fn; tail->arg = it.entry->arg; - drgn_memory_segment_tree_insert(tree, tail, - NULL); + drgn_memory_segment_tree_insert(tree, tail, NULL); goto insert; } - if (it.entry->address == address) { + if (it.entry->min_address == min_address) { /* * The new segment subsumes an existing segment at the * same address. We can steal the existing segment. */ stolen = it.entry; - } else if (address < existing_end) { + } else if (min_address <= it.entry->max_address) { /* * The new segment overlaps an existing segment before * it, and part of the existing segment extends before @@ -145,8 +136,7 @@ drgn_memory_reader_add_segment(struct drgn_memory_reader *reader, } while (it.entry) { - existing_end = it.entry->address + it.entry->size; - if (end >= existing_end) { + if (max_address >= it.entry->max_address) { /* * The new segment subsumes an existing segment after * it. @@ -158,9 +148,7 @@ drgn_memory_reader_add_segment(struct drgn_memory_reader *reader, * allocate a new segment later, it's safe to * modify the tree now. */ - struct drgn_memory_segment *existing_segment; - - existing_segment = it.entry; + struct drgn_memory_segment *existing_segment = it.entry; it = drgn_memory_segment_tree_delete_iterator(tree, it); free(existing_segment); } else { @@ -180,7 +168,7 @@ drgn_memory_reader_add_segment(struct drgn_memory_reader *reader, } continue; } - if (end > it.entry->address) { + if (max_address >= it.entry->min_address) { /* * The new segment overlaps an existing segment after * it, and part of the existing segment extends after @@ -208,14 +196,12 @@ drgn_memory_reader_add_segment(struct drgn_memory_reader *reader, * Now that we've allocated the new segment if necessary, we can safely * modify the tree. */ - if (truncate_head) { - truncate_head->size -= end - truncate_head->address; - truncate_head->address = end; - } + if (truncate_head) + truncate_head->min_address = max_address + 1; if (truncate_tail) - truncate_tail->size = address - truncate_tail->address; - segment->address = segment->orig_address = address; - segment->size = size; + truncate_tail->max_address = min_address - 1; + segment->min_address = segment->orig_min_address = min_address; + segment->max_address = max_address; segment->read_fn = read_fn; segment->arg = arg; /* If the segment is stolen, then it's already in the tree. */ @@ -228,33 +214,32 @@ struct drgn_error *drgn_memory_reader_read(struct drgn_memory_reader *reader, void *buf, uint64_t address, size_t count, bool physical) { + assert(count == 0 || count - 1 <= UINT64_MAX - address); + + struct drgn_error *err; struct drgn_memory_segment_tree *tree = (physical ? &reader->physical_segments : &reader->virtual_segments); - struct drgn_error *err; - size_t read = 0; - - while (read < count) { - struct drgn_memory_segment *segment; - size_t n; - - segment = drgn_memory_segment_tree_search_le(tree, - &address).entry; - if (!segment || segment->address + segment->size <= address) { + char *p = buf; + while (count > 0) { + struct drgn_memory_segment *segment = + drgn_memory_segment_tree_search_le(tree, + &address).entry; + if (!segment || segment->max_address < address) { return drgn_error_create_fault("could not find memory segment", address); } - n = min(segment->address + segment->size - address, - (uint64_t)(count - read)); - err = segment->read_fn((char *)buf + read, address, n, - address - segment->orig_address, + size_t n = min((uint64_t)(count - 1), + segment->max_address - address) + 1; + err = segment->read_fn(p, address, n, + address - segment->orig_min_address, segment->arg, physical); if (err) return err; - - read += n; + p += n; address += n; + count -= n; } return NULL; } diff --git a/libdrgn/memory_reader.h b/libdrgn/memory_reader.h index 8d4eaff3d..9e608995a 100644 --- a/libdrgn/memory_reader.h +++ b/libdrgn/memory_reader.h @@ -25,23 +25,24 @@ * @ref drgn_memory_reader provides a common interface for registering regions * of memory in a program and reading from memory. * + * @ref drgn_memory_reader does not have a notion of the maximum address or + * address overflow/wrap-around. Those must be handled at a higher layer. + * * @{ */ /** Memory segment in a @ref drgn_memory_reader. */ struct drgn_memory_segment { struct binary_tree_node node; - /** Address of the segment in memory. */ - uint64_t address; - /** Size of the segment in bytes; */ - uint64_t size; + /** Address range of the segment in memory (inclusive). */ + uint64_t min_address, max_address; /** * The address of the segment when it was added, before any truncations. * - * This is always greater than or equal to @ref - * drgn_memory_segment::address. + * This is always less than or equal to @ref + * drgn_memory_segment::min_address. */ - uint64_t orig_address; + uint64_t orig_min_address; /** Read callback. */ drgn_memory_read_fn read_fn; /** Argument to pass to @ref drgn_memory_segment::read_fn. */ @@ -51,7 +52,7 @@ struct drgn_memory_segment { static inline uint64_t drgn_memory_segment_to_key(const struct drgn_memory_segment *entry) { - return entry->address; + return entry->min_address; } DEFINE_BINARY_SEARCH_TREE_TYPE(drgn_memory_segment_tree, @@ -84,10 +85,20 @@ void drgn_memory_reader_deinit(struct drgn_memory_reader *reader); /** Return whether a @ref drgn_memory_reader has no segments. */ bool drgn_memory_reader_empty(struct drgn_memory_reader *reader); -/** @sa drgn_program_add_memory_segment() */ +/** + * Add a segment to a @ref drgn_memory_reader. + * + * @param[in] reader Memory reader. + * @param[in] min_address Start address (inclusive). + * @param[in] max_address End address (inclusive). Must be `>= min_address`. + * @param[in] read_fn Callback to read from segment. + * @param[in] arg Argument to pass to @p read_fn. + * @param[in] physical Whether to add a physical memory segment. + * @return @c NULL on success, non-@c NULL on error. + */ struct drgn_error * drgn_memory_reader_add_segment(struct drgn_memory_reader *reader, - uint64_t address, uint64_t size, + uint64_t min_address, uint64_t max_address, drgn_memory_read_fn read_fn, void *arg, bool physical); @@ -97,7 +108,8 @@ drgn_memory_reader_add_segment(struct drgn_memory_reader *reader, * @param[in] reader Memory reader. * @param[out] buf Buffer to read into. * @param[in] address Starting address in memory to read. - * @param[in] count Number of bytes to read. + * @param[in] count Number of bytes to read. `address + count - 1` must be + * `<= UINT64_MAX` * @param[in] physical Whether @c address is physical. * @return @c NULL on success, non-@c NULL on error. */ diff --git a/libdrgn/object.c b/libdrgn/object.c index 011f64f0d..9e2fa4d2a 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -10,7 +10,6 @@ #include "drgn.h" #include "error.h" #include "language.h" -#include "memory_reader.h" #include "minmax.h" #include "object.h" #include "program.h" @@ -233,7 +232,12 @@ static void drgn_value_deserialize(union drgn_value *value, const void *buf, int64_t svalue; uint64_t uvalue; double fvalue64; - float fvalue32; + struct { +#if !HOST_LITTLE_ENDIAN + float pad; +#endif + float fvalue32; + }; } tmp; tmp.uvalue = deserialize_bits(buf, bit_offset, bit_size, little_endian); @@ -314,7 +318,7 @@ drgn_object_set_from_buffer(struct drgn_object *res, bit_offset); } -static struct drgn_error * +struct drgn_error * drgn_object_set_reference_internal(struct drgn_object *res, const struct drgn_object_type *type, uint64_t address, uint64_t bit_offset) @@ -532,8 +536,8 @@ drgn_object_read_reference(const struct drgn_object *obj, if (!dst) return &drgn_enomem; } - err = drgn_memory_reader_read(&drgn_object_program(obj)->reader, - dst, obj->address, size, false); + err = drgn_program_read_memory(drgn_object_program(obj), dst, + obj->address, size, false); if (err) { if (dst != value->ibuf) free(dst); @@ -548,9 +552,8 @@ drgn_object_read_reference(const struct drgn_object *obj, uint64_t read_size = drgn_value_size(bit_offset + bit_size); char buf[9]; assert(read_size <= sizeof(buf)); - err = drgn_memory_reader_read(&drgn_object_program(obj)->reader, - buf, obj->address, read_size, - false); + err = drgn_program_read_memory(drgn_object_program(obj), buf, + obj->address, read_size, false); if (err) return err; drgn_value_deserialize(value, buf, bit_offset, obj->encoding, diff --git a/libdrgn/object.h b/libdrgn/object.h index 923ebfd5a..d008cdcc2 100644 --- a/libdrgn/object.h +++ b/libdrgn/object.h @@ -12,6 +12,9 @@ #ifndef DRGN_OBJECT_H #define DRGN_OBJECT_H +#include +#include + #include "drgn.h" #include "type.h" @@ -31,6 +34,24 @@ * @{ */ +/** Allocate a zero-initialized @ref drgn_value. */ +static inline bool drgn_value_zalloc(uint64_t size, union drgn_value *value_ret, + char **buf_ret) +{ + if (size <= sizeof(value_ret->ibuf)) { + memset(value_ret->ibuf, 0, sizeof(value_ret->ibuf)); + *buf_ret = value_ret->ibuf; + } else { + if (size > SIZE_MAX) + return false; + char *buf = calloc(1, size); + if (!buf) + return false; + value_ret->bufp = *buf_ret = buf; + } + return true; +} + /** * Get whether an object is zero. * @@ -139,6 +160,15 @@ drgn_object_set_from_buffer_internal(struct drgn_object *res, const struct drgn_object_type *type, const void *buf, uint64_t bit_offset); +/** + * Like @ref drgn_object_set_reference() but @ref drgn_object_type() was already + * called. + */ +struct drgn_error * +drgn_object_set_reference_internal(struct drgn_object *res, + const struct drgn_object_type *type, + uint64_t address, uint64_t bit_offset); + /** * Binary operator implementation. * diff --git a/libdrgn/program.c b/libdrgn/program.c index 2694428d4..e4b5c6cdd 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -23,6 +23,7 @@ #include "language.h" #include "linux_kernel.h" #include "memory_reader.h" +#include "minmax.h" #include "object_index.h" #include "program.h" #include "symbol.h" @@ -141,8 +142,16 @@ drgn_program_add_memory_segment(struct drgn_program *prog, uint64_t address, uint64_t size, drgn_memory_read_fn read_fn, void *arg, bool physical) { - return drgn_memory_reader_add_segment(&prog->reader, address, size, - read_fn, arg, physical); + uint64_t address_mask; + struct drgn_error *err = drgn_program_address_mask(prog, &address_mask); + if (err) + return err; + if (size == 0 || address > address_mask) + return NULL; + uint64_t max_address = address + min(size - 1, address_mask - address); + return drgn_memory_reader_add_segment(&prog->reader, address, + max_address, read_fn, arg, + physical); } LIBDRGN_PUBLIC struct drgn_error * @@ -191,7 +200,7 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) { struct drgn_error *err; GElf_Ehdr ehdr_mem, *ehdr; - struct drgn_platform platform; + bool had_platform; bool is_64_bit, is_kdump; size_t phnum, i; size_t num_file_segments, j; @@ -232,13 +241,17 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) "not an ELF core file"); goto out_elf; } - - drgn_platform_from_elf(ehdr, &platform); + had_platform = prog->has_platform; + if (!had_platform) { + struct drgn_platform platform; + drgn_platform_from_elf(ehdr, &platform); + drgn_program_set_platform(prog, &platform); + } is_64_bit = ehdr->e_ident[EI_CLASS] == ELFCLASS64; if (elf_getphdrnum(prog->core, &phnum) != 0) { err = drgn_error_libelf(); - goto out_elf; + goto out_platform; } /* @@ -252,7 +265,7 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) phdr = gelf_getphdr(prog->core, i, &phdr_mem); if (!phdr) { err = drgn_error_libelf(); - goto out_elf; + goto out_platform; } if (phdr->p_type == PT_LOAD) { @@ -270,7 +283,7 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) note_header_type(phdr)); if (!data) { err = drgn_error_libelf(); - goto out_elf; + goto out_platform; } offset = 0; @@ -310,7 +323,7 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) if (fstatfs(prog->core_fd, &fs) == -1) { err = drgn_error_create_os("fstatfs", errno, path); if (err) - goto out_elf; + goto out_platform; } is_proc_kcore = fs.f_type == 0x9fa0; /* PROC_SUPER_MAGIC */ } else { @@ -325,7 +338,7 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) if (env && atoi(env)) { err = drgn_program_set_kdump(prog); if (err) - goto out_elf; + goto out_platform; return NULL; } } @@ -334,18 +347,16 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) sizeof(*prog->file_segments)); if (!prog->file_segments) { err = &drgn_enomem; - goto out_elf; + goto out_platform; } if ((is_proc_kcore || vmcoreinfo_note) && - platform.arch->linux_kernel_pgtable_iterator_next) { + prog->platform.arch->linux_kernel_pgtable_iterator_next) { /* * Try to read any memory that isn't in the core dump via the * page table. */ - err = drgn_program_add_memory_segment(prog, 0, - is_64_bit ? - UINT64_MAX : UINT32_MAX, + err = drgn_program_add_memory_segment(prog, 0, UINT64_MAX, read_memory_via_pgtable, prog, false); if (err) @@ -397,12 +408,11 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) * pass, as we may need to read virtual memory to determine the mapping. */ if (is_proc_kcore && !have_phys_addrs && - platform.arch->linux_kernel_live_direct_mapping_fallback) { + prog->platform.arch->linux_kernel_live_direct_mapping_fallback) { uint64_t direct_mapping, direct_mapping_size; - - err = platform.arch->linux_kernel_live_direct_mapping_fallback(prog, - &direct_mapping, - &direct_mapping_size); + err = prog->platform.arch->linux_kernel_live_direct_mapping_fallback(prog, + &direct_mapping, + &direct_mapping_size); if (err) goto out_segments; @@ -445,8 +455,7 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) if (is_proc_kcore) { if (!vmcoreinfo_note) { - err = read_vmcoreinfo_fallback(&prog->reader, - &prog->vmcoreinfo); + err = read_vmcoreinfo_fallback(prog); if (err) goto out_segments; } @@ -467,7 +476,6 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) prog->lang = &drgn_language_c; } - drgn_program_set_platform(prog, &platform); return NULL; out_segments: @@ -475,6 +483,8 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) drgn_memory_reader_init(&prog->reader); free(prog->file_segments); prog->file_segments = NULL; +out_platform: + prog->has_platform = had_platform; out_elf: elf_end(prog->core); prog->core = NULL; @@ -494,17 +504,20 @@ LIBDRGN_PUBLIC struct drgn_error * drgn_program_set_pid(struct drgn_program *prog, pid_t pid) { struct drgn_error *err; - char buf[64]; err = drgn_program_check_initialized(prog); if (err) return err; + char buf[64]; sprintf(buf, "/proc/%ld/mem", (long)pid); prog->core_fd = open(buf, O_RDONLY); if (prog->core_fd == -1) return drgn_error_create_os("open", errno, buf); + bool had_platform = prog->has_platform; + drgn_program_set_platform(prog, &drgn_host_platform); + prog->file_segments = malloc(sizeof(*prog->file_segments)); if (!prog->file_segments) { err = &drgn_enomem; @@ -522,7 +535,6 @@ drgn_program_set_pid(struct drgn_program *prog, pid_t pid) prog->pid = pid; prog->flags |= DRGN_PROGRAM_IS_LIVE; - drgn_program_set_platform(prog, &drgn_host_platform); return NULL; out_segments: @@ -531,6 +543,7 @@ drgn_program_set_pid(struct drgn_program *prog, pid_t pid) free(prog->file_segments); prog->file_segments = NULL; out_fd: + prog->has_platform = had_platform; close(prog->core_fd); prog->core_fd = -1; return err; @@ -939,8 +952,23 @@ LIBDRGN_PUBLIC struct drgn_error * drgn_program_read_memory(struct drgn_program *prog, void *buf, uint64_t address, size_t count, bool physical) { - return drgn_memory_reader_read(&prog->reader, buf, address, count, - physical); + uint64_t address_mask; + struct drgn_error *err = drgn_program_address_mask(prog, &address_mask); + if (err) + return err; + char *p = buf; + address &= address_mask; + while (count > 0) { + size_t n = min((uint64_t)(count - 1), address_mask - address) + 1; + err = drgn_memory_reader_read(&prog->reader, p, address, n, + physical); + if (err) + return err; + p += n; + address = 0; + count -= n; + } + return NULL; } DEFINE_VECTOR(char_vector, char) @@ -949,9 +977,13 @@ LIBDRGN_PUBLIC struct drgn_error * drgn_program_read_c_string(struct drgn_program *prog, uint64_t address, bool physical, size_t max_size, char **ret) { - struct drgn_error *err; + uint64_t address_mask; + struct drgn_error *err = drgn_program_address_mask(prog, &address_mask); + if (err) + return err; struct char_vector str = VECTOR_INIT; for (;;) { + address &= address_mask; char *c = char_vector_append_entry(&str); if (!c) { char_vector_deinit(&str); @@ -981,8 +1013,8 @@ LIBDRGN_PUBLIC struct drgn_error * drgn_program_read_u8(struct drgn_program *prog, uint64_t address, bool physical, uint8_t *ret) { - return drgn_memory_reader_read(&prog->reader, ret, address, - sizeof(*ret), physical); + return drgn_program_read_memory(prog, ret, address, sizeof(*ret), + physical); } #define DEFINE_PROGRAM_READ_U(n) \ @@ -995,8 +1027,8 @@ drgn_program_read_u##n(struct drgn_program *prog, uint64_t address, \ if (err) \ return err; \ uint##n##_t tmp; \ - err = drgn_memory_reader_read(&prog->reader, &tmp, address, \ - sizeof(tmp), physical); \ + err = drgn_program_read_memory(prog, &tmp, address, sizeof(tmp), \ + physical); \ if (err) \ return err; \ if (bswap) \ @@ -1023,8 +1055,8 @@ drgn_program_read_word(struct drgn_program *prog, uint64_t address, return err; if (is_64_bit) { uint64_t tmp; - err = drgn_memory_reader_read(&prog->reader, &tmp, address, - sizeof(tmp), physical); + err = drgn_program_read_memory(prog, &tmp, address, sizeof(tmp), + physical); if (err) return err; if (bswap) @@ -1032,8 +1064,8 @@ drgn_program_read_word(struct drgn_program *prog, uint64_t address, *ret = tmp; } else { uint32_t tmp; - err = drgn_memory_reader_read(&prog->reader, &tmp, address, - sizeof(tmp), physical); + err = drgn_program_read_memory(prog, &tmp, address, sizeof(tmp), + physical); if (err) return err; if (bswap) diff --git a/libdrgn/python/error.c b/libdrgn/python/error.c index 1fb2b1364..5e674ca23 100644 --- a/libdrgn/python/error.c +++ b/libdrgn/python/error.c @@ -66,47 +66,24 @@ static struct drgn_error drgn_error_python = { .message = "error in Python callback", }; -_Py_IDENTIFIER(drgn_in_python); +static _Thread_local bool drgn_in_python = false; bool set_drgn_in_python(void) { - PyObject *dict, *key, *value; - - dict = PyThreadState_GetDict(); - if (!dict) - return false; - key = _PyUnicode_FromId(&PyId_drgn_in_python); - if (!key) { - PyErr_Clear(); - return false; - } - value = PyDict_GetItemWithError(dict, key); - if (value == Py_True) + if (drgn_in_python) return false; - if ((!value && PyErr_Occurred()) || - PyDict_SetItem(dict, key, Py_True) == -1) { - PyErr_Clear(); - return false; - } + drgn_in_python = true; return true; } void clear_drgn_in_python(void) { - PyObject *exc_type, *exc_value, *exc_traceback; - PyObject *dict; - - PyErr_Fetch(&exc_type, &exc_value, &exc_traceback); - dict = PyThreadState_GetDict(); - if (dict) - _PyDict_SetItemId(dict, &PyId_drgn_in_python, Py_False); - PyErr_Restore(exc_type, exc_value, exc_traceback); + drgn_in_python = false; } struct drgn_error *drgn_error_from_python(void) { PyObject *exc_type, *exc_value, *exc_traceback, *exc_message; - PyObject *dict; const char *type, *message; struct drgn_error *err; @@ -114,8 +91,7 @@ struct drgn_error *drgn_error_from_python(void) if (!exc_type) return NULL; - dict = PyThreadState_GetDict(); - if (dict && _PyDict_GetItemId(dict, &PyId_drgn_in_python) == Py_True) { + if (drgn_in_python) { PyErr_Restore(exc_type, exc_value, exc_traceback); return &drgn_error_python; } diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 1e55e7aa8..a0657a38f 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -1,6 +1,7 @@ // Copyright (c) Facebook, Inc. and its affiliates. // SPDX-License-Identifier: GPL-3.0-or-later +#include #ifdef WITH_KDUMPFILE #include #endif @@ -280,6 +281,10 @@ DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) goto err; } + if (PyModule_AddStringConstant(m, "_elfutils_version", + dwfl_version(NULL))) + goto err; + PyObject *with_libkdumpfile; #ifdef WITH_LIBKDUMPFILE with_libkdumpfile = Py_True; diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index c28d32f8a..453edf628 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -879,6 +879,8 @@ static PyMethodDef Program_methods[] = { drgn_Program_load_default_debug_info_DOC}, {"__getitem__", (PyCFunction)Program_subscript, METH_O | METH_COEXIST, drgn_Program___getitem___DOC}, + {"__contains__", (PyCFunction)Program_contains, METH_O | METH_COEXIST, + drgn_Program___contains___DOC}, {"read", (PyCFunction)Program_read, METH_VARARGS | METH_KEYWORDS, drgn_Program_read_DOC}, #define METHOD_DEF_READ(x) \ @@ -950,7 +952,6 @@ static PyMappingMethods Program_as_mapping = { .mp_subscript = (binaryfunc)Program_subscript, }; - static PySequenceMethods Program_as_sequence = { .sq_contains = (objobjproc)Program_contains, }; diff --git a/libdrgn/python/stack_trace.c b/libdrgn/python/stack_trace.c index be54db9c0..5fe9fdf6e 100644 --- a/libdrgn/python/stack_trace.c +++ b/libdrgn/python/stack_trace.c @@ -72,6 +72,89 @@ static void StackFrame_dealloc(StackFrame *self) Py_TYPE(self)->tp_free((PyObject *)self); } +static PyObject *StackFrame_str(StackFrame *self) +{ + struct drgn_error *err; + char *str; + err = drgn_format_stack_frame(self->trace->trace, self->i, &str); + if (err) + return set_drgn_error(err); + PyObject *ret = PyUnicode_FromString(str); + free(str); + return ret; +} + +static DrgnObject *StackFrame_subscript(StackFrame *self, PyObject *key) +{ + struct drgn_error *err; + Program *prog = container_of(self->trace->trace->prog, Program, prog); + if (!PyUnicode_Check(key)) { + PyErr_SetObject(PyExc_KeyError, key); + return NULL; + } + const char *name = PyUnicode_AsUTF8(key); + if (!name) + return NULL; + DrgnObject *ret = DrgnObject_alloc(prog); + if (!ret) + return NULL; + bool clear = set_drgn_in_python(); + err = drgn_stack_frame_find_object(self->trace->trace, self->i, name, + &ret->obj); + if (clear) + clear_drgn_in_python(); + if (err) { + if (err->code == DRGN_ERROR_LOOKUP) { + drgn_error_destroy(err); + PyErr_SetObject(PyExc_KeyError, key); + } else { + set_drgn_error(err); + } + Py_DECREF(ret); + return NULL; + } + return ret; +} + +static int StackFrame_contains(StackFrame *self, PyObject *key) +{ + struct drgn_error *err; + if (!PyUnicode_Check(key)) { + PyErr_SetObject(PyExc_KeyError, key); + return -1; + } + const char *name = PyUnicode_AsUTF8(key); + if (!name) + return -1; + struct drgn_object tmp; + drgn_object_init(&tmp, self->trace->trace->prog); + err = drgn_stack_frame_find_object(self->trace->trace, self->i, name, + &tmp); + drgn_object_deinit(&tmp); + if (!err) { + return 1; + } else if (err->code == DRGN_ERROR_LOOKUP) { + drgn_error_destroy(err); + return 0; + } else { + return -1; + } +} + +static PyObject *StackFrame_source(StackFrame *self) +{ + int line; + int column; + const char *filename = drgn_stack_frame_source(self->trace->trace, + self->i, &line, &column); + if (!filename) { + PyErr_SetString(PyExc_LookupError, + "source code location not available"); + return NULL; + } + return Py_BuildValue("sii", filename, line, column); +} + static PyObject *StackFrame_symbol(StackFrame *self) { struct drgn_error *err; @@ -148,6 +231,20 @@ static PyObject *StackFrame_registers(StackFrame *self) return dict; } +static PyObject *StackFrame_get_name(StackFrame *self, void *arg) +{ + const char *name = drgn_stack_frame_name(self->trace->trace, self->i); + if (name) + return PyUnicode_FromString(name); + else + Py_RETURN_NONE; +} + +static PyObject *StackFrame_get_is_inline(StackFrame *self, void *arg) +{ + Py_RETURN_BOOL(drgn_stack_frame_is_inline(self->trace->trace, self->i)); +} + static PyObject *StackFrame_get_interrupted(StackFrame *self, void *arg) { Py_RETURN_BOOL(drgn_stack_frame_interrupted(self->trace->trace, @@ -167,6 +264,12 @@ static PyObject *StackFrame_get_pc(StackFrame *self, void *arg) } static PyMethodDef StackFrame_methods[] = { + {"__getitem__", (PyCFunction)StackFrame_subscript, + METH_O | METH_COEXIST, drgn_StackFrame___getitem___DOC}, + {"__contains__", (PyCFunction)StackFrame_contains, + METH_O | METH_COEXIST, drgn_StackFrame___contains___DOC}, + {"source", (PyCFunction)StackFrame_source, METH_NOARGS, + drgn_StackFrame_source_DOC}, {"symbol", (PyCFunction)StackFrame_symbol, METH_NOARGS, drgn_StackFrame_symbol_DOC}, {"register", (PyCFunction)StackFrame_register, @@ -177,17 +280,31 @@ static PyMethodDef StackFrame_methods[] = { }; static PyGetSetDef StackFrame_getset[] = { + {"name", (getter)StackFrame_get_name, NULL, drgn_StackFrame_name_DOC}, + {"is_inline", (getter)StackFrame_get_is_inline, NULL, + drgn_StackFrame_is_inline_DOC}, {"interrupted", (getter)StackFrame_get_interrupted, NULL, drgn_StackFrame_interrupted_DOC}, {"pc", (getter)StackFrame_get_pc, NULL, drgn_StackFrame_pc_DOC}, {}, }; +static PyMappingMethods StackFrame_as_mapping = { + .mp_subscript = (binaryfunc)StackFrame_subscript, +}; + +static PySequenceMethods StackFrame_as_sequence = { + .sq_contains = (objobjproc)StackFrame_contains, +}; + PyTypeObject StackFrame_type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "_drgn.StackFrame", .tp_basicsize = sizeof(StackFrame), .tp_dealloc = (destructor)StackFrame_dealloc, + .tp_as_sequence = &StackFrame_as_sequence, + .tp_as_mapping = &StackFrame_as_mapping, + .tp_str = (reprfunc)StackFrame_str, .tp_flags = Py_TPFLAGS_DEFAULT, .tp_doc = drgn_StackFrame_DOC, .tp_methods = StackFrame_methods, diff --git a/libdrgn/python/type.c b/libdrgn/python/type.c index 817f36bc9..c0f6afbc0 100644 --- a/libdrgn/python/type.c +++ b/libdrgn/python/type.c @@ -422,13 +422,14 @@ DrgnType_ATTR(template_parameters); static PyObject *DrgnType_getter(DrgnType *self, struct DrgnType_Attr *attr) { - PyObject *value; - - value = _PyDict_GetItemId(self->attr_cache, &attr->id); + PyObject *value = _PyDict_GetItemIdWithError(self->attr_cache, + &attr->id); if (value) { Py_INCREF(value); return value; } + if (PyErr_Occurred()) + return NULL; value = attr->getter(self); if (!value) diff --git a/libdrgn/serialize.c b/libdrgn/serialize.c index b50d80194..3904f4fb5 100644 --- a/libdrgn/serialize.c +++ b/libdrgn/serialize.c @@ -6,6 +6,86 @@ #include "serialize.h" +static inline uint8_t copy_bits_step(const uint8_t *s, unsigned int src_bit_offset, + unsigned int bit_size, + unsigned int dst_bit_offset, bool lsb0) +{ + uint8_t result; + if (lsb0) { + result = s[0] >> src_bit_offset; + if (bit_size > 8 - src_bit_offset) + result |= s[1] << (8 - src_bit_offset); + result <<= dst_bit_offset; + } else { + result = s[0] << src_bit_offset; + if (bit_size > 8 - src_bit_offset) + result |= s[1] >> (8 - src_bit_offset); + result >>= dst_bit_offset; + } + return result; +} + +void copy_bits(void *dst, unsigned int dst_bit_offset, const void *src, + unsigned int src_bit_offset, uint64_t bit_size, bool lsb0) +{ + assert(dst_bit_offset < 8); + assert(src_bit_offset < 8); + + if (bit_size == 0) + return; + + uint8_t *d = dst; + const uint8_t *s = src; + uint64_t dst_last_bit = dst_bit_offset + bit_size - 1; + uint8_t dst_first_mask = copy_bits_first_mask(dst_bit_offset, lsb0); + uint8_t dst_last_mask = copy_bits_last_mask(dst_last_bit, lsb0); + + if (dst_bit_offset == src_bit_offset) { + /* + * In the common case that the source and destination have the + * same offset, we can use memcpy(), preserving bits at the + * start and/or end if necessary. + */ + uint8_t first_byte = d[0]; + uint8_t last_byte = d[dst_last_bit / 8]; + memcpy(d, s, dst_last_bit / 8 + 1); + if (dst_bit_offset != 0) { + d[0] = ((first_byte & ~dst_first_mask) + | (d[0] & dst_first_mask)); + } + if (dst_last_bit % 8 != 7) { + d[dst_last_bit / 8] = ((last_byte & ~dst_last_mask) + | (d[dst_last_bit / 8] & dst_last_mask)); + } + } else if (bit_size <= 8 - dst_bit_offset) { + /* Destination is only one byte. */ + uint8_t dst_mask = dst_first_mask & dst_last_mask; + d[0] = ((d[0] & ~dst_mask) + | (copy_bits_step(&s[0], src_bit_offset, bit_size, + dst_bit_offset, lsb0) & dst_mask)); + } else { + /* Destination is two or more bytes. */ + d[0] = ((d[0] & ~dst_first_mask) + | (copy_bits_step(&s[0], src_bit_offset, + 8 - dst_bit_offset, dst_bit_offset, + lsb0) & dst_first_mask)); + src_bit_offset += 8 - dst_bit_offset; + size_t si = src_bit_offset / 8; + src_bit_offset %= 8; + size_t di = 1; + while (di < dst_last_bit / 8) { + d[di] = copy_bits_step(&s[si], src_bit_offset, 8, 0, + lsb0); + di++; + si++; + } + d[di] = ((d[di] & ~dst_last_mask) + | (copy_bits_step(&s[si], src_bit_offset, + dst_last_bit % 8 + 1, 0, lsb0) + & dst_last_mask)); + } +} + void serialize_bits(void *buf, uint64_t bit_offset, uint64_t uvalue, uint8_t bit_size, bool little_endian) { diff --git a/libdrgn/serialize.h b/libdrgn/serialize.h index 281c54a1e..0f4783f35 100644 --- a/libdrgn/serialize.h +++ b/libdrgn/serialize.h @@ -65,7 +65,7 @@ static inline void copy_lsbytes(void *dst, size_t dst_size, } else { memset(d, 0, dst_size - size); if (src_little_endian) { - for (size_t i = dst_size - size; i < size; i++) + for (size_t i = dst_size - size; i < dst_size; i++) d[i] = s[dst_size - 1 - i]; } else { memcpy(d + dst_size - size, s + src_size - size, size); @@ -73,6 +73,45 @@ static inline void copy_lsbytes(void *dst, size_t dst_size, } } +/** + * Return a bit mask with bits `[bit_offset, 7]` set. + * + * @param[in] lsb0 See @ref copy_bits(). + */ +static inline uint8_t copy_bits_first_mask(unsigned int bit_offset, bool lsb0) +{ + return lsb0 ? 0xff << bit_offset : 0xff >> bit_offset; +} + +/** + * Return a bit mask with bits `[0, last_bit % 8]` set. + * + * @param[in] lsb0 See @ref copy_bits(). + */ +static inline uint8_t copy_bits_last_mask(uint64_t last_bit, bool lsb0) +{ + return lsb0 ? 0xff >> (7 - last_bit % 8) : 0x7f80 >> (last_bit % 8); +} + +/** + * Copy @p bit_size bits from @p src at bit offset @p src_bit_offset to @p dst + * at bit offset @p dst_bit_offset. + * + * @param[in] dst Destination buffer. + * @param[in] dst_bit_offset Offset in bits from the beginning of @p dst to copy + * to. Must be < 8. + * @param[in] src Source buffer. + * @param[in] src_bit_offset Offset in bits from the beginning of @p src to copy + * from. Must be < 8. + * @param[in] bit_size Number of bits to copy. + * @param[in] lsb0 If @c true, bits within a byte are numbered from least + * significant (0) to most significant (7); if @c false, they are numbered from + * most significant (0) to least significant (7). This determines the + * interpretation of @p dst_bit_offset and @p src_bit_offset. + */ +void copy_bits(void *dst, unsigned int dst_bit_offset, const void *src, + unsigned int src_bit_offset, uint64_t bit_size, bool lsb0); + /** * Serialize bits to a memory buffer. * diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index 9a4f4cdd2..8dc05e294 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include #include @@ -25,16 +27,18 @@ #include "type.h" #include "util.h" -static bool drgn_stack_trace_append_frame(struct drgn_stack_trace **trace, - size_t *capacity, - struct drgn_register_state *regs) +static struct drgn_error * +drgn_stack_trace_append_frame(struct drgn_stack_trace **trace, size_t *capacity, + struct drgn_register_state *regs, + Dwarf_Die *scopes, size_t num_scopes, + size_t function_scope) { if ((*trace)->num_frames == *capacity) { static const size_t max_capacity = (SIZE_MAX - sizeof(struct drgn_stack_trace)) / sizeof(struct drgn_stack_frame); if (*capacity == max_capacity) - return false; + return &drgn_enomem; size_t new_capacity; if (*capacity > max_capacity / 2) new_capacity = max_capacity; @@ -45,14 +49,17 @@ static bool drgn_stack_trace_append_frame(struct drgn_stack_trace **trace, offsetof(struct drgn_stack_trace, frames[new_capacity])); if (!new_trace) - return false; + return &drgn_enomem; *trace = new_trace; *capacity = new_capacity; } struct drgn_stack_frame *frame = &(*trace)->frames[(*trace)->num_frames++]; frame->regs = regs; - return true; + frame->scopes = scopes; + frame->num_scopes = num_scopes; + frame->function_scope = function_scope; + return NULL; } static void drgn_stack_trace_shrink_to_fit(struct drgn_stack_trace **trace, @@ -71,8 +78,15 @@ static void drgn_stack_trace_shrink_to_fit(struct drgn_stack_trace **trace, LIBDRGN_PUBLIC void drgn_stack_trace_destroy(struct drgn_stack_trace *trace) { - for (size_t i = 0; i < trace->num_frames; i++) - drgn_register_state_destroy(trace->frames[i].regs); + struct drgn_register_state *regs = NULL; + for (size_t i = 0; i < trace->num_frames; i++) { + if (trace->frames[i].regs != regs) { + drgn_register_state_destroy(regs); + regs = trace->frames[i].regs; + } + free(trace->frames[i].scopes); + } + drgn_register_state_destroy(regs); free(trace); } @@ -91,8 +105,12 @@ drgn_format_stack_trace(struct drgn_stack_trace *trace, char **ret) goto enomem; struct drgn_register_state *regs = trace->frames[frame].regs; - struct optional_uint64 pc = drgn_register_state_get_pc(regs); - if (pc.has_value) { + struct optional_uint64 pc; + const char *name = drgn_stack_frame_name(trace, frame); + if (name) { + if (!string_builder_append(&str, name)) + goto enomem; + } else if ((pc = drgn_register_state_get_pc(regs)).has_value) { Dwfl_Module *dwfl_module = regs->module ? regs->module->dwfl_module : NULL; struct drgn_symbol sym; @@ -117,6 +135,19 @@ drgn_format_stack_trace(struct drgn_stack_trace *trace, char **ret) goto enomem; } + int line, column; + const char *filename = drgn_stack_frame_source(trace, frame, + &line, &column); + if (filename && column) { + if (!string_builder_appendf(&str, " (%s:%d:%d)", + filename, line, column)) + goto enomem; + } else if (filename) { + if (!string_builder_appendf(&str, " (%s:%d)", filename, + line)) + goto enomem; + } + if (frame != trace->num_frames - 1 && !string_builder_appendc(&str, '\n')) goto enomem; @@ -130,6 +161,154 @@ drgn_format_stack_trace(struct drgn_stack_trace *trace, char **ret) return &drgn_enomem; } +LIBDRGN_PUBLIC struct drgn_error * +drgn_format_stack_frame(struct drgn_stack_trace *trace, size_t frame, char **ret) +{ + struct string_builder str = {}; + struct drgn_register_state *regs = trace->frames[frame].regs; + if (!string_builder_appendf(&str, "#%zu at ", frame)) + goto enomem; + + struct optional_uint64 pc = drgn_register_state_get_pc(regs); + if (pc.has_value) { + if (!string_builder_appendf(&str, "%#" PRIx64, pc.value)) + goto enomem; + + Dwfl_Module *dwfl_module = + regs->module ? regs->module->dwfl_module : NULL; + struct drgn_symbol sym; + if (dwfl_module && + drgn_program_find_symbol_by_address_internal(trace->prog, + pc.value - !regs->interrupted, + dwfl_module, + &sym) && + !string_builder_appendf(&str, " (%s+0x%" PRIx64 "/0x%" PRIx64 ")", + sym.name, pc.value - sym.address, + sym.size)) + goto enomem; + } else { + if (!string_builder_append(&str, "???")) + goto enomem; + } + + const char *name = drgn_stack_frame_name(trace, frame); + if (name && !string_builder_appendf(&str, " in %s", name)) + goto enomem; + + int line, column; + const char *filename = drgn_stack_frame_source(trace, frame, &line, + &column); + if (filename && column) { + if (!string_builder_appendf(&str, " at %s:%d:%d", filename, + line, column)) + goto enomem; + } else if (filename) { + if (!string_builder_appendf(&str, " at %s:%d", filename, line)) + goto enomem; + } + + if (drgn_stack_frame_is_inline(trace, frame) && + !string_builder_append(&str, " (inlined)")) + goto enomem; + + if (!string_builder_finalize(&str, ret)) + goto enomem; + return NULL; + +enomem: + free(str.str); + return &drgn_enomem; +} + +LIBDRGN_PUBLIC const char *drgn_stack_frame_name(struct drgn_stack_trace *trace, + size_t frame) +{ + Dwarf_Die *scopes = trace->frames[frame].scopes; + size_t num_scopes = trace->frames[frame].num_scopes; + size_t function_scope = trace->frames[frame].function_scope; + if (function_scope >= num_scopes) + return NULL; + return dwarf_diename(&scopes[function_scope]); +} + +LIBDRGN_PUBLIC bool drgn_stack_frame_is_inline(struct drgn_stack_trace *trace, + size_t frame) +{ + Dwarf_Die *scopes = trace->frames[frame].scopes; + size_t num_scopes = trace->frames[frame].num_scopes; + size_t function_scope = trace->frames[frame].function_scope; + return (function_scope < num_scopes && + dwarf_tag(&scopes[function_scope]) == + DW_TAG_inlined_subroutine); +} + +LIBDRGN_PUBLIC const char * +drgn_stack_frame_source(struct drgn_stack_trace *trace, size_t frame, + int *line_ret, int *column_ret) +{ + if (frame > 0 && + trace->frames[frame].regs == trace->frames[frame - 1].regs) { + /* + * This frame is the caller of an inline frame. Get the call + * location from the inlined_subroutine of the callee. + */ + Dwarf_Die *inlined_scopes = trace->frames[frame - 1].scopes; + size_t inlined_num_scopes = trace->frames[frame - 1].num_scopes; + size_t inlined_function_scope = + trace->frames[frame - 1].function_scope; + if (inlined_function_scope >= inlined_num_scopes) + return NULL; + Dwarf_Die *inlined = &inlined_scopes[inlined_function_scope]; + + Dwarf_Die inlined_cu; + Dwarf_Files *files; + if (!dwarf_diecu(inlined, &inlined_cu, NULL, NULL) || + dwarf_getsrcfiles(&inlined_cu, &files, NULL)) + return NULL; + + Dwarf_Attribute attr; + Dwarf_Word value; + if (dwarf_formudata(dwarf_attr(inlined, DW_AT_call_file, &attr), + &value)) + return NULL; + + const char *filename = dwarf_filesrc(files, value, NULL, NULL); + if (!filename) + return NULL; + if (line_ret) { + if (dwarf_formudata(dwarf_attr(inlined, DW_AT_call_line, + &attr), &value)) + *line_ret = 0; + else + *line_ret = value; + } + if (column_ret) { + if (dwarf_formudata(dwarf_attr(inlined, + DW_AT_call_column, + &attr), &value)) + *column_ret = 0; + else + *column_ret = value; + } + return filename; + } else { + struct drgn_register_state *regs = trace->frames[frame].regs; + Dwfl_Module *dwfl_module = + regs->module ? regs->module->dwfl_module : NULL; + if (!dwfl_module) + return NULL; + struct optional_uint64 pc = drgn_register_state_get_pc(regs); + if (!pc.has_value) + return NULL; + pc.value -= !regs->interrupted; + Dwfl_Line *line = dwfl_module_getsrc(dwfl_module, pc.value); + if (!line) + return NULL; + return dwfl_lineinfo(line, NULL, line_ret, column_ret, NULL, + NULL); + } +} + LIBDRGN_PUBLIC bool drgn_stack_frame_interrupted(struct drgn_stack_trace *trace, size_t frame) { @@ -173,6 +352,87 @@ drgn_stack_frame_symbol(struct drgn_stack_trace *trace, size_t frame, return NULL; } +LIBDRGN_PUBLIC struct drgn_error * +drgn_stack_frame_find_object(struct drgn_stack_trace *trace, size_t frame_i, + const char *name, struct drgn_object *ret) +{ + struct drgn_error *err; + struct drgn_stack_frame *frame = &trace->frames[frame_i]; + + if (frame->num_scopes == 0) + goto not_found; + + Dwarf_Die die, type_die; + err = drgn_find_in_dwarf_scopes(frame->scopes, frame->num_scopes, name, + &die, &type_die); + if (err) + return err; + if (!die.addr && frame->function_scope == 0) { + /* + * Scope 0 must be a DW_TAG_inlined_subroutine, and we didn't + * find the name in the concrete inlined instance tree. We need + * to find the scopes that contain the the abstract instance + * root (i.e, the DW_TAG_subprogram definition). (We could do + * this ahead of time when unwinding the stack, but for + * efficiency we do it lazily.) + */ + Dwarf_Attribute attr_mem, *attr; + if (!(attr = dwarf_attr(frame->scopes, DW_AT_abstract_origin, + &attr_mem))) + goto not_found; + Dwarf_Die abstract_origin; + if (!dwarf_formref_die(attr, &abstract_origin)) + return drgn_error_libdw(); + + Dwarf_Die *ancestors; + size_t num_ancestors; + err = drgn_find_die_ancestors(&abstract_origin, &ancestors, + &num_ancestors); + if (err) + return err; + + size_t new_num_scopes = num_ancestors + frame->num_scopes; + Dwarf_Die *new_scopes = realloc(ancestors, + new_num_scopes * + sizeof(*new_scopes)); + if (!new_scopes) { + free(ancestors); + return &drgn_enomem; + } + memcpy(&new_scopes[num_ancestors], frame->scopes, + frame->num_scopes * sizeof(*new_scopes)); + free(frame->scopes); + frame->scopes = new_scopes; + frame->num_scopes = new_num_scopes; + frame->function_scope = num_ancestors; + + /* Look for the name in the new scopes. */ + err = drgn_find_in_dwarf_scopes(frame->scopes, num_ancestors, + name, &die, &type_die); + if (err) + return err; + } + if (!die.addr) { +not_found:; + const char *frame_name = drgn_stack_frame_name(trace, frame_i); + if (frame_name) { + return drgn_error_format(DRGN_ERROR_LOOKUP, + "could not find '%s' in '%s'", + name, frame_name); + } else { + return drgn_error_format(DRGN_ERROR_LOOKUP, + "could not find '%s'", name); + } + } + + Dwarf_Die function_die = frame->scopes[frame->function_scope]; + return drgn_object_from_dwarf(trace->prog->_dbinfo, frame->regs->module, + &die, + dwarf_tag(&die) == DW_TAG_enumerator ? + &type_die : NULL, + &function_die, frame->regs, ret); +} + LIBDRGN_PUBLIC bool drgn_stack_frame_register(struct drgn_stack_trace *trace, size_t frame, const struct drgn_register *reg, @@ -440,6 +700,132 @@ static void drgn_add_to_register(void *dst, size_t dst_size, const void *src, } +static struct drgn_error * +drgn_stack_trace_add_frames(struct drgn_stack_trace **trace, + size_t *trace_capacity, + struct drgn_register_state *regs) +{ + struct drgn_error *err; + + if (!regs->module) { + err = drgn_stack_trace_append_frame(trace, trace_capacity, regs, + NULL, 0, 0); + goto out; + } + + uint64_t pc = regs->_pc - !regs->interrupted; + uint64_t bias; + Dwarf_Die *scopes; + size_t num_scopes; + err = drgn_debug_info_module_find_dwarf_scopes(regs->module, pc, &bias, + &scopes, &num_scopes); + if (err) + goto out; + pc -= bias; + + size_t orig_num_frames = (*trace)->num_frames; + /* + * Walk backwards through scopes, splitting into frames. Stop at index 1 + * because 0 must be a unit DIE. + */ + size_t frame_end = num_scopes; + for (size_t i = num_scopes; i-- > 1;) { + bool has_pc; + if (i == num_scopes - 1) { + /* + * The last scope is guaranteed to contain PC, so avoid + * a call to dwarf_haspc(). + */ + has_pc = true; + } else { + int r = dwarf_haspc(&scopes[i], pc); + if (r < 0) { + err = drgn_error_libdw(); + goto out_scopes; + } + has_pc = r > 0; + } + if (has_pc) { + Dwarf_Die *frame_scopes; + switch (dwarf_tag(&scopes[i])) { + case DW_TAG_subprogram: + /* + * Reuse the original scopes array (shrinking it + * if necessary). + */ + if (frame_end == num_scopes || + !(frame_scopes = realloc(scopes, + frame_end * + sizeof(scopes[i])))) + frame_scopes = scopes; + err = drgn_stack_trace_append_frame(trace, + trace_capacity, + regs, + frame_scopes, + frame_end, + i); + if (err) { + free(frame_scopes); + /* + * We stole scopes for frame_scopes, so + * not out_scopes. + */ + goto out; + } + /* + * Added the DW_TAG_subprogram frame. We're + * done. + */ + return NULL; + case DW_TAG_inlined_subroutine: + frame_scopes = memdup(&scopes[i], + (frame_end - i) * + sizeof(scopes[i])); + if (!frame_scopes) { + err = &drgn_enomem; + goto out_scopes; + } + err = drgn_stack_trace_append_frame(trace, + trace_capacity, + regs, + frame_scopes, + frame_end - i, + 0); + if (err) { + free(frame_scopes); + goto out_scopes; + } + frame_end = i; + break; + default: + break; + } + } else { + /* + * This DIE doesn't contain PC. Ignore it and everything + * after it. + */ + frame_end = i; + } + } + + /* + * We didn't find a matching DW_TAG_subprogram. Free any matching + * DW_TAG_inlined_subroutine frames we found and add a scopeless frame. + */ + for (size_t i = orig_num_frames; i < (*trace)->num_frames; i++) + free((*trace)->frames[i].scopes); + (*trace)->num_frames = orig_num_frames; + err = drgn_stack_trace_append_frame(trace, trace_capacity, regs, NULL, + 0, 0); +out_scopes: + free(scopes); +out: + if (err) + drgn_register_state_destroy(regs); + return err; +} + static struct drgn_error * drgn_unwind_one_register(struct drgn_program *prog, const struct drgn_cfi_rule *rule, @@ -640,12 +1026,10 @@ static struct drgn_error *drgn_get_stack_trace(struct drgn_program *prog, /* Limit iterations so we don't get caught in a loop. */ for (int i = 0; i < 1024; i++) { - if (!drgn_stack_trace_append_frame(&trace, &trace_capacity, - regs)) { - err = &drgn_enomem; - drgn_register_state_destroy(regs); + err = drgn_stack_trace_add_frames(&trace, &trace_capacity, + regs); + if (err) goto out; - } err = drgn_unwind_with_cfi(prog, &row, regs, ®s); if (err == &drgn_not_found) { diff --git a/libdrgn/stack_trace.h b/libdrgn/stack_trace.h index 20f5eb6c0..1a932a1a8 100644 --- a/libdrgn/stack_trace.h +++ b/libdrgn/stack_trace.h @@ -28,6 +28,9 @@ struct drgn_stack_frame { struct drgn_register_state *regs; + Dwarf_Die *scopes; + size_t num_scopes; + size_t function_scope; }; struct drgn_stack_trace { diff --git a/libdrgn/util.h b/libdrgn/util.h index 166c38a1f..9129a2b8b 100644 --- a/libdrgn/util.h +++ b/libdrgn/util.h @@ -118,6 +118,14 @@ static inline void *malloc64(uint64_t size) return malloc(size); } +static inline void *memdup(void *ptr, size_t size) +{ + void *copy = malloc(size); + if (copy) + memcpy(copy, ptr, size); + return copy; +} + /** Return the maximum value of an @p n-byte unsigned integer. */ static inline uint64_t uint_max(int n) { diff --git a/scripts/build_dists.sh b/scripts/build_dists.sh new file mode 100755 index 000000000..54d660356 --- /dev/null +++ b/scripts/build_dists.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +set -eux + +: "${PYTHON=python3}" +"$PYTHON" setup.py sdist +SDIST=dist/drgn-"$("$PYTHON" setup.py --version)".tar.gz + +: "${DOCKER=docker}" +$DOCKER pull quay.io/pypa/manylinux2010_x86_64 +$DOCKER run -it \ + --env PLAT=manylinux2010_x86_64 \ + --env SDIST="$SDIST" \ + --env OWNER="$(id -u):$(id -g)" \ + --volume "$(pwd)":/io:ro \ + --volume "$(pwd)/dist":/io/dist \ + --workdir /io \ + --hostname drgn \ + --rm \ + quay.io/pypa/manylinux2010_x86_64 \ + ./scripts/build_manylinux_in_docker.sh diff --git a/scripts/build_manylinux_in_docker.sh b/scripts/build_manylinux_in_docker.sh new file mode 100755 index 000000000..dc556945a --- /dev/null +++ b/scripts/build_manylinux_in_docker.sh @@ -0,0 +1,83 @@ +#!/bin/sh + +set -eux + +# Drop into a shell if something fails. +trap 'if [ $? -ne 0 ]; then exec bash -i; fi' EXIT + +yum install -y \ + bzip2-devel \ + libzstd-devel \ + lzo-devel \ + snappy-devel \ + xz-devel \ + zlib-devel + +# The manylinux image contains an upgraded autotools in /usr/local, but the +# pkg-config macros are not present for this upgraded package. See +# https://github.com/pypa/manylinux/issues/731. +ln -s /usr/share/aclocal/pkg.m4 /usr/local/share/aclocal/ + +# Install a recent version of elfutils instead of whatever is in the manylinux +# image. +elfutils_version=0.183 +elfutils_url=https://sourceware.org/elfutils/ftp/$elfutils_version/elfutils-$elfutils_version.tar.bz2 +mkdir /tmp/elfutils +cd /tmp/elfutils +curl -L "$elfutils_url" | tar -xj --strip-components=1 +# We don't bother with debuginfod support for a few reasons: +# +# 1. It depends on libcurl, which would pull in a bunch of transitive +# dependencies. +# 2. libdw loads libdebuginfod with dlopen(), which auditwheel misses. +# 3. drgn hasn't been tested with debuginfod. +./configure --disable-libdebuginfod --disable-debuginfod +make -j$(($(nproc) + 1)) +make install + +libkdumpfile_commit=v0.4.0 +libkdumpfile_url=https://github.com/ptesarik/libkdumpfile/archive/$libkdumpfile_commit/libkdumpfile-$libkdumpfile_commit.tar.gz +mkdir /tmp/libkdumpfile +cd /tmp/libkdumpfile +curl -L "$libkdumpfile_url" | tar -xz --strip-components=1 +autoreconf -fiv +# z_const was added in zlib 1.2.5.2, but CentOS 6 has 1.2.3. +CPPFLAGS="-Dz_const=const" ./configure --with-lzo --with-snappy --with-zlib --without-python +make -j$(($(nproc) + 1)) +make install + +ldconfig + +mkdir /tmp/drgn +cd /tmp/drgn +tar -xf "/io/$SDIST" --strip-components=1 + +python_supported() { + "$1" -c 'import sys; sys.exit(sys.version_info < (3, 6))' +} + +for pybin in /opt/python/cp*/bin; do + if python_supported "$pybin/python"; then + # static_assert was added to assert.h in glibc 2.16, but CentOS + # 6 has 2.12. + CPPFLAGS="-Dstatic_assert=_Static_assert" "$pybin/pip" wheel . --no-deps -w /tmp/wheels/ + fi +done + +for wheel in /tmp/wheels/*.whl; do + if auditwheel show "$wheel"; then + auditwheel repair "$wheel" --plat "$PLAT" -w /tmp/manylinux_wheels/ + else + echo "Skipping non-platform wheel $wheel" + fi +done + +for pybin in /opt/python/cp*/bin; do + if python_supported "$pybin/python"; then + "$pybin/pip" install drgn --no-index -f /tmp/manylinux_wheels/ + "$pybin/drgn" --version + fi +done + +chown "$OWNER" /tmp/manylinux_wheels/* +mv /tmp/manylinux_wheels/* /io/dist/ diff --git a/setup.py b/setup.py index 240fe1c61..d60253092 100755 --- a/setup.py +++ b/setup.py @@ -125,20 +125,7 @@ def make_release_tree(self, base_dir, files): class test(Command): description = "run unit tests after in-place build" - KERNELS = [ - "5.12", - "5.11", - "5.10", - "5.9", - "5.8", - "5.7", - "5.6", - "5.4", - "4.19", - "4.14", - "4.9", - "4.4", - ] + KERNELS = ["5.13", "5.12", "5.11", "5.10", "5.4", "4.19", "4.14", "4.9", "4.4"] user_options = [ ( @@ -203,11 +190,13 @@ def _run_vm(self, kernel_dir): def run(self): from pathlib import Path - from vmtest.download import KernelDownloader + from vmtest.download import download_kernels_in_thread # Start downloads ASAP so that they're hopefully done by the time we # need them. - with KernelDownloader(self.kernels, Path(self.vmtest_dir)) as downloader: + with download_kernels_in_thread( + Path(self.vmtest_dir), "x86_64", self.kernels + ) as kernel_downloads: if self.kernels: self.announce("downloading kernels in the background", log.INFO) self.run_command("egg_info") @@ -225,14 +214,17 @@ def run(self): failed.append("local") if self.kernels: - for kernel in downloader: + for kernel in kernel_downloads: + kernel_release = kernel.name + if kernel_release.startswith("kernel-"): + kernel_release = kernel_release[len("kernel-") :] self.announce( - f"running tests in VM on Linux {kernel.name}", log.INFO + f"running tests in VM on Linux {kernel_release}", log.INFO ) if self._run_vm(kernel): - passed.append(kernel.name) + passed.append(kernel_release) else: - failed.append(kernel.name) + failed.append(kernel_release) if passed: self.announce(f'Passed: {", ".join(passed)}', log.INFO) diff --git a/tests/__init__.py b/tests/__init__.py index 028062aec..43367aec4 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -41,6 +41,23 @@ def mock_memory_read(data, address, count, offset, physical): return data[offset : offset + count] +def add_mock_memory_segments(prog, segments): + for segment in segments: + if segment.virt_addr is not None: + prog.add_memory_segment( + segment.virt_addr, + len(segment.buf), + functools.partial(mock_memory_read, segment.buf), + ) + if segment.phys_addr is not None: + prog.add_memory_segment( + segment.phys_addr, + len(segment.buf), + functools.partial(mock_memory_read, segment.buf), + True, + ) + + class MockObject(NamedTuple): name: str type: Type @@ -84,20 +101,7 @@ def mock_object_find(prog, name, flags, filename): prog = Program(platform) if segments is not None: - for segment in segments: - if segment.virt_addr is not None: - prog.add_memory_segment( - segment.virt_addr, - len(segment.buf), - functools.partial(mock_memory_read, segment.buf), - ) - if segment.phys_addr is not None: - prog.add_memory_segment( - segment.phys_addr, - len(segment.buf), - functools.partial(mock_memory_read, segment.buf), - True, - ) + add_mock_memory_segments(prog, segments) if types is not None: prog.add_type_finder(mock_find_type) if objects is not None: diff --git a/tests/assembler.py b/tests/assembler.py new file mode 100644 index 000000000..b4aa7aaf5 --- /dev/null +++ b/tests/assembler.py @@ -0,0 +1,62 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +from collections import namedtuple + + +def _append_uleb128(buf, value): + while True: + byte = value & 0x7F + value >>= 7 + if value: + buf.append(byte | 0x80) + else: + buf.append(byte) + break + + +def _append_sleb128(buf, value): + while True: + byte = value & 0x7F + value >>= 7 + if (not value and not (byte & 0x40)) or (value == -1 and (byte & 0x40)): + buf.append(byte) + break + else: + buf.append(byte | 0x80) + + +U8 = namedtuple("U8", ["value"]) +U8._append = lambda self, buf, byteorder: buf.append(self.value) +S8 = namedtuple("S8", ["value"]) +S8._append = lambda self, buf, byteorder: buf.append(self.value & 0xFF) +U16 = namedtuple("U16", ["value"]) +U16._append = lambda self, buf, byteorder: buf.extend(self.value.to_bytes(2, byteorder)) +S16 = namedtuple("S16", ["value"]) +S16._append = lambda self, buf, byteorder: buf.extend( + self.value.to_bytes(2, byteorder, signed=True) +) +U32 = namedtuple("U32", ["value"]) +U32._append = lambda self, buf, byteorder: buf.extend(self.value.to_bytes(4, byteorder)) +S32 = namedtuple("S32", ["value"]) +S32._append = lambda self, buf, byteorder: buf.extend( + self.value.to_bytes(4, byteorder, signed=True) +) +U64 = namedtuple("U64", ["value"]) +U64._append = lambda self, buf, byteorder: buf.extend(self.value.to_bytes(8, byteorder)) +S64 = namedtuple("S64", ["value"]) +S64._append = lambda self, buf, byteorder: buf.extend( + self.value.to_bytes(8, byteorder, signed=True) +) +ULEB128 = namedtuple("ULEB128", ["value"]) +ULEB128._append = lambda self, buf, byteorder: _append_uleb128(buf, self.value) +SLEB128 = namedtuple("SLEB128", ["value"]) +SLEB128._append = lambda self, buf, byteorder: _append_sleb128(buf, self.value) + + +def assemble(*args, little_endian=True): + byteorder = "little" if little_endian else "big" + buf = bytearray() + for arg in args: + arg._append(buf, byteorder) + return buf diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index c0bacca53..9e82f9df1 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -4,6 +4,7 @@ from collections import namedtuple import os.path +from tests.assembler import _append_sleb128, _append_uleb128 from tests.dwarf import DW_AT, DW_FORM, DW_TAG from tests.elf import ET, PT, SHT from tests.elfwriter import ElfSection, create_elf_file @@ -13,29 +14,7 @@ DwarfDie.__new__.__defaults__ = (None,) -def _append_uleb128(buf, value): - while True: - byte = value & 0x7F - value >>= 7 - if value: - buf.append(byte | 0x80) - else: - buf.append(byte) - break - - -def _append_sleb128(buf, value): - while True: - byte = value & 0x7F - value >>= 7 - if (not value and not (byte & 0x40)) or (value == -1 and (byte & 0x40)): - buf.append(byte) - break - else: - buf.append(byte | 0x80) - - -def _compile_debug_abbrev(cu_die): +def _compile_debug_abbrev(unit_dies, use_dw_form_indirect): buf = bytearray() code = 1 @@ -47,39 +26,37 @@ def aux(die): buf.append(bool(die.children)) for attrib in die.attribs: _append_uleb128(buf, attrib.name) - _append_uleb128(buf, attrib.form) + _append_uleb128( + buf, DW_FORM.indirect if use_dw_form_indirect else attrib.form + ) buf.append(0) buf.append(0) if die.children: for child in die.children: aux(child) - aux(cu_die) + for die in unit_dies: + aux(die) buf.append(0) return buf -def _compile_debug_info(cu_die, little_endian, bits): - buf = bytearray() +def _compile_debug_info(unit_dies, little_endian, bits, use_dw_form_indirect): byteorder = "little" if little_endian else "big" - - buf.extend(b"\0\0\0\0") # unit_length - buf.extend((4).to_bytes(2, byteorder)) # version - buf.extend((0).to_bytes(4, byteorder)) # debug_abbrev_offset - buf.append(bits // 8) # address_size - die_offsets = [] relocations = [] code = 1 decl_file = 1 - def aux(die, depth): + def aux(buf, die, depth): nonlocal code, decl_file if depth == 1: die_offsets.append(len(buf)) _append_uleb128(buf, code) code += 1 for attrib in die.attribs: + if use_dw_form_indirect: + _append_uleb128(buf, attrib.form) if attrib.name == DW_AT.decl_file: value = decl_file decl_file += 1 @@ -108,6 +85,8 @@ def aux(die, depth): elif attrib.form == DW_FORM.ref4: relocations.append((len(buf), value)) buf.extend(b"\0\0\0\0") + elif attrib.form == DW_FORM.ref_sig8: + buf.extend((value + 1).to_bytes(8, byteorder)) elif attrib.form == DW_FORM.sec_offset: buf.extend(b"\0\0\0\0") elif attrib.form == DW_FORM.flag_present: @@ -119,20 +98,41 @@ def aux(die, depth): assert False, attrib.form if die.children: for child in die.children: - aux(child, depth + 1) + aux(buf, child, depth + 1) buf.append(0) - aux(cu_die, 0) + debug_info = bytearray() + debug_types = bytearray() + tu_id = 1 + for die in unit_dies: + relocations.clear() + die_offsets.clear() + buf = debug_info if die.tag == DW_TAG.compile_unit else debug_types + orig_len = len(buf) + buf.extend(b"\0\0\0\0") # unit_length + buf.extend((4).to_bytes(2, byteorder)) # version + buf.extend((0).to_bytes(4, byteorder)) # debug_abbrev_offset + buf.append(bits // 8) # address_size - unit_length = len(buf) - 4 - buf[:4] = unit_length.to_bytes(4, byteorder) + if die.tag == DW_TAG.type_unit: + buf.extend(tu_id.to_bytes(8, byteorder)) # type_signature + tu_id += 1 + # For now, we assume that the first child is the type. + relocations.append((len(buf), 0)) + buf.extend(b"\0\0\0\0") # type_offset - for offset, index in relocations: - buf[offset : offset + 4] = die_offsets[index].to_bytes(4, byteorder) - return buf + aux(buf, die, 0) + + unit_length = len(buf) - orig_len - 4 + buf[orig_len : orig_len + 4] = unit_length.to_bytes(4, byteorder) + for offset, index in relocations: + die_offset = die_offsets[index] - orig_len + buf[offset : offset + 4] = die_offset.to_bytes(4, byteorder) + return debug_info, debug_types -def _compile_debug_line(cu_die, little_endian): + +def _compile_debug_line(unit_dies, little_endian): buf = bytearray() byteorder = "little" if little_endian else "big" @@ -159,7 +159,8 @@ def compile_include_directories(die): for child in die.children: compile_include_directories(child) - compile_include_directories(cu_die) + for die in unit_dies: + compile_include_directories(die) buf.append(0) decl_file = 1 @@ -185,7 +186,8 @@ def compile_file_names(die): for child in die.children: compile_file_names(child) - compile_file_names(cu_die) + for die in unit_dies: + compile_file_names(die) buf.append(0) unit_length = len(buf) - 4 @@ -195,39 +197,61 @@ def compile_file_names(die): return buf -def compile_dwarf(dies, little_endian=True, bits=64, *, lang=None): +UNIT_HEADER_TYPES = frozenset({DW_TAG.type_unit, DW_TAG.compile_unit}) + + +def compile_dwarf( + dies, little_endian=True, bits=64, *, lang=None, use_dw_form_indirect=False +): if isinstance(dies, DwarfDie): dies = (dies,) assert all(isinstance(die, DwarfDie) for die in dies) - cu_attribs = [ - DwarfAttrib(DW_AT.comp_dir, DW_FORM.string, "/usr/src"), - DwarfAttrib(DW_AT.stmt_list, DW_FORM.sec_offset, 0), - ] + + if dies and dies[0].tag in UNIT_HEADER_TYPES: + unit_dies = dies + else: + unit_dies = (DwarfDie(DW_TAG.compile_unit, (), dies),) + assert all(die.tag in UNIT_HEADER_TYPES for die in unit_dies) + + unit_attribs = [DwarfAttrib(DW_AT.stmt_list, DW_FORM.sec_offset, 0)] if lang is not None: - cu_attribs.append(DwarfAttrib(DW_AT.language, DW_FORM.data1, lang)) - cu_die = DwarfDie(DW_TAG.compile_unit, cu_attribs, dies) - - return create_elf_file( - ET.EXEC, - [ - ElfSection(p_type=PT.LOAD, vaddr=0xFFFF0000, data=b""), - ElfSection( - name=".debug_abbrev", - sh_type=SHT.PROGBITS, - data=_compile_debug_abbrev(cu_die), - ), - ElfSection( - name=".debug_info", - sh_type=SHT.PROGBITS, - data=_compile_debug_info(cu_die, little_endian, bits), - ), - ElfSection( - name=".debug_line", - sh_type=SHT.PROGBITS, - data=_compile_debug_line(cu_die, little_endian), - ), - ElfSection(name=".debug_str", sh_type=SHT.PROGBITS, data=b"\0"), - ], - little_endian=little_endian, - bits=bits, + unit_attribs.append(DwarfAttrib(DW_AT.language, DW_FORM.data1, lang)) + cu_attribs = unit_attribs + [ + DwarfAttrib(DW_AT.comp_dir, DW_FORM.string, "/usr/src") + ] + + unit_dies = [ + DwarfDie( + die.tag, + list(die.attribs) + + (cu_attribs if die.tag == DW_TAG.compile_unit else unit_attribs), + die.children, + ) + for die in unit_dies + ] + + debug_info, debug_types = _compile_debug_info( + unit_dies, little_endian, bits, use_dw_form_indirect ) + + sections = [ + ElfSection(p_type=PT.LOAD, vaddr=0xFFFF0000, data=b""), + ElfSection( + name=".debug_abbrev", + sh_type=SHT.PROGBITS, + data=_compile_debug_abbrev(unit_dies, use_dw_form_indirect), + ), + ElfSection(name=".debug_info", sh_type=SHT.PROGBITS, data=debug_info), + ElfSection( + name=".debug_line", + sh_type=SHT.PROGBITS, + data=_compile_debug_line(unit_dies, little_endian), + ), + ElfSection(name=".debug_str", sh_type=SHT.PROGBITS, data=b"\0"), + ] + if debug_types: + sections.append( + ElfSection(name=".debug_types", sh_type=SHT.PROGBITS, data=debug_types) + ) + + return create_elf_file(ET.EXEC, sections, little_endian=little_endian, bits=bits) diff --git a/tests/helpers/linux/test_stack_trace.py b/tests/helpers/linux/test_stack_trace.py index 1420813c2..0716439b0 100644 --- a/tests/helpers/linux/test_stack_trace.py +++ b/tests/helpers/linux/test_stack_trace.py @@ -44,6 +44,23 @@ def test_by_pid_dwarf(self): def test_by_pid_orc(self): self._test_by_pid(True) + def test_local_variable(self): + pid = fork_and_pause() + wait_until(lambda: proc_state(pid) == "S") + for frame in self.prog.stack_trace(pid): + if frame.name in ("context_switch", "__schedule"): + try: + prev = frame["prev"] + except KeyError: + continue + if not prev.absent_: + self.assertEqual(prev.pid, pid) + break + else: + self.skipTest("prev not found in context_switch or __schedule") + os.kill(pid, signal.SIGKILL) + os.waitpid(pid, 0) + def test_pt_regs(self): # This won't unwind anything useful, but at least make sure it accepts # a struct pt_regs. diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index 8a63ab575..fd1b6f3bc 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -1,12 +1,16 @@ # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0-or-later +import functools +import operator import os.path import re import tempfile import unittest +import drgn from drgn import ( + FaultError, FindObjectFlags, Language, Object, @@ -18,8 +22,15 @@ TypeParameter, TypeTemplateParameter, ) -from tests import DEFAULT_LANGUAGE, TestCase, identical -from tests.dwarf import DW_AT, DW_ATE, DW_END, DW_FORM, DW_LANG, DW_TAG +from tests import ( + DEFAULT_LANGUAGE, + MockMemorySegment, + TestCase, + add_mock_memory_segments, + identical, +) +import tests.assembler as assembler +from tests.dwarf import DW_AT, DW_ATE, DW_END, DW_FORM, DW_LANG, DW_OP, DW_TAG from tests.dwarfwriter import DwarfAttrib, DwarfDie, compile_dwarf bool_die = DwarfDie( @@ -181,12 +192,14 @@ ) -def dwarf_program(*args, **kwds): +def dwarf_program(*args, segments=None, **kwds): prog = Program() with tempfile.NamedTemporaryFile() as f: f.write(compile_dwarf(*args, **kwds)) f.flush() prog.load_debug_info([f.name]) + if segments is not None: + add_mock_memory_segments(prog, segments) return prog @@ -204,6 +217,24 @@ def wrap_test_type_dies(dies): ) +elfutils_version = tuple(int(x) for x in drgn._elfutils_version.split(".")[:2]) + + +def with_and_without_dw_form_indirect(f): + @functools.wraps(f) + def wrapper(self): + with self.subTest(): + f(self, False) + # elfutils does not support DW_FORM_indirect properly before commit + # d63b26b8d21f ("libdw: handle DW_FORM_indirect when reading + # attributes"). + if elfutils_version >= (0, 184): + with self.subTest(msg="with DW_FORM_indirect"): + f(self, True) + + return wrapper + + class TestTypes(TestCase): def test_unknown_tag(self): prog = dwarf_program(wrap_test_type_dies(DwarfDie(0x9999, ()))) @@ -371,7 +402,8 @@ def test_byteorder_by_name(self): ) self.assertIdentical(prog.type("int"), prog.int_type("int", 4, True, "little")) - def test_qualifier(self): + @with_and_without_dw_form_indirect + def test_qualifier(self, use_dw_form_indirect): prog = dwarf_program( wrap_test_type_dies( ( @@ -380,7 +412,8 @@ def test_qualifier(self): ), int_die, ) - ) + ), + use_dw_form_indirect=use_dw_form_indirect, ) self.assertIdentical( prog.type("TEST").type, @@ -711,22 +744,6 @@ def test_struct_missing_size(self): "TEST", ) - def test_struct_invalid_name(self): - prog = dwarf_program( - wrap_test_type_dies( - DwarfDie( - DW_TAG.structure_type, - ( - DwarfAttrib(DW_AT.name, DW_FORM.data1, 0), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 0), - ), - ) - ) - ) - self.assertRaisesRegex( - Exception, "DW_TAG_structure_type has invalid DW_AT_name", prog.type, "TEST" - ) - def test_incomplete_to_complete(self): prog = dwarf_program( wrap_test_type_dies( @@ -1919,29 +1936,6 @@ def test_enum_missing_compatible_type_and_byte_size(self): "TEST", ) - def test_enum_invalid_name(self): - prog = dwarf_program( - wrap_test_type_dies( - ( - DwarfDie( - DW_TAG.enumeration_type, - ( - DwarfAttrib(DW_AT.name, DW_FORM.data1, 0), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), - ), - ), - unsigned_int_die, - ) - ) - ) - self.assertRaisesRegex( - Exception, - "DW_TAG_enumeration_type has invalid DW_AT_name", - prog.type, - "TEST", - ) - def test_enum_enumerator_missing_name(self): prog = dwarf_program( wrap_test_type_dies( @@ -3469,6 +3463,104 @@ def test_language(self): prog.int_type("int", 4, True, language=DEFAULT_LANGUAGE), ) + def test_base_type_unit(self): + prog = dwarf_program( + ( + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.base_type, + (DwarfAttrib(DW_AT.signature, DW_FORM.ref_sig8, 0),), + ), + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "TEST"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + ), + ), + ), + ), + DwarfDie(DW_TAG.type_unit, (), ((int_die,))), + ) + ) + self.assertIdentical(prog.type("TEST").type, prog.int_type("int", 4, True)) + self.assertIdentical(prog.type("int"), prog.type("TEST").type) + + def test_struct_type_unit(self): + prog = dwarf_program( + ( + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.structure_type, + (DwarfAttrib(DW_AT.signature, DW_FORM.ref_sig8, 0),), + ), + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "TEST"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + ), + ), + ), + ), + DwarfDie( + DW_TAG.type_unit, + (), + ( + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 0 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "y"), + DwarfAttrib( + DW_AT.data_member_location, DW_FORM.data1, 4 + ), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + ), + ), + ), + ), + int_die, + ), + ), + ) + ) + + self.assertIdentical( + prog.type("TEST").type, + prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x", 0), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), + ), + ) + self.assertIdentical(prog.type("struct point"), prog.type("TEST").type) + class TestObjects(TestCase): def test_constant_signed_enum(self): @@ -3658,164 +3750,75 @@ def test_variable(self): FindObjectFlags.CONSTANT, ) - def test_variable_no_address(self): + def test_zero_size_variable(self): prog = dwarf_program( wrap_test_type_dies( ( int_die, DwarfDie( - DW_TAG.variable, - ( - DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - ), + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0),), ), - ) - ) - ) - self.assertIdentical(prog.object("x"), Object(prog, "int")) - - def test_variable_unimplemented_location(self): - prog = dwarf_program( - wrap_test_type_dies( - ( - int_die, DwarfDie( DW_TAG.variable, ( DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib(DW_AT.location, DW_FORM.exprloc, b"\xe0"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", + ), ), ), ) ) ) - self.assertRaisesRegex(Exception, "unimplemented operation", prog.object, "x") - - def test_variable_const_signed(self): - for form in ( - DW_FORM.data1, - DW_FORM.data2, - DW_FORM.data4, - DW_FORM.data8, - DW_FORM.sdata, - ): - - prog = dwarf_program( - wrap_test_type_dies( - ( - int_die, - DwarfDie( - DW_TAG.variable, - ( - DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib(DW_AT.const_value, form, 1), - ), - ), - ) - ) - ) - self.assertIdentical( - prog["x"], Object(prog, prog.int_type("int", 4, True), 1) - ) - - def test_variable_const_unsigned(self): - for form in ( - DW_FORM.data1, - DW_FORM.data2, - DW_FORM.data4, - DW_FORM.data8, - DW_FORM.udata, - ): - prog = dwarf_program( - wrap_test_type_dies( - ( - unsigned_int_die, - DwarfDie( - DW_TAG.variable, - ( - DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib(DW_AT.const_value, form, 1), - ), - ), - ) - ) - ) - self.assertIdentical( - prog["x"], Object(prog, prog.int_type("unsigned int", 4, False), 1) - ) + self.assertIdentical( + prog["x"], + Object( + prog, + prog.array_type(prog.int_type("int", 4, True)), + address=0xFFFFFFFF01020304, + ), + ) - def test_variable_const_block(self): + def test_variable_no_address(self): prog = dwarf_program( wrap_test_type_dies( ( int_die, - DwarfDie( - DW_TAG.array_type, - (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0),), - ( - DwarfDie( - DW_TAG.subrange_type, - (DwarfAttrib(DW_AT.upper_bound, DW_FORM.data1, 1),), - ), - ), - ), DwarfDie( DW_TAG.variable, ( - DwarfAttrib(DW_AT.name, DW_FORM.string, "p"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - DwarfAttrib( - DW_AT.const_value, - DW_FORM.block1, - b"\x01\x00\x00\x00\x02\x00\x00\x00", - ), + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), ), ), ) ) ) - self.assertIdentical( - prog["p"], - Object(prog, prog.array_type(prog.int_type("int", 4, True), 2), [1, 2]), - ) + self.assertIdentical(prog.object("x"), Object(prog, "int")) - def test_variable_const_block_too_small(self): + def test_variable_expr_empty(self): prog = dwarf_program( wrap_test_type_dies( ( int_die, - DwarfDie( - DW_TAG.array_type, - (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0),), - ( - DwarfDie( - DW_TAG.subrange_type, - (DwarfAttrib(DW_AT.upper_bound, DW_FORM.data1, 1),), - ), - ), - ), DwarfDie( DW_TAG.variable, ( - DwarfAttrib(DW_AT.name, DW_FORM.string, "p"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), - DwarfAttrib( - DW_AT.const_value, - DW_FORM.block1, - b"\x01\x00\x00\x00\x02\x00\x00", - ), + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.location, DW_FORM.exprloc, b""), ), ), ) ) ) - self.assertRaisesRegex(Exception, "too small", prog.variable, "p") + self.assertIdentical(prog.object("x"), Object(prog, "int")) - def test_specification(self): + def test_variable_expr_bit_piece(self): prog = dwarf_program( wrap_test_type_dies( ( @@ -3825,64 +3828,1830 @@ def test_specification(self): ( DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), - ), - ), - DwarfDie( - DW_TAG.variable, - ( - DwarfAttrib(DW_AT.specification, DW_FORM.ref4, 1), DwarfAttrib( DW_AT.location, DW_FORM.exprloc, - b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", + assembler.assemble( + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFFFFFF01020304), + assembler.U8(DW_OP.bit_piece), + assembler.ULEB128(32), + assembler.ULEB128(4), + ), ), ), ), ) - ) + ), ) + self.assertIdentical( + prog.object("x"), + Object(prog, "int", address=0xFFFFFFFF01020304, bit_offset=4), + ) + + def test_variable_expr_implicit_value(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(4), + assembler.U32(0x12345678), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x12345678)) + + def test_variable_expr_implicit_value_pieces(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(2), + assembler.U16( + 0x5678 if little_endian else 0x1234 + ), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(2), + assembler.U16( + 0x1234 if little_endian else 0x5678 + ), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x12345678)) + + def test_variable_expr_implicit_value_pieces_too_large(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(2), + assembler.U16( + 0x5678 if little_endian else 0x1234 + ), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(4), + assembler.U32( + 0x1234 if little_endian else 0x5678 + ), + assembler.U8(DW_OP.piece), + # Piece size is larger than remaining size of object. + assembler.ULEB128(4), + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(4), + assembler.U32(0), + # There is nothing remaining in the object. + assembler.U8(DW_OP.piece), + assembler.ULEB128(4), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x12345678)) + + def test_variable_expr_implicit_value_too_small(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(1), + assembler.U8(0x99), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x99)) + + def test_variable_expr_implicit_value_bit_pieces(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(1), + assembler.U8( + 0x8F if little_endian else 0x1F + ), + assembler.U8(DW_OP.bit_piece), + assembler.ULEB128(4), + assembler.ULEB128(4), + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(4), + assembler.U32( + 0x1234567 + if little_endian + else 0x2345678 + ), + assembler.U8(DW_OP.bit_piece), + assembler.ULEB128(28), + assembler.ULEB128(0), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x12345678)) + + def test_variable_expr_implicit_value_piece_empty(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(2), + assembler.U16(0), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + ), + ), + ), + ), + ) + ), + ) + self.assertIdentical(prog.object("x"), Object(prog, "int")) + + def test_variable_expr_stack_value(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.lit31), + assembler.U8(DW_OP.stack_value), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 31)) + + def test_variable_expr_stack_value_pieces(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8( + DW_OP.lit2 + if little_endian + else DW_OP.lit1 + ), + assembler.U8(DW_OP.stack_value), + assembler.U8(DW_OP.piece), + assembler.ULEB128( + 3 if little_endian else 1 + ), + assembler.U8( + DW_OP.lit1 + if little_endian + else DW_OP.lit2 + ), + assembler.U8(DW_OP.stack_value), + assembler.U8(DW_OP.piece), + assembler.ULEB128( + 1 if little_endian else 3 + ), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x1000002)) + + def test_variable_expr_stack_value_bit_pieces(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8( + DW_OP.lit2 + if little_endian + else DW_OP.lit31 + ), + assembler.U8(DW_OP.stack_value), + assembler.U8(DW_OP.bit_piece), + assembler.ULEB128( + 4 if little_endian else 28 + ), + assembler.ULEB128( + 0 if little_endian else 4 + ), + assembler.U8( + DW_OP.lit31 + if little_endian + else DW_OP.lit2 + ), + assembler.U8(DW_OP.stack_value), + assembler.U8(DW_OP.bit_piece), + assembler.ULEB128( + 28 if little_endian else 4 + ), + assembler.ULEB128( + 4 if little_endian else 0 + ), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x12)) + + def test_variable_expr_stack_value_piece_empty(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.stack_value), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + ), + ), + ), + ), + ) + ), + ) + self.assertIdentical(prog.object("x"), Object(prog, "int")) + + def test_variable_expr_contiguous_piece_addresses(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0002), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + ), + ), + ), + ), + ) + ), + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", address=0xFFFF0000)) + + def test_variable_expr_contiguous_bit_piece_addresses(self): + for bit_offset in (0, 1): + with self.subTest(bit_offset=bit_offset): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.bit_piece), + assembler.ULEB128(10), + assembler.ULEB128(bit_offset), + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0001), + assembler.U8(DW_OP.bit_piece), + assembler.ULEB128(22), + assembler.ULEB128(bit_offset + 2), + ), + ), + ), + ), + ) + ), + ) + self.assertIdentical( + prog.object("x"), + Object(prog, "int", address=0xFFFF0000, bit_offset=bit_offset), + ) + + def test_variable_expr_non_contiguous_piece_addresses(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0002), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + segments=[ + MockMemorySegment( + (0x12345678).to_bytes( + 4, "little" if little_endian else "big" + ), + 0xFFFF0000, + ) + ], + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x56781234)) + + def test_variable_expr_non_contiguous_piece_addresses_too_large(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0002), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.piece), + assembler.ULEB128(256), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + segments=[ + MockMemorySegment( + (0x12345678).to_bytes( + 4, "little" if little_endian else "big" + ), + 0xFFFF0000, + ) + ], + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x56781234)) + + def test_variable_expr_non_contiguous_bit_piece_addresses(self): + for little_endian in (True, False): + with self.subTest(little_endian=little_endian): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.bit_piece), + assembler.ULEB128(4), + assembler.ULEB128(0), + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.bit_piece), + assembler.ULEB128(28), + assembler.ULEB128(5), + little_endian=little_endian, + ), + ), + ), + ), + ) + ), + little_endian=little_endian, + segments=[ + MockMemorySegment( + ( + (0x2468ACE8).to_bytes(5, "little") + if little_endian + else (0x111A2B3C00).to_bytes(5, "big") + ), + 0xFFFF0000, + ) + ], + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", 0x12345678)) + + def test_variable_expr_empty_piece_non_contiguous_address(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + # This piece is not contiguous with + # the previous one, but it is zero + # bits so it should be ignored. + assembler.U8(DW_OP.addr), + assembler.U64(0xEEEE0000), + assembler.U8(DW_OP.piece), + assembler.ULEB128(0), + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0002), + assembler.U8(DW_OP.piece), + assembler.ULEB128(2), + ), + ), + ), + ), + ) + ), + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", address=0xFFFF0000)) + + def test_variable_expr_previous_empty_piece_non_contiguous_address(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.addr), + assembler.U64(0xEEEE0000), + assembler.U8(DW_OP.piece), + assembler.ULEB128(0), + # This piece is not contiguous with + # the previous one, but the + # previous one was zero bits so it + # should be ignored. + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.piece), + assembler.ULEB128(4), + ), + ), + ), + ), + ) + ), + ) + self.assertIdentical(prog.object("x"), Object(prog, "int", address=0xFFFF0000)) + + def test_variable_expr_address_empty_piece(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.addr), + assembler.U64(0xEEEE0000), + assembler.U8(DW_OP.piece), + assembler.ULEB128(0), + ), + ), + ), + ), + ) + ), + ) + self.assertIdentical(prog.object("x"), Object(prog, "int")) + + def test_variable_expr_absent_empty_piece(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.piece), + assembler.ULEB128(0), + ), + ), + ), + ), + ) + ), + ) + self.assertIdentical(prog.object("x"), Object(prog, "int")) + + def test_variable_expr_unknown(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.location, DW_FORM.exprloc, b"\xdf"), + ), + ), + ) + ) + ) + self.assertRaisesRegex( + Exception, "unknown DWARF expression opcode", prog.object, "x" + ) + + def test_variable_expr_unknown_after_location(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + assembler.U8(DW_OP.implicit_value), + assembler.ULEB128(4), + assembler.U32(0), + assembler.U8(0xDF), + ), + ), + ), + ), + ) + ) + ) + self.assertRaisesRegex( + Exception, "unknown DWARF expression opcode", prog.object, "x" + ) + + def _eval_dwarf_expr(self, ops, **kwds): + assemble_kwds = { + key: value for key, value in kwds.items() if key == "little_endian" + } + return dwarf_program( + wrap_test_type_dies( + ( + unsigned_long_long_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + assembler.assemble( + *ops, + assembler.U8(DW_OP.stack_value), + **assemble_kwds, + ), + ), + ), + ), + ) + ), + **kwds, + )["x"].value_() + + def _assert_dwarf_expr_eval(self, ops, expected, **kwds): + self.assertEqual(self._eval_dwarf_expr(ops, **kwds), expected) + + def _assert_dwarf_expr_stack_underflow(self, ops, **kwds): + with self.assertRaisesRegex(Exception, "stack underflow"): + self._eval_dwarf_expr(ops, **kwds) + + def test_variable_expr_op_lit(self): + for i in range(32): + with self.subTest(i=i): + self._assert_dwarf_expr_eval([assembler.U8(DW_OP.lit0 + i)], i) + + def test_variable_expr_op_addr(self): + with self.subTest(bits=64): + self._assert_dwarf_expr_eval( + [assembler.U8(DW_OP.addr), assembler.U64(2 ** 64 - 1)], + 2 ** 64 - 1, + bits=64, + ) + with self.subTest(bits=32): + self._assert_dwarf_expr_eval( + [assembler.U8(DW_OP.addr), assembler.U32(2 ** 32 - 1)], + 2 ** 32 - 1, + bits=32, + ) + + def test_variable_expr_op_constu(self): + for bits in (64, 32): + for size in (1, 2, 4, 8): + op_name = f"const{size}u" + with self.subTest(bits=bits, op=op_name): + op = getattr(DW_OP, op_name) + type_ = getattr(assembler, f"U{size * 8}") + self._assert_dwarf_expr_eval( + [assembler.U8(op), type_(2 ** (size * 8) - 1)], + (2 ** (size * 8) - 1) & (2 ** bits - 1), + bits=bits, + ) + with self.subTest(bits=bits, op="constu"): + self._assert_dwarf_expr_eval( + [assembler.U8(DW_OP.constu), assembler.ULEB128(0x123456789)], + 0x123456789 & (2 ** bits - 1), + bits=bits, + ) + + def test_variable_expr_op_consts(self): + for bits in (64, 32): + for size in (1, 2, 4, 8): + op_name = f"const{size}s" + with self.subTest(bits=bits, op=op_name): + op = getattr(DW_OP, op_name) + type_ = getattr(assembler, f"S{size * 8}") + self._assert_dwarf_expr_eval( + [assembler.U8(op), type_(-1)], + -1 & (2 ** bits - 1), + bits=bits, + ) + with self.subTest(bits=bits, op="consts"): + self._assert_dwarf_expr_eval( + [assembler.U8(DW_OP.consts), assembler.SLEB128(-0x123456789)], + -0x123456789 & (2 ** bits - 1), + bits=bits, + ) + + def test_variable_expr_op_dup(self): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.dup), + assembler.U8(DW_OP.plus), + ], + 2, + ) + + def test_variable_expr_op_drop(self): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.drop), + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.plus), + ], + 4, + ) + + def test_variable_expr_op_pick(self): + for i, value in enumerate((30, 20, 10)): + with self.subTest(i=i): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit10), + assembler.U8(DW_OP.lit20), + assembler.U8(DW_OP.lit30), + assembler.U8(DW_OP.pick), + assembler.U8(i), + ], + value, + ) + + def test_variable_expr_op_pick_underflow(self): + for i in (3, 255): + with self.subTest(i=i): + self._assert_dwarf_expr_stack_underflow( + [ + assembler.U8(DW_OP.lit10), + assembler.U8(DW_OP.lit20), + assembler.U8(DW_OP.lit30), + assembler.U8(DW_OP.pick), + assembler.U8(i), + ] + ) + + def test_variable_expr_op_over(self): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit10), + assembler.U8(DW_OP.lit20), + assembler.U8(DW_OP.over), + ], + 10, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit10), + assembler.U8(DW_OP.lit20), + assembler.U8(DW_OP.lit30), + assembler.U8(DW_OP.over), + ], + 20, + ) + + def test_variable_expr_op_swap(self): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.swap), + assembler.U8(DW_OP.minus), + ], + 2, + ) + + def test_variable_expr_op_rot(self): + for i, value in enumerate((5, 3, 7, 1)): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.lit7), + assembler.U8(DW_OP.rot), + assembler.U8(DW_OP.pick), + assembler.U8(i), + ], + value, + ) + + def test_variable_expr_op_deref(self): + for bits in (64, 32): + for little_endian in (True, False): + with self.subTest(bits=bits, little_endian=little_endian): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.addr), + (assembler.U64 if bits == 64 else assembler.U32)( + 0xFFFF0000 + ), + assembler.U8(DW_OP.deref), + ], + 0x12345678, + bits=bits, + little_endian=little_endian, + segments=[ + MockMemorySegment( + (0x12345678).to_bytes( + bits // 8, "little" if little_endian else "big" + ), + 0xFFFF0000, + ) + ], + ) + + def test_variable_expr_op_deref_fault(self): + with self.assertRaises(FaultError): + self._eval_dwarf_expr( + [ + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.deref), + ] + ) + + def test_variable_expr_op_deref_size(self): + for bits in (64, 32): + for little_endian in (True, False): + with self.subTest(bits=bits, little_endian=little_endian): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.addr), + (assembler.U64 if bits == 64 else assembler.U32)( + 0xFFFF0000 + ), + assembler.U8(DW_OP.deref_size), + assembler.U8(2), + ], + 0x1337, + bits=bits, + little_endian=little_endian, + segments=[ + MockMemorySegment( + (0x1337).to_bytes( + 2, "little" if little_endian else "big" + ), + 0xFFFF0000, + ) + ], + ) + + def test_variable_expr_op_deref_size_fault(self): + with self.assertRaises(FaultError): + self._eval_dwarf_expr( + [ + assembler.U8(DW_OP.addr), + assembler.U64(0xFFFF0000), + assembler.U8(DW_OP.deref_size), + assembler.U8(1), + ] + ) + + def test_variable_expr_stack_underflow(self): + for case in [ + (DW_OP.dup, 1), + (DW_OP.drop, 1), + (DW_OP.over, 2), + (DW_OP.swap, 2), + (DW_OP.rot, 3), + (DW_OP.deref, 1), + (DW_OP.deref_size, 1, assembler.U8(1)), + (DW_OP.abs, 1), + (DW_OP.and_, 2), + (DW_OP.div, 2), + (DW_OP.minus, 2), + (DW_OP.mod, 2), + (DW_OP.mul, 2), + (DW_OP.neg, 1), + (DW_OP.not_, 1), + (DW_OP.or_, 2), + (DW_OP.plus, 2), + (DW_OP.plus_uconst, 1, assembler.ULEB128(1)), + (DW_OP.shl, 2), + (DW_OP.shr, 2), + (DW_OP.shra, 2), + (DW_OP.xor, 2), + (DW_OP.le, 2), + (DW_OP.ge, 2), + (DW_OP.eq, 2), + (DW_OP.lt, 2), + (DW_OP.gt, 2), + (DW_OP.ne, 2), + (DW_OP.bra, 1, assembler.S16(1)), + ]: + op = case[0] + min_entries = case[1] + extra_args = case[2:] + with self.subTest(op=op): + for i in range(min_entries): + self._assert_dwarf_expr_stack_underflow( + [assembler.U8(DW_OP.lit1)] * i + [assembler.U8(op), *extra_args] + ) + + def test_variable_expr_op_abs(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1s), + assembler.S8(-9), + assembler.U8(DW_OP.abs), + ], + 9, + bits=bits, + ) + + def test_variable_expr_op_and(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.and_), + ], + 1, + bits=bits, + ) + + def test_variable_expr_op_div(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.div), + ], + 2, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.div), + ], + 0, + bits=bits, + ) + # The DWARF 5 specification doesn't specify how signed division + # should be rounded. We assume truncation towards zero like C. + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1s), + assembler.S8(-5), + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.div), + ], + -2 & (2 ** bits - 1), + bits=bits, + ) + + def test_variable_expr_op_div_by_zero(self): + with self.assertRaisesRegex(Exception, "division by zero"): + self._eval_dwarf_expr( + [ + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.div), + ] + ) + + def test_variable_expr_op_minus(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.minus), + ], + 3, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.minus), + ], + -3 & (2 ** bits - 1), + bits=bits, + ) + + def test_variable_expr_op_mod(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.mod), + ], + 1, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.mod), + ], + 0, + bits=bits, + ) + # Although DW_OP_div is signed, DW_OP_mod is unsigned. + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1s), + assembler.S8(-5), + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.mod), + ], + 1, + bits=bits, + ) + + def test_variable_expr_op_mod_by_zero(self): + with self.assertRaisesRegex(Exception, "modulo by zero"): + self._eval_dwarf_expr( + [ + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.mod), + ] + ) + + def test_variable_expr_op_mul(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.mul), + ], + 10, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1s), + assembler.S8(-5), + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.mul), + ], + ((-5 & (2 ** bits - 1)) * 2) & (2 ** bits - 1), + bits=bits, + ) + + def test_variable_expr_op_neg(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit7), + assembler.U8(DW_OP.neg), + ], + -7 & (2 ** bits - 1), + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1s), + assembler.S8(-7), + assembler.U8(DW_OP.neg), + ], + 7, + bits=bits, + ) + + def test_variable_expr_op_not(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.not_), + ], + 2 ** bits - 1, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit31), + assembler.U8(DW_OP.not_), + ], + ~31 & (2 ** bits - 1), + bits=bits, + ) + + def test_variable_expr_op_or(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.or_), + ], + 7, + bits=bits, + ) + + def test_variable_expr_op_plus(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit6), + assembler.U8(DW_OP.lit7), + assembler.U8(DW_OP.plus), + ], + 13, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.S8(DW_OP.const1s), + assembler.S8(-3), + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.plus), + ], + 2, + bits=bits, + ) - self.assertIdentical( - prog["x"], - Object(prog, prog.int_type("int", 4, True), address=0xFFFFFFFF01020304), + def test_variable_expr_op_plus_uconst(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit6), + assembler.U8(DW_OP.plus_uconst), + assembler.ULEB128(7), + ], + 13, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.S8(DW_OP.const1s), + assembler.S8(-3), + assembler.U8(DW_OP.plus_uconst), + assembler.ULEB128(5), + ], + 2, + bits=bits, + ) + + def test_variable_expr_op_shl(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.lit4), + assembler.U8(DW_OP.shl), + ], + 48, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.constu), + assembler.ULEB128(2 ** (bits - 2)), + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.shl), + ], + 2 ** (bits - 1), + bits=bits, + ) + # The DWARF specification doesn't define the behavior of + # shifting by a number of bits larger than the width of the + # type. We evaluate it to zero. + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.const1u), + assembler.U8(bits), + assembler.U8(DW_OP.shl), + ], + 0, + bits=bits, + ) + + def test_variable_expr_op_shr(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1u), + assembler.U8(48), + assembler.U8(DW_OP.lit4), + assembler.U8(DW_OP.shr), + ], + 3, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.constu), + assembler.ULEB128(2 ** (bits - 1)), + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.shr), + ], + 2 ** (bits - 2), + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1s), + assembler.S8(-1), + assembler.U8(DW_OP.const1u), + assembler.U8(bits), + assembler.U8(DW_OP.shr), + ], + 0, + bits=bits, + ) + + def test_variable_expr_op_shra(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1u), + assembler.U8(48), + assembler.U8(DW_OP.lit4), + assembler.U8(DW_OP.shra), + ], + 3, + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1s), + assembler.S8(-48), + assembler.U8(DW_OP.lit4), + assembler.U8(DW_OP.shra), + ], + -3 & (2 ** bits - 1), + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.constu), + assembler.ULEB128(2 ** (bits - 1)), + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.shra), + ], + 2 ** (bits - 2) + 2 ** (bits - 1), + bits=bits, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1s), + assembler.S8(-2), + assembler.U8(DW_OP.const1u), + assembler.U8(bits), + assembler.U8(DW_OP.shra), + ], + -1 & (2 ** bits - 1), + bits=bits, + ) + + def test_variable_expr_op_xor(self): + for bits in (64, 32): + with self.subTest(bits=bits): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.xor), + ], + 6, + bits=bits, + ) + + def test_variable_expr_relational(self): + for op, py_op in [ + (DW_OP.le, operator.le), + (DW_OP.ge, operator.ge), + (DW_OP.eq, operator.eq), + (DW_OP.lt, operator.lt), + (DW_OP.gt, operator.gt), + (DW_OP.ne, operator.ne), + ]: + for bits in (64, 32): + for val1, val2 in [ + (3, 5), + (3, -5), + (-3, 5), + (-3, -5), + (5, 5), + (5, -5), + (-5, 5), + (-5, -5), + (6, 5), + (6, -5), + (-6, 5), + (-6, -5), + ]: + with self.subTest(bits=bits, val1=val1, val2=val2): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.const1s), + assembler.S8(val1), + assembler.U8(DW_OP.const1s), + assembler.S8(val2), + assembler.U8(op), + ], + int(py_op(val1, val2)), + ) + + def test_variable_expr_op_skip(self): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.skip), + assembler.S16(3), + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.div), + assembler.U8(DW_OP.lit20), + ], + 20, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.skip), + assembler.S16(4), + assembler.U8(DW_OP.lit3), + assembler.U8(DW_OP.skip), + assembler.S16(4), + assembler.U8(DW_OP.lit2), + assembler.U8(DW_OP.skip), + assembler.S16(-8), + ], + 3, ) - def test_namespace_reverse_specification(self): - """Test specification inside namespace while declaration is outside of it.""" - dies = ( - int_die, - DwarfDie( - DW_TAG.namespace, + def test_variable_expr_op_skip_infinite(self): + with self.assertRaisesRegex(Exception, "too many operations"): + self._eval_dwarf_expr([assembler.U8(DW_OP.skip), assembler.S16(-3)]) + + def test_variable_expr_op_skip_out_of_bounds(self): + with self.assertRaisesRegex(Exception, "out of bounds"): + self._eval_dwarf_expr( [ - DwarfAttrib(DW_AT.name, DW_FORM.string, "moho"), - DwarfAttrib(DW_AT.sibling, DW_FORM.ref4, 2), + assembler.U8(DW_OP.skip), + # 1 extra for for the DW_OP_stack_value added by + # _eval_dwarf_expr(). + assembler.U16(3), + assembler.U8(DW_OP.nop), ], + ) + + def test_variable_expr_op_bra(self): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit31), + assembler.U8(DW_OP.bra), + assembler.S16(3), + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.div), + assembler.U8(DW_OP.lit20), + ], + 20, + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.bra), + assembler.S16(1), + assembler.U8(DW_OP.lit2), + ], + 2, + ) + # More complicated expression implementing something like this: + # i = 0 + # x = 0 + # do { + # x += 2; + # i += 1; + # while (i <= 5); + # return x; + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.plus_uconst), + assembler.ULEB128(2), + assembler.U8(DW_OP.swap), + assembler.U8(DW_OP.plus_uconst), + assembler.ULEB128(1), + assembler.U8(DW_OP.swap), + assembler.U8(DW_OP.over), + assembler.U8(DW_OP.lit5), + assembler.U8(DW_OP.lt), + assembler.U8(DW_OP.bra), + assembler.S16(-12), + ], + 10, + ) + + def test_variable_expr_op_bra_out_of_bounds(self): + with self.assertRaisesRegex(Exception, "out of bounds"): + self._eval_dwarf_expr( [ + assembler.U8(DW_OP.lit1), + assembler.U8(DW_OP.bra), + # 1 extra for for the DW_OP_stack_value added by + # _eval_dwarf_expr(). + assembler.U16(3), + assembler.U8(DW_OP.nop), + ], + ) + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.lit0), + assembler.U8(DW_OP.bra), + assembler.U16(3), + assembler.U8(DW_OP.lit2), + ], + 2, + ) + + def test_variable_expr_op_nop(self): + self._assert_dwarf_expr_eval( + [ + assembler.U8(DW_OP.nop), + assembler.U8(DW_OP.nop), + assembler.U8(DW_OP.lit25), + assembler.U8(DW_OP.nop), + assembler.U8(DW_OP.nop), + ], + 25, + ) + + def test_variable_const_signed(self): + for form in ( + DW_FORM.data1, + DW_FORM.data2, + DW_FORM.data4, + DW_FORM.data8, + DW_FORM.sdata, + ): + + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.const_value, form, 1), + ), + ), + ) + ) + ) + self.assertIdentical( + prog["x"], Object(prog, prog.int_type("int", 4, True), 1) + ) + + def test_variable_const_unsigned(self): + for form in ( + DW_FORM.data1, + DW_FORM.data2, + DW_FORM.data4, + DW_FORM.data8, + DW_FORM.udata, + ): + prog = dwarf_program( + wrap_test_type_dies( + ( + unsigned_int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.const_value, form, 1), + ), + ), + ) + ) + ) + self.assertIdentical( + prog["x"], Object(prog, prog.int_type("unsigned int", 4, False), 1) + ) + + def test_variable_const_block(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.upper_bound, DW_FORM.data1, 1),), + ), + ), + ), + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "p"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib( + DW_AT.const_value, + DW_FORM.block1, + b"\x01\x00\x00\x00\x02\x00\x00\x00", + ), + ), + ), + ) + ) + ) + self.assertIdentical( + prog["p"], + Object(prog, prog.array_type(prog.int_type("int", 4, True), 2), [1, 2]), + ) + + def test_variable_const_block_too_small(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.array_type, + (DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0),), + ( + DwarfDie( + DW_TAG.subrange_type, + (DwarfAttrib(DW_AT.upper_bound, DW_FORM.data1, 1),), + ), + ), + ), + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "p"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib( + DW_AT.const_value, + DW_FORM.block1, + b"\x01\x00\x00\x00\x02\x00\x00", + ), + ), + ), + ) + ) + ) + self.assertRaisesRegex(Exception, "too small", prog.variable, "p") + + @with_and_without_dw_form_indirect + def test_specification(self, use_dw_form_indirect): + prog = dwarf_program( + wrap_test_type_dies( + ( + int_die, + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), + ), + ), DwarfDie( DW_TAG.variable, ( - DwarfAttrib(DW_AT.specification, DW_FORM.ref4, 2), + DwarfAttrib(DW_AT.specification, DW_FORM.ref4, 1), DwarfAttrib( DW_AT.location, DW_FORM.exprloc, b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", ), ), - ) - ], + ), + ) ), - DwarfDie( - DW_TAG.variable, - ( - DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), - DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), - DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), + use_dw_form_indirect=use_dw_form_indirect, + ) + + self.assertIdentical( + prog["x"], + Object(prog, prog.int_type("int", 4, True), address=0xFFFFFFFF01020304), + ) + + @with_and_without_dw_form_indirect + def test_namespace_reverse_specification(self, use_dw_form_indirect): + """Test specification inside namespace while declaration is outside of it.""" + prog = dwarf_program( + ( + int_die, + DwarfDie( + DW_TAG.namespace, + [ + DwarfAttrib(DW_AT.name, DW_FORM.string, "moho"), + DwarfAttrib(DW_AT.sibling, DW_FORM.ref4, 2), + ], + [ + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.specification, DW_FORM.ref4, 2), + DwarfAttrib( + DW_AT.location, + DW_FORM.exprloc, + b"\x03\x04\x03\x02\x01\xff\xff\xff\xff", + ), + ), + ) + ], + ), + DwarfDie( + DW_TAG.variable, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "x"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 0), + DwarfAttrib(DW_AT.declaration, DW_FORM.flag_present, True), + ), ), ), + use_dw_form_indirect=use_dw_form_indirect, ) - prog = dwarf_program(dies) self.assertIdentical( prog["x"], Object(prog, prog.int_type("int", 4, True), address=0xFFFFFFFF01020304), diff --git a/tests/test_program.py b/tests/test_program.py index 87d2f0b1e..d3a9c94c4 100644 --- a/tests/test_program.py +++ b/tests/test_program.py @@ -173,10 +173,27 @@ def test_adjacent_segments(self): ) self.assertEqual(prog.read(0xFFFF0000, 14), data[:14]) + def test_address_overflow(self): + for bits in (64, 32): + with self.subTest(bits=bits): + prog = mock_program( + segments=[ + MockMemorySegment(b"cd", 0x0), + MockMemorySegment(b"abyz", 2 ** bits - 2), + ], + platform=MOCK_PLATFORM if bits == 64 else MOCK_32BIT_PLATFORM, + ) + for start in range(3): + for size in range(4 - start): + self.assertEqual( + prog.read((2 ** bits - 2 + start) % 2 ** 64, size), + b"abcd"[start : start + size], + ) + def test_overlap_same_address_smaller_size(self): # Existing segment: |_______| # New segment: |___| - prog = Program() + prog = Program(MOCK_PLATFORM) segment1 = unittest.mock.Mock(side_effect=zero_memory_read) segment2 = unittest.mock.Mock(side_effect=zero_memory_read) prog.add_memory_segment(0xFFFF0000, 128, segment1) @@ -188,7 +205,7 @@ def test_overlap_same_address_smaller_size(self): def test_overlap_within_segment(self): # Existing segment: |_______| # New segment: |___| - prog = Program() + prog = Program(MOCK_PLATFORM) segment1 = unittest.mock.Mock(side_effect=zero_memory_read) segment2 = unittest.mock.Mock(side_effect=zero_memory_read) prog.add_memory_segment(0xFFFF0000, 128, segment1) @@ -205,7 +222,7 @@ def test_overlap_within_segment(self): def test_overlap_same_segment(self): # Existing segment: |_______| # New segment: |_______| - prog = Program() + prog = Program(MOCK_PLATFORM) segment1 = unittest.mock.Mock(side_effect=zero_memory_read) segment2 = unittest.mock.Mock(side_effect=zero_memory_read) prog.add_memory_segment(0xFFFF0000, 128, segment1) @@ -217,7 +234,7 @@ def test_overlap_same_segment(self): def test_overlap_same_address_larger_size(self): # Existing segment: |___| # New segment: |_______| - prog = Program() + prog = Program(MOCK_PLATFORM) segment1 = unittest.mock.Mock(side_effect=zero_memory_read) segment2 = unittest.mock.Mock(side_effect=zero_memory_read) prog.add_memory_segment(0xFFFF0000, 64, segment1) @@ -229,7 +246,7 @@ def test_overlap_same_address_larger_size(self): def test_overlap_segment_tail(self): # Existing segment: |_______| # New segment: |_______| - prog = Program() + prog = Program(MOCK_PLATFORM) segment1 = unittest.mock.Mock(side_effect=zero_memory_read) segment2 = unittest.mock.Mock(side_effect=zero_memory_read) prog.add_memory_segment(0xFFFF0000, 128, segment1) @@ -241,7 +258,7 @@ def test_overlap_segment_tail(self): def test_overlap_subsume_after(self): # Existing segments: |_|_|_|_| # New segment: |_______| - prog = Program() + prog = Program(MOCK_PLATFORM) segment1 = unittest.mock.Mock(side_effect=zero_memory_read) segment2 = unittest.mock.Mock(side_effect=zero_memory_read) segment3 = unittest.mock.Mock(side_effect=zero_memory_read) @@ -258,7 +275,7 @@ def test_overlap_subsume_after(self): def test_overlap_segment_head(self): # Existing segment: |_______| # New segment: |_______| - prog = Program() + prog = Program(MOCK_PLATFORM) segment1 = unittest.mock.Mock(side_effect=zero_memory_read) segment2 = unittest.mock.Mock(side_effect=zero_memory_read) prog.add_memory_segment(0xFFFF0040, 128, segment1) @@ -270,7 +287,7 @@ def test_overlap_segment_head(self): def test_overlap_segment_head_and_tail(self): # Existing segment: |_______||_______| # New segment: |_______| - prog = Program() + prog = Program(MOCK_PLATFORM) segment1 = unittest.mock.Mock(side_effect=zero_memory_read) segment2 = unittest.mock.Mock(side_effect=zero_memory_read) segment3 = unittest.mock.Mock(side_effect=zero_memory_read) @@ -285,7 +302,7 @@ def test_overlap_segment_head_and_tail(self): def test_overlap_subsume_at_and_after(self): # Existing segments: |_|_|_|_| # New segment: |_______| - prog = Program() + prog = Program(MOCK_PLATFORM) segment1 = unittest.mock.Mock(side_effect=zero_memory_read) segment2 = unittest.mock.Mock(side_effect=zero_memory_read) prog.add_memory_segment(0xFFFF0000, 32, segment1) diff --git a/tests/test_type.py b/tests/test_type.py index 2ebfa1ad6..7ac393e8c 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -945,7 +945,8 @@ def test_qualifiers(self): t = self.prog.void_type(qualifiers=Qualifiers.CONST | Qualifiers.VOLATILE) self.assertEqual(t.qualifiers, Qualifiers.CONST | Qualifiers.VOLATILE) self.assertEqual( - repr(t), "prog.void_type(qualifiers=)" + repr(t), + f"prog.void_type(qualifiers={repr(Qualifiers.CONST | Qualifiers.VOLATILE)})", ) self.assertIdentical( diff --git a/util.py b/util.py index 6a3070ab1..23f178692 100644 --- a/util.py +++ b/util.py @@ -97,6 +97,7 @@ class KernelVersion: """ def __init__(self, release: str) -> None: + self._release = release # ~ sorts before anything, including the end of the version. self._key = re.sub(r"-(rc[0-9])", r"~\1", release) @@ -109,3 +110,6 @@ def __lt__(self, other: object) -> bool: if not isinstance(other, KernelVersion): return NotImplemented return verrevcmp(self._key, other._key) < 0 + + def __str__(self) -> str: + return self._release diff --git a/vmtest/README.rst b/vmtest/README.rst index cff9b3681..88bd4f5be 100644 --- a/vmtest/README.rst +++ b/vmtest/README.rst @@ -9,7 +9,7 @@ zstd to be installed. Tests can also be run on specific kernels with ``-k``. This takes a comma-separated list of kernels which are wildcard patterns (e.g., ``5.6.*``) -matching a kernel release hosted on Dropbox (see below). +matching a kernel release hosted on GitHub (see below). Architecture ------------ @@ -32,13 +32,13 @@ the exit status via `virtio-serial This infrastructure is all generic. The drgn-specific parts are: -1. The kernel builds. The `kernel configuration `_ includes everything - required to run drgn and the Linux kernel helper tests. Each build is - packaged as a tarball containing ``vmlinux``, ``vmlinuz``, and kernel - modules. These packages are hosted on `Dropbox - `_. - They are managed via the Dropbox API by the `vmtest.manage `_ CLI - and downloaded by the `vmtest.download `_ module. +1. The kernel builds. These are configured with a minimal configuration + including everything required to run drgn and the Linux kernel helper tests. + Each build is packaged as a tarball containing ``vmlinux``, ``vmlinuz``, and + kernel modules. These packages are hosted in a `GitHub release + `_. They are + managed via the GitHub API by the `vmtest.manage `_ CLI and + downloaded by the `vmtest.download `_ module. 2. The test command itself. This is just some ``setup.py`` glue and the proper invocation of the Python `unittest command line interface `_. diff --git a/vmtest/asynciosubprocess.py b/vmtest/asynciosubprocess.py new file mode 100644 index 000000000..b4b90d0a5 --- /dev/null +++ b/vmtest/asynciosubprocess.py @@ -0,0 +1,46 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +import asyncio +from contextlib import contextmanager +import os +from subprocess import CalledProcessError as CalledProcessError +from typing import Any, Iterator, Tuple + + +async def check_call(*args: Any, **kwds: Any) -> None: + proc = await asyncio.create_subprocess_exec(*args, **kwds) + returncode = await proc.wait() + if returncode != 0: + raise CalledProcessError(returncode, args) + + +async def check_output(*args: Any, **kwds: Any) -> bytes: + kwds["stdout"] = asyncio.subprocess.PIPE + proc = await asyncio.create_subprocess_exec(*args, **kwds) + stdout = (await proc.communicate())[0] + if proc.returncode: + raise CalledProcessError(proc.returncode, args) + return stdout + + +async def check_output_shell(cmd: str, **kwds: Any) -> bytes: + kwds["stdout"] = asyncio.subprocess.PIPE + proc = await asyncio.create_subprocess_shell(cmd, **kwds) + stdout = (await proc.communicate())[0] + if proc.returncode: + raise CalledProcessError(proc.returncode, cmd) + return stdout + + +@contextmanager +def pipe_context() -> Iterator[Tuple[int, int]]: + pipe_r = pipe_w = None + try: + pipe_r, pipe_w = os.pipe() + yield pipe_r, pipe_w + finally: + if pipe_r is not None: + os.close(pipe_r) + if pipe_w is not None: + os.close(pipe_w) diff --git a/vmtest/config b/vmtest/config deleted file mode 100644 index c625242c6..000000000 --- a/vmtest/config +++ /dev/null @@ -1,55 +0,0 @@ -# Minimal Linux kernel configuration for booting into vmtest and running drgn -# tests. - -CONFIG_LOCALVERSION="-vmtest2" - -CONFIG_SMP=y -CONFIG_MODULES=y - -# We run the tests in KVM. -CONFIG_HYPERVISOR_GUEST=y -CONFIG_KVM_GUEST=y -CONFIG_PARAVIRT=y -CONFIG_PARAVIRT_SPINLOCKS=y - -# Minimum requirements for vmtest. -CONFIG_9P_FS=y -CONFIG_DEVTMPFS=y -CONFIG_INET=y -CONFIG_NET=y -CONFIG_NETWORK_FILESYSTEMS=y -CONFIG_NET_9P=y -CONFIG_NET_9P_VIRTIO=y -CONFIG_OVERLAY_FS=y -CONFIG_PCI=y -CONFIG_PROC_FS=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SYSFS=y -CONFIG_TMPFS=y -CONFIG_TMPFS_XATTR=y -CONFIG_VIRTIO_CONSOLE=y -CONFIG_VIRTIO_PCI=y - -# drgn needs /proc/kcore for live debugging. -CONFIG_PROC_KCORE=y -# In some cases, it also needs /proc/kallsyms. -CONFIG_KALLSYMS=y -CONFIG_KALLSYMS_ALL=y - -# drgn needs debug info. -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_INFO_DWARF4=y - -# Before Linux kernel commit 8757dc970f55 ("x86/crash: Define -# arch_crash_save_vmcoreinfo() if CONFIG_CRASH_CORE=y") (in v5.6), some -# important information in VMCOREINFO is initialized by the kexec code. -CONFIG_KEXEC=y - -# For block tests. -CONFIG_BLK_DEV_LOOP=m - -# For kconfig tests. -CONFIG_IKCONFIG=m -CONFIG_IKCONFIG_PROC=y diff --git a/vmtest/download.py b/vmtest/download.py index bfaad391c..8565d7619 100644 --- a/vmtest/download.py +++ b/vmtest/download.py @@ -1,146 +1,212 @@ # Copyright (c) Facebook, Inc. and its affiliates. # SPDX-License-Identifier: GPL-3.0-or-later +import argparse +from contextlib import contextmanager import fnmatch import glob -import http.client +import logging +import os from pathlib import Path import queue import re import shutil import subprocess +import tempfile import threading -from typing import Any, Dict, Iterator, Optional, Sequence, Union -import urllib.request +from typing import Any, Dict, Iterator, Sequence, Union from util import KernelVersion - -# This URL contains a mapping from file names to URLs where those files can be -# downloaded. This is needed because the files under a Dropbox shared folder -# have randomly-generated links. -_INDEX_URL = "https://www.dropbox.com/sh/2mcf2xvg319qdaw/AAC_AbpvQPRrHF-99B2REpXja/x86_64/INDEX?dl=1" - - -class KernelDownloader: - def __init__(self, kernels: Sequence[str], download_dir: Path) -> None: - self._kernels = kernels - self._arch_download_dir = download_dir / "x86_64" - self._cached_index: Optional[Dict[str, str]] = None - self._index_lock = threading.Lock() - self._queue: queue.Queue[Union[Path, Exception, None]] = queue.Queue() - self._thread: Optional[threading.Thread] - # Don't create the thread if we don't have anything to do. - if kernels: - self._thread = threading.Thread(target=self._download_all, daemon=True) - self._thread.start() +from vmtest.githubapi import GitHubApi + +logger = logging.getLogger(__name__) + +VMTEST_GITHUB_RELEASE = ("osandov", "drgn", "vmtest-assets") + + +def available_kernel_releases( + github_release: Dict[str, Any], arch: str +) -> Dict[str, Dict[str, Any]]: + pattern = re.compile(r"kernel-(.*)\." + re.escape(arch) + "\.tar\.zst") + releases = {} + for asset in github_release["assets"]: + match = pattern.fullmatch(asset["name"]) + if match: + releases[match.group(1)] = asset + return releases + + +def _download_kernel(gh: GitHubApi, url: str, dir: Path) -> None: + dir.parent.mkdir(parents=True, exist_ok=True) + tmp_dir = Path(tempfile.mkdtemp(dir=dir.parent)) + try: + # Don't assume that the available version of tar has zstd support or + # the non-standard -I/--use-compress-program option. + with subprocess.Popen( + ["zstd", "-d", "-", "--stdout"], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + ) as zstd_proc, subprocess.Popen( + ["tar", "-C", str(tmp_dir), "-x"], + stdin=zstd_proc.stdout, + ) as tar_proc, gh.download( + url + ) as resp: + assert zstd_proc.stdin is not None + shutil.copyfileobj(resp, zstd_proc.stdin) + zstd_proc.stdin.close() + if zstd_proc.returncode != 0: + raise subprocess.CalledProcessError(zstd_proc.returncode, zstd_proc.args) + if tar_proc.returncode != 0: + raise subprocess.CalledProcessError(tar_proc.returncode, tar_proc.args) + except: + shutil.rmtree(tmp_dir, ignore_errors=True) + raise + else: + tmp_dir.rename(dir) + + +def download_kernels( + download_dir: Path, arch: str, kernels: Sequence[str] +) -> Iterator[Path]: + gh = GitHubApi(os.getenv("GITHUB_TOKEN")) + + # We don't want to make any API requests if we don't have to, so we don't + # fetch this until we need it. + cached_kernel_releases = None + + def get_available_kernel_releases() -> Dict[str, Dict[str, Any]]: + nonlocal cached_kernel_releases + if cached_kernel_releases is None: + logger.info("getting available kernel releases") + download_dir.mkdir(parents=True, exist_ok=True) + cached_kernel_releases = available_kernel_releases( + gh.get_release_by_tag( + *VMTEST_GITHUB_RELEASE, cache=download_dir / "github_release.json" + ), + arch, + ) + return cached_kernel_releases + + arch_download_dir = download_dir / arch + + # Make sure all of the given kernels exist first. + to_download = [] + for kernel in kernels: + if kernel != glob.escape(kernel): + try: + match = max( + ( + available + for available in get_available_kernel_releases() + if fnmatch.fnmatch(available, kernel) + ), + key=KernelVersion, + ) + except ValueError: + raise Exception(f"no available kernel release matches {kernel!r}") + else: + logger.info("kernel release pattern %s matches %s", kernel, match) + kernel = match + kernel_dir = arch_download_dir / ("kernel-" + kernel) + if kernel_dir.exists(): + # As a policy, vmtest assets will never be updated with the same + # name. Therefore, if the kernel was previously downloaded, we + # don't need to download it again. + url = None else: - self._thread = None - self._queue.put(None) - - def __enter__(self) -> "KernelDownloader": - return self - - def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: - if self._thread: - self._thread.join() - - @property - def _index(self) -> Dict[str, str]: - if self._cached_index is None: - with self._index_lock: - if self._cached_index is None: - index = {} - with urllib.request.urlopen(_INDEX_URL) as u: - for line in u: - name, url = line.decode().rstrip("\n").split("\t", 1) - index[name] = url - self._cached_index = index - return self._cached_index - - def _find_kernel(self, pattern: str) -> str: - matches = [] - for name, url in self._index.items(): - match = re.fullmatch(r"kernel-(.*)\.tar\.zst", name) - if match and fnmatch.fnmatch(match.group(1), pattern): - matches.append(match.group(1)) - if not matches: - raise Exception(f"no kernel release matches {pattern!r}") - return max(matches, key=KernelVersion) - - def _download(self, release: str) -> Path: - # Only do the wildcard lookup if the release is a wildcard - # pattern. - if release != glob.escape(release): - release = self._find_kernel(release) - path = self._arch_download_dir / release - if not path.exists(): - name = f"kernel-{release}.tar.zst" - tmp = path.with_name(path.name + ".tmp") - tmp.mkdir(parents=True) - remove_tmp = True try: - # Don't assume that the available version of tar has zstd - # support or the non-standard -I/--use-compress-program option. - with subprocess.Popen( - ["zstd", "-d", "-", "--stdout"], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - ) as zstd_proc, subprocess.Popen( - ["tar", "-C", str(tmp), "-x"], stdin=zstd_proc.stdout - ) as tar_proc, urllib.request.urlopen( - self._index[name] - ) as u: - assert zstd_proc.stdin is not None - shutil.copyfileobj(u, zstd_proc.stdin) - zstd_proc.stdin.close() - if u.length: - raise http.client.IncompleteRead(b"", u.length) - if zstd_proc.returncode != 0: - raise subprocess.CalledProcessError( - zstd_proc.returncode, zstd_proc.args - ) - if tar_proc.returncode != 0: - raise subprocess.CalledProcessError( - tar_proc.returncode, tar_proc.args - ) - tmp.rename(path) - remove_tmp = False - finally: - if remove_tmp: - shutil.rmtree(tmp) - return path - - def _download_all(self) -> None: - try: - for kernel in self._kernels: - self._queue.put(self._download(kernel)) - self._queue.put(None) - except Exception as e: - self._queue.put(e) - - def __iter__(self) -> Iterator[Path]: + asset = get_available_kernel_releases()[kernel] + except KeyError: + raise Exception(f"kernel release {kernel} not found") + url = asset["url"] + to_download.append((kernel, kernel_dir, url)) + + for release, kernel_dir, url in to_download: + if url is None: + logger.info( + "kernel release %s already downloaded to %s", release, kernel_dir + ) + else: + logger.info( + "downloading kernel release %s to %s from %s", release, kernel_dir, url + ) + _download_kernel(gh, url, kernel_dir) + yield kernel_dir + + +def _download_kernels_thread( + download_dir: Path, + arch: str, + kernels: Sequence[str], + q: "queue.Queue[Union[Path, Exception]]", +) -> None: + try: + it = download_kernels(download_dir, arch, kernels) while True: - result = self._queue.get() - if isinstance(result, Exception): - raise result - elif result is None: - break - yield result + q.put(next(it)) + except Exception as e: + q.put(e) -if __name__ == "__main__": - import argparse +@contextmanager +def download_kernels_in_thread( + download_dir: Path, arch: str, kernels: Sequence[str] +) -> Iterator[Iterator[Path]]: + q: "queue.Queue[Union[Path, Exception]]" = queue.Queue() + + def aux() -> Iterator[Path]: + while True: + obj = q.get() + if isinstance(obj, StopIteration): + break + elif isinstance(obj, Exception): + raise obj + yield obj + + thread = None + try: + thread = threading.Thread( + target=_download_kernels_thread, + args=(download_dir, arch, kernels, q), + daemon=True, + ) + thread.start() + yield aux() + finally: + if thread: + thread.join() + + +def main() -> None: + logging.basicConfig( + format="%(asctime)s:%(levelname)s:%(name)s:%(message)s", level=logging.INFO + ) parser = argparse.ArgumentParser( - description="download vmtest kernels", + description="Download drgn vmtest assets", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( - "-d", "--directory", default="build/vmtest", help="directory to download to" + "-k", + "--kernel", + action="append", + dest="kernels", + help="download latest kernel matching glob pattern; may be given multiple times", + ) + parser.add_argument( + "-d", + "--download-directory", + metavar="DIR", + type=Path, + default="build/vmtest", + help="directory to download assets to", ) - parser.add_argument("kernels", metavar="KERNEL", nargs="*") args = parser.parse_args() - with KernelDownloader(args.kernels, Path(args.directory)) as downloader: - for kernel in downloader: - print(kernel) + for path in download_kernels(args.download_directory, "x86_64", args.kernels or ()): + print(path) + + +if __name__ == "__main__": + main() diff --git a/vmtest/githubapi.py b/vmtest/githubapi.py new file mode 100644 index 000000000..d37dd73bc --- /dev/null +++ b/vmtest/githubapi.py @@ -0,0 +1,174 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +import json +from pathlib import Path +import typing +from typing import Any, Dict, Mapping, Optional, Union +import urllib.error +import urllib.parse +import urllib.request + +if typing.TYPE_CHECKING: + import aiohttp + + +_CACHE = Optional[Union[str, bytes, Path]] + + +# Hacky base class because we want the GitHub API from async and non-async +# code. +# +# This provides a slapdash interface for caching a response in a file so that +# we can do conditional requests +# (https://docs.github.com/en/rest/overview/resources-in-the-rest-api#conditional-requests). +# A more complete implementation would be something like a SQLite database +# indexed by endpoint, but this is simpler and good enough for now. +class _GitHubApiBase: + _HOST = "https://api.github.com" + + def __init__(self, token: Optional[str]) -> None: + self._headers = { + "Accept": "application/vnd.github.v3+json", + "User-Agent": "osandov/drgn vmtest", + } + if token is not None: + self._headers["Authorization"] = "token " + token + + def _request( + self, + method: str, + url: str, + *, + params: Optional[Mapping[str, str]] = None, + headers: Optional[Dict[str, str]] = None, + data: Any = None, + ) -> Any: + raise NotImplementedError() + + def _cached_get_json(self, endpoint: str, cache: _CACHE) -> Any: + raise NotImplementedError() + + def _read_cache(self, cache: _CACHE) -> Optional[Mapping[str, Any]]: + if not cache: + return None + try: + with open(cache, "r") as f: + return json.load(f) # type: ignore[no-any-return] + except FileNotFoundError: + return None + + def _cached_get_headers( + self, cached: Optional[Mapping[str, Any]] + ) -> Dict[str, str]: + if cached is not None: + if "etag" in cached: + return {**self._headers, "If-None-Match": cached["etag"]} + elif "last_modified" in cached: + return {**self._headers, "If-Modified-Since": cached["last_modified"]} + return self._headers + + def _write_cache( + self, cache: _CACHE, body: Any, headers: Mapping[str, str] + ) -> None: + if cache is not None and ("ETag" in headers or "Last-Modified" in headers): + to_cache = {"body": body} + if "ETag" in headers: + to_cache["etag"] = headers["ETag"] + if "Last-Modified" in headers: + to_cache["last_modified"] = headers["Last-Modified"] + with open(cache, "w") as f: + json.dump(to_cache, f) + + def get_release_by_tag( + self, owner: str, repo: str, tag: str, *, cache: _CACHE = None + ) -> Any: + return self._cached_get_json(f"repos/{owner}/{repo}/releases/tags/{tag}", cache) + + def download(self, url: str) -> Any: + return self._request( + "GET", url, headers={**self._headers, "Accept": "application/octet-stream"} + ) + + def upload(self, url: str, data: Any, content_type: str) -> Any: + return self._request( + "POST", + url, + headers={**self._headers, "Content-Type": content_type}, + data=data, + ) + + +class GitHubApi(_GitHubApiBase): + def _request( + self, + method: str, + url: str, + *, + params: Optional[Mapping[str, str]] = None, + headers: Optional[Dict[str, str]] = None, + data: Any = None, + ) -> Any: + if params: + url += "?" + urllib.parse.urlencode(params) + return urllib.request.urlopen( + urllib.request.Request( + url, + data=data, + headers={} if headers is None else headers, + method=method, + ) + ) + + def _cached_get_json(self, endpoint: str, cache: _CACHE) -> Any: + cached = self._read_cache(cache) + try: + with urllib.request.urlopen( + urllib.request.Request( + self._HOST + "/" + endpoint, + headers=self._cached_get_headers(cached), + ) + ) as resp: + body = json.load(resp) + self._write_cache(cache, body, resp.headers) + return body + except urllib.error.HTTPError as e: + if e.code == 304 and cached is not None: + return cached["body"] + else: + raise + + +class AioGitHubApi(_GitHubApiBase): + def __init__(self, session: "aiohttp.ClientSession", token: Optional[str]) -> None: + super().__init__(token) + self._session = session + + def _request( + self, + method: str, + url: str, + *, + params: Optional[Mapping[str, str]] = None, + headers: Optional[Dict[str, str]] = None, + data: Any = None, + ) -> Any: + return self._session.request( + method, url, params=params, headers=headers, data=data + ) + + async def _cached_get_json(self, endpoint: str, cache: _CACHE) -> Any: + cached = self._read_cache(cache) + async with self._session.get( + self._HOST + "/" + endpoint, + headers=self._cached_get_headers(cached), + raise_for_status=True, + ) as resp: + if resp.status == 304: + if cached is None: + raise Exception("got HTTP 304 but response was not cached") + return cached["body"] + else: + body = await resp.json() + self._write_cache(cache, body, resp.headers) + return body diff --git a/vmtest/kbuild.py b/vmtest/kbuild.py new file mode 100644 index 000000000..f41c85265 --- /dev/null +++ b/vmtest/kbuild.py @@ -0,0 +1,340 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +import argparse +import asyncio +import filecmp +import logging +from pathlib import Path +import shlex +import shutil +import sys +import tempfile +from typing import IO, Any, Optional, Tuple, Union + +from util import nproc +from vmtest.asynciosubprocess import ( + CalledProcessError, + check_call, + check_output, + check_output_shell, + pipe_context, +) + +logger = logging.getLogger(__name__) + +KERNEL_LOCALVERSION = "-vmtest3" + + +def kconfig() -> str: + return rf"""# Minimal Linux kernel configuration for booting into vmtest and running drgn +# tests. + +CONFIG_LOCALVERSION="{KERNEL_LOCALVERSION}" + +CONFIG_SMP=y +CONFIG_MODULES=y + +# We run the tests in KVM. +CONFIG_HYPERVISOR_GUEST=y +CONFIG_KVM_GUEST=y +CONFIG_PARAVIRT=y +CONFIG_PARAVIRT_SPINLOCKS=y + +# Minimum requirements for vmtest. +CONFIG_9P_FS=y +CONFIG_DEVTMPFS=y +CONFIG_INET=y +CONFIG_NET=y +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_OVERLAY_FS=y +CONFIG_PCI=y +CONFIG_PROC_FS=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_XATTR=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_PCI=y + +# drgn needs /proc/kcore for live debugging. +CONFIG_PROC_KCORE=y +# In some cases, it also needs /proc/kallsyms. +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y + +# drgn needs debug info. +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF4=y + +# Before Linux kernel commit 8757dc970f55 ("x86/crash: Define +# arch_crash_save_vmcoreinfo() if CONFIG_CRASH_CORE=y") (in v5.6), some +# important information in VMCOREINFO is initialized by the kexec code. +CONFIG_KEXEC=y + +# For block tests. +CONFIG_BLK_DEV_LOOP=m + +# For kconfig tests. +CONFIG_IKCONFIG=m +CONFIG_IKCONFIG_PROC=y +""" + + +class KBuild: + def __init__( + self, + kernel_dir: Path, + build_dir: Path, + arch: str, + build_log_file: Union[int, IO[Any], None] = None, + ) -> None: + self._build_dir = build_dir + self._kernel_dir = kernel_dir + self._arch = arch + self._build_stdout = build_log_file + self._build_stderr = ( + None if build_log_file is None else asyncio.subprocess.STDOUT + ) + self._cached_make_args: Optional[Tuple[str, ...]] = None + self._cached_kernel_release: Optional[str] = None + + async def _prepare_make(self) -> Tuple[str, ...]: + if self._cached_make_args is None: + self._build_dir.mkdir(parents=True, exist_ok=True) + + debug_prefix_map = [] + # GCC uses the "logical" working directory, i.e., the PWD + # environment variable, when it can. See + # https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=libiberty/getpwd.c;hb=HEAD. + # Map both the canonical and logical paths. + build_dir_real = self._build_dir.resolve() + debug_prefix_map.append(str(build_dir_real) + "=.") + build_dir_logical = ( + await check_output_shell( + f"cd {shlex.quote(str(self._build_dir))}; pwd -L", + ) + ).decode()[:-1] + if build_dir_logical != str(build_dir_real): + debug_prefix_map.append(build_dir_logical + "=.") + + # Before Linux kernel commit 25b146c5b8ce ("kbuild: allow Kbuild to + # start from any directory") (in v5.2), O= forces the source + # directory to be absolute. Since Linux kernel commit 95fd3f87bfbe + # ("kbuild: add a flag to force absolute path for srctree") (in + # v5.3), KBUILD_ABS_SRCTREE=1 does the same. This means that except + # for v5.2, which we don't support, the source directory will + # always be absolute, and we don't need to worry about mapping it + # from a relative path. + kernel_dir_real = self._kernel_dir.resolve() + if kernel_dir_real != build_dir_real: + debug_prefix_map.append(str(kernel_dir_real) + "/=./") + + cflags = " ".join(["-fdebug-prefix-map=" + map for map in debug_prefix_map]) + + self._cached_make_args = ( + "-C", + str(self._kernel_dir), + "ARCH=" + str(self._arch), + "O=" + str(build_dir_real), + "KBUILD_ABS_SRCTREE=1", + "KBUILD_BUILD_USER=drgn", + "KBUILD_BUILD_HOST=drgn", + "KAFLAGS=" + cflags, + "KCFLAGS=" + cflags, + "-j", + str(nproc()), + ) + return self._cached_make_args + + async def _kernel_release(self) -> str: + if self._cached_kernel_release is None: + # Must call _prepare_make() first. + assert self._cached_make_args is not None + self._cached_kernel_release = ( + ( + await check_output( + "make", *self._cached_make_args, "-s", "kernelrelease" + ) + ) + .decode() + .strip() + ) + return self._cached_kernel_release + + async def build(self) -> None: + logger.info("building kernel in %s", self._build_dir) + build_log_file_name = getattr(self._build_stdout, "name", None) + if build_log_file_name is not None: + logger.info("build logs in %s", build_log_file_name) + + make_args = await self._prepare_make() + + config = self._build_dir / ".config" + tmp_config = self._build_dir / ".config.vmtest.tmp" + + tmp_config.write_text(kconfig()) + await check_call( + "make", + *make_args, + "KCONFIG_CONFIG=" + tmp_config.name, + "olddefconfig", + stdout=self._build_stdout, + stderr=self._build_stderr, + ) + try: + equal = filecmp.cmp(config, tmp_config) + if not equal: + logger.info("kernel configuration changed") + except FileNotFoundError: + equal = False + logger.info("no previous kernel configuration") + if equal: + logger.info("kernel configuration did not change") + tmp_config.unlink() + else: + tmp_config.rename(config) + + kernel_release = await self._kernel_release() + logger.info("kernel release is %s", kernel_release) + await check_call( + "make", + *make_args, + "all", + stdout=self._build_stdout, + stderr=self._build_stderr, + ) + logger.info("built kernel %s in %s", kernel_release, self._build_dir) + + async def package(self, output_dir: Path) -> Path: + make_args = await self._prepare_make() + kernel_release = await self._kernel_release() + + tarball = output_dir / f"kernel-{kernel_release}.{self._arch}.tar.zst" + + logger.info( + "packaging kernel %s from %s to %s", + kernel_release, + self._build_dir, + tarball, + ) + + image_name = ( + (await check_output("make", *make_args, "-s", "image_name")) + .decode() + .strip() + ) + + with tempfile.TemporaryDirectory( + prefix="install.", dir=self._build_dir + ) as tmp_name: + install_dir = Path(tmp_name) + modules_dir = install_dir / "lib" / "modules" / kernel_release + + logger.info("installing modules") + await check_call( + "make", + *make_args, + "INSTALL_MOD_PATH=" + str(install_dir.resolve()), + "modules_install", + stdout=self._build_stdout, + stderr=self._build_stderr, + ) + # Don't want these symlinks. + (modules_dir / "build").unlink() + (modules_dir / "source").unlink() + + logger.info("copying vmlinux") + vmlinux = modules_dir / "vmlinux" + await check_call( + "objcopy", + "--remove-relocations=*", + self._build_dir / "vmlinux", + str(vmlinux), + ) + vmlinux.chmod(0o644) + + logger.info("copying vmlinuz") + vmlinuz = modules_dir / "vmlinuz" + shutil.copy(self._build_dir / image_name, vmlinuz) + vmlinuz.chmod(0o644) + + logger.info("creating tarball") + tarball.parent.mkdir(parents=True, exist_ok=True) + tar_cmd = ("tar", "-C", str(modules_dir), "-c", ".") + zstd_cmd = ("zstd", "-T0", "-19", "-q", "-", "-o", str(tarball), "-f") + with pipe_context() as (pipe_r, pipe_w): + tar_proc, zstd_proc = await asyncio.gather( + asyncio.create_subprocess_exec(*tar_cmd, stdout=pipe_w), + asyncio.create_subprocess_exec(*zstd_cmd, stdin=pipe_r), + ) + tar_returncode, zstd_returncode = await asyncio.gather( + tar_proc.wait(), zstd_proc.wait() + ) + if tar_returncode != 0: + raise CalledProcessError(tar_returncode, tar_cmd) + if zstd_returncode != 0: + raise CalledProcessError(zstd_returncode, zstd_cmd) + + logger.info( + "packaged kernel %s from %s to %s", kernel_release, self._build_dir, tarball + ) + return tarball + + +async def main() -> None: + logging.basicConfig( + format="%(asctime)s:%(levelname)s:%(name)s:%(message)s", level=logging.INFO + ) + + parser = argparse.ArgumentParser( + description="Build a drgn vmtest kernel", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "-k", + "--kernel-directory", + metavar="DIR", + type=Path, + help="kernel source tree directory", + default=".", + ) + parser.add_argument( + "-b", + "--build-directory", + metavar="DIR", + type=Path, + help="build output directory", + default=".", + ) + parser.add_argument( + "-p", + "--package", + metavar="DIR", + type=Path, + help="also package the built kernel and place it in DIR", + default=argparse.SUPPRESS, + ) + parser.add_argument( + "--dump-kconfig", + action="store_true", + help="dump kernel configuration file to standard output instead of building", + ) + args = parser.parse_args() + + if args.dump_kconfig: + sys.stdout.write(kconfig()) + return + + kbuild = KBuild(args.kernel_directory, args.build_directory, "x86_64") + await kbuild.build() + if hasattr(args, "package"): + await kbuild.package(args.package) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/vmtest/manage.py b/vmtest/manage.py index 0f53a123e..daddf592b 100644 --- a/vmtest/manage.py +++ b/vmtest/manage.py @@ -3,576 +3,159 @@ import argparse import asyncio -import difflib -import getpass -import io -import json +import itertools import logging import os from pathlib import Path import re -import shlex -import shutil import sys -import time -from typing import ( - Any, - AsyncGenerator, - BinaryIO, - Dict, - List, - Optional, - Sequence, - Set, - SupportsFloat, - SupportsRound, - TextIO, - Tuple, -) -import urllib.parse +from typing import AsyncIterator, Dict, List, NamedTuple, Optional, Sequence, Union import aiohttp -from yarl import URL +import uritemplate -from util import nproc +from util import KernelVersion +from vmtest.asynciosubprocess import check_call, check_output +from vmtest.download import VMTEST_GITHUB_RELEASE, available_kernel_releases +from vmtest.githubapi import AioGitHubApi +from vmtest.kbuild import KERNEL_LOCALVERSION, KBuild logger = logging.getLogger(__name__) +# [inclusive, exclusive) ranges of kernel versions to ignore when building +# latest releases of each version. +IGNORE_KERNEL_RANGES = ( + (KernelVersion("~"), KernelVersion("4.4")), + (KernelVersion("4.5~"), KernelVersion("4.9")), + (KernelVersion("4.10~"), KernelVersion("4.14")), + (KernelVersion("4.15~"), KernelVersion("4.19")), + (KernelVersion("4.20~"), KernelVersion("5.4")), + (KernelVersion("5.5~"), KernelVersion("5.10")), +) -KERNEL_CONFIG_PATH = Path(__file__).parent / "config" - -KERNEL_ORG_JSON = "https://www.kernel.org/releases.json" - -DROPBOX_API_URL = "https://api.dropboxapi.com" -CONTENT_API_URL = "https://content.dropboxapi.com" - - -def humanize_size(n: SupportsFloat, precision: int = 1) -> str: - n = float(n) - for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]: - if abs(n) < 1024: - break - n /= 1024 - else: - unit = "Yi" - if n.is_integer(): - precision = 0 - return f"{n:.{precision}f}{unit}B" - - -def humanize_duration(seconds: SupportsRound[Any]) -> str: - seconds = round(seconds) - return f"{seconds // 60}m{seconds % 60}s" - - -async def raise_for_status_body(resp: aiohttp.ClientResponse) -> None: - """ - Like aiohttp.ClientResponse.raise_for_status(), but includes the response - body. - """ - if resp.status >= 400: - message = resp.reason or "" - body = await resp.text() - if body: - if message: - message += ": " - message += body - raise aiohttp.ClientResponseError( - resp.request_info, - resp.history, - status=resp.status, - message=message, - headers=resp.headers, - ) - - -def get_current_localversion() -> str: - with KERNEL_CONFIG_PATH.open("r") as f: - match = re.search(r'^CONFIG_LOCALVERSION="([^"]*)"', f.read(), re.MULTILINE) - return match.group(1) if match else "" +# Use the GitHub mirrors rather than the official kernel.org repositories since +# this script usually runs in GitHub Actions. +LINUX_GIT_URL = "https://github.com/torvalds/linux.git" +STABLE_LINUX_GIT_URL = "https://github.com/gregkh/linux.git" -async def get_kernel_org_versions(http_client: aiohttp.ClientSession) -> List[str]: - async with http_client.get(KERNEL_ORG_JSON, raise_for_status=True) as resp: - releases = (await resp.json())["releases"] - return [ - release["version"] - for release in releases - if release["moniker"] in {"mainline", "stable", "longterm"} +async def get_latest_kernel_tags() -> List[str]: + mainline_refs, stable_refs = await asyncio.gather( + check_output("git", "ls-remote", "--tags", "--refs", LINUX_GIT_URL), + check_output("git", "ls-remote", "--tags", "--refs", STABLE_LINUX_GIT_URL), + ) + latest: Dict[str, KernelVersion] = {} + for match in itertools.chain( + re.finditer( + r"^[a-f0-9]+\s+refs/tags/v([0-9]+\.[0-9]+)(-rc[0-9]+)?$", + mainline_refs.decode(), + re.M, + ), + re.finditer( + r"^[a-f0-9]+\s+refs/tags/v([0-9]+\.[0-9]+)(\.[0-9]+)$", + stable_refs.decode(), + re.M, + ), + ): + version = KernelVersion(match.group(1) + (match.group(2) or "")) + for start_version, end_version in IGNORE_KERNEL_RANGES: + if start_version <= version < end_version: + break + else: + latest[match.group(1)] = max(version, latest.get(match.group(1), version)) + return ["v" + str(version) for version in sorted(latest.values(), reverse=True)] + + +def kernel_tag_to_release(tag: str) -> str: + match = re.fullmatch(r"v([0-9]+\.[0-9]+)(\.[0-9]+)?(-rc\d+)?", tag) + assert match + return "".join( + [ + match.group(1), + match.group(2) or ".0", + match.group(3) or "", + KERNEL_LOCALVERSION, ] + ) -async def get_available_kernel_releases( - http_client: aiohttp.ClientSession, token: str -) -> Set[str]: - headers = {"Authorization": "Bearer " + token} - params = {"path": "/Public/x86_64"} - url = DROPBOX_API_URL + "/2/files/list_folder" - available = set() - while True: - async with http_client.post(url, headers=headers, json=params) as resp: - if resp.status == 409 and (await resp.json())["error_summary"].startswith( - "path/not_found/" - ): - break - await raise_for_status_body(resp) - obj = await resp.json() - for entry in obj["entries"]: - if entry[".tag"] != "file": - continue - match = re.fullmatch(r"kernel-(.*)\.tar\.zst", entry["name"]) - if match: - available.add(match.group(1)) - if not obj["has_more"]: - break - url = DROPBOX_API_URL + "/2/files/list_folder/continue" - params = {"cursor": obj["cursor"]} - return available - - -class CalledProcessError(Exception): - def __init__(self, returncode: int, cmd: Sequence[str]) -> None: - self.returncode = returncode - self.cmd = cmd - - def __str__(self) -> str: - command = " ".join(shlex.quote(arg) for arg in self.cmd) - raise Exception( - f"Command {command!r} returned non-zero exit status {self.returncode}" - ) - +async def fetch_kernel_tags(kernel_dir: Path, kernel_tags: Sequence[str]) -> None: + if not kernel_dir.exists(): + logger.info("creating kernel repository in %s", kernel_dir) + await check_call("git", "init", "-q", str(kernel_dir)) -async def check_call(*args: Any, **kwds: Any) -> None: - proc = await asyncio.create_subprocess_exec(*args, **kwds) - returncode = await proc.wait() - if returncode != 0: - raise CalledProcessError(returncode, args) - - -async def check_output(*args: Any, **kwds: Any) -> bytes: - kwds["stdout"] = asyncio.subprocess.PIPE - proc = await asyncio.create_subprocess_exec(*args, **kwds) - stdout = (await proc.communicate())[0] - if proc.returncode: - raise CalledProcessError(proc.returncode, args) - return stdout - - -def getpwd() -> str: - """ - Get the current working directory in the same way that GCC does. See - https://gcc.gnu.org/git/?p=gcc.git;a=blob;f=libiberty/getpwd.c;hb=HEAD. - """ - try: - pwd = os.environ["PWD"] - if pwd.startswith("/"): - pwdstat = os.stat(pwd) - dotstat = os.stat(".") - if dotstat.st_ino == pwdstat.st_ino and dotstat.st_dev == pwdstat.st_dev: - return pwd - except (KeyError, OSError): - pass - return os.getcwd() - - -async def build_kernel( - commit: str, build_dir: Path, log_file: TextIO -) -> Tuple[str, Path]: - """ - Returns built kernel release (i.e., `uname -r`) and image name (e.g., - `arch/x86/boot/bzImage`). - """ - await check_call( - "git", "checkout", commit, stdout=log_file, stderr=asyncio.subprocess.STDOUT - ) + mainline_tags = [] + stable_tags = [] + for tag in kernel_tags: + if re.fullmatch("v[0-9]+\.[0-9]+\.[0-9]+", tag): + stable_tags.append(tag) + else: + mainline_tags.append(tag) - shutil.copy(KERNEL_CONFIG_PATH, build_dir / ".config") - - logger.info("building %s", commit) - start = time.monotonic() - cflags = f"-fdebug-prefix-map={getpwd() / build_dir}=" - kbuild_args = [ - "KBUILD_BUILD_USER=drgn", - "KBUILD_BUILD_HOST=drgn", - "KAFLAGS=" + cflags, - "KCFLAGS=" + cflags, - "O=" + str(build_dir), - "-j", - str(nproc()), - ] - await check_call( - "make", - *kbuild_args, - "olddefconfig", - "all", - stdout=log_file, - stderr=asyncio.subprocess.STDOUT, - ) - elapsed = time.monotonic() - start - logger.info("built %s in %s", commit, humanize_duration(elapsed)) + for (name, url, tags) in ( + ("mainline", LINUX_GIT_URL, mainline_tags), + ("stable", STABLE_LINUX_GIT_URL, stable_tags), + ): + if tags: + logger.info("fetching %s kernel tags: %s", name, ", ".join(tags)) + await check_call( + "git", + "-C", + str(kernel_dir), + "fetch", + "--depth", + "1", + url, + *(f"refs/tags/{tag}:refs/tags/{tag}" for tag in tags), + ) - logger.info("packaging %s", commit) - start = time.monotonic() - release = ( - ( - await check_output( - "make", *kbuild_args, "-s", "kernelrelease", stderr=log_file - ) - ) - .decode() - .strip() - ) - image_name = ( - (await check_output("make", *kbuild_args, "-s", "image_name", stderr=log_file)) - .decode() - .strip() - ) +async def build_kernels( + kernel_dir: Path, build_dir: Path, arch: str, kernel_revs: Sequence[str] +) -> AsyncIterator[Path]: + build_dir.mkdir(parents=True, exist_ok=True) + for rev in kernel_revs: + rev_build_dir = build_dir / ("build-" + rev) + logger.info("checking out %s in %s", rev, rev_build_dir) + await check_call("git", "-C", str(kernel_dir), "checkout", "-q", rev) + with open(build_dir / f"build-{rev}.log", "w") as build_log_file: + kbuild = KBuild(kernel_dir, rev_build_dir, arch, build_log_file) + await kbuild.build() + yield await kbuild.package(build_dir) - install_dir = build_dir / "install" - modules_dir = install_dir / "lib" / "modules" / release - await check_call( - "make", - *kbuild_args, - "INSTALL_MOD_PATH=install", - "modules_install", - stdout=log_file, - stderr=asyncio.subprocess.STDOUT, - ) - # Don't want these symlinks. - (modules_dir / "build").unlink() - (modules_dir / "source").unlink() - - vmlinux = modules_dir / "vmlinux" - await check_call( - "objcopy", - "--remove-relocations=*", - str(build_dir / "vmlinux"), - str(vmlinux), - stdout=log_file, - stderr=asyncio.subprocess.STDOUT, - ) - vmlinux.chmod(0o644) - - vmlinuz = modules_dir / "vmlinuz" - shutil.copy(build_dir / image_name, vmlinuz) - vmlinuz.chmod(0o644) - - tarball = build_dir / "kernel.tar.zst" - tar_command = ("tar", "-C", str(modules_dir), "-c", ".") - zstd_command = ("zstd", "-T0", "-19", "-q", "-", "-o", str(tarball)) - pipe_r, pipe_w = os.pipe() - try: - tar_proc, zstd_proc = await asyncio.gather( - asyncio.create_subprocess_exec( - *tar_command, stdout=pipe_w, stderr=log_file - ), - asyncio.create_subprocess_exec( - *zstd_command, - stdin=pipe_r, - stdout=log_file, - stderr=asyncio.subprocess.STDOUT, - ), - ) - finally: - os.close(pipe_r) - os.close(pipe_w) - tar_returncode, zstd_returncode = await asyncio.gather( - tar_proc.wait(), zstd_proc.wait() - ) - if tar_returncode != 0: - raise CalledProcessError(tar_returncode, tar_command) - if zstd_returncode != 0: - raise CalledProcessError(zstd_returncode, zstd_command) - shutil.rmtree(install_dir) - elapsed = time.monotonic() - start - logger.info("packaged %s in %s", commit, humanize_duration(elapsed)) - - return release, tarball - - -async def try_build_kernel(commit: str) -> Optional[Tuple[str, Path]]: - """Returns (kernel release, tarball path) on success, None on error.""" - proc = await asyncio.create_subprocess_exec( - "git", - "rev-parse", - "--verify", - "-q", - commit + "^{commit}", - stdout=asyncio.subprocess.DEVNULL, - ) - if (await proc.wait()) != 0: - logger.error("unknown revision: %s", commit) - return None - - build_dir = Path("build-" + commit) - try: - log_path = build_dir / "build.log" - logger.info("preparing %r; logs in %r", str(build_dir), str(log_path)) - build_dir.mkdir(0o755) - with log_path.open("w") as log_file: - try: - return await build_kernel(commit, build_dir, log_file) - except Exception: - logger.exception("building %s failed; see %r", commit, str(log_path)) - return None - except Exception: - logger.exception("preparing %r failed", str(build_dir)) - return None - - -class Uploader: - CHUNK_SIZE = 8 * 1024 * 1024 - - def __init__(self, http_client: aiohttp.ClientSession, token: str) -> None: - self._http_client = http_client - self._token = token - self._pending: List[Tuple[str, asyncio.Task[bool]]] = [] - - async def _upload_file_obj(self, file: BinaryIO, commit: Dict[str, Any]) -> None: - headers = { - "Authorization": "Bearer " + self._token, - "Content-Type": "application/octet-stream", - } - offset = 0 - session_id = None - while True: - data = file.read(Uploader.CHUNK_SIZE) - last = len(data) < Uploader.CHUNK_SIZE - if session_id is None: - if last: - endpoint = "upload" - params = commit - else: - endpoint = "upload_session/start" - params = {} - else: - params = { - "cursor": {"offset": offset, "session_id": session_id}, - } - if last: - endpoint = "upload_session/finish" - params["commit"] = commit - else: - endpoint = "upload_session/append_v2" - offset += len(data) - headers["Dropbox-API-Arg"] = json.dumps(params) - url = CONTENT_API_URL + "/2/files/" + endpoint - async with self._http_client.post(url, headers=headers, data=data) as resp: - await raise_for_status_body(resp) - if endpoint == "upload_session/start": - session_id = (await resp.json())["session_id"] - if last: - break +class AssetUploadWork(NamedTuple): + upload_url: str + path: Union[str, bytes, Path] + name: str + content_type: str - async def _try_upload_file_obj( - self, file: BinaryIO, commit: Dict[str, Any] - ) -> bool: - try: - logger.info("uploading %r", commit["path"]) - start = time.monotonic() - await self._upload_file_obj(file, commit) - elapsed = time.monotonic() - start - logger.info("uploaded %r in %s", commit["path"], humanize_duration(elapsed)) - return True - except Exception: - logger.exception("uploading %r failed", commit["path"]) - return False - async def _try_upload_file(self, path: str, commit: Dict[str, Any]) -> bool: - try: - logger.info("uploading %r to %r", path, commit["path"]) - start = time.monotonic() - with open(path, "rb") as f: - await self._upload_file_obj(f, commit) - elapsed = time.monotonic() - start - logger.info( - "uploaded %r to %r in %s", - path, - commit["path"], - humanize_duration(elapsed), - ) - return True - except Exception: - logger.exception("uploading %r to %r failed", path, commit["path"]) - return False - - @staticmethod - def _make_commit( - dst_path: str, *, mode: Optional[str] = None, autorename: Optional[bool] = None - ) -> Dict[str, Any]: - commit: Dict[str, Any] = {"path": dst_path} - if mode is not None: - commit["mode"] = mode - if autorename is not None: - commit["autorename"] = autorename - return commit - - def queue_file_obj(self, file: BinaryIO, *args: Any, **kwds: Any) -> None: - commit = self._make_commit(*args, **kwds) - task = asyncio.create_task(self._try_upload_file_obj(file, commit)) - self._pending.append((commit["path"], task)) - - def queue_file(self, src_path: str, *args: Any, **kwds: Any) -> None: - commit = self._make_commit(*args, **kwds) - task = asyncio.create_task(self._try_upload_file(src_path, commit)) - self._pending.append((commit["path"], task)) - - async def wait(self) -> Tuple[List[str], List[str]]: - """ - Returns list of successfully uploaded paths and list of paths that - failed to upload. - """ - succeeded = [] - failed = [] - for path, task in self._pending: - if await task: - succeeded.append(path) - else: - failed.append(path) - self._pending.clear() - return succeeded, failed - - -async def list_shared_folder( - http_client: aiohttp.ClientSession, url: str -) -> AsyncGenerator[Tuple[str, bool, str], None]: - """ - List a Dropbox shared folder. The Dropbox API doesn't provide a way to get - the links for entries inside of a shared folder, so we're forced to scrape - them from the webpage and XHR endpoint. - - Generates filename, whether it is a directory, and its shared link. - """ - method = "GET" - data = None +async def asset_uploader( + gh: AioGitHubApi, + queue: "asyncio.Queue[Optional[AssetUploadWork]]", +) -> bool: + success = True while True: - async with http_client.request(method, url, data=data) as resp: - if method == "GET": - resp.raise_for_status() - match = re.search( - r'"\{\\"shared_link_infos\\".*[^\\]\}"', (await resp.text()) + work = await queue.get() + if not work: + queue.task_done() + return success + logger.info("uploading %s", work.name) + try: + with open(work.path, "rb") as f: + await gh.upload( + uritemplate.expand(work.upload_url, name=work.name), + f, + work.content_type, ) - assert match - obj = json.loads(json.loads(match.group())) - else: - await raise_for_status_body(resp) - obj = await resp.json() - for entry in obj["entries"]: - yield entry["filename"], entry["is_dir"], entry["href"] - if not obj["has_more_entries"]: - break - if method == "GET": - method = "POST" - url = "https://www.dropbox.com/list_shared_link_folder_entries" - data = { - "t": http_client.cookie_jar.filter_cookies(URL(url))["t"].value, - "link_key": obj["folder_share_token"]["linkKey"], - "link_type": obj["folder_share_token"]["linkType"], - "secure_hash": obj["folder_share_token"]["secureHash"], - "sub_path": obj["folder_share_token"]["subPath"], - } - assert data is not None - data["voucher"] = obj["next_request_voucher"] - - -async def walk_shared_folder( - http_client: aiohttp.ClientSession, url: str -) -> AsyncGenerator[Tuple[str, List[Tuple[str, str]], List[Tuple[str, str]]], None]: - """ - Walk a Dropbox shared folder, similar to os.walk(). Generates path, list of - files and their shared links, and list of folders and their shared links. - """ - stack = [("", url)] - while stack: - path, url = stack.pop() - dirs = [] - files = [] - async for filename, is_dir, href in list_shared_folder(http_client, url): - if is_dir: - dirs.append((filename, href)) - else: - files.append((filename, href)) - yield path, files, dirs - if path: - path += "/" - stack.extend((path + filename, href) for filename, href in dirs) - - -def make_download_url(url: str) -> str: - parsed = urllib.parse.urlsplit(url) - query = [ - (name, value) - for name, value in urllib.parse.parse_qsl(parsed.query) - if name != "dl" - ] - query.append(("dl", "1")) - return urllib.parse.urlunsplit(parsed._replace(query=urllib.parse.urlencode(query))) - - -async def update_index( - http_client: aiohttp.ClientSession, token: str, uploader: Uploader -) -> bool: - try: - logger.info("finding shared folder link") - headers = {"Authorization": "Bearer " + token} - params = { - "path": "/Public", - "direct_only": True, - } - async with http_client.post( - DROPBOX_API_URL + "/2/sharing/list_shared_links", - headers=headers, - json=params, - ) as resp: - await raise_for_status_body(resp) - for link in (await resp.json())["links"]: - if link[".tag"] != "folder": - continue - try: - visibility = link["link_permissions"]["resolved_visibility"][".tag"] - except KeyError: - continue - if visibility == "public": - break - else: - raise Exception("shared folder link not found") - - logger.info("walking shared folder") - async for path, files, dirs in walk_shared_folder(http_client, link["url"]): - lines = [] - old_lines = [] - for name, href in files: - href = make_download_url(href) - lines.append(name + "\t" + href + "\n") - if name == "INDEX": - async with http_client.get(href, raise_for_status=True) as resp: - old_lines = (await resp.text()).splitlines(keepends=True) - lines.extend(name + "/\t" + href + "\n" for name, href in dirs) - lines.sort() - - index_path = (path + "/" if path else "") + "INDEX" - if lines == old_lines: - logger.info("%s is up to date", index_path) - continue - diff = difflib.unified_diff( - old_lines, lines, fromfile="a/" + index_path, tofile="b/" + index_path - ) - logger.info("updating %s:\n%s", index_path, "".join(diff).rstrip("\n")) - uploader.queue_file_obj( - io.BytesIO("".join(lines).encode()), - "/Public/" + index_path, - mode="overwrite", - ) - succeeded, failed = await uploader.wait() - if failed: - logger.info("updates failed: %s", ", ".join(failed)) - return False - return True - except Exception: - logger.exception("updating INDEX files failed") - return False + except Exception: + logger.exception("uploading %s failed", work.name) + success = False + else: + logger.info("uploaded %s", work.name) + finally: + queue.task_done() async def main() -> None: @@ -581,138 +164,110 @@ async def main() -> None: ) parser = argparse.ArgumentParser( - description="Tool for managing drgn vmtest kernel builds and files" + description="Build and upload drgn vmtest assets", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( - "-b", - "--build", - type=str, - action="append", - metavar="COMMIT", - help="build the given kernel release; may be given multiple times. " - "Must be run from a Linux kernel git repository", + "-K", + "--latest-kernels", + action="store_true", + help="build and upload latest supported kernel releases", ) parser.add_argument( - "-k", - "--build-kernel-org", + "--dry-run", action="store_true", - help="build new kernels listed on kernel.org", + help="build but don't upload anything to GitHub", ) parser.add_argument( - "-u", "--upload", action="store_true", help="upload built kernels" + "--kernel-directory", + metavar="DIR", + type=Path, + help="kernel Git repository directory (created if needed)", + default=".", ) parser.add_argument( - "-U", - "--upload-file", - type=str, - action="append", - dest="upload_files", - metavar=("SRC_PATH", "DST_PATH"), - nargs=2, - help="upload the given file; may be given multiple times", + "--build-directory", + metavar="DIR", + type=Path, + help="directory for build artifacts", + default=".", ) parser.add_argument( - "-i", "--index", action="store_true", help="update the INDEX files" + "--cache-directory", + metavar="DIR", + type=Path, + default="build/vmtest", + help="directory to cache API calls in", ) args = parser.parse_args() - if (args.build or args.build_kernel_org) and ( - not Path(".git").exists() or not Path("kernel").exists() - ): - sys.exit("-b/-k must be run from linux.git") + arch = "x86_64" + + async with aiohttp.ClientSession(trust_env=True) as session: + GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") + if GITHUB_TOKEN is None and not args.dry_run: + sys.exit("GITHUB_TOKEN environment variable is not set") + gh = AioGitHubApi(session, GITHUB_TOKEN) - if args.build_kernel_org or args.upload or args.upload_files or args.index: - if os.isatty(sys.stdin.fileno()): - dropbox_token = getpass.getpass("Enter Dropbox app API token: ") + args.cache_directory.mkdir(parents=True, exist_ok=True) + github_release_coro = gh.get_release_by_tag( + *VMTEST_GITHUB_RELEASE, cache=args.cache_directory / "github_release.json" + ) + if args.latest_kernels: + github_release, latest_kernel_tags = await asyncio.gather( + github_release_coro, get_latest_kernel_tags() + ) else: - dropbox_token = input() - - builds_succeeded = [] - builds_failed = [] - uploads_succeeded = [] - uploads_failed = [] - - async with aiohttp.ClientSession(trust_env=True) as http_client: - # dict rather than set to preserve insertion order. - to_build = dict.fromkeys(args.build or ()) - if args.build_kernel_org: - localversion = get_current_localversion() - logger.info("current localversion: %s", localversion) - try: - # In this context, "version" is a tag name without the "v" - # prefix and "release" is a uname release string. - logger.info( - "getting list of kernel.org versions and available releases" - ) - kernel_org, available = await asyncio.gather( - get_kernel_org_versions(http_client), - get_available_kernel_releases(http_client, dropbox_token), + github_release = await github_release_coro + + kernel_releases = available_kernel_releases(github_release, arch) + logger.info( + "available %s kernel releases: %s", + arch, + ", ".join(sorted(kernel_releases, key=KernelVersion, reverse=True)), + ) + + if args.latest_kernels: + logger.info("latest kernel versions: %s", ", ".join(latest_kernel_tags)) + kernel_tags = [ + tag + for tag in latest_kernel_tags + if kernel_tag_to_release(tag) not in kernel_releases + ] + else: + kernel_tags = [] + + if kernel_tags: + logger.info("kernel versions to build: %s", ", ".join(kernel_tags)) + + if not args.dry_run: + upload_queue: "asyncio.Queue[Optional[AssetUploadWork]]" = ( + asyncio.Queue() ) - logger.info("kernel.org versions: %s", ", ".join(kernel_org)) - logger.info("available releases: %s", ", ".join(sorted(available))) - for version in kernel_org: - match = re.fullmatch(r"(\d+\.\d+)(\.\d+)?(-rc\d+)?", version) - if not match: - logger.error("couldn't parse kernel.org version %r", version) - sys.exit(1) - release = "".join( - [ - match.group(1), - match.group(2) or ".0", - match.group(3) or "", - localversion, - ] + uploader = asyncio.create_task(asset_uploader(gh, upload_queue)) + + await fetch_kernel_tags(args.kernel_directory, kernel_tags) + + async for kernel_package in build_kernels( + args.kernel_directory, args.build_directory, arch, kernel_tags + ): + if args.dry_run: + logger.info("would upload %s", kernel_package) + else: + await upload_queue.put( + AssetUploadWork( + upload_url=github_release["upload_url"], + path=kernel_package, + name=kernel_package.name, + content_type="application/zstd", + ) ) - if release not in available: - to_build["v" + version] = None - except Exception: - logger.exception( - "failed to get kernel.org releases and/or available releases" - ) - sys.exit(1) - - if args.upload or args.upload_files or args.index: - uploader = Uploader(http_client, dropbox_token) - - for src_path, dst_path in args.upload_files or (): - uploader.queue_file(src_path, dst_path, autorename=False) - - if to_build: - logger.info("releases to build: %s", ", ".join(to_build)) - for kernel in to_build: - result = await try_build_kernel(kernel) - if result is None: - builds_failed.append(kernel) - continue - builds_succeeded.append(kernel) - release, tarball = result - if args.upload: - uploader.queue_file( - str(tarball), - f"/Public/x86_64/kernel-{release}.tar.zst", - autorename=False, - ) - if args.upload or args.upload_files: - succeeded, failed = await uploader.wait() - uploads_succeeded.extend(succeeded) - uploads_failed.extend(failed) - - if builds_succeeded: - logger.info("successfully built: %s", ", ".join(builds_succeeded)) - if builds_failed: - logger.error("builds failed: %s", ", ".join(builds_failed)) - if uploads_succeeded: - logger.info("successfully uploaded: %s", ", ".join(uploads_succeeded)) - if uploads_failed: - logger.info("uploads failed: %s", ", ".join(uploads_failed)) - - if builds_failed or uploads_failed: - logger.error("builds and/or uploads failed; exiting") - sys.exit(1) - - if args.index and not await update_index(http_client, dropbox_token, uploader): - sys.exit(1) + if not args.dry_run: + await upload_queue.put(None) + await upload_queue.join() + if not await uploader: + sys.exit("some uploads failed") if __name__ == "__main__": diff --git a/vmtest/vm.py b/vmtest/vm.py index eae3c4e7c..c3fe15c78 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -275,6 +275,8 @@ def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: parser.add_argument( "-d", "--directory", + metavar="DIR", + type=Path, default="build/vmtest", help="directory for build artifacts and downloaded kernels", ) @@ -303,16 +305,13 @@ def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: if kernel.startswith(".") or kernel.startswith("/"): kernel_dir = Path(kernel) else: - from vmtest.download import KernelDownloader + from vmtest.download import download_kernels - with KernelDownloader( - [getattr(args, "kernel", "*")], download_dir=Path(args.directory) - ) as downloader: - kernel_dir = next(iter(downloader)) + kernel_dir = next(download_kernels(args.directory, "x86_64", (kernel,))) try: command = " ".join(args.command) if args.command else '"$BUSYBOX" sh -i' - sys.exit(run_in_vm(command, kernel_dir, Path(args.directory))) + sys.exit(run_in_vm(command, kernel_dir, args.directory)) except LostVMError as e: print("error:", e, file=sys.stderr) sys.exit(args.lost_status)