diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 041ab32a2..ae89f8aed 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.9, 3.8, 3.7, 3.6] + python-version: ['3.10', '3.9', '3.8', '3.7', '3.6'] cc: [gcc, clang] fail-fast: false env: @@ -23,11 +23,11 @@ jobs: run: | sudo apt-get update sudo apt-get install busybox-static libelf-dev libdw-dev qemu-kvm zstd ${{ matrix.cc == 'clang' && 'libomp-$(clang --version | sed -rn "s/.*clang version ([0-9]+).*/\\1/p")-dev' || '' }} - pip install mypy + pip install mypy pyroute2 - name: Generate version.py run: python setup.py --version - name: Check with mypy - run: mypy --strict --no-warn-return-any drgn _drgn.pyi + run: mypy --strict --no-warn-return-any --no-warn-unused-ignores drgn _drgn.pyi - name: Build and test with ${{ matrix.cc }} run: python setup.py test -K diff --git a/.github/workflows/dco-check.yml b/.github/workflows/dco-check.yml new file mode 100644 index 000000000..319b4b125 --- /dev/null +++ b/.github/workflows/dco-check.yml @@ -0,0 +1,27 @@ +name: DCO Check + +on: + pull_request: + types: [opened, synchronize, reopened, ready_for_review] + +jobs: + check: + if: ${{ !github.event.pull_request.draft }} + runs-on: ubuntu-latest + steps: + - name: Checkout commit logs + run: | + git init + git fetch --filter=blob:none "$GITHUB_SERVER_URL/$GITHUB_REPOSITORY" "$GITHUB_BASE_REF" "$GITHUB_REF" + - name: Check for DCO sign-offs + run: | + no_sign_off="$(git log --no-merges --grep=Signed-off-by --invert-grep "FETCH_HEAD..$GITHUB_SHA")" + if [ -z "$no_sign_off" ]; then + echo "All commits have a Developer Certificate of Origin sign-off" + else + echo "The following commits are missing a Developer Certificate of Origin sign-off;" + echo "see https://github.com/osandov/drgn/blob/main/CONTRIBUTING.rst#signing-off" + echo + echo "$no_sign_off" + exit 1 + fi diff --git a/.gitignore b/.gitignore index e0391ec13..6fe438be0 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,10 @@ /cscope.* /dist /docs/_build +/drgn-*.tar.gz /drgn.egg-info /drgn/internal/version.py /htmlcov +/python-drgn-*.src.rpm +/python-drgn.spec __pycache__ diff --git a/.packit.yaml b/.packit.yaml new file mode 100644 index 000000000..1ca1c09c8 --- /dev/null +++ b/.packit.yaml @@ -0,0 +1,42 @@ +# See the documentation for more information: +# https://packit.dev/docs/configuration/ + +specfile_path: python-drgn.spec +synced_files: + - python-drgn.spec + - .packit.yaml + +upstream_package_name: drgn +downstream_package_name: python-drgn +actions: + get-current-version: "python3 setup.py --version" + # Fetch the specfile from Rawhide and drop any patches + post-upstream-clone: "bash -c \"curl -s https://src.fedoraproject.org/rpms/python-drgn/raw/main/f/python-drgn.spec | sed '/^Patch[0-9]/d' > python-drgn.spec\"" + +jobs: +- job: copr_build + trigger: commit + metadata: + targets: + - fedora-all-aarch64 + - fedora-all-armhfp + - fedora-all-i386 + - fedora-all-ppc64le + - fedora-all-s390x + - fedora-all-x86_64 + - epel-8-aarch64 + - epel-8-ppc64le + - epel-8-x86_64 +- job: copr_build + trigger: pull_request + metadata: + targets: + - fedora-all-aarch64 + - fedora-all-armhfp + - fedora-all-i386 + - fedora-all-ppc64le + - fedora-all-s390x + - fedora-all-x86_64 + - epel-8-aarch64 + - epel-8-ppc64le + - epel-8-x86_64 diff --git a/.readthedocs.yml b/.readthedocs.yml index 4e368ddf7..fe579b18e 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,3 +1,6 @@ version: 2 sphinx: configuration: docs/conf.py +python: + install: + - requirements: docs/requirements.txt diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 9937e961a..51571301a 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -7,18 +7,25 @@ submit changes for drgn. Building -------- -The easiest way to develop drgn is by building and running it locally. See the -`installation documentation -`_. +The easiest way to develop drgn is by building and running it locally. Please +build with warnings enabled. Install the dependencies from the `installation +instructions `_, then run: + +.. code-block:: console + + $ git clone https://github.com/osandov/drgn.git + $ cd drgn + $ CFLAGS="-Wall -Werror -g -O2" python3 setup.py build_ext -i + $ python3 -m drgn --help Testing ------- -.. highlight:: console - Tests should be added for all features and bug fixes. -drgn's test suite can be run with:: +drgn's test suite can be run with: + +.. code-block:: console $ python3 setup.py test @@ -27,7 +34,9 @@ add ``-K``. See `vmtest `_ for more details. Tests can also be run manually with `unittest `_ -after building locally:: +after building locally: + +.. code-block:: console $ python3 -m unittest discover -v @@ -49,14 +58,32 @@ C code in drgn mostly follows the `Linux kernel coding style `_ except that drgn requires C11 or newer, so declarations may be mixed with code. -A few other guidelines: +A few other guidelines/conventions: +* Constants should be defined as enums or ``static const`` variables rather + than macros. * Functions that can fail should return a ``struct drgn_error *`` (and return their result via an out parameter if necessary). * Out parameters should be named ``ret`` (or suffixed with ``_ret`` if there - are multiple). -* Constants should be defined as enums or ``static const`` variables rather - than macros. + are multiple) and be the last parameter(s) of the function. +* Functions that initialize an already allocated structure should be suffixed + with ``_init`` and take the structure to initialize as the first argument, + e.g., ``struct drgn_error *foo_init(struct foo *foo, int foo_flags)``. +* The matching function to deinitialize a structure should be suffixed with + ``_deinit``, e.g., ``void foo_deinit(struct foo *foo)``. If possible, the + definition should be placed directly after the definition of ``_init`` so + that it is easier to visually verify that everything is cleaned up. +* Functions that allocate and initialize a structure should be suffixed with + ``_create`` and either return the structure as an out parameter (e.g., + ``struct drgn_error *foo_create(int foo_flags, struct foo **ret)``) or as the + return value if they can only fail with an out-of-memory error (e.g., + ``struct foo *foo_create(int foo_flags)``). +* The matching function to free an allocated structure should be suffixed with + ``_destroy``, e.g., ``void foo_destroy(struct foo *foo)``. If possible, the + definition should be placed directly after the definition of ``_create``. + ``_destroy`` should usually allow a ``NULL`` argument, just like ``free()``. +* Functions that return a result in a ``struct drgn_object *`` parameter should + only modify the object if the function succeeds. drgn assumes some `implementation-defined behavior `_ for sanity: @@ -74,7 +101,9 @@ Python Python code in drgn should be compatible with Python 3.6 and newer. Python code should be formatted with `black `_ -and `isort `_:: +and `isort `_: + +.. code-block:: console $ isort . && black . diff --git a/MANIFEST.in b/MANIFEST.in index ef05b5490..430d66fcd 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,4 +3,4 @@ recursive-include examples *.py recursive-include tests *.py recursive-include tools *.py recursive-include vmtest *.c *.py *.rst -include COPYING util.py vmtest/config +include CONTRIBUTING.rst COPYING pytest.ini util.py vmtest/config diff --git a/README.rst b/README.rst index 8e8bd4951..2d4f44973 100644 --- a/README.rst +++ b/README.rst @@ -42,56 +42,109 @@ complex, inter-connected state in large programs. It is also designed as a library that can be used to build debugging and introspection tools; see the official `tools `_. -drgn was developed for debugging the Linux kernel (as an alternative to the -`crash `_ utility), but it can also debug -userspace programs written in C. C++ support is in progress. +drgn was developed at `Meta `_ for debugging the +Linux kernel (as an alternative to the `crash +`_ utility), but it can also debug userspace +programs written in C. C++ support is in progress. .. end-introduction Documentation can be found at `drgn.readthedocs.io `_. +.. start-installation + Installation ------------ -.. start-install-dependencies +Package Manager +^^^^^^^^^^^^^^^ -Install dependencies: +drgn can be installed using the package manager on some Linux distributions. -Arch Linux: +* Fedora >= 32 -.. code-block:: console + .. code-block:: console - $ sudo pacman -S --needed gcc libelf make pkgconf python python-pip python-setuptools + $ sudo dnf install drgn -Debian/Ubuntu: +* RHEL/CentOS >= 8 -.. code-block:: console + `Enable EPEL `_. Then: + + .. code-block:: console - $ sudo apt-get install gcc liblzma-dev libelf-dev libdw-dev make pkgconf python3 python3-dev python3-pip python3-setuptools zlib1g-dev + $ sudo dnf install drgn -Note that Debian Stretch, Ubuntu Trusty, and Ubuntu Xenial (and older) ship -Python versions which are too old. Python 3.6 or newer must be installed -manually. +* Arch Linux -Fedora: + Install the `drgn `_ package from + the `AUR `_. + +pip +^^^ + +If your Linux distribution doesn't package the latest release of drgn, you can +install it with `pip `_. + +First, `install pip +`_. +Then, run: .. code-block:: console - $ sudo dnf install elfutils-devel gcc make pkgconf python3 python3-devel python3-pip python3-setuptools + $ sudo pip3 install drgn + +This will install a binary wheel by default. If you get a build error, then pip +wasn't able to use the binary wheel. Install the dependencies listed `below +<#from-source>`_ and try again. + +Note that RHEL/CentOS 6, Debian Stretch, Ubuntu Trusty, and Ubuntu Xenial (and +older) ship Python versions which are too old. Python 3.6 or newer must be +installed. + +From Source +^^^^^^^^^^^ + +To get the development version of drgn, you will need to build it from source. +First, install dependencies: + +* Fedora/RHEL/CentOS + + .. code-block:: console + + $ sudo dnf install autoconf automake elfutils-devel gawk gcc git libtool make pkgconf python3 python3-devel python3-pip python3-setuptools -Optionally, install: + Replace ``dnf`` with ``yum`` for RHEL/CentOS < 8. -* `libkdumpfile `_ if you want - support for kdump-compressed kernel core dumps +* Debian/Ubuntu -.. end-install-dependencies + .. code-block:: console + + $ sudo apt-get install autoconf automake gawk gcc git liblzma-dev libelf-dev libdw-dev libtool make pkgconf python3 python3-dev python3-pip python3-setuptools zlib1g-dev + +* Arch Linux + + .. code-block:: console + + $ sudo pacman -S --needed autoconf automake gawk gcc git libelf libtool make pkgconf python python-pip python-setuptools + +Optionally, install `libkdumpfile `_ +if you want support for the `makedumpfile +`_ compressed kernel core dump +format. ``libkdumpfile`` is currently only packaged on Fedora and EPEL. For +other distributions, you must install it manually. Then, run: .. code-block:: console - $ sudo pip3 install drgn + $ git clone https://github.com/osandov/drgn.git + $ cd drgn + $ python3 setup.py build + $ sudo python3 setup.py install + +.. end-installation See the `installation documentation `_ for more options. @@ -103,35 +156,54 @@ Quick Start drgn debugs the running kernel by default; run ``sudo drgn``. To debug a running program, run ``sudo drgn -p $PID``. To debug a core dump (either a -kernel vmcore or a userspace core dump), run ``drgn -c $PATH``. The program -must have debugging symbols available. +kernel vmcore or a userspace core dump), run ``drgn -c $PATH``. Make sure to +`install debugging symbols +`_ for +whatever you are debugging. -Then, you can access variables in the program with ``prog['name']``, access -structure members with ``.``, use various predefined helpers, and more: +Then, you can access variables in the program with ``prog['name']`` and access +structure members with ``.``: .. code-block:: pycon $ sudo drgn >>> prog['init_task'].comm (char [16])"swapper/0" - >>> d_path(fget(find_task(prog, 1), 0).f_path.address_of_()) - b'/dev/null' - >>> max(task.stime for task in for_each_task(prog)) - (u64)4192109975952 - >>> sum(disk.gendisk.part0.nr_sects for disk in for_each_disk(prog)) - (sector_t)999705952 + +You can use various predefined helpers: + +.. code-block:: pycon + + >>> len(list(bpf_prog_for_each(prog))) + 11 + >>> task = find_task(prog, 115) + >>> cmdline(task) + [b'findmnt', b'-p'] + +You can get stack traces with ``prog.stack_trace()`` and access parameters or +local variables with ``stack_trace['name']``: + +.. code-block:: pycon + + >>> trace = prog.stack_trace(task) + >>> trace[5] + #5 at 0xffffffff8a5a32d0 (do_sys_poll+0x400/0x578) in do_poll at ./fs/select.c:961:8 (inlined) + >>> poll_list = trace[5]['list'] + >>> file = fget(task, poll_list.entries[0].fd) + >>> d_path(file.f_path.address_of_()) + b'/proc/115/mountinfo' .. end-quick-start See the `user guide `_ -for more information. +for more details and features. License ------- .. start-license -Copyright (c) Facebook, Inc. and its affiliates. +Copyright (c) Meta Platforms, Inc. and affiliates. drgn is licensed under the `GPLv3 `_ or later. diff --git a/_drgn.pyi b/_drgn.pyi index 7774ab84b..28174d1ee 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ @@ -192,10 +192,19 @@ class Program: # address_or_name is positional-only. def symbol(self, address_or_name: Union[IntegerLike, str]) -> Symbol: """ - Get the symbol containing the given address, or the global symbol with - the given name. + Get a symbol containing the given address, or a symbol with the given + name. - :param address_or_name: The address or name. + Global symbols are preferred over weak symbols, and weak symbols are + preferred over other symbols. In other words: if a matching + :attr:`SymbolBinding.GLOBAL` or :attr:`SymbolBinding.UNIQUE` symbol is + found, it is returned. Otherwise, if a matching + :attr:`SymbolBinding.WEAK` symbol is found, it is returned. Otherwise, + any matching symbol (e.g., :attr:`SymbolBinding.LOCAL`) is returned. If + there are multiple matching symbols with the same binding, one is + returned arbitrarily. + + :param address_or_name: Address or name. :raises LookupError: if no symbol contains the given address or matches the given name """ @@ -235,7 +244,7 @@ class Program: Get the type with the given name. >>> prog.type('long') - int_type(name='long', size=8, is_signed=True) + prog.int_type(name='long', size=8, is_signed=True) :param name: The type name. :param filename: The source code file that contains the definition. See @@ -1177,6 +1186,31 @@ class Object: ``void``) """ ... + def to_bytes_(self) -> bytes: + """Return the binary representation of this object's value.""" + ... + @classmethod + def from_bytes_( + cls, + prog: Program, + type: Union[str, Type], + bytes: bytes, + *, + bit_offset: IntegerLike = 0, + bit_field_size: Optional[IntegerLike] = None, + ) -> Object: + """ + Return a value object from its binary representation. + + :param prog: Program to create the object in. + :param type: Type of the object. + :param bytes: Buffer containing value of the object. + :param bit_offset: Offset in bits from the beginning of *bytes* to the + beginning of the object. + :param bit_field_size: Size in bits of the object if it is a bit field. + The default is ``None``, which means the object is not a bit field. + """ + ... def format_( self, *, @@ -1369,6 +1403,66 @@ class Symbol: size: int """Size of this symbol in bytes.""" + binding: SymbolBinding + """Linkage behavior and visibility of this symbol.""" + + kind: SymbolKind + """Kind of entity represented by this symbol.""" + +class SymbolBinding(enum.Enum): + """ + A ``SymbolBinding`` describes the linkage behavior and visibility of a + symbol. + """ + + UNKNOWN = ... + """Unknown.""" + + LOCAL = ... + """Not visible outside of the object file containing its definition.""" + + GLOBAL = ... + """Globally visible.""" + + WEAK = ... + """Globally visible but may be overridden by a non-weak global symbol.""" + + UNIQUE = ... + """ + Globally visible even if dynamic shared object is loaded locally. See GCC's + ``-fno-gnu-unique`` `option + `_. + """ + +class SymbolKind(enum.Enum): + """ + A ``SymbolKind`` describes the kind of entity that a symbol represents. + """ + + UNKNOWN = ... + """Unknown or not defined.""" + + OBJECT = ... + """Data object (e.g., variable or array).""" + + FUNC = ... + """Function or other executable code.""" + + SECTION = ... + """Object file section.""" + + FILE = ... + """Source file.""" + + COMMON = ... + """Data object in common block.""" + + TLS = ... + """Thread-local storage entity.""" + + IFUNC = ... + """`Indirect function `_.""" + class StackTrace: """ A ``StackTrace`` is a :ref:`sequence ` of @@ -1522,7 +1616,7 @@ class Type: :func:`repr()` of a ``Type`` returns a Python representation of the type: >>> print(repr(prog.type('sector_t'))) - typedef_type(name='sector_t', type=int_type(name='unsigned long', size=8, is_signed=False)) + prog.typedef_type(name='sector_t', type=prog.int_type(name='unsigned long', size=8, is_signed=False)) :class:`str() ` returns a representation of the type in programming language syntax: @@ -2070,6 +2164,34 @@ def _linux_helper_radix_tree_lookup(root: Object, index: IntegerLike) -> Object: """ ... +def _linux_helper_per_cpu_ptr(ptr: Object, cpu: IntegerLike) -> Object: + """ + Return the per-CPU pointer for a given CPU. + + >>> prog["init_net"].loopback_dev.pcpu_refcnt + (int *)0x2c980 + >>> per_cpu_ptr(prog["init_net"].loopback_dev.pcpu_refcnt, 7) + *(int *)0xffff925e3ddec980 = 4 + + :param ptr: Per-CPU pointer, i.e., ``type __percpu *``. For global + variables, it's usually easier to use :func:`per_cpu()`. + :param cpu: CPU number. + :return: ``type *`` object. + """ + ... + +def _linux_helper_idle_task(prog: Program, cpu: IntegerLike) -> Object: + """ + Return the idle thread (PID 0, a.k.a swapper) for the given CPU. + + >>> idle_task(prog, 1).comm + (char [16])"swapper/1" + + :param cpu: CPU number. + :return: ``struct task_struct *`` + """ + ... + def _linux_helper_idr_find(idr: Object, id: IntegerLike) -> Object: """ Look up the entry with the given ID in an IDR. @@ -2125,3 +2247,41 @@ def _linux_helper_kaslr_offset(prog: Program) -> int: def _linux_helper_pgtable_l5_enabled(prog: Program) -> bool: """Return whether 5-level paging is enabled.""" ... + +def _linux_helper_radix_tree_for_each(root: Object) -> Iterator[Tuple[int, Object]]: + """ + Iterate over all of the entries in a radix tree. + + :param root: ``struct radix_tree_root *`` + :return: Iterator of (index, ``void *``) tuples. + """ + ... + +def _linux_helper_idr_for_each(idr: Object) -> Iterator[Tuple[int, Object]]: + """ + Iterate over all of the entries in an IDR. + + :param idr: ``struct idr *`` + :return: Iterator of (index, ``void *``) tuples. + """ + ... + +def _linux_helper_for_each_pid(prog_or_ns: Union[Program, Object]) -> Iterator[Object]: + """ + Iterate over all PIDs in a namespace. + + :param prog_or_ns: ``struct pid_namespace *`` to iterate over, or + :class:`Program` to iterate over initial PID namespace. + :return: Iterator of ``struct pid *`` objects. + """ + ... + +def _linux_helper_for_each_task(prog_or_ns: Union[Program, Object]) -> Iterator[Object]: + """ + Iterate over all of the tasks visible in a namespace. + + :param prog_or_ns: ``struct pid_namespace *`` to iterate over, or + :class:`Program` to iterate over initial PID namespace. + :return: Iterator of ``struct task_struct *`` objects. + """ + ... diff --git a/debian/control b/debian/control index e01adad2f..25e0cab76 100644 --- a/debian/control +++ b/debian/control @@ -6,6 +6,7 @@ Standards-Version: 4.1.2 Build-Depends: autoconf, automake, bison, + dh-python, elfutils, flex, gawk, @@ -24,7 +25,7 @@ Build-Depends: autoconf, pkg-config, python3, python3-distutils, - python3.6-dev, + python3-dev, zlib1g-dev Package: drgn diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index f951d3056..f1c0d2e5a 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -100,6 +100,12 @@ Some of drgn's behavior can be modified through environment variables: vice versa. This environment variable is mainly intended for testing and may be ignored in the future. +``DRGN_USE_LIBDWFL_REPORT`` + Whether drgn should use libdwfl to find debugging information for core + dumps instead of its own implementation (0 or 1). The default is 0. This + environment variable is mainly intended as an escape hatch in case of bugs + in drgn's implementation and will be ignored in the future. + ``DRGN_USE_LIBKDUMPFILE_FOR_ELF`` Whether drgn should use libkdumpfile for ELF vmcores (0 or 1). The default is 0. This functionality will be removed in the future. diff --git a/docs/api_reference.rst b/docs/api_reference.rst index 15fbdd8cf..79f2190f2 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -66,6 +66,8 @@ Symbols ------- .. drgndoc:: Symbol +.. drgndoc:: SymbolBinding +.. drgndoc:: SymbolKind Stack Traces ------------ diff --git a/docs/conf.py b/docs/conf.py index d2d981c79..efb70d3c5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,7 +11,6 @@ "setuptools_config", "sphinx.ext.extlinks", "sphinx.ext.intersphinx", - "sphinx.ext.viewcode", ] drgndoc_paths = ["../drgn", "../_drgn.pyi"] diff --git a/docs/exts/drgndoc/docstrings.py b/docs/exts/drgndoc/docstrings.py index e6bab5369..4a728430b 100644 --- a/docs/exts/drgndoc/docstrings.py +++ b/docs/exts/drgndoc/docstrings.py @@ -1,11 +1,11 @@ #!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import argparse import functools import sys -from typing import Union, cast +from typing import cast from drgndoc.format import Formatter from drgndoc.namespace import Namespace, ResolvedNode @@ -74,7 +74,7 @@ def escape_string(s: str) -> str: if args.header: output_file.write( - f"""\ + """\ /* * Generated by drgndoc.docstrings -H. * @@ -86,7 +86,7 @@ def escape_string(s: str) -> str: """ ) else: - output_file.write(f"/* Generated by drgndoc.docstrings. */\n\n") + output_file.write("/* Generated by drgndoc.docstrings. */\n\n") def aux(resolved: ResolvedNode[Node], name: str) -> None: node = resolved.node diff --git a/docs/exts/drgndoc/ext.py b/docs/exts/drgndoc/ext.py index 6ca33b9d8..0a9d66815 100644 --- a/docs/exts/drgndoc/ext.py +++ b/docs/exts/drgndoc/ext.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ @@ -42,7 +42,7 @@ import os.path import re -from typing import Any, Dict, List, cast +from typing import Any, Dict, cast import docutils.nodes import docutils.parsers.rst.directives diff --git a/docs/exts/drgndoc/format.py b/docs/exts/drgndoc/format.py index b7be5b6f9..1a75beec9 100644 --- a/docs/exts/drgndoc/format.py +++ b/docs/exts/drgndoc/format.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import ast diff --git a/docs/exts/drgndoc/namespace.py b/docs/exts/drgndoc/namespace.py index 808c1cb09..1a868ca2a 100644 --- a/docs/exts/drgndoc/namespace.py +++ b/docs/exts/drgndoc/namespace.py @@ -1,8 +1,8 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import itertools -from typing import Generic, Iterator, List, Mapping, Optional, Sequence, TypeVar, Union +from typing import Generic, Iterator, List, Mapping, Sequence, TypeVar, Union from drgndoc.parse import ( Class, @@ -14,7 +14,6 @@ Node, Variable, ) -from drgndoc.util import dot_join NodeT_co = TypeVar("NodeT_co", bound=Node, covariant=True) diff --git a/docs/exts/drgndoc/parse.py b/docs/exts/drgndoc/parse.py index d9f5cc91d..aa3bc9bb3 100644 --- a/docs/exts/drgndoc/parse.py +++ b/docs/exts/drgndoc/parse.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import ast diff --git a/docs/exts/drgndoc/util.py b/docs/exts/drgndoc/util.py index 5c088ac3a..1c9e9feda 100644 --- a/docs/exts/drgndoc/util.py +++ b/docs/exts/drgndoc/util.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later from typing import Optional diff --git a/docs/exts/drgndoc/visitor.py b/docs/exts/drgndoc/visitor.py index 6cf132ffd..b1fa51e1d 100644 --- a/docs/exts/drgndoc/visitor.py +++ b/docs/exts/drgndoc/visitor.py @@ -1,8 +1,7 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import ast -import sys from typing import Any, Optional diff --git a/docs/getting_debugging_symbols.rst b/docs/getting_debugging_symbols.rst new file mode 100644 index 000000000..efea9a16d --- /dev/null +++ b/docs/getting_debugging_symbols.rst @@ -0,0 +1,119 @@ +Getting Debugging Symbols +========================= + +.. highlight:: console + +Most Linux distributions don't install debugging symbols for installed packages +by default. This page documents how to install debugging symbols on common +distributions. If drgn prints an error like:: + + $ sudo drgn + could not get debugging information for: + kernel (could not find vmlinux for 5.14.14-200.fc34.x86_64) + ... + +Then you need to install debugging symbols. + +Fedora +------ + +Fedora makes it very easy to install debugging symbols with the `DNF +debuginfo-install plugin +`_, +which is installed by default. Simply run ``sudo dnf debuginfo-install +$package``:: + + $ sudo dnf debuginfo-install python3 + +To find out what package owns a binary, use ``rpm -qf``:: + + $ rpm -qf $(which python3) + python3-3.9.7-1.fc34.x86_64 + +To install symbols for the running kernel:: + + $ sudo dnf debuginfo-install kernel-$(uname -r) + +Also see the `Fedora documentation +`_. + +Debian +------ + +Debian requires you to manually add the debugging symbol repositories:: + + $ sudo tee /etc/apt/sources.list.d/debug.list << EOF + deb http://deb.debian.org/debian-debug/ $(lsb_release -cs)-debug main + deb http://deb.debian.org/debian-debug/ $(lsb_release -cs)-proposed-updates-debug main + EOF + $ sudo apt update + +Then, debugging symbol packages can be installed with ``sudo apt install``. +Some debugging symbol packages are named with a ``-dbg`` suffix:: + + $ sudo apt install python3-dbg + +And some are named with a ``-dbgsym`` suffix:: + + $ sudo apt install coreutils-dbgsym + +You can use the ``find-dbgsym-packages`` command from the ``debian-goodies`` +package to find the correct name:: + + $ sudo apt install debian-goodies + $ find-dbgsym-packages $(which python3) + libc6-dbg libexpat1-dbgsym python3.9-dbg zlib1g-dbgsym + $ find-dbgsym-packages $(which cat) + coreutils-dbgsym libc6-dbg + +To install symbols for the running kernel:: + + $ sudo apt install linux-image-$(uname -r)-dbg + +Also see the `Debian documentation +`_. + +Ubuntu +------ + +On Ubuntu, you must install the debugging symbol archive signing key and +manually add the debugging symbol repositories:: + + $ sudo apt update + $ sudo apt install ubuntu-dbgsym-keyring + $ sudo tee /etc/apt/sources.list.d/debug.list << EOF + deb http://ddebs.ubuntu.com $(lsb_release -cs) main restricted universe multiverse + deb http://ddebs.ubuntu.com $(lsb_release -cs)-updates main restricted universe multiverse + deb http://ddebs.ubuntu.com $(lsb_release -cs)-proposed main restricted universe multiverse + EOF + $ sudo apt update + +Like Debian, some debugging symbol packages are named with a ``-dbg`` suffix +and some are named with a ``-dbgsym`` suffix:: + + $ sudo apt install python3-dbg + $ sudo apt install coreutils-dbgsym + +You can use the ``find-dbgsym-packages`` command from the ``debian-goodies`` +package to find the correct name:: + + $ sudo apt install debian-goodies + $ find-dbgsym-packages $(which python3) + libc6-dbg libexpat1-dbgsym python3.9-dbg zlib1g-dbgsym + $ find-dbgsym-packages $(which cat) + coreutils-dbgsym libc6-dbg + +To install symbols for the running kernel:: + + $ sudo apt install linux-image-$(uname -r)-dbgsym + +Also see the `Ubuntu documentation +`_. + +Arch Linux +---------- + +Arch Linux unfortunately does not make debugging symbols available. Packages +must be manually rebuilt with debugging symbols enabled. See the `ArchWiki +`_ and the `feature +request `_. diff --git a/docs/index.rst b/docs/index.rst index f0e23d4bb..b4a68b3f3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -37,3 +37,4 @@ Table of Contents api_reference helpers case_studies + getting_debugging_symbols diff --git a/docs/installation.rst b/docs/installation.rst index e2f12d96d..e60a0381e 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -11,6 +11,12 @@ drgn depends on: - `Python `_ 3.6 or newer - `elfutils `_ 0.165 or newer +It optionally depends on: + +- `libkdumpfile `_ for `makedumpfile + `_ compressed kernel core dump + format support + The build requires: - `GCC `_ @@ -18,10 +24,6 @@ The build requires: - `pkgconf `_ - `setuptools `_ -.. include:: ../README.rst - :start-after: start-install-dependencies - :end-before: end-install-dependencies - Building from the Git repository (rather than a release tarball) additionally requires: @@ -30,49 +32,35 @@ requires: - `libtool `_ - `GNU Awk `_ 4.0 or newer -Simply add ``autoconf automake gawk libtool`` to the appropriate installation -command above. - -Installation ------------- +.. include:: ../README.rst + :start-after: start-installation + :end-before: end-installation .. highlight:: console -After installing dependencies, the latest release of drgn can be installed -globally with `pip `_:: - - $ sudo pip3 install drgn - $ drgn --help +Virtual Environment +^^^^^^^^^^^^^^^^^^^ -The development version can be built and installed manually:: +The above options all install drgn globally. You can also install drgn in a +`virtual environment `_, either +with pip:: - $ git clone https://github.com/osandov/drgn.git - $ cd drgn - $ python3 setup.py build - $ sudo python3 setup.py install - $ drgn --help + $ python3 -m venv drgnenv + $ source drgnenv/bin/activate + (drgnenv) $ pip3 install drgn + (drgnenv) $ drgn --help -Both of these options can be done in a `virtual environment -`_ if you do not wish to install -drgn globally:: +Or from source:: $ python3 -m venv drgnenv $ source drgnenv/bin/activate - (drgenv) $ pip3 install drgn - (drgenv) $ drgn --help + (drgnenv) $ python3 setup.py install + (drgnenv) $ drgn --help -Development ------------ +Running Locally +--------------- -For development, drgn can be built and run locally:: +If you build drgn from source, you can also run it without installing it:: - $ CFLAGS="-Wall -Werror -g -O2" python3 setup.py build_ext -i + $ python3 setup.py build_ext -i $ python3 -m drgn --help - -libkdumpfile ------------- - -drgn supports kdump-compressed kernel core dumps when `libkdumpfile -`_ is available. libkdumpfile is not -packaged for most Linux distributions, so it must be built and installed -manually. If it is installed, then drgn is automatically built with support. diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 000000000..f2bba1b52 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1 @@ +sphinx==4.1.2 diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 633d3249f..6bcf32cac 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -27,7 +27,7 @@ A ``Program`` is used to look up type definitions, access variables, and read arbitrary memory:: >>> prog.type('unsigned long') - int_type(name='unsigned long', size=8, is_signed=False) + prog.int_type(name='unsigned long', size=8, is_signed=False) >>> prog['jiffies'] Object(prog, 'volatile unsigned long', address=0xffffffffbe405000) >>> prog.read(0xffffffffbe411e10, 16) @@ -199,6 +199,41 @@ Other Concepts In addition to the core concepts above, drgn provides a few additional abstractions. +Stack Traces +^^^^^^^^^^^^ + +drgn represents stack traces with the :class:`drgn.StackTrace` and +:class:`drgn.StackFrame` classes. :meth:`drgn.Program.stack_trace()` returns +the call stack for a thread. The :meth:`[] ` +operator looks up an object in the scope of a ``StackFrame``:: + + >>> trace = prog.stack_trace(115) + >>> trace + #0 context_switch (./kernel/sched/core.c:4683:2) + #1 __schedule (./kernel/sched/core.c:5940:8) + #2 schedule (./kernel/sched/core.c:6019:3) + #3 schedule_hrtimeout_range_clock (./kernel/time/hrtimer.c:2148:3) + #4 poll_schedule_timeout (./fs/select.c:243:8) + #5 do_poll (./fs/select.c:961:8) + #6 do_sys_poll (./fs/select.c:1011:12) + #7 __do_sys_poll (./fs/select.c:1076:8) + #8 __se_sys_poll (./fs/select.c:1064:1) + #9 __x64_sys_poll (./fs/select.c:1064:1) + #10 do_syscall_x64 (./arch/x86/entry/common.c:50:14) + #11 do_syscall_64 (./arch/x86/entry/common.c:80:7) + #12 entry_SYSCALL_64+0x7c/0x15b (./arch/x86/entry/entry_64.S:113) + #13 0x7f3344072af7 + >>> trace[5] + #5 at 0xffffffff8a5a32d0 (do_sys_poll+0x400/0x578) in do_poll at ./fs/select.c:961:8 (inlined) + >>> prog['do_poll'] + (int (struct poll_list *list, struct poll_wqueues *wait, struct timespec64 *end_time)) + >>> trace[5]['list'] + *(struct poll_list *)0xffffacca402e3b50 = { + .next = (struct poll_list *)0x0, + .len = (int)1, + .entries = (struct pollfd []){}, + } + Symbols ^^^^^^^ @@ -206,22 +241,15 @@ The symbol table of a program is a list of identifiers along with their address and size. drgn represents symbols with the :class:`drgn.Symbol` class, which is returned by :meth:`drgn.Program.symbol()`. -Stack Traces -^^^^^^^^^^^^ - -drgn represents stack traces with the :class:`drgn.StackTrace` and -:class:`drgn.StackFrame` classes. :meth:`drgn.Program.stack_trace()` returns -the call stack for a thread. - Types ^^^^^ drgn automatically obtains type definitions from the program. Types are represented by the :class:`drgn.Type` class and created by various factory -functions like :func:`drgn.int_type()`:: +functions like :meth:`drgn.Program.int_type()`:: >>> prog.type('int') - int_type(name='int', size=4, is_signed=True) + prog.int_type(name='int', size=4, is_signed=True) You won't usually need to work with types directly, but see :ref:`api-reference-types` if you do. @@ -289,7 +317,7 @@ print the output of :func:`repr()`. For :class:`drgn.Object` and >>> print(repr(prog['jiffies'])) Object(prog, 'volatile unsigned long', address=0xffffffffbe405000) >>> print(repr(prog.type('atomic_t'))) - typedef_type(name='atomic_t', type=struct_type(tag=None, size=4, members=((int_type(name='int', size=4, is_signed=True), 'counter', 0, 0),))) + prog.typedef_type(name='atomic_t', type=prog.struct_type(tag=None, size=4, members=(TypeMember(prog.type('int'), name='counter', bit_offset=0),))) The standard :func:`print()` function uses the output of :func:`str()`. For drgn objects and types, this is a representation in programming language diff --git a/drgn/__init__.py b/drgn/__init__.py index 893c36dce..4fb73012f 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ @@ -67,6 +67,8 @@ StackFrame, StackTrace, Symbol, + SymbolBinding, + SymbolKind, Type, TypeEnumerator, TypeKind, @@ -110,6 +112,8 @@ "StackFrame", "StackTrace", "Symbol", + "SymbolBinding", + "SymbolKind", "Type", "TypeEnumerator", "TypeKind", diff --git a/drgn/__main__.py b/drgn/__main__.py index c85ee75b1..9fd749eb4 100644 --- a/drgn/__main__.py +++ b/drgn/__main__.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/__init__.py b/drgn/helpers/__init__.py index 552451c95..8bf2b4d47 100644 --- a/drgn/helpers/__init__.py +++ b/drgn/helpers/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ @@ -10,10 +10,6 @@ for the Linux kernel. In the future, there may be helpers for, e.g., glibc and libstdc++. -Parameter types and return types are :class:`drgn.Object` unless noted -otherwise. Many helpers include a C function signature indicating the expected -object types. - Generic Helpers =============== @@ -23,7 +19,7 @@ import enum import typing -from typing import Container, Iterable, List, Tuple +from typing import Container, Iterable from drgn import Type diff --git a/drgn/helpers/linux/__init__.py b/drgn/helpers/linux/__init__.py index 71bf0c25a..c46a3a571 100644 --- a/drgn/helpers/linux/__init__.py +++ b/drgn/helpers/linux/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ @@ -32,8 +32,9 @@ import importlib import pkgutil +from typing import List -__all__ = [] +__all__: List[str] = [] for _module_info in pkgutil.iter_modules( __path__, # type: ignore[name-defined] # python/mypy#1422 prefix=__name__ + ".", diff --git a/drgn/helpers/linux/bitops.py b/drgn/helpers/linux/bitops.py new file mode 100644 index 000000000..fd6210ddc --- /dev/null +++ b/drgn/helpers/linux/bitops.py @@ -0,0 +1,64 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +""" +Bit Operations +-------------- + +The ``drgn.helpers.linux.bitops`` module provides helpers for common bit +operations in the Linux kernel. +""" + +from typing import Iterator + +from drgn import IntegerLike, Object, sizeof + +__all__ = ( + "for_each_clear_bit", + "for_each_set_bit", + "test_bit", +) + + +def for_each_set_bit(bitmap: Object, size: IntegerLike) -> Iterator[int]: + """ + Iterate over all set (one) bits in a bitmap. + + :param bitmap: ``unsigned long *`` + :param size: Size of *bitmap* in bits. + """ + size = int(size) + word_bits = 8 * sizeof(bitmap.type_.type) + for i in range((size + word_bits - 1) // word_bits): + word = bitmap[i].value_() + for j in range(min(word_bits, size - word_bits * i)): + if word & (1 << j): + yield (word_bits * i) + j + + +def for_each_clear_bit(bitmap: Object, size: IntegerLike) -> Iterator[int]: + """ + Iterate over all clear (zero) bits in a bitmap. + + :param bitmap: ``unsigned long *`` + :param size: Size of *bitmap* in bits. + """ + size = int(size) + word_bits = 8 * sizeof(bitmap.type_.type) + for i in range((size + word_bits - 1) // word_bits): + word = bitmap[i].value_() + for j in range(min(word_bits, size - word_bits * i)): + if not (word & (1 << j)): + yield (word_bits * i) + j + + +def test_bit(nr: IntegerLike, bitmap: Object) -> bool: + """ + Return whether a bit in a bitmap is set. + + :param nr: Bit number. + :param bitmap: ``unsigned long *`` + """ + nr = int(nr) + word_bits = 8 * sizeof(bitmap.type_.type) + return ((bitmap[nr // word_bits].value_() >> (nr & (word_bits - 1))) & 1) != 0 diff --git a/drgn/helpers/linux/block.py b/drgn/helpers/linux/block.py index cf8588427..0bbdc4f69 100644 --- a/drgn/helpers/linux/block.py +++ b/drgn/helpers/linux/block.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/boot.py b/drgn/helpers/linux/boot.py index 74dd2be85..6e959bf4f 100644 --- a/drgn/helpers/linux/boot.py +++ b/drgn/helpers/linux/boot.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/bpf.py b/drgn/helpers/linux/bpf.py index fa526041b..46d33665c 100644 --- a/drgn/helpers/linux/bpf.py +++ b/drgn/helpers/linux/bpf.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/cgroup.py b/drgn/helpers/linux/cgroup.py index dac0a91c3..beb151e88 100644 --- a/drgn/helpers/linux/cgroup.py +++ b/drgn/helpers/linux/cgroup.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/cpumask.py b/drgn/helpers/linux/cpumask.py index 84e572098..3001da913 100644 --- a/drgn/helpers/linux/cpumask.py +++ b/drgn/helpers/linux/cpumask.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ @@ -11,7 +11,8 @@ from typing import Iterator -from drgn import Object, Program, sizeof +from drgn import Object, Program +from drgn.helpers.linux.bitops import for_each_set_bit __all__ = ( "for_each_cpu", @@ -27,13 +28,11 @@ def for_each_cpu(mask: Object) -> Iterator[int]: :param mask: ``struct cpumask`` """ - bits = mask.bits - word_bits = 8 * sizeof(bits.type_.type) - for i in range(bits.type_.length): # type: ignore - word = bits[i].value_() - for j in range(word_bits): - if word & (1 << j): - yield (word_bits * i) + j + try: + nr_cpu_ids = mask.prog_["nr_cpu_ids"].value_() + except KeyError: + nr_cpu_ids = 1 + return for_each_set_bit(mask.bits, nr_cpu_ids) def _for_each_cpu_mask(prog: Program, name: str) -> Iterator[int]: diff --git a/drgn/helpers/linux/device.py b/drgn/helpers/linux/device.py index 73861d77d..6fde94295 100644 --- a/drgn/helpers/linux/device.py +++ b/drgn/helpers/linux/device.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/fs.py b/drgn/helpers/linux/fs.py index 7eff48338..71cbfc361 100644 --- a/drgn/helpers/linux/fs.py +++ b/drgn/helpers/linux/fs.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/idr.py b/drgn/helpers/linux/idr.py index b9497cd83..7535fbb68 100644 --- a/drgn/helpers/linux/idr.py +++ b/drgn/helpers/linux/idr.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ @@ -11,28 +11,12 @@ IDRs were not based on radix trees. """ -from typing import Iterator, Tuple - -from _drgn import _linux_helper_idr_find as idr_find -from drgn import Object -from drgn.helpers.linux.radixtree import radix_tree_for_each, radix_tree_lookup +from _drgn import ( + _linux_helper_idr_find as idr_find, + _linux_helper_idr_for_each as idr_for_each, +) __all__ = ( "idr_find", "idr_for_each", ) - - -def idr_for_each(idr: Object) -> Iterator[Tuple[int, Object]]: - """ - Iterate over all of the entries in an IDR. - - :param idr: ``struct idr *`` - :return: Iterator of (index, ``void *``) tuples. - """ - try: - base = idr.idr_base.value_() - except AttributeError: - base = 0 - for index, entry in radix_tree_for_each(idr.idr_rt.address_of_()): - yield index + base, entry diff --git a/drgn/helpers/linux/kconfig.py b/drgn/helpers/linux/kconfig.py index ff5aac0e0..fb125cbb3 100644 --- a/drgn/helpers/linux/kconfig.py +++ b/drgn/helpers/linux/kconfig.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/kernfs.py b/drgn/helpers/linux/kernfs.py index a3e223598..566f2a906 100644 --- a/drgn/helpers/linux/kernfs.py +++ b/drgn/helpers/linux/kernfs.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/list.py b/drgn/helpers/linux/list.py index b72d1e0e0..2357389a5 100644 --- a/drgn/helpers/linux/list.py +++ b/drgn/helpers/linux/list.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ @@ -156,7 +156,9 @@ def list_for_each_reverse(head: Object) -> Iterator[Object]: pos = pos.prev.read_() -def list_for_each_entry(type: str, head: Object, member: str) -> Iterator[Object]: +def list_for_each_entry( + type: Union[str, Type], head: Object, member: str +) -> Iterator[Object]: """ Iterate over all of the entries in a list. @@ -170,7 +172,7 @@ def list_for_each_entry(type: str, head: Object, member: str) -> Iterator[Object def list_for_each_entry_reverse( - type: str, head: Object, member: str + type: Union[str, Type], head: Object, member: str ) -> Iterator[Object]: """ Iterate over all of the entries in a list in reverse order. @@ -206,7 +208,9 @@ def hlist_for_each(head: Object) -> Iterator[Object]: pos = pos.next.read_() -def hlist_for_each_entry(type: str, head: Object, member: str) -> Iterator[Object]: +def hlist_for_each_entry( + type: Union[str, Type], head: Object, member: str +) -> Iterator[Object]: """ Iterate over all of the entries in a hash list. diff --git a/drgn/helpers/linux/list_nulls.py b/drgn/helpers/linux/list_nulls.py index f007c68c8..7c5fb4b14 100644 --- a/drgn/helpers/linux/list_nulls.py +++ b/drgn/helpers/linux/list_nulls.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ @@ -11,9 +11,9 @@ list is not a ``NULL`` pointer, but a "nulls" marker. """ -from typing import Iterator +from typing import Iterator, Union -from drgn import Object, container_of +from drgn import Object, Type, container_of __all__ = ( "hlist_nulls_empty", @@ -41,7 +41,7 @@ def hlist_nulls_empty(head: Object) -> bool: def hlist_nulls_for_each_entry( - type: str, head: Object, member: str + type: Union[str, Type], head: Object, member: str ) -> Iterator[Object]: """ Iterate over all the entries in a nulls hash list. diff --git a/drgn/helpers/linux/mm.py b/drgn/helpers/linux/mm.py index 8a5b9d9e2..d669316b7 100644 --- a/drgn/helpers/linux/mm.py +++ b/drgn/helpers/linux/mm.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ @@ -11,7 +11,7 @@ """ import operator -from typing import Any, Iterator, List, Optional, Union, overload +from typing import Iterator, List, Optional, Union, overload from _drgn import _linux_helper_read_vm from drgn import IntegerLike, Object, Program, cast diff --git a/drgn/helpers/linux/net.py b/drgn/helpers/linux/net.py index 621f7652f..9efbb3bfb 100644 --- a/drgn/helpers/linux/net.py +++ b/drgn/helpers/linux/net.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ @@ -9,18 +9,186 @@ Linux kernel networking subsystem. """ -from typing import Iterator +import operator +from typing import Iterator, Union -from drgn import Object +from drgn import NULL, IntegerLike, Object, Program, cast, container_of +from drgn.helpers.linux.fs import fget +from drgn.helpers.linux.list import hlist_for_each_entry, list_for_each_entry from drgn.helpers.linux.list_nulls import hlist_nulls_for_each_entry -from drgn.helpers.linux.tcp import sk_tcpstate __all__ = ( + "SOCK_INODE", + "SOCKET_I", + "for_each_net", + "get_net_ns_by_inode", + "get_net_ns_by_fd", + "netdev_for_each_tx_queue", + "netdev_get_by_index", + "netdev_get_by_name", "sk_fullsock", "sk_nulls_for_each", ) +_S_IFMT = 0o170000 +_S_IFSOCK = 0o140000 + + +def SOCKET_I(inode: Object) -> Object: + """ + Get a socket from an inode referring to the socket. + + :param inode: ``struct inode *`` + :return: ``struct socket *`` + :raises ValueError: If *inode* does not refer to a socket + """ + if inode.i_mode & _S_IFMT != _S_IFSOCK: + raise ValueError("not a socket inode") + + return container_of(inode, "struct socket_alloc", "vfs_inode").socket.address_of_() + + +def SOCK_INODE(sock: Object) -> Object: + """ + Get the inode of a socket. + + :param sock: ``struct socket *`` + :return: ``struct inode *`` + """ + return container_of(sock, "struct socket_alloc", "socket").vfs_inode.address_of_() + + +def for_each_net(prog: Program) -> Iterator[Object]: + """ + Iterate over all network namespaces in the system. + + :return: Iterator of ``struct net *`` objects. + """ + for net in list_for_each_entry( + "struct net", prog["net_namespace_list"].address_of_(), "list" + ): + yield net + + +_CLONE_NEWNET = 0x40000000 + + +def get_net_ns_by_inode(inode: Object) -> Object: + """ + Get a network namespace from a network namespace NSFS inode, e.g. + ``/proc/$PID/ns/net`` or ``/var/run/netns/$NAME``. + + :param inode: ``struct inode *`` + :return: ``struct net *`` + :raises ValueError: if *inode* is not a network namespace inode + """ + if inode.i_fop != inode.prog_["ns_file_operations"].address_of_(): + raise ValueError("not a namespace inode") + + ns = cast("struct ns_common *", inode.i_private) + if ns.ops.type != _CLONE_NEWNET: + raise ValueError("not a network namespace inode") + + return container_of(ns, "struct net", "ns") + + +def get_net_ns_by_fd(task: Object, fd: IntegerLike) -> Object: + """ + Get a network namespace from a task and a file descriptor referring to a + network namespace NSFS inode, e.g. ``/proc/$PID/ns/net`` or + ``/var/run/netns/$NAME``. + + :param task: ``struct task_struct *`` + :param fd: File descriptor. + :return: ``struct net *`` + :raises ValueError: If *fd* does not refer to a network namespace inode + """ + return get_net_ns_by_inode(fget(task, fd).f_inode) + + +def netdev_for_each_tx_queue(dev: Object) -> Iterator[Object]: + """ + Iterate over all TX queues for a network device. + + :param dev: ``struct net_device *`` + :return: Iterator of ``struct netdev_queue *`` objects. + """ + for i in range(dev.num_tx_queues): + yield dev._tx + i + + +_NETDEV_HASHBITS = 8 +_NETDEV_HASHENTRIES = 1 << _NETDEV_HASHBITS + + +def netdev_get_by_index( + prog_or_net: Union[Program, Object], ifindex: IntegerLike +) -> Object: + """ + Get the network device with the given interface index number. + + :param prog_or_net: ``struct net *`` containing the device, or + :class:`Program` to use the initial network namespace. + :param ifindex: Network interface index number. + :return: ``struct net_device *`` (``NULL`` if not found) + """ + if isinstance(prog_or_net, Program): + prog_or_net = prog_or_net["init_net"] + if isinstance(ifindex, Object): + ifindex = ifindex.read_() + + head = prog_or_net.dev_index_head[ + operator.index(ifindex) & (_NETDEV_HASHENTRIES - 1) + ] + for netdev in hlist_for_each_entry("struct net_device", head, "index_hlist"): + if netdev.ifindex == ifindex: + return netdev + + return NULL(prog_or_net.prog_, "struct net_device *") + + +def netdev_get_by_name( + prog_or_net: Union[Program, Object], name: Union[str, bytes] +) -> Object: + """ + Get the network device with the given interface name. + + :param prog_or_net: ``struct net *`` containing the device, or + :class:`Program` to use the initial network namespace. + :param name: Network interface name. + :return: ``struct net_device *`` (``NULL`` if not found) + """ + if isinstance(prog_or_net, Program): + prog_or_net = prog_or_net["init_net"] + if isinstance(name, str): + name = name.encode() + + # Since Linux kernel commit ff92741270bf ("net: introduce name_node struct + # to be used in hashlist") (in v5.5), the device name hash table contains + # struct netdev_name_node entries. Before that, it contained the struct + # net_device directly. + try: + entry_type = prog_or_net.prog_.type("struct netdev_name_node") + member = "hlist" + entry_is_name_node = True + except LookupError: + entry_type = prog_or_net.prog_.type("struct net_device") + member = "name_hlist" + entry_is_name_node = False + + for i in range(_NETDEV_HASHENTRIES): + head = prog_or_net.dev_name_head[i] + for entry in hlist_for_each_entry(entry_type, head, member): + if entry.name.string_() == name: + if entry_is_name_node: + return entry.dev + else: + return entry + + return NULL(prog_or_net.prog_, "struct net_device *") + + def sk_fullsock(sk: Object) -> bool: """ Check whether a socket is a full socket, i.e., not a time-wait or request diff --git a/drgn/helpers/linux/nodemask.py b/drgn/helpers/linux/nodemask.py new file mode 100644 index 000000000..5afe1b42e --- /dev/null +++ b/drgn/helpers/linux/nodemask.py @@ -0,0 +1,66 @@ +# Copyright (c) ByteDance, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +""" +NUMA Node Masks +--------------- + +The ``drgn.helpers.linux.nodemask`` module provides helpers for working with +NUMA node masks from :linux:`include/linux/nodemask.h`. +""" + +from typing import Iterator + +from drgn import IntegerLike, Object, Program +from drgn.helpers.linux.bitops import for_each_set_bit, test_bit + +__all__ = ( + "for_each_node", + "for_each_node_mask", + "for_each_node_state", + "for_each_online_node", + "node_state", +) + + +def for_each_node_mask(mask: Object) -> Iterator[int]: + """ + Iterate over all of the NUMA nodes in the given mask. + + :param mask: ``nodemask_t`` + """ + try: + nr_node_ids = mask.prog_["nr_node_ids"].value_() + except KeyError: + nr_node_ids = 1 + return for_each_set_bit(mask.bits, nr_node_ids) + + +def for_each_node_state(prog: Program, state: IntegerLike) -> Iterator[int]: + """ + Iterate over all NUMA nodes in the given state. + + :param state: ``enum node_states`` (e.g., ``N_NORMAL_MEMORY``) + """ + mask = prog["node_states"][state] + return for_each_node_mask(mask) + + +def for_each_node(prog: Program) -> Iterator[int]: + """Iterate over all possible NUMA nodes.""" + return for_each_node_state(prog, prog["N_POSSIBLE"]) + + +def for_each_online_node(prog: Program) -> Iterator[int]: + """Iterate over all online NUMA nodes.""" + return for_each_node_state(prog, prog["N_ONLINE"]) + + +def node_state(node: IntegerLike, state: Object) -> bool: + """ + Return whether the given NUMA node has the given state. + + :param node: NUMA node number. + :param state: ``enum node_states`` (e.g., ``N_NORMAL_MEMORY``) + """ + return test_bit(node, state.prog_["node_states"][state].bits) diff --git a/drgn/helpers/linux/percpu.py b/drgn/helpers/linux/percpu.py index 2472f8e4c..3143533df 100644 --- a/drgn/helpers/linux/percpu.py +++ b/drgn/helpers/linux/percpu.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ @@ -10,6 +10,7 @@ from :linux:`include/linux/percpu_counter.h`. """ +from _drgn import _linux_helper_per_cpu_ptr as per_cpu_ptr from drgn import IntegerLike, Object from drgn.helpers.linux.cpumask import for_each_online_cpu @@ -37,24 +38,6 @@ def per_cpu(var: Object, cpu: IntegerLike) -> Object: return per_cpu_ptr(var.address_of_(), cpu)[0] -def per_cpu_ptr(ptr: Object, cpu: IntegerLike) -> Object: - """ - Return the per-CPU pointer for a given CPU. - - >>> prog["init_net"].loopback_dev.pcpu_refcnt - (int *)0x2c980 - >>> per_cpu_ptr(prog["init_net"].loopback_dev.pcpu_refcnt, 7) - *(int *)0xffff925e3ddec980 = 4 - - :param ptr: Per-CPU pointer, i.e., ``type __percpu *``. For global - variables, it's usually easier to use :func:`per_cpu()`. - :param cpu: CPU number. - :return: ``type *`` object. - """ - offset = ptr.prog_["__per_cpu_offset"][cpu].value_() - return Object(ptr.prog_, ptr.type_, value=ptr.value_() + offset) - - def percpu_counter_sum(fbc: Object) -> int: """ Return the sum of a per-CPU counter. diff --git a/drgn/helpers/linux/pid.py b/drgn/helpers/linux/pid.py index 9f4d7cab4..152d15b21 100644 --- a/drgn/helpers/linux/pid.py +++ b/drgn/helpers/linux/pid.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ @@ -9,16 +9,13 @@ IDs and processes. """ -from typing import Iterator, Union - from _drgn import ( _linux_helper_find_pid as find_pid, _linux_helper_find_task as find_task, + _linux_helper_for_each_pid as for_each_pid, + _linux_helper_for_each_task as for_each_task, _linux_helper_pid_task as pid_task, ) -from drgn import NULL, Object, Program, cast, container_of -from drgn.helpers.linux.idr import idr_find, idr_for_each -from drgn.helpers.linux.list import hlist_for_each_entry __all__ = ( "find_pid", @@ -27,49 +24,3 @@ "for_each_task", "pid_task", ) - - -def for_each_pid(prog_or_ns: Union[Program, Object]) -> Iterator[Object]: - """ - Iterate over all PIDs in a namespace. - - :param prog_or_ns: ``struct pid_namespace *`` to iterate over, or - :class:`Program` to iterate over initial PID namespace. - :return: Iterator of ``struct pid *`` objects. - """ - if isinstance(prog_or_ns, Program): - prog = prog_or_ns - ns = prog_or_ns["init_pid_ns"].address_of_() - else: - prog = prog_or_ns.prog_ - ns = prog_or_ns - if hasattr(ns, "idr"): - for nr, entry in idr_for_each(ns.idr): - yield cast("struct pid *", entry) - else: - pid_hash = prog["pid_hash"] - for i in range(1 << prog["pidhash_shift"].value_()): - for upid in hlist_for_each_entry( - "struct upid", pid_hash[i].address_of_(), "pid_chain" - ): - if upid.ns == ns: - yield container_of(upid, "struct pid", f"numbers[{int(ns.level)}]") - - -def for_each_task(prog_or_ns: Union[Program, Object]) -> Iterator[Object]: - """ - Iterate over all of the tasks visible in a namespace. - - :param prog_or_ns: ``struct pid_namespace *`` to iterate over, or - :class:`Program` to iterate over initial PID namespace. - :return: Iterator of ``struct task_struct *`` objects. - """ - if isinstance(prog_or_ns, Program): - prog = prog_or_ns - else: - prog = prog_or_ns.prog_ - PIDTYPE_PID = prog["PIDTYPE_PID"].value_() - for pid in for_each_pid(prog_or_ns): - task = pid_task(pid, PIDTYPE_PID) - if task: - yield task diff --git a/drgn/helpers/linux/radixtree.py b/drgn/helpers/linux/radixtree.py index fe8814248..090835fc4 100644 --- a/drgn/helpers/linux/radixtree.py +++ b/drgn/helpers/linux/radixtree.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ @@ -9,54 +9,12 @@ radix trees from :linux:`include/linux/radix-tree.h`. """ -from typing import Iterator, Tuple - -from _drgn import _linux_helper_radix_tree_lookup as radix_tree_lookup -from drgn import Object, cast +from _drgn import ( + _linux_helper_radix_tree_for_each as radix_tree_for_each, + _linux_helper_radix_tree_lookup as radix_tree_lookup, +) __all__ = ( "radix_tree_for_each", "radix_tree_lookup", ) - -_RADIX_TREE_ENTRY_MASK = 3 - - -def _is_internal_node(node: Object, internal_node: int) -> bool: - return (node.value_() & _RADIX_TREE_ENTRY_MASK) == internal_node - - -def _entry_to_node(node: Object, internal_node: int) -> Object: - return Object(node.prog_, node.type_, value=node.value_() & ~internal_node) - - -def _radix_tree_root_node(root: Object) -> Tuple[Object, int]: - try: - node = root.xa_head - except AttributeError: - return root.rnode.read_(), 1 - else: - return cast("struct xa_node *", node).read_(), 2 - - -def radix_tree_for_each(root: Object) -> Iterator[Tuple[int, Object]]: - """ - Iterate over all of the entries in a radix tree. - - :param root: ``struct radix_tree_root *`` - :return: Iterator of (index, ``void *``) tuples. - """ - node, RADIX_TREE_INTERNAL_NODE = _radix_tree_root_node(root) - - def aux(node: Object, index: int) -> Iterator[Tuple[int, Object]]: - if _is_internal_node(node, RADIX_TREE_INTERNAL_NODE): - parent = _entry_to_node(node, RADIX_TREE_INTERNAL_NODE) - for i, slot in enumerate(parent.slots): - yield from aux( - cast(parent.type_, slot).read_(), - index + (i << parent.shift.value_()), - ) - elif node: - yield index, cast("void *", node) - - yield from aux(node, 0) diff --git a/drgn/helpers/linux/rbtree.py b/drgn/helpers/linux/rbtree.py index c81e2bb96..a1572b043 100644 --- a/drgn/helpers/linux/rbtree.py +++ b/drgn/helpers/linux/rbtree.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ @@ -9,9 +9,9 @@ red-black trees from :linux:`include/linux/rbtree.h`. """ -from typing import Callable, Iterator, TypeVar +from typing import Callable, Iterator, TypeVar, Union -from drgn import NULL, Object, container_of +from drgn import NULL, Object, Type, container_of __all__ = ( "RB_EMPTY_NODE", @@ -158,7 +158,7 @@ def aux(node: Object) -> Iterator[Object]: def rbtree_inorder_for_each_entry( - type: str, root: Object, member: str + type: Union[str, Type], root: Object, member: str ) -> Iterator[Object]: """ Iterate over all of the entries in a red-black tree in sorted order. @@ -176,7 +176,7 @@ def rbtree_inorder_for_each_entry( def rb_find( - type: str, + type: Union[str, Type], root: Object, member: str, key: KeyType, diff --git a/drgn/helpers/linux/sched.py b/drgn/helpers/linux/sched.py index f6abf9f06..b0fc5d0ee 100644 --- a/drgn/helpers/linux/sched.py +++ b/drgn/helpers/linux/sched.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ @@ -9,9 +9,13 @@ Linux CPU scheduler. """ +from _drgn import _linux_helper_idle_task as idle_task from drgn import Object -__all__ = ("task_state_to_char",) +__all__ = ( + "idle_task", + "task_state_to_char", +) _TASK_NOLOAD = 0x400 @@ -29,7 +33,9 @@ def task_state_to_char(task: Object) -> str: task_state_chars: str TASK_REPORT: int try: - task_state_chars, TASK_REPORT = prog.cache["task_state_to_char"] + task_state_chars, TASK_REPORT, task_state_name = prog.cache[ + "task_state_to_char" + ] except KeyError: task_state_array = prog["task_state_array"] # Walk through task_state_array backwards looking for the largest state @@ -45,13 +51,30 @@ def task_state_to_char(task: Object) -> str: if chars is None: raise Exception("could not parse task_state_array") task_state_chars = chars.decode("ascii") - prog.cache["task_state_to_char"] = task_state_chars, TASK_REPORT - task_state = task.state.value_() + + # Since Linux kernel commit 2f064a59a11f ("sched: Change + # task_struct::state") (in v5.14), the task state is named "__state". + # Before that, it is named "state". + try: + task_state = task.__state + task_state_name = "__state" + except AttributeError: + task_state = task.state + task_state_name = "state" + + prog.cache["task_state_to_char"] = ( + task_state_chars, + TASK_REPORT, + task_state_name, + ) + else: + task_state = getattr(task, task_state_name) + task_state = task_state.value_() exit_state = task.exit_state.value_() state = (task_state | exit_state) & TASK_REPORT char = task_state_chars[state.bit_length()] - # States beyond TASK_REPORT are special. As of Linux v5.8, TASK_IDLE is the - # only one; it is defined as TASK_UNINTERRUPTIBLE | TASK_NOLOAD. + # States beyond TASK_REPORT are special. As of Linux v5.14, TASK_IDLE is + # the only one; it is defined as TASK_UNINTERRUPTIBLE | TASK_NOLOAD. if char == "D" and (task_state & ~state) == _TASK_NOLOAD: return "I" else: diff --git a/drgn/helpers/linux/tc.py b/drgn/helpers/linux/tc.py new file mode 100644 index 000000000..42022453c --- /dev/null +++ b/drgn/helpers/linux/tc.py @@ -0,0 +1,60 @@ +# Copyright (c) ByteDance, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +""" +Traffic Control (TC) +-------------------- + +The ``drgn.helpers.linux.tc`` module provides helpers for working with the +Linux kernel Traffic Control (TC) subsystem. +""" + +import operator + +from drgn import NULL, IntegerLike, Object +from drgn.helpers.linux.list import hlist_for_each_entry, list_for_each_entry + +__all__ = ("qdisc_lookup",) + + +def qdisc_lookup(dev: Object, major: IntegerLike) -> Object: + """ + Get a Qdisc from a device and a major handle number. It is worth noting + that conventionally handles are hexadecimal, e.g. ``10:`` in a ``tc`` + command means major handle 0x10. + + :param dev: ``struct net_device *`` + :param major: Qdisc major handle number. + :return: ``struct Qdisc *`` (``NULL`` if not found) + """ + major = operator.index(major) << 16 + + roots = [dev.qdisc] + if dev.ingress_queue: + roots.append(dev.ingress_queue.qdisc_sleeping) + + # Since Linux kernel commit 59cc1f61f09c ("net: sched: convert qdisc linked + # list to hashtable") (in v4.7), a device's child Qdiscs are maintained in + # a hashtable in its struct net_device. Before that, they are maintained in + # a linked list in their root Qdisc. + use_hashtable = dev.prog_.type("struct net_device").has_member("qdisc_hash") + + for root in roots: + if root.handle == major: + return root + + if use_hashtable: + for head in root.dev_queue.dev.qdisc_hash: + for qdisc in hlist_for_each_entry( + "struct Qdisc", head.address_of_(), "hash" + ): + if qdisc.handle == major: + return qdisc + else: + for qdisc in list_for_each_entry( + "struct Qdisc", root.list.address_of_(), "list" + ): + if qdisc.handle == major: + return qdisc + + return NULL(dev.prog_, "struct Qdisc *") diff --git a/drgn/helpers/linux/tcp.py b/drgn/helpers/linux/tcp.py index af0c7b116..0f92551af 100644 --- a/drgn/helpers/linux/tcp.py +++ b/drgn/helpers/linux/tcp.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/helpers/linux/user.py b/drgn/helpers/linux/user.py index d9fd7475c..32ee4fa92 100644 --- a/drgn/helpers/linux/user.py +++ b/drgn/helpers/linux/user.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ @@ -32,7 +32,7 @@ def find_user(prog: Program, uid: Union[Object, IntegerLike]) -> Object: Return the user structure with the given UID. :param uid: ``kuid_t`` object or integer. - :return: ``struct user_state *`` + :return: ``struct user_struct *`` (``NULL`` if not found) """ try: uidhashentry = prog.cache["uidhashentry"] diff --git a/drgn/internal/__init__.py b/drgn/internal/__init__.py index 1b14962ca..7400b44e2 100644 --- a/drgn/internal/__init__.py +++ b/drgn/internal/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """ diff --git a/drgn/internal/cli.py b/drgn/internal/cli.py index 77a25d1a2..aaed8ba67 100644 --- a/drgn/internal/cli.py +++ b/drgn/internal/cli.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """drgn command line interface""" @@ -93,7 +93,8 @@ def main() -> None: "-q", "--quiet", action="store_true", - help="don't print non-fatal warnings (e.g., about missing debugging information)", + help="don't print download progress or non-fatal warnings " + "(e.g., about missing debugging information)", ) parser.add_argument( "script", @@ -106,6 +107,11 @@ def main() -> None: args = parser.parse_args() + if not args.script: + print(version, file=sys.stderr, flush=True) + if not args.quiet: + os.environ["DEBUGINFOD_PROGRESS"] = "1" + prog = drgn.Program() if args.core is not None: prog.set_core_dump(args.core) @@ -170,13 +176,11 @@ def write_history_file() -> None: sys.displayhook = displayhook - banner = ( - version - + """ + banner = """\ For help, type help(drgn). >>> import drgn ->>> from drgn import """ - + ", ".join(drgn_globals) +>>> from drgn import """ + ", ".join( + drgn_globals ) if prog.flags & drgn.ProgramFlags.IS_LINUX_KERNEL: banner += "\n>>> from drgn.helpers.linux import *" diff --git a/drgn/internal/rlcompleter.py b/drgn/internal/rlcompleter.py index 6c4bb7f3a..86d750f49 100644 --- a/drgn/internal/rlcompleter.py +++ b/drgn/internal/rlcompleter.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """Improved readline completer""" diff --git a/examples/linux/cgroup.py b/examples/linux/cgroup.py index e215136a2..d0de71ad9 100755 --- a/examples/linux/cgroup.py +++ b/examples/linux/cgroup.py @@ -1,5 +1,5 @@ #!/usr/bin/env drgn -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """List the paths of all descendants of a cgroup v2""" diff --git a/examples/linux/fs_inodes.py b/examples/linux/fs_inodes.py index 856d13f76..0d9252b1b 100755 --- a/examples/linux/fs_inodes.py +++ b/examples/linux/fs_inodes.py @@ -1,12 +1,11 @@ #!/usr/bin/env drgn -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """List the paths of all inodes cached in a given filesystem""" import os import sys -import time from drgn.helpers.linux.fs import for_each_mount, inode_path from drgn.helpers.linux.list import list_for_each_entry diff --git a/examples/linux/lsmod.py b/examples/linux/lsmod.py index 1585e2c22..634783799 100755 --- a/examples/linux/lsmod.py +++ b/examples/linux/lsmod.py @@ -1,5 +1,5 @@ #!/usr/bin/env drgn -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """An implementation of lsmod(8) using drgn""" diff --git a/examples/linux/ps.py b/examples/linux/ps.py index 0eda3be28..d2c66a1c4 100755 --- a/examples/linux/ps.py +++ b/examples/linux/ps.py @@ -1,5 +1,5 @@ #!/usr/bin/env drgn -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """A simplified implementation of ps(1) using drgn""" diff --git a/examples/linux/tcp_sock.py b/examples/linux/tcp_sock.py index ad628974c..e077df72c 100755 --- a/examples/linux/tcp_sock.py +++ b/examples/linux/tcp_sock.py @@ -1,5 +1,5 @@ #!/usr/bin/env drgn -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later """List all TCP sockets and their cgroup v2 paths""" diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 3eede7f47..15dfbeed2 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later ACLOCAL_AMFLAGS = -I m4 @@ -25,6 +25,7 @@ noinst_LTLIBRARIES = libdrgnimpl.la libdrgnimpl_la_SOURCES = $(ARCH_DEFS:.defs=.c) \ $(ARCH_INCS) \ arch_register_layout.h \ + array.h \ binary_buffer.c \ binary_buffer.h \ binary_search_tree.h \ @@ -34,8 +35,8 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS:.defs=.c) \ cityhash.h \ debug_info.c \ debug_info.h \ - dwarf_index.c \ - dwarf_index.h \ + dwarf_info.c \ + dwarf_info.h \ error.c \ error.h \ hash_table.c \ @@ -54,11 +55,14 @@ libdrgnimpl_la_SOURCES = $(ARCH_DEFS:.defs=.c) \ memory_reader.c \ memory_reader.h \ minmax.h \ + nstring.h \ object.c \ object.h \ object_index.c \ object_index.h \ orc.h \ + orc_info.c \ + orc_info.h \ path.c \ path.h \ platform.c \ @@ -154,7 +158,8 @@ python/docstrings.c: ../_drgn.pyi $(drgndoc_docstrings_deps) python/docstrings.h: ../_drgn.pyi $(drgndoc_docstrings_deps) $(AM_V_GEN)$(drgndoc_docstrings) -H -m _drgn:drgn $< > $@ -EXTRA_DIST = $(ARCH_DEFS) \ +EXTRA_DIST = Doxyfile \ + $(ARCH_DEFS) \ build-aux/gen_arch.awk \ build-aux/gen_constants.py \ drgn.h.in diff --git a/libdrgn/arch_register_layout.h b/libdrgn/arch_register_layout.h index 9911589a2..3c9da6222 100644 --- a/libdrgn/arch_register_layout.h +++ b/libdrgn/arch_register_layout.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/arch_x86_64.c b/libdrgn/arch_x86_64.c index 2368b422c..bd9440b82 100644 --- a/libdrgn/arch_x86_64.c +++ b/libdrgn/arch_x86_64.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include @@ -6,6 +6,7 @@ #include #include +#include "array.h" #include "drgn.h" #include "error.h" #include "linux_kernel.h" @@ -683,7 +684,7 @@ linux_kernel_pgtable_iterator_next_x86_64(struct pgtable_iterator *it, /* Find the lowest level with cached entries. */ for (level = 0; level < levels; level++) { - if (arch->index[level] < ARRAY_SIZE(arch->table[level])) + if (arch->index[level] < array_size(arch->table[level])) break; } /* For every level below that, refill the cache/return pages. */ diff --git a/libdrgn/arch_x86_64.defs b/libdrgn/arch_x86_64.defs index 03b1ccf02..c5cda5ac9 100644 --- a/libdrgn/arch_x86_64.defs +++ b/libdrgn/arch_x86_64.defs @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later "rax" "rdx" diff --git a/libdrgn/array.h b/libdrgn/array.h new file mode 100644 index 000000000..a860427c8 --- /dev/null +++ b/libdrgn/array.h @@ -0,0 +1,42 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: GPL-3.0-or-later + +/** + * @file + * + * Helpers for C arrays. + */ + +#ifndef DRGN_ARRAY_H +#define DRGN_ARRAY_H + +#include "pp.h" +#include "util.h" + +/** @cond */ +#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) + +#define array_for_each_impl(var, arr, unique_end) \ + for (typeof((arr)[0]) *var = (arr), \ + *unique_end = var + array_size(arr); \ + var < unique_end; var++) +/** @endcond */ + +/** + * Return the number of elements in an array. + * + * @hideinitializer + */ +#define array_size(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) + +/** + * Iterate over every element in an array. + * + * The element is declared as `element_type *var` in the scope of the loop. + * + * @hideinitializer + */ +#define array_for_each(var, arr) \ + array_for_each_impl(var, arr, PP_UNIQUE(end)) + +#endif /* DRGN_ARRAY_H */ diff --git a/libdrgn/binary_buffer.c b/libdrgn/binary_buffer.c index 7cabed214..98df2c580 100644 --- a/libdrgn/binary_buffer.c +++ b/libdrgn/binary_buffer.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "binary_buffer.h" diff --git a/libdrgn/binary_buffer.h b/libdrgn/binary_buffer.h index dc0e0f8fe..cfab91bdd 100644 --- a/libdrgn/binary_buffer.h +++ b/libdrgn/binary_buffer.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/binary_search_tree.h b/libdrgn/binary_search_tree.h index ba6fb4f57..fe5ea8f01 100644 --- a/libdrgn/binary_search_tree.h +++ b/libdrgn/binary_search_tree.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** @@ -234,41 +234,41 @@ struct binary_tree_search_result { /* * Binary search tree variants need to define three functions: * - * variant##_tree_insert_fixup(root, node, parent) is called after a node is - * inserted (as *root, parent->left, or parent->right). It must set the node's - * parent pointer and rebalance the tree. + * drgn_##variant##_tree_insert_fixup(root, node, parent) is called after a node + * is inserted (as *root, parent->left, or parent->right). It must set the + * node's parent pointer and rebalance the tree. * - * variant##_tree_found(root, node) is called when a duplicate node is found for - * an insert operation or when a node is found for a search operation (but not - * for a delete operation). It may rebalance the tree or do nothing. + * drgn_##variant##_tree_found(root, node) is called when a duplicate node is + * found for an insert operation or when a node is found for a search operation + * (but not for a delete operation). It may rebalance the tree or do nothing. * - * variant##_tree_delete(root, node) must delete the node and rebalance the - * tree. + * drgn_##variant##_tree_delete(root, node) must delete the node and rebalance + * the tree. */ -void splay_tree_splay(struct binary_tree_node **root, - struct binary_tree_node *node, - struct binary_tree_node *parent); +void drgn_splay_tree_splay(struct binary_tree_node **root, + struct binary_tree_node *node, + struct binary_tree_node *parent); -static inline void splay_tree_insert_fixup(struct binary_tree_node **root, - struct binary_tree_node *node, - struct binary_tree_node *parent) +static inline void drgn_splay_tree_insert_fixup(struct binary_tree_node **root, + struct binary_tree_node *node, + struct binary_tree_node *parent) { if (parent) - splay_tree_splay(root, node, parent); + drgn_splay_tree_splay(root, node, parent); else node->parent = NULL; } -static inline void splay_tree_found(struct binary_tree_node **root, - struct binary_tree_node *node) +static inline void drgn_splay_tree_found(struct binary_tree_node **root, + struct binary_tree_node *node) { if (node->parent) - splay_tree_splay(root, node, node->parent); + drgn_splay_tree_splay(root, node, node->parent); } -void splay_tree_delete(struct binary_tree_node **root, - struct binary_tree_node *node); +void drgn_splay_tree_delete(struct binary_tree_node **root, + struct binary_tree_node *node); /** * Define a binary search tree type without defining its functions. @@ -372,14 +372,14 @@ static int tree##_insert(struct tree *tree, tree##_entry_type *entry, \ if (*res.nodep) { \ if (it_ret) \ it_ret->entry = tree##_node_to_entry(*res.nodep); \ - variant##_tree_found(&tree->root, *res.nodep); \ + drgn_##variant##_tree_found(&tree->root, *res.nodep); \ return 0; \ } \ \ node = tree##_entry_to_node(entry); \ node->left = node->right = NULL; \ *res.nodep = node; \ - variant##_tree_insert_fixup(&tree->root, node, res.parent); \ + drgn_##variant##_tree_insert_fixup(&tree->root, node, res.parent); \ return 1; \ } \ \ @@ -392,7 +392,7 @@ static struct tree##_iterator tree##_search(struct tree *tree, \ node = *tree##_search_internal(tree, key).nodep; \ if (!node) \ return (struct tree##_iterator){}; \ - variant##_tree_found(&tree->root, node); \ + drgn_##variant##_tree_found(&tree->root, node); \ return (struct tree##_iterator){ tree##_node_to_entry(node), }; \ } \ \ @@ -422,7 +422,8 @@ static struct tree##_iterator tree##_search_le(struct tree *tree, \ } \ } \ if (entry) \ - variant##_tree_found(&tree->root, tree##_entry_to_node(entry)); \ + drgn_##variant##_tree_found(&tree->root, \ + tree##_entry_to_node(entry)); \ return (struct tree##_iterator){ entry, }; \ } \ \ @@ -434,7 +435,7 @@ static bool tree##_delete(struct tree *tree, const tree##_key_type *key) \ node = *tree##_search_internal(tree, key).nodep; \ if (!node) \ return false; \ - variant##_tree_delete(&tree->root, node); \ + drgn_##variant##_tree_delete(&tree->root, node); \ return true; \ } \ \ @@ -486,7 +487,7 @@ tree##_delete_iterator(struct tree *tree, struct tree##_iterator it) \ \ node = tree##_entry_to_node(it.entry); \ it = tree##_next_impl(it); \ - variant##_tree_delete(&tree->root, node); \ + drgn_##variant##_tree_delete(&tree->root, node); \ return it; \ } \ \ diff --git a/libdrgn/bitops.h b/libdrgn/bitops.h index 49b1b748d..3877889c0 100644 --- a/libdrgn/bitops.h +++ b/libdrgn/bitops.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** @@ -38,7 +38,7 @@ * * @param[in] x Integer. */ -#define ctz(x) generic_bitop(x, PP_UNIQUE(_x), __builtin_ctz) +#define ctz(x) generic_bitop(x, PP_UNIQUE(_x), builtin_bitop_impl, ctz) /** * Find Last Set bit. @@ -60,7 +60,7 @@ * * @param[in] x Integer. */ -#define fls(x) generic_bitop(x, PP_UNIQUE(_x), fls_) +#define fls(x) generic_bitop(x, PP_UNIQUE(_x), fls_impl,) /** @cond */ /* * The straightfoward implementation is bits - clz. However, as noted by the @@ -71,21 +71,19 @@ * This doesn't do the normal macro argument safety stuff because it should only * be used via generic_bitop() which already does it. */ -#define fls_impl(x, type, suffix) \ - (x ? 1 + ((8 * sizeof(type) - 1) ^ __builtin_clz##suffix(x)) : 0) -#define fls_(x) fls_impl(x, unsigned int,) -#define fls_l(x) fls_impl(x, unsigned long, l) -#define fls_ll(x) fls_impl(x, unsigned long long, ll) +#define fls_impl(arg, suffix, x) \ + (x ? 1 + ((8 * sizeof(0u##suffix) - 1) ^ __builtin_clz##suffix(x)) : 0) -#define generic_bitop(x, unique_x, op) ({ \ +#define builtin_bitop_impl(arg, suffix, x) __builtin_##arg##suffix(x) +#define generic_bitop(x, unique_x, impl, impl_arg) ({ \ __auto_type unique_x = (x); \ _Static_assert(sizeof(unique_x) <= sizeof(unsigned long long), \ "type is too large"); \ (unsigned int)(sizeof(unique_x) <= sizeof(unsigned int) ? \ - op(unique_x) : \ + impl(impl_arg, , unique_x) : \ sizeof(unique_x) <= sizeof(unsigned long) ? \ - op##l(unique_x) : \ - op##ll(unique_x)); \ + impl(impl_arg, l, unique_x) : \ + impl(impl_arg, ll, unique_x)); \ }) /** @endcond */ diff --git a/libdrgn/build-aux/gen_arch.awk b/libdrgn/build-aux/gen_arch.awk index 102bd7165..cfda42456 100644 --- a/libdrgn/build-aux/gen_arch.awk +++ b/libdrgn/build-aux/gen_arch.awk @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later # This script generates drgn architecture definition code ("arch_foo.inc") from @@ -133,7 +133,7 @@ END { print "/* Generated by libdrgn/build-aux/gen_arch.awk. */" print "" - print "#include \"arch_register_layout.h\"" + print "#include \"arch_register_layout.h\" // IWYU pragma: export" print "" print "static const struct drgn_register registers[] = {" diff --git a/libdrgn/build-aux/gen_constants.py b/libdrgn/build-aux/gen_constants.py index b859dfd84..70941ed02 100644 --- a/libdrgn/build-aux/gen_constants.py +++ b/libdrgn/build-aux/gen_constants.py @@ -1,7 +1,6 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later -import os.path import re import sys @@ -63,17 +62,19 @@ def gen_constant_class(drgn_h, output_file, class_name, enum_class, constants, r def gen_constants(input_file, output_file): drgn_h = input_file.read() output_file.write( - f"""\ + """\ /* Generated by libdrgn/build-aux/gen_constants.py. */ #include "drgnpy.h" PyObject *Architecture_class; PyObject *FindObjectFlags_class; -PyObject *PrimitiveType_class; PyObject *PlatformFlags_class; +PyObject *PrimitiveType_class; PyObject *ProgramFlags_class; PyObject *Qualifiers_class; +PyObject *SymbolBinding_class; +PyObject *SymbolKind_class; PyObject *TypeKind_class; """ ) @@ -91,18 +92,18 @@ def gen_constants(input_file, output_file): gen_constant_class( drgn_h, output_file, - "PrimitiveType", - "Enum", + "PlatformFlags", + "Flag", (), - r"DRGN_(C)_TYPE_([a-zA-Z0-9_]+)", + r"DRGN_PLATFORM_([a-zA-Z0-9_]+)(? diff --git a/libdrgn/cfi.h b/libdrgn/cfi.h index 72d954c55..6ff250d6a 100644 --- a/libdrgn/cfi.h +++ b/libdrgn/cfi.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/cityhash.h b/libdrgn/cityhash.h index 29abebad3..9b4fdbb94 100644 --- a/libdrgn/cityhash.h +++ b/libdrgn/cityhash.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #ifndef DRGN_CITYHASH_H diff --git a/libdrgn/configure.ac b/libdrgn/configure.ac index 88378aefe..21feaf7f3 100644 --- a/libdrgn/configure.ac +++ b/libdrgn/configure.ac @@ -1,7 +1,7 @@ -dnl Copyright (c) Facebook, Inc. and its affiliates. +dnl Copyright (c) Meta Platforms, Inc. and affiliates. dnl SPDX-License-Identifier: GPL-3.0-or-later -AC_INIT([libdrgn], [0.0.13], +AC_INIT([libdrgn], [0.0.16], [https://github.com/osandov/drgn/issues],, [https://github.com/osandov/drgn]) diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index f79635231..60817d1a2 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -1,19 +1,15 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include #include -#include #include -#include #include #include #include #include #include #include -#include -#include #include #include #include @@ -22,99 +18,25 @@ #include "debug_info.h" #include "error.h" -#include "language.h" -#include "lazy_object.h" #include "linux_kernel.h" -#include "minmax.h" -#include "object.h" -#include "orc.h" -#include "path.h" #include "program.h" -#include "register_state.h" -#include "serialize.h" -#include "type.h" #include "util.h" -/** - * Arbitrary limit for number of operations to execute in a DWARF expression to - * avoid infinite loops. - */ -static const int MAX_DWARF_EXPR_OPS = 10000; - -struct drgn_dwarf_cie { - /* Whether this CIE is from .eh_frame. */ - bool is_eh; - /* Size of an address in this CIE in bytes. */ - uint8_t address_size; - /* DW_EH_PE_* encoding of addresses in this CIE. */ - uint8_t address_encoding; - /* Whether this CIE has a 'z' augmentation. */ - bool have_augmentation_length; - /* Whether this CIE is for a signal handler ('S' augmentation). */ - bool signal_frame; - drgn_register_number return_address_register; - uint64_t code_alignment_factor; - int64_t data_alignment_factor; - const char *initial_instructions; - size_t initial_instructions_size; -}; - -struct drgn_dwarf_fde { - uint64_t initial_location; - uint64_t address_range; - /* CIE for this FDE as an index into drgn_debug_info_module::cies. */ - size_t cie; - const char *instructions; - size_t instructions_size; -}; - -DEFINE_VECTOR(drgn_dwarf_fde_vector, struct drgn_dwarf_fde) -DEFINE_VECTOR(drgn_dwarf_cie_vector, struct drgn_dwarf_cie) -DEFINE_HASH_MAP(drgn_dwarf_cie_map, size_t, size_t, int_key_hash_pair, - scalar_key_eq) -DEFINE_VECTOR(drgn_cfi_row_vector, struct drgn_cfi_row *) -DEFINE_VECTOR(uint64_vector, uint64_t) - -DEFINE_VECTOR(dwarf_die_vector, Dwarf_Die) - -#define DW_TAG_UNKNOWN_FORMAT "unknown DWARF tag 0x%02x" -#define DW_TAG_BUF_LEN (sizeof(DW_TAG_UNKNOWN_FORMAT) - 4 + 2 * sizeof(int)) - -/** - * Get the name of a DWARF tag. - * - * @return Static string if the tag is known or @p buf if the tag is unknown - * (populated with a description). - */ -static const char *dw_tag_str(int tag, char buf[DW_TAG_BUF_LEN]) -{ - switch (tag) { -#define DWARF_ONE_KNOWN_DW_TAG(name, value) case value: return "DW_TAG_" #name; - DWARF_ALL_KNOWN_DW_TAG -#undef DWARF_ONE_KNOWN_DW_TAG - default: - sprintf(buf, DW_TAG_UNKNOWN_FORMAT, tag); - return buf; - } -} - -/** Like @ref dw_tag_str(), but takes a @c Dwarf_Die. */ -static const char *dwarf_tag_str(Dwarf_Die *die, char buf[DW_TAG_BUF_LEN]) -{ - return dw_tag_str(dwarf_tag(die), buf); -} - static const char * const drgn_debug_scn_names[] = { [DRGN_SCN_DEBUG_INFO] = ".debug_info", [DRGN_SCN_DEBUG_TYPES] = ".debug_types", [DRGN_SCN_DEBUG_ABBREV] = ".debug_abbrev", [DRGN_SCN_DEBUG_STR] = ".debug_str", + [DRGN_SCN_DEBUG_STR_OFFSETS] = ".debug_str_offsets", [DRGN_SCN_DEBUG_LINE] = ".debug_line", + [DRGN_SCN_DEBUG_LINE_STR] = ".debug_line_str", + [DRGN_SCN_DEBUG_ADDR] = ".debug_addr", [DRGN_SCN_DEBUG_FRAME] = ".debug_frame", [DRGN_SCN_EH_FRAME] = ".eh_frame", [DRGN_SCN_ORC_UNWIND_IP] = ".orc_unwind_ip", [DRGN_SCN_ORC_UNWIND] = ".orc_unwind", [DRGN_SCN_DEBUG_LOC] = ".debug_loc", + [DRGN_SCN_DEBUG_LOCLISTS] = ".debug_loclists", [DRGN_SCN_TEXT] = ".text", [DRGN_SCN_GOT] = ".got", }; @@ -142,339 +64,25 @@ struct drgn_error *drgn_debug_info_buffer_error(struct binary_buffer *bb, message); } +DEFINE_VECTOR_FUNCTIONS(drgn_debug_info_module_vector) -/** Iterator over DWARF DIEs in a @ref drgn_debug_info_module. */ -struct drgn_dwarf_die_iterator { - /** Stack of current DIE and its ancestors. */ - struct dwarf_die_vector dies; - Dwarf *dwarf; - /** End of current CU (for bounds checking). */ - const char *cu_end; - /** Offset of next CU. */ - Dwarf_Off next_cu_off; - /** Whether current CU is from .debug_types. */ - bool debug_types; +struct drgn_debug_info_module_key { + const void *build_id; + size_t build_id_len; + uint64_t start, end; }; -static void drgn_dwarf_die_iterator_init(struct drgn_dwarf_die_iterator *it, - Dwarf *dwarf) -{ - dwarf_die_vector_init(&it->dies); - it->dwarf = dwarf; - it->next_cu_off = 0; - it->debug_types = false; -} - -static void drgn_dwarf_die_iterator_deinit(struct drgn_dwarf_die_iterator *it) -{ - dwarf_die_vector_deinit(&it->dies); -} - -/** - * Return the next DWARF DIE in a @ref drgn_dwarf_die_iterator. - * - * The first call returns the top-level DIE for the first unit in the module. - * Subsequent calls return children, siblings, and unit DIEs. - * - * This includes the .debug_types section. - * - * @param[in] children If @c true and the last returned DIE has children, return - * its first child (this is a pre-order traversal). Otherwise, return the next - * DIE at the level less than or equal to the last returned DIE, i.e., the last - * returned DIE's sibling, or its ancestor's sibling, or the next top-level unit - * DIE. - * @param[in] subtree If zero, iterate over all DIEs in all units. If non-zero, - * stop after returning all DIEs in the subtree rooted at the DIE that was - * returned in the last call as `(*dies_ret)[subtree - 1]`. - * @param[out] dies_ret Returned array containing DIE and its ancestors. - * `(*dies_ret)[*length_ret - 1]` is the DIE itself, - * `(*dies_ret)[*length_ret - 2]` is its parent, `(*dies_ret)[*length_ret - 3]` - * is its grandparent, etc., and `(*dies_ret)[0]` is the top-level unit DIE. - * This is valid until the next call to @ref drgn_dwarf_die_iterator_next() or - * @ref drgn_dwarf_die_iterator_deinit(). - * @param[out] length_ret Returned length of @p dies_ret. - * @return @c NULL on success, `&drgn_stop` if there are no more DIEs, in which - * case `*length_ret` equals @p subtree and @p dies_ret refers to the root of - * the iterated subtree, non-@c NULL on error, in which case this should not be - * called again. - */ -static struct drgn_error * -drgn_dwarf_die_iterator_next(struct drgn_dwarf_die_iterator *it, bool children, - size_t subtree, Dwarf_Die **dies_ret, - size_t *length_ret) -{ -#define TOP() (&it->dies.data[it->dies.size - 1]) - struct drgn_error *err = NULL; - int r; - Dwarf_Die die; - assert(subtree <= it->dies.size); - if (it->dies.size == 0) { - /* This is the first call. Get the first unit DIE. */ - if (!dwarf_die_vector_append_entry(&it->dies)) { - err = &drgn_enomem; - goto out; - } - } else { - if (children) { - r = dwarf_child(TOP(), &die); - if (r == 0) { - /* The previous DIE has a child. Return it. */ - if (!dwarf_die_vector_append(&it->dies, &die)) - err = &drgn_enomem; - goto out; - } else if (r < 0) { - err = drgn_error_libdw(); - goto out; - } - /* The previous DIE has no children. */ - } - - if (it->dies.size == subtree) { - /* - * The previous DIE is the root of the subtree. We're - * done. - */ - err = &drgn_stop; - goto out; - } - - if (it->dies.size > 1) { - r = dwarf_siblingof(TOP(), &die); - if (r == 0) { - /* The previous DIE has a sibling. Return it. */ - *TOP() = die; - goto out; - } else if (r > 0) { - if (!die.addr) - goto next_unit; - /* - * The previous DIE is the last child of its - * parent. - */ - char *addr = die.addr; - do { - /* - * addr points to the null terminator - * for the list of siblings. Go back up - * to its parent. The next byte is - * either the parent's sibling or - * another null terminator. - */ - it->dies.size--; - addr++; - if (it->dies.size == subtree) { - /* - * We're back to the root of the - * subtree. We're done. - */ - err = &drgn_stop; - goto out; - } - if (it->dies.size == 1 || - addr >= it->cu_end) - goto next_unit; - } while (*addr == '\0'); - /* - * addr now points to the next DIE. Return it. - */ - *TOP() = (Dwarf_Die){ - .cu = it->dies.data[0].cu, - .addr = addr, - }; - goto out; - } else { - err = drgn_error_libdw(); - goto out; - } - } - } - -next_unit:; - /* There are no more DIEs in the current unit. */ - Dwarf_Off cu_off = it->next_cu_off; - size_t cu_header_size; - uint64_t type_signature; - r = dwarf_next_unit(it->dwarf, cu_off, &it->next_cu_off, - &cu_header_size, NULL, NULL, NULL, NULL, - it->debug_types ? &type_signature : NULL, NULL); - if (r == 0) { - /* Got the next unit. Return the unit DIE. */ - if (it->debug_types) { - r = !dwarf_offdie_types(it->dwarf, - cu_off + cu_header_size, TOP()); - } else { - r = !dwarf_offdie(it->dwarf, cu_off + cu_header_size, - TOP()); - } - if (r) { - err = drgn_error_libdw(); - goto out; - } - it->cu_end = ((const char *)TOP()->addr - - dwarf_dieoffset(TOP()) - + it->next_cu_off); - } else if (r > 0) { - if (!it->debug_types) { - it->next_cu_off = 0; - it->debug_types = true; - goto next_unit; - } - /* There are no more units. */ - err = &drgn_stop; - } else { - err = drgn_error_libdw(); - } - -out: - /* - * Return these even in the error case to avoid maybe uninitialized - * warnings in the caller. - */ - *dies_ret = it->dies.data; - *length_ret = it->dies.size; - return err; -#undef TOP -} - -struct drgn_error * -drgn_debug_info_module_find_dwarf_scopes(struct drgn_debug_info_module *module, - uint64_t pc, uint64_t *bias_ret, - Dwarf_Die **dies_ret, - size_t *length_ret) -{ - struct drgn_error *err; - - Dwarf_Addr bias; - Dwarf *dwarf = dwfl_module_getdwarf(module->dwfl_module, &bias); - if (!dwarf) - return drgn_error_libdw(); - *bias_ret = bias; - pc -= bias; - - /* First, try to get the CU containing the PC. */ - Dwarf_Aranges *aranges; - size_t naranges; - if (dwarf_getaranges(dwarf, &aranges, &naranges) < 0) - return drgn_error_libdw(); - - struct drgn_dwarf_die_iterator it; - bool children; - size_t subtree; - Dwarf_Off offset; - if (dwarf_getarangeinfo(dwarf_getarange_addr(aranges, pc), NULL, NULL, - &offset) >= 0) { - drgn_dwarf_die_iterator_init(&it, dwarf); - Dwarf_Die *cu_die = dwarf_die_vector_append_entry(&it.dies); - if (!cu_die) { - err = &drgn_enomem; - goto err; - } - if (!dwarf_offdie(dwarf, offset, cu_die)) { - err = drgn_error_libdw(); - goto err; - } - if (dwarf_next_unit(dwarf, offset - dwarf_cuoffset(cu_die), - &it.next_cu_off, NULL, NULL, NULL, NULL, - NULL, NULL, NULL)) { - err = drgn_error_libdw(); - goto err; - } - it.cu_end = ((const char *)cu_die->addr - - dwarf_dieoffset(cu_die) - + it.next_cu_off); - children = true; - subtree = 1; - } else { - /* - * Range was not found. .debug_aranges could be missing or - * incomplete, so fall back to checking each CU. - */ - drgn_dwarf_die_iterator_init(&it, dwarf); - children = false; - subtree = 0; - } - - /* Now find DIEs containing the PC. */ - Dwarf_Die *dies; - size_t length; - while (!(err = drgn_dwarf_die_iterator_next(&it, children, subtree, - &dies, &length))) { - int r = dwarf_haspc(&dies[length - 1], pc); - if (r > 0) { - children = true; - subtree = length; - } else if (r < 0) { - err = drgn_error_libdw(); - goto err; - } - } - if (err != &drgn_stop) - goto err; - - *dies_ret = dies; - *length_ret = length; - return NULL; - -err: - drgn_dwarf_die_iterator_deinit(&it); - return err; -} - -struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret, - size_t *length_ret) +static inline struct drgn_debug_info_module_key +drgn_debug_info_module_key(struct drgn_debug_info_module * const *entry) { - struct drgn_error *err; - - Dwarf *dwarf = dwarf_cu_getdwarf(die->cu); - if (!dwarf) - return drgn_error_libdw(); - - struct drgn_dwarf_die_iterator it; - drgn_dwarf_die_iterator_init(&it, dwarf); - Dwarf_Die *cu_die = dwarf_die_vector_append_entry(&it.dies); - if (!cu_die) { - err = &drgn_enomem; - goto err; - } - Dwarf_Half cu_version; - Dwarf_Off type_offset; - if (!dwarf_cu_die(die->cu, cu_die, &cu_version, NULL, NULL, NULL, NULL, - &type_offset)) { - err = drgn_error_libdw(); - goto err; - } - it.debug_types = cu_version == 4 && type_offset != 0; - uint64_t type_signature; - Dwarf_Off cu_die_offset = dwarf_dieoffset(cu_die); - if (dwarf_next_unit(dwarf, cu_die_offset - dwarf_cuoffset(cu_die), - &it.next_cu_off, NULL, NULL, NULL, NULL, NULL, - it.debug_types ? &type_signature : NULL, NULL)) { - err = drgn_error_libdw(); - goto err; - } - it.cu_end = (const char *)cu_die->addr - cu_die_offset + it.next_cu_off; - - Dwarf_Die *dies; - size_t length; - while (!(err = drgn_dwarf_die_iterator_next(&it, true, 1, &dies, - &length))) { - if (dies[length - 1].addr == die->addr) { - *dies_ret = dies; - *length_ret = length - 1; - return NULL; - } - } - if (err == &drgn_stop) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "could not find DWARF DIE ancestors"); - } -err: - drgn_dwarf_die_iterator_deinit(&it); - return err; + return (struct drgn_debug_info_module_key){ + .build_id = (*entry)->build_id, + .build_id_len = (*entry)->build_id_len, + .start = (*entry)->start, + .end = (*entry)->end, + }; } -DEFINE_VECTOR_FUNCTIONS(drgn_debug_info_module_vector) - static inline struct hash_pair drgn_debug_info_module_key_hash_pair(const struct drgn_debug_info_module_key *key) { @@ -492,11 +100,11 @@ drgn_debug_info_module_key_eq(const struct drgn_debug_info_module_key *a, a->start == b->start && a->end == b->end); } DEFINE_HASH_TABLE_FUNCTIONS(drgn_debug_info_module_table, + drgn_debug_info_module_key, drgn_debug_info_module_key_hash_pair, drgn_debug_info_module_key_eq) -DEFINE_HASH_TABLE_FUNCTIONS(c_string_set, c_string_key_hash_pair, - c_string_key_eq) +DEFINE_HASH_SET_FUNCTIONS(c_string_set, c_string_key_hash_pair, c_string_key_eq) /** * @c Dwfl_Callbacks::find_elf() implementation. @@ -610,10 +218,8 @@ drgn_debug_info_module_destroy(struct drgn_debug_info_module *module) { if (module) { drgn_error_destroy(module->err); - free(module->orc_entries); - free(module->orc_pc_offsets); - free(module->fdes); - free(module->cies); + drgn_orc_module_info_deinit(module); + drgn_dwarf_module_info_deinit(module); elf_end(module->elf); if (module->fd != -1) close(module->fd); @@ -921,9 +527,9 @@ drgn_debug_info_report_elf(struct drgn_debug_info_load_state *load, ssize_t build_id_len = dwelf_elf_gnu_build_id(elf, &build_id); if (build_id_len < 0) { err = drgn_debug_info_report_error(load, path, NULL, - drgn_error_libdwfl()); - close(fd); + drgn_error_libelf()); elf_end(elf); + close(fd); return err; } else if (build_id_len == 0) { build_id = NULL; @@ -975,4680 +581,1505 @@ static int drgn_debug_info_report_dwfl_module(Dwfl_Module *dwfl_module, return DWARF_CB_ABORT; } -static struct drgn_error * -userspace_report_debug_info(struct drgn_debug_info_load_state *load) +static struct drgn_error *drgn_get_nt_file(Elf *elf, const char **ret, + size_t *len_ret) { - struct drgn_error *err; - - for (size_t i = 0; i < load->num_paths; i++) { - int fd; - Elf *elf; - err = open_elf_file(load->paths[i], &fd, &elf); - if (err) { - err = drgn_debug_info_report_error(load, load->paths[i], - NULL, err); - if (err) - return err; - continue; - } - /* - * We haven't implemented a way to get the load address for - * anything reported here, so for now we report it as unloaded. - */ - err = drgn_debug_info_report_elf(load, load->paths[i], fd, elf, - 0, 0, NULL, NULL); - if (err) - return err; - } - - if (load->load_default) { - Dwfl *dwfl = load->dbinfo->dwfl; - struct drgn_program *prog = load->dbinfo->prog; - if (prog->flags & DRGN_PROGRAM_IS_LIVE) { - int ret = dwfl_linux_proc_report(dwfl, prog->pid); - if (ret == -1) { - return drgn_error_libdwfl(); - } else if (ret) { - return drgn_error_create_os("dwfl_linux_proc_report", - ret, NULL); + size_t phnum; + if (elf_getphdrnum(elf, &phnum) != 0) + return drgn_error_libelf(); + for (size_t i = 0; i < phnum; i++) { + GElf_Phdr phdr_mem, *phdr = gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) + return drgn_error_libelf(); + if (phdr->p_type == PT_NOTE) { + Elf_Data *data = elf_getdata_rawchunk(elf, + phdr->p_offset, + phdr->p_filesz, + note_header_type(phdr->p_align)); + if (!data) + return drgn_error_libelf(); + GElf_Nhdr nhdr; + size_t offset = 0, name_offset, desc_offset; + while (offset < data->d_size && + (offset = gelf_getnote(data, offset, &nhdr, + &name_offset, + &desc_offset))) { + const char *name = + (char *)data->d_buf + name_offset; + if (nhdr.n_namesz == sizeof("CORE") && + memcmp(name, "CORE", sizeof("CORE")) == 0 && + nhdr.n_type == NT_FILE) { + *ret = (char *)data->d_buf + desc_offset; + *len_ret = nhdr.n_descsz; + return NULL; + } } - } else if (dwfl_core_file_report(dwfl, prog->core, - NULL) == -1) { - return drgn_error_libdwfl(); } } + *ret = NULL; + *len_ret = 0; return NULL; } -static struct drgn_error *relocate_elf_section(Elf_Scn *scn, Elf_Scn *reloc_scn, - Elf_Scn *symtab_scn, - const uint64_t *sh_addrs, - size_t shdrnum, - const struct drgn_platform *platform) +struct drgn_mapped_file_segment { + uint64_t start; + uint64_t end; + uint64_t file_offset; +}; + +DEFINE_VECTOR(drgn_mapped_file_segment_vector, struct drgn_mapped_file_segment) + +DEFINE_HASH_MAP(drgn_mapped_files, const char *, + struct drgn_mapped_file_segment_vector, c_string_key_hash_pair, + c_string_key_eq) + +struct userspace_core_report_state { + struct drgn_mapped_files files; + char *phdr_buf; + size_t phdr_buf_capacity; + char *segment_buf; + size_t segment_buf_capacity; +}; + +static struct drgn_error *parse_nt_file_error(struct binary_buffer *bb, + const char *pos, + const char *message) { - struct drgn_error *err; + return drgn_error_create(DRGN_ERROR_OTHER, "couldn't parse NT_FILE"); +} - bool is_64_bit = drgn_platform_is_64_bit(platform); - bool bswap = drgn_platform_bswap(platform); - apply_elf_rela_fn *apply_elf_rela = platform->arch->apply_elf_rela; +static bool +drgn_mapped_file_segments_contiguous(const struct drgn_mapped_file_segment *segment1, + const struct drgn_mapped_file_segment *segment2) +{ + if (segment1->end != segment2->start) + return false; + uint64_t size = segment1->end - segment1->start; + return segment1->file_offset + size == segment2->file_offset; +} - Elf_Data *data, *reloc_data, *symtab_data; - err = read_elf_section(scn, &data); - if (err) - return err; +static struct drgn_error * +userspace_core_get_mapped_files(struct drgn_debug_info_load_state *load, + struct userspace_core_report_state *core, + const char *nt_file, size_t nt_file_len) +{ + struct drgn_error *err; - struct drgn_relocating_section relocating = { - .buf = data->d_buf, - .buf_size = data->d_size, - .addr = sh_addrs[elf_ndxscn(scn)], - .bswap = bswap, - }; + GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr(load->dbinfo->prog->core, + &ehdr_mem); + if (!ehdr) + return drgn_error_libelf(); + bool is_64_bit = ehdr->e_ident[EI_CLASS] == ELFCLASS64; + bool little_endian = ehdr->e_ident[EI_DATA] == ELFDATA2LSB; - err = read_elf_section(reloc_scn, &reloc_data); - if (err) - return err; - const void *relocs = reloc_data->d_buf; - size_t reloc_size = is_64_bit ? sizeof(Elf64_Rela) : sizeof(Elf32_Rela); - size_t num_relocs = reloc_data->d_size / reloc_size; + struct binary_buffer bb; + binary_buffer_init(&bb, nt_file, nt_file_len, little_endian, + parse_nt_file_error); - err = read_elf_section(symtab_scn, &symtab_data); - if (err) - return err; - const void *syms = symtab_data->d_buf; - size_t sym_size = is_64_bit ? sizeof(Elf64_Sym) : sizeof(Elf32_Sym); - size_t num_syms = symtab_data->d_size / sym_size; + /* + * fs/binfmt_elf.c in the Linux kernel source code documents the format + * of NT_FILE as: + * + * long count -- how many files are mapped + * long page_size -- units for file_ofs + * array of [COUNT] elements of + * long start + * long end + * long file_ofs + * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... + */ + uint64_t count, page_size; + if (is_64_bit) { + if ((err = binary_buffer_next_u64(&bb, &count))) + return err; + if (count > UINT64_MAX / 24) + return binary_buffer_error(&bb, "count is too large"); + if ((err = binary_buffer_next_u64(&bb, &page_size)) || + (err = binary_buffer_skip(&bb, count * 24))) + return err; + } else { + if ((err = binary_buffer_next_u32_into_u64(&bb, &count))) + return err; + if (count > UINT64_MAX / 12) + return binary_buffer_error(&bb, "count is too large"); + if ((err = binary_buffer_next_u32_into_u64(&bb, &page_size)) || + (err = binary_buffer_skip(&bb, count * 12))) + return err; + } - for (size_t i = 0; i < num_relocs; i++) { - uint64_t r_offset; - uint32_t r_sym; - uint32_t r_type; - int64_t r_addend; + for (uint64_t i = 0; i < count; i++) { + struct drgn_mapped_file_segment segment; if (is_64_bit) { - Elf64_Rela *rela = (Elf64_Rela *)relocs + i; - uint64_t r_info; - memcpy(&r_offset, &rela->r_offset, sizeof(r_offset)); - memcpy(&r_info, &rela->r_info, sizeof(r_info)); - memcpy(&r_addend, &rela->r_addend, sizeof(r_addend)); - if (bswap) { - r_offset = bswap_64(r_offset); - r_info = bswap_64(r_info); - r_addend = bswap_64(r_addend); + memcpy(&segment, nt_file + 16 + i * 24, 24); + if (bb.bswap) { + segment.start = bswap_64(segment.start); + segment.end = bswap_64(segment.end); + segment.file_offset = bswap_64(segment.file_offset); } - r_sym = ELF64_R_SYM(r_info); - r_type = ELF64_R_TYPE(r_info); } else { - Elf32_Rela *rela32 = (Elf32_Rela *)relocs + i; - uint32_t r_offset32; - uint32_t r_info32; - int32_t r_addend32; - memcpy(&r_offset32, &rela32->r_offset, sizeof(r_offset32)); - memcpy(&r_info32, &rela32->r_info, sizeof(r_info32)); - memcpy(&r_addend32, &rela32->r_addend, sizeof(r_addend32)); - if (bswap) { - r_offset32 = bswap_32(r_offset32); - r_info32 = bswap_32(r_info32); - r_addend32 = bswap_32(r_addend32); + struct { + uint32_t start; + uint32_t end; + uint32_t file_offset; + } segment32; + memcpy(&segment32, nt_file + 8 + i * 12, 12); + if (bb.bswap) { + segment.start = bswap_32(segment32.start); + segment.end = bswap_32(segment32.end); + segment.file_offset = bswap_32(segment32.file_offset); + } else { + segment.start = segment32.start; + segment.end = segment32.end; + segment.file_offset = segment32.file_offset; } - r_offset = r_offset32; - r_sym = ELF32_R_SYM(r_info32); - r_type = ELF32_R_TYPE(r_info32); - r_addend = r_addend32; - } - if (r_sym >= num_syms) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid ELF relocation symbol"); - } - uint16_t st_shndx; - uint64_t st_value; - if (is_64_bit) { - const Elf64_Sym *sym = (Elf64_Sym *)syms + r_sym; - memcpy(&st_shndx, &sym->st_shndx, sizeof(st_shndx)); - memcpy(&st_value, &sym->st_value, sizeof(st_value)); - if (bswap) { - st_shndx = bswap_16(st_shndx); - st_value = bswap_64(st_value); - } - } else { - const Elf32_Sym *sym = (Elf32_Sym *)syms + r_sym; - memcpy(&st_shndx, &sym->st_shndx, sizeof(st_shndx)); - uint32_t st_value32; - memcpy(&st_value32, &sym->st_value, sizeof(st_value32)); - if (bswap) { - st_shndx = bswap_16(st_shndx); - st_value32 = bswap_32(st_value32); - } - st_value = st_value32; - } - if (st_shndx >= shdrnum) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid ELF symbol section index"); } + segment.file_offset *= page_size; - err = apply_elf_rela(&relocating, r_offset, r_type, r_addend, - sh_addrs[st_shndx] + st_value); - if (err) + struct drgn_mapped_files_entry entry = { + .key = bb.pos, + }; + if ((err = binary_buffer_skip_string(&bb))) return err; - } + struct drgn_mapped_files_iterator it; + int r = drgn_mapped_files_insert(&core->files, &entry, &it); + if (r < 0) + return &drgn_enomem; + if (r == 1) + drgn_mapped_file_segment_vector_init(&it.entry->value); - /* - * Mark the relocation section as empty so that libdwfl doesn't try to - * apply it again. - */ - GElf_Shdr *shdr, shdr_mem; - shdr = gelf_getshdr(reloc_scn, &shdr_mem); - if (!shdr) - return drgn_error_libelf(); - shdr->sh_size = 0; - if (!gelf_update_shdr(reloc_scn, shdr)) - return drgn_error_libelf(); - reloc_data->d_size = 0; + /* + * The Linux kernel creates separate entries for contiguous + * mappings with different memory protections even though the + * protection is not included in NT_FILE. Merge them if we can. + */ + if (it.entry->value.size > 0 && + drgn_mapped_file_segments_contiguous(&it.entry->value.data[it.entry->value.size - 1], + &segment)) + it.entry->value.data[it.entry->value.size - 1].end = segment.end; + else if (!drgn_mapped_file_segment_vector_append(&it.entry->value, + &segment)) + return &drgn_enomem; + } return NULL; } -/* - * Before the debugging information in a relocatable ELF file (e.g., Linux - * kernel module) can be used, it must have ELF relocations applied. This is - * usually done by libdwfl. However, libdwfl is relatively slow at it. This is a - * much faster implementation. - */ -static struct drgn_error *relocate_elf_file(Elf *elf) +static bool build_id_matches(Elf *elf, const void *build_id, + size_t build_id_len) { - struct drgn_error *err; - - GElf_Ehdr ehdr_mem, *ehdr; - ehdr = gelf_getehdr(elf, &ehdr_mem); - if (!ehdr) - return drgn_error_libelf(); - - if (ehdr->e_type != ET_REL) { - /* Not a relocatable file. */ - return NULL; - } - - struct drgn_platform platform; - drgn_platform_from_elf(ehdr, &platform); - if (!platform.arch->apply_elf_rela) { - /* Unsupported; fall back to libdwfl. */ - return NULL; - } - - size_t shdrnum; - if (elf_getshdrnum(elf, &shdrnum)) - return drgn_error_libelf(); - uint64_t *sh_addrs = calloc(shdrnum, sizeof(sh_addrs[0])); - if (!sh_addrs && shdrnum > 0) - return &drgn_enomem; - - Elf_Scn *scn = NULL; - while ((scn = elf_nextscn(elf, scn))) { - GElf_Shdr *shdr, shdr_mem; - shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) { - err = drgn_error_libelf(); - goto out; - } - sh_addrs[elf_ndxscn(scn)] = shdr->sh_addr; - } - - size_t shstrndx; - if (elf_getshdrstrndx(elf, &shstrndx)) { - err = drgn_error_libelf(); - goto out; - } - - Elf_Scn *reloc_scn = NULL; - while ((reloc_scn = elf_nextscn(elf, reloc_scn))) { - GElf_Shdr *shdr, shdr_mem; - shdr = gelf_getshdr(reloc_scn, &shdr_mem); - if (!shdr) { - err = drgn_error_libelf(); - goto out; - } - /* We don't support any architectures that use SHT_REL yet. */ - if (shdr->sh_type != SHT_RELA) - continue; - - const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); - if (!scnname) { - err = drgn_error_libelf(); - goto out; - } - - if (strstartswith(scnname, ".rela.debug_") || - strstartswith(scnname, ".rela.orc_")) { - Elf_Scn *scn = elf_getscn(elf, shdr->sh_info); - if (!scn) { - err = drgn_error_libelf(); - goto out; - } - - Elf_Scn *symtab_scn = elf_getscn(elf, shdr->sh_link); - if (!symtab_scn) { - err = drgn_error_libelf(); - goto out; - } - - err = relocate_elf_section(scn, reloc_scn, symtab_scn, - sh_addrs, shdrnum, - &platform); - if (err) - goto out; - } - } -out: - free(sh_addrs); - return NULL; + const void *elf_build_id; + ssize_t elf_build_id_len = dwelf_elf_gnu_build_id(elf, &elf_build_id); + if (elf_build_id_len < 0) + return false; + return (elf_build_id_len == build_id_len && + memcmp(elf_build_id, build_id, build_id_len) == 0); } static struct drgn_error * -drgn_debug_info_find_sections(struct drgn_debug_info_module *module) +userspace_core_elf_address_range(uint16_t e_type, size_t phnum, + struct drgn_error *(*get_phdr)(void *, size_t, GElf_Phdr *), + void *arg, + const struct drgn_mapped_file_segment *segments, + size_t num_segments, + const struct drgn_mapped_file_segment *ehdr_segment, + uint64_t *bias_ret, uint64_t *start_ret, + uint64_t *end_ret) { struct drgn_error *err; - if (module->elf) { - err = relocate_elf_file(module->elf); + /* + * First, find the virtual address of the ELF header so that we can + * calculate the bias. + */ + uint64_t ehdr_vaddr; + size_t i; + for (i = 0; i < phnum; i++) { + GElf_Phdr phdr; + err = get_phdr(arg, i, &phdr); if (err) return err; + if (phdr.p_type == PT_LOAD) { + uint64_t align = phdr.p_align ? phdr.p_align : 1; + if ((phdr.p_offset & -align) == 0) { + ehdr_vaddr = phdr.p_vaddr & -align; + break; + } + } + } + if (i >= phnum) { + /* + * No loadable segments contain the ELF header. This can't be + * our file. + */ + *bias_ret = 0; +not_loaded: + *start_ret = *end_ret = 0; + return NULL; + } + *bias_ret = ehdr_segment->start - ehdr_vaddr; + if (*bias_ret != 0 && e_type == ET_EXEC) { + /* The executable is not loaded at the correct address. */ + goto not_loaded; } /* - * Note: not dwfl_module_getelf(), because then libdwfl applies - * ELF relocations to all sections, not just debug sections. + * Now check all of the program headers to (1) get the module address + * range and (2) make sure that they are mapped as expected. If we're + * lucky, this can detect a file that was mmap'd and not actually loaded + * by the kernel or dynamic loader. This could also be the wrong file. */ - Dwarf_Addr bias; - Dwarf *dwarf = dwfl_module_getdwarf(module->dwfl_module, &bias); - if (!dwarf) - return drgn_error_libdwfl(); - Elf *elf = dwarf_getelf(dwarf); - if (!elf) - return drgn_error_libdw(); - GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr(elf, &ehdr_mem); - if (!ehdr) - return drgn_error_libelf(); - drgn_platform_from_elf(ehdr, &module->platform); - - size_t shstrndx; - if (elf_getshdrstrndx(elf, &shstrndx)) - return drgn_error_libelf(); - - Elf_Scn *scn = NULL; - while ((scn = elf_nextscn(elf, scn))) { - GElf_Shdr shdr_mem; - GElf_Shdr *shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) - return drgn_error_libelf(); - - if (shdr->sh_type != SHT_PROGBITS) + const struct drgn_mapped_file_segment *segment = segments; + const struct drgn_mapped_file_segment *end_segment = + segments + num_segments; + uint64_t start = 0, end = 0; + bool first = true; + for (i = 0; i < phnum; i++) { + GElf_Phdr phdr; + err = get_phdr(arg, i, &phdr); + if (err) + return err; + if (phdr.p_type != PT_LOAD) continue; - const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); - if (!scnname) - return drgn_error_libelf(); - - for (size_t i = 0; i < DRGN_NUM_DEBUG_SCNS; i++) { - if (!module->scns[i] && - strcmp(scnname, drgn_debug_scn_names[i]) == 0) { - module->scns[i] = scn; - break; + uint64_t vaddr = phdr.p_vaddr + *bias_ret; + if (phdr.p_filesz != 0) { + /* + * Advance to the mapped segment containing the start + * address. + */ + while (vaddr >= segment->end) { + if (++segment == end_segment) + goto not_loaded; + if (vaddr < segment->start) + goto not_loaded; + } + if (segment->file_offset + (vaddr - segment->start) != + phdr.p_offset) { + /* + * The address in the core dump does not map to + * the segment's file offset. + */ + goto not_loaded; + } + if (phdr.p_filesz > segment->end - vaddr) { + /* Part of the segment is not mapped. */ + goto not_loaded; } } + if (first) { + uint64_t align = phdr.p_align ? phdr.p_align : 1; + start = vaddr & -align; + first = false; + } + end = vaddr + phdr.p_memsz; } + if (start >= end) + goto not_loaded; + *start_ret = start; + *end_ret = end; return NULL; } -static struct drgn_error * -drgn_debug_info_precache_sections(struct drgn_debug_info_module *module) -{ - struct drgn_error *err; +static bool alloc_or_reuse(char **buf, size_t *capacity, uint64_t size) +{ + if (size > *capacity) { + if (size > SIZE_MAX) + return false; + free(*buf); + *buf = malloc(size); + if (!*buf) { + *capacity = 0; + return false; + } + *capacity = size; + } + return true; +} + +/* ehdr_buf must be aligned as Elf64_Ehdr. */ +static void read_ehdr(const void *ehdr_buf, GElf_Ehdr *ret, bool *is_64_bit_ret, + bool *bswap_ret) +{ + *is_64_bit_ret = ((unsigned char *)ehdr_buf)[EI_CLASS] == ELFCLASS64; + bool little_endian = + ((unsigned char *)ehdr_buf)[EI_DATA] == ELFDATA2LSB; + *bswap_ret = little_endian != HOST_LITTLE_ENDIAN; + if (*is_64_bit_ret) { + const Elf64_Ehdr *ehdr64 = ehdr_buf; + if (*bswap_ret) { + memcpy(ret->e_ident, ehdr64->e_ident, EI_NIDENT); + ret->e_type = bswap_16(ehdr64->e_type); + ret->e_machine = bswap_16(ehdr64->e_machine); + ret->e_version = bswap_32(ehdr64->e_version); + ret->e_entry = bswap_64(ehdr64->e_entry); + ret->e_phoff = bswap_64(ehdr64->e_phoff); + ret->e_shoff = bswap_64(ehdr64->e_shoff); + ret->e_flags = bswap_32(ehdr64->e_flags); + ret->e_ehsize = bswap_16(ehdr64->e_ehsize); + ret->e_phentsize = bswap_16(ehdr64->e_phentsize); + ret->e_phnum = bswap_16(ehdr64->e_phnum); + ret->e_shentsize = bswap_16(ehdr64->e_shentsize); + ret->e_shnum = bswap_16(ehdr64->e_shnum); + ret->e_shstrndx = bswap_16(ehdr64->e_shstrndx); + } else { + *ret = *ehdr64; + } + } else { + const Elf32_Ehdr *ehdr32 = ehdr_buf; + memcpy(ret->e_ident, ehdr32->e_ident, EI_NIDENT); + if (*bswap_ret) { + ret->e_type = bswap_16(ehdr32->e_type); + ret->e_machine = bswap_16(ehdr32->e_machine); + ret->e_version = bswap_32(ehdr32->e_version); + ret->e_entry = bswap_32(ehdr32->e_entry); + ret->e_phoff = bswap_32(ehdr32->e_phoff); + ret->e_shoff = bswap_32(ehdr32->e_shoff); + ret->e_flags = bswap_32(ehdr32->e_flags); + ret->e_ehsize = bswap_16(ehdr32->e_ehsize); + ret->e_phentsize = bswap_16(ehdr32->e_phentsize); + ret->e_phnum = bswap_16(ehdr32->e_phnum); + ret->e_shentsize = bswap_16(ehdr32->e_shentsize); + ret->e_shnum = bswap_16(ehdr32->e_shnum); + ret->e_shstrndx = bswap_16(ehdr32->e_shstrndx); + } else { + ret->e_type = ehdr32->e_type; + ret->e_machine = ehdr32->e_machine; + ret->e_version = ehdr32->e_version; + ret->e_entry = ehdr32->e_entry; + ret->e_phoff = ehdr32->e_phoff; + ret->e_shoff = ehdr32->e_shoff; + ret->e_flags = ehdr32->e_flags; + ret->e_ehsize = ehdr32->e_ehsize; + ret->e_phentsize = ehdr32->e_phentsize; + ret->e_phnum = ehdr32->e_phnum; + ret->e_shentsize = ehdr32->e_shentsize; + ret->e_shnum = ehdr32->e_shnum; + ret->e_shstrndx = ehdr32->e_shstrndx; + } + } +} - for (size_t i = 0; i < DRGN_NUM_DEBUG_SCN_DATA_PRECACHE; i++) { - if (module->scns[i]) { - err = read_elf_section(module->scns[i], - &module->scn_data[i]); - if (err) - return err; +/* phdr_buf must be aligned as Elf64_Phdr. */ +static void read_phdr(const void *phdr_buf, size_t i, bool is_64_bit, + bool bswap, GElf_Phdr *ret) +{ + if (is_64_bit) { + const Elf64_Phdr *phdr64 = (Elf64_Phdr *)phdr_buf + i; + if (bswap) { + ret->p_type = bswap_32(phdr64->p_type); + ret->p_flags = bswap_32(phdr64->p_flags); + ret->p_offset = bswap_64(phdr64->p_offset); + ret->p_vaddr = bswap_64(phdr64->p_vaddr); + ret->p_paddr = bswap_64(phdr64->p_paddr); + ret->p_filesz = bswap_64(phdr64->p_filesz); + ret->p_memsz = bswap_64(phdr64->p_memsz); + ret->p_align = bswap_64(phdr64->p_align); + } else { + *ret = *phdr64; + } + } else { + const Elf32_Phdr *phdr32 = (Elf32_Phdr *)phdr_buf + i; + if (bswap) { + ret->p_type = bswap_32(phdr32->p_type); + ret->p_offset = bswap_32(phdr32->p_offset); + ret->p_vaddr = bswap_32(phdr32->p_vaddr); + ret->p_paddr = bswap_32(phdr32->p_paddr); + ret->p_filesz = bswap_32(phdr32->p_filesz); + ret->p_memsz = bswap_32(phdr32->p_memsz); + ret->p_flags = bswap_32(phdr32->p_flags); + ret->p_align = bswap_32(phdr32->p_align); + } else { + ret->p_type = phdr32->p_type; + ret->p_offset = phdr32->p_offset; + ret->p_vaddr = phdr32->p_vaddr; + ret->p_paddr = phdr32->p_paddr; + ret->p_filesz = phdr32->p_filesz; + ret->p_memsz = phdr32->p_memsz; + ret->p_flags = phdr32->p_flags; + ret->p_align = phdr32->p_align; } } +} +static const char *read_build_id(const char *buf, size_t buf_len, + uint64_t align, bool bswap, + size_t *len_ret) +{ /* - * Truncate any extraneous bytes so that we can assume that a pointer - * within .debug_str is always null-terminated. + * Build IDs are usually 16 or 20 bytes (MD5 or SHA-1, respectively), so + * these arbitrary limits are generous. */ - Elf_Data *debug_str = module->scn_data[DRGN_SCN_DEBUG_STR]; - if (debug_str) { - const char *buf = debug_str->d_buf; - const char *nul = memrchr(buf, '\0', debug_str->d_size); - if (nul) - debug_str->d_size = nul - buf + 1; - else - debug_str->d_size = 0; + static const uint32_t build_id_min_size = 2; + static const uint32_t build_id_max_size = 1024; + /* Elf32_Nhdr is the same as Elf64_Nhdr. */ + Elf64_Nhdr nhdr; + const char *p = buf; + while (buf + buf_len - p >= sizeof(nhdr)) { + memcpy(&nhdr, p, sizeof(nhdr)); + if (bswap) { + nhdr.n_namesz = bswap_32(nhdr.n_namesz); + nhdr.n_descsz = bswap_32(nhdr.n_descsz); + nhdr.n_type = bswap_32(nhdr.n_type); + } + p += sizeof(nhdr); + + uint64_t namesz = (nhdr.n_namesz + align - 1) & ~(align - 1); + if (namesz > buf + buf_len - p) + return NULL; + const char *name = p; + p += namesz; + + if (nhdr.n_namesz == sizeof("GNU") && + memcmp(name, "GNU", sizeof("GNU")) == 0 && + nhdr.n_type == NT_GNU_BUILD_ID && + nhdr.n_descsz >= build_id_min_size && + nhdr.n_descsz <= build_id_max_size) { + if (nhdr.n_descsz > buf + buf_len - p) + return NULL; + *len_ret = nhdr.n_descsz; + return p; + } + + uint64_t descsz = (nhdr.n_descsz + align - 1) & ~(align - 1); + if (descsz > buf + buf_len - p) + return NULL; + p += descsz; } return NULL; } +struct core_get_phdr_arg { + const void *phdr_buf; + bool is_64_bit; + bool bswap; +}; + static struct drgn_error * -drgn_debug_info_module_cache_section(struct drgn_debug_info_module *module, - enum drgn_debug_info_scn scn) +core_get_phdr(void *arg_, size_t i, GElf_Phdr *ret) { - if (module->scn_data[scn]) - return NULL; - return read_elf_section(module->scns[scn], &module->scn_data[scn]); + struct core_get_phdr_arg *arg = arg_; + read_phdr(arg->phdr_buf, i, arg->is_64_bit, arg->bswap, ret); + return NULL; } +struct userspace_core_identified_file { + const void *build_id; + size_t build_id_len; + uint64_t start, end; + bool ignore; + bool have_address_range; +}; + static struct drgn_error * -drgn_debug_info_read_module(struct drgn_debug_info_load_state *load, - struct drgn_dwarf_index_update_state *dindex_state, - struct drgn_debug_info_module *head) +userspace_core_identify_file(struct drgn_program *prog, + struct userspace_core_report_state *core, + const struct drgn_mapped_file_segment *segments, + size_t num_segments, + const struct drgn_mapped_file_segment *ehdr_segment, + struct userspace_core_identified_file *ret) { struct drgn_error *err; - struct drgn_debug_info_module *module; - for (module = head; module; module = module->next) { - err = drgn_debug_info_find_sections(module); - if (err) { - module->err = err; - continue; + + Elf64_Ehdr ehdr_buf; + err = drgn_program_read_memory(prog, &ehdr_buf, ehdr_segment->start, + sizeof(ehdr_buf), false); + if (err) { + if (err->code == DRGN_ERROR_FAULT) { + drgn_error_destroy(err); + err = NULL; } - if (module->scns[DRGN_SCN_DEBUG_INFO] && - module->scns[DRGN_SCN_DEBUG_ABBREV]) { - err = drgn_debug_info_precache_sections(module); - if (err) { - module->err = err; - continue; - } - module->state = DRGN_DEBUG_INFO_MODULE_INDEXING; - return drgn_dwarf_index_read_module(dindex_state, - module); + return err; + } + if (memcmp(&ehdr_buf, ELFMAG, SELFMAG) != 0) { + ret->ignore = true; + return NULL; + } + + GElf_Ehdr ehdr; + struct core_get_phdr_arg arg; + read_ehdr(&ehdr_buf, &ehdr, &arg.is_64_bit, &arg.bswap); + if (ehdr.e_type == ET_CORE || + ehdr.e_phnum == 0 || + ehdr.e_phentsize != + (arg.is_64_bit ? sizeof(Elf64_Phdr) : sizeof(Elf32_Phdr))) { + ret->ignore = true; + return NULL; + } + + if (ehdr.e_phnum > SIZE_MAX / ehdr.e_phentsize || + !alloc_or_reuse(&core->phdr_buf, &core->phdr_buf_capacity, + ehdr.e_phnum * ehdr.e_phentsize)) + return &drgn_enomem; + + /* + * Check whether the mapped segment containing the file header also + * contains the program headers. This seems to be the case in practice. + */ + uint64_t ehdr_segment_file_end = + (ehdr_segment->file_offset + + (ehdr_segment->end - ehdr_segment->start)); + if (ehdr_segment_file_end < ehdr.e_phoff || + ehdr_segment_file_end - ehdr.e_phoff < + ehdr.e_phnum * ehdr.e_phentsize) + return NULL; + + err = drgn_program_read_memory(prog, core->phdr_buf, + ehdr_segment->start + ehdr.e_phoff, + ehdr.e_phnum * ehdr.e_phentsize, false); + if (err) { + if (err->code == DRGN_ERROR_FAULT) { + drgn_error_destroy(err); + err = NULL; } + return err; } + arg.phdr_buf = core->phdr_buf; + /* - * We checked all of the files and didn't find debugging information. - * Report why for each one. + * In theory, if the program has a huge number of program headers, they + * may not all be dumped. However, the largest binary I was able to find + * still had all program headers within 1k. * - * (If we did find debugging information, we discard errors on the - * unused files.) + * It'd be more reliable to determine the bias based on the headers that + * were saved, use that to read the build ID, use that to find the ELF + * file, and then determine the address range directly from the ELF + * file. However, we need the address range to report the build ID to + * libdwfl, so we do it this way. */ - err = NULL; - #pragma omp critical(drgn_debug_info_read_module_error) - for (module = head; module; module = module->next) { - const char *name = - dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, - NULL, NULL, NULL, NULL); - if (module->err) { - err = drgn_debug_info_report_error(load, name, NULL, - module->err); - module->err = NULL; - } else { - err = drgn_debug_info_report_error(load, name, - "no debugging information", - NULL); + uint64_t bias; + err = userspace_core_elf_address_range(ehdr.e_type, ehdr.e_phnum, + core_get_phdr, &arg, segments, + num_segments, ehdr_segment, + &bias, &ret->start, &ret->end); + if (err) + return err; + if (ret->start >= ret->end) { + ret->ignore = true; + return NULL; + } + ret->have_address_range = true; + + for (uint16_t i = 0; i < ehdr.e_phnum; i++) { + GElf_Phdr phdr; + core_get_phdr(&arg, i, &phdr); + if (phdr.p_type == PT_NOTE) { + if (!alloc_or_reuse(&core->segment_buf, + &core->segment_buf_capacity, + phdr.p_filesz)) + return &drgn_enomem; + err = drgn_program_read_memory(prog, core->segment_buf, + phdr.p_vaddr + bias, + phdr.p_filesz, false); + if (err) { + if (err->code == DRGN_ERROR_FAULT) { + drgn_error_destroy(err); + continue; + } else { + return err; + } + } + ret->build_id = read_build_id(core->segment_buf, + phdr.p_filesz, + phdr.p_align, arg.bswap, + &ret->build_id_len); + if (ret->build_id) + break; } - if (err) - break; } - return err; + return NULL; +} + +static struct drgn_error *elf_file_get_phdr(void *arg, size_t i, + GElf_Phdr *phdr) +{ + if (!gelf_getphdr(arg, i, phdr)) + return drgn_error_libelf(); + return NULL; } static struct drgn_error * -drgn_debug_info_update_index(struct drgn_debug_info_load_state *load) +userspace_core_maybe_report_file(struct drgn_debug_info_load_state *load, + struct userspace_core_report_state *core, + const char *path, + const struct drgn_mapped_file_segment *segments, + size_t num_segments) { - if (!load->new_modules.size) - return NULL; - struct drgn_debug_info *dbinfo = load->dbinfo; - if (!c_string_set_reserve(&dbinfo->module_names, - c_string_set_size(&dbinfo->module_names) + - load->new_modules.size)) - return &drgn_enomem; + struct drgn_error *err; + struct drgn_program *prog = load->dbinfo->prog; + for (size_t ehdr_idx = 0; ehdr_idx < num_segments; ehdr_idx++) { + const struct drgn_mapped_file_segment *ehdr_segment = + &segments[ehdr_idx]; + /* + * There should always be a full page mapped, so even if it's a + * 32-bit file, we can read the 64-bit size. + */ + if (ehdr_segment->file_offset != 0 || + ehdr_segment->end - ehdr_segment->start < sizeof(Elf64_Ehdr)) + continue; - struct drgn_dwarf_index_update_state dindex_state; - if (!drgn_dwarf_index_update_state_init(&dindex_state, &dbinfo->dindex)) - return &drgn_enomem; - struct drgn_error *err = NULL; - #pragma omp parallel for schedule(dynamic) - for (size_t i = 0; i < load->new_modules.size; i++) { + /* + * This logic is complicated because we're dealing with two data + * sources that we can't completely trust: the memory in the + * core dump and the file at the path found in the core dump. + * + * First, we try to identify the mapped file contents in the + * core dump. Ideally, this will find a build ID. However, this + * can fail for a few reasons: + * + * 1. The file is not an ELF file. + * 2. The ELF file is not an executable or library. + * 3. The ELF file does not have a build ID. + * 4. The file header was not dumped to the core dump, in which + * case we can't tell whether this is an ELF file. Dumping + * the first page of an executable file has been the default + * behavior since Linux kernel commit 895021552d6f + * ("coredump: default + * CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y") (in v2.6.37), but + * it can be disabled at kernel build time or toggled at + * runtime. + * 5. The build ID or the necessary ELF metadata were not dumped + * in the core dump. This can happen if the necessary program + * headers or note segment were not in the first page of the + * file. + * 6. The file is mapped but not actually loaded into the + * program (e.g., if the program is a tool like a profiler or + * a debugger that mmaps binaries [like drgn itself!]). + * + * In cases 1 and 2, we can simply ignore the file. In cases + * 3-5, we blindly trust the path in the core dump. We can + * sometimes detect case 6 in + * userspace_core_elf_address_range(). + * + * There is also the possibility that the program modified or + * corrupted the ELF metadata in memory (more likely if the file + * was explicitly mmap'd, since the metadata will usually be + * read-only if it was loaded properly). We don't deal with that + * yet. + */ + struct userspace_core_identified_file identity = {}; + err = userspace_core_identify_file(prog, core, segments, + num_segments, ehdr_segment, + &identity); if (err) + return err; + if (identity.ignore) continue; - struct drgn_error *module_err = - drgn_debug_info_read_module(load, &dindex_state, - load->new_modules.data[i]); - if (module_err) { - #pragma omp critical(drgn_debug_info_update_index_error) + +#define CLEAR_ELF() do { \ + elf = NULL; \ + fd = -1; \ +} while (0) +#define CLOSE_ELF() do { \ + elf_end(elf); \ + close(fd); \ + CLEAR_ELF(); \ +} while (0) + int fd; + Elf *elf; + /* + * There are a few things that can go wrong here: + * + * 1. The path no longer exists. + * 2. The path refers to a different ELF file than was in the + * core dump. + * 3. The path refers to something which isn't a valid ELF file. + */ + err = open_elf_file(path, &fd, &elf); + if (err) { + drgn_error_destroy(err); + CLEAR_ELF(); + } else if (identity.build_id_len > 0) { + if (!build_id_matches(elf, identity.build_id, + identity.build_id_len)) + CLOSE_ELF(); + } + + if (elf && !identity.have_address_range) { + GElf_Ehdr ehdr_mem, *ehdr; + size_t phnum; + if ((ehdr = gelf_getehdr(elf, &ehdr_mem)) && + (elf_getphdrnum(elf, &phnum) == 0)) { + uint64_t bias; + err = userspace_core_elf_address_range(ehdr->e_type, + phnum, + elf_file_get_phdr, + elf, + segments, + num_segments, + ehdr_segment, + &bias, + &identity.start, + &identity.end); + if (err || identity.start >= identity.end) { + drgn_error_destroy(err); + CLOSE_ELF(); + } else { + identity.have_address_range = true; + } + } else { + CLOSE_ELF(); + } + } + + if (elf) { + err = drgn_debug_info_report_elf(load, path, fd, elf, + identity.start, + identity.end, NULL, + NULL); if (err) - drgn_error_destroy(module_err); - else - err = module_err; + return err; + } else { + if (!identity.have_address_range) + identity.start = identity.end = 0; + Dwfl_Module *dwfl_module = + dwfl_report_module(load->dbinfo->dwfl, path, + identity.start, + identity.end); + if (!dwfl_module) + return drgn_error_libdwfl(); + if (identity.build_id_len > 0 && + dwfl_module_report_build_id(dwfl_module, + identity.build_id, + identity.build_id_len, + 0)) + return drgn_error_libdwfl(); } +#undef CLOSE_ELF +#undef CLEAR_ELF } - if (!err) - err = drgn_dwarf_index_update(&dindex_state); - drgn_dwarf_index_update_state_deinit(&dindex_state); - if (!err) - drgn_debug_info_free_modules(dbinfo, true, false); - return err; -} - -struct drgn_error * -drgn_debug_info_report_flush(struct drgn_debug_info_load_state *load) -{ - struct drgn_debug_info *dbinfo = load->dbinfo; - my_dwfl_report_end(dbinfo, NULL, NULL); - struct drgn_error *err = drgn_debug_info_update_index(load); - dwfl_report_begin_add(dbinfo->dwfl); - if (err) - return err; - load->new_modules.size = 0; return NULL; } static struct drgn_error * -drgn_debug_info_report_finalize_errors(struct drgn_debug_info_load_state *load) +userspace_core_report_mapped_files(struct drgn_debug_info_load_state *load, + struct userspace_core_report_state *core) { - if (load->num_errors > load->max_errors && - (!string_builder_line_break(&load->errors) || - !string_builder_appendf(&load->errors, "... %u more", - load->num_errors - load->max_errors))) { - free(load->errors.str); - return &drgn_enomem; - } - if (load->num_errors) { - return drgn_error_from_string_builder(DRGN_ERROR_MISSING_DEBUG_INFO, - &load->errors); - } else { - return NULL; + + struct drgn_error *err; + for (struct drgn_mapped_files_iterator it = + drgn_mapped_files_first(&core->files); + it.entry; it = drgn_mapped_files_next(it)) { + err = userspace_core_maybe_report_file(load, core, + it.entry->key, + it.entry->value.data, + it.entry->value.size); + if (err) + return err; } + return NULL; } -struct drgn_error *drgn_debug_info_load(struct drgn_debug_info *dbinfo, - const char **paths, size_t n, - bool load_default, bool load_main) +static struct drgn_error * +userspace_core_report_debug_info(struct drgn_debug_info_load_state *load, + const char *nt_file, size_t nt_file_len) { - struct drgn_program *prog = dbinfo->prog; struct drgn_error *err; - if (load_default) - load_main = true; - - const char *max_errors = getenv("DRGN_MAX_DEBUG_INFO_ERRORS"); - struct drgn_debug_info_load_state load = { - .dbinfo = dbinfo, - .paths = paths, - .num_paths = n, - .load_default = load_default, - .load_main = load_main, - .new_modules = VECTOR_INIT, - .max_errors = max_errors ? atoi(max_errors) : 5, + struct userspace_core_report_state core = { + .files = HASH_TABLE_INIT, }; - dwfl_report_begin_add(dbinfo->dwfl); - if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) - err = linux_kernel_report_debug_info(&load); - else - err = userspace_report_debug_info(&load); - my_dwfl_report_end(dbinfo, NULL, NULL); + err = userspace_core_get_mapped_files(load, &core, nt_file, + nt_file_len); if (err) - goto err; + goto out; + err = userspace_core_report_mapped_files(load, &core); +out: + free(core.segment_buf); + free(core.phdr_buf); + for (struct drgn_mapped_files_iterator it = + drgn_mapped_files_first(&core.files); + it.entry; it = drgn_mapped_files_next(it)) + drgn_mapped_file_segment_vector_deinit(&it.entry->value); + drgn_mapped_files_deinit(&core.files); + return err; +} - /* - * userspace_report_debug_info() reports the main debugging information - * directly with libdwfl, so we need to report it to dbinfo. - */ - if (!(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) && load_main && - dwfl_getmodules(dbinfo->dwfl, drgn_debug_info_report_dwfl_module, - &load, 0)) { - err = &drgn_enomem; - goto err; - } +static struct drgn_error * +userspace_report_elf_file(struct drgn_debug_info_load_state *load, + const char *path) +{ + struct drgn_error *err; - err = drgn_debug_info_update_index(&load); + int fd; + Elf *elf; + err = open_elf_file(path, &fd, &elf); if (err) goto err; + GElf_Ehdr ehdr_mem, *ehdr; + ehdr = gelf_getehdr(elf, &ehdr_mem); + if (!ehdr) { + err = drgn_error_libelf(); + goto err_close; + } /* - * If this fails, it's too late to roll back. This can only fail with - * enomem, so it's not a big deal. + * We haven't implemented a way to get the load address for dynamically + * loaded or relocatable files, so for now we report those as unloaded. */ - err = drgn_debug_info_report_finalize_errors(&load); -out: - drgn_debug_info_module_vector_deinit(&load.new_modules); - return err; + uint64_t start = 0, end = 0; + if (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_CORE) { + err = elf_address_range(elf, 0, &start, &end); + if (err) + goto err_close; + } -err: - drgn_debug_info_free_modules(dbinfo, false, false); - free(load.errors.str); - goto out; -} + return drgn_debug_info_report_elf(load, path, fd, elf, start, end, NULL, + NULL); -bool drgn_debug_info_is_indexed(struct drgn_debug_info *dbinfo, - const char *name) -{ - return c_string_set_search(&dbinfo->module_names, &name).entry != NULL; +err_close: + elf_end(elf); + close(fd); +err: + return drgn_debug_info_report_error(load, path, NULL, err); } static struct drgn_error * -drgn_dwarf_location(struct drgn_debug_info_module *module, - Dwarf_Attribute *attr, - const struct drgn_register_state *regs, - const char **expr_ret, size_t *expr_size_ret) +userspace_report_debug_info(struct drgn_debug_info_load_state *load) { struct drgn_error *err; - switch (attr->form) { - case DW_FORM_sec_offset: { - if (!module->scns[DRGN_SCN_DEBUG_LOC]) { - return drgn_error_create(DRGN_ERROR_OTHER, - "loclistptr without .debug_loc section"); - } - struct optional_uint64 pc; - if (!regs || - !(pc = drgn_register_state_get_pc(regs)).has_value) { - *expr_ret = NULL; - *expr_size_ret = 0; - return NULL; - } - - err = drgn_debug_info_module_cache_section(module, - DRGN_SCN_DEBUG_LOC); + for (size_t i = 0; i < load->num_paths; i++) { + err = userspace_report_elf_file(load, load->paths[i]); if (err) return err; + } - Dwarf_Addr bias; - dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, &bias, - NULL, NULL, NULL); - pc.value = pc.value - !regs->interrupted - bias; - - Dwarf_Word offset; - if (dwarf_formudata(attr, &offset)) - return drgn_error_libdw(); - - struct drgn_debug_info_buffer buffer; - drgn_debug_info_buffer_init(&buffer, module, - DRGN_SCN_DEBUG_LOC); - if (offset > buffer.bb.end - buffer.bb.pos) { - return drgn_error_create(DRGN_ERROR_OTHER, - "loclistptr is out of bounds"); - } - buffer.bb.pos += offset; - - uint8_t address_size = - drgn_platform_address_size(&module->platform); - uint64_t address_max = uint_max(address_size); - uint64_t base; - bool base_valid = false; - for (;;) { - uint64_t start, end; - if ((err = binary_buffer_next_uint(&buffer.bb, - address_size, - &start)) || - (err = binary_buffer_next_uint(&buffer.bb, - address_size, &end))) - return err; - if (start == 0 && end == 0) { - break; - } else if (start == address_max) { - base = end; - base_valid = true; + if (load->load_default) { + Dwfl *dwfl = load->dbinfo->dwfl; + struct drgn_program *prog = load->dbinfo->prog; + if (prog->flags & DRGN_PROGRAM_IS_LIVE) { + int ret = dwfl_linux_proc_report(dwfl, prog->pid); + if (ret == -1) { + return drgn_error_libdwfl(); + } else if (ret) { + return drgn_error_create_os("dwfl_linux_proc_report", + ret, NULL); + } + } else { + const char *nt_file; + size_t nt_file_len; + char *env = getenv("DRGN_USE_LIBDWFL_REPORT"); + if (env && atoi(env)) { + nt_file = NULL; + nt_file_len = 0; } else { - if (!base_valid) { - Dwarf_Die cu_die; - if (!dwarf_cu_die(attr->cu, &cu_die, - NULL, NULL, NULL, - NULL, NULL, NULL)) - return drgn_error_libdw(); - Dwarf_Addr low_pc; - if (dwarf_lowpc(&cu_die, &low_pc)) - return drgn_error_libdw(); - base = low_pc; - base_valid = true; - } - uint16_t expr_size; - if ((err = binary_buffer_next_u16(&buffer.bb, - &expr_size))) + err = drgn_get_nt_file(prog->core, &nt_file, + &nt_file_len); + if (err) return err; - if (expr_size > buffer.bb.end - buffer.bb.pos) { - return binary_buffer_error(&buffer.bb, - "location description size is out of bounds"); - } - if (base + start <= pc.value && - pc.value < base + end) { - *expr_ret = buffer.bb.pos; - *expr_size_ret = expr_size; - return NULL; - } - buffer.bb.pos += expr_size; } - } - *expr_ret = NULL; - *expr_size_ret = 0; - return NULL; - } - default: { - Dwarf_Block block; - if (dwarf_formblock(attr, &block)) - return drgn_error_libdw(); - *expr_ret = (char *)block.data; - *expr_size_ret = block.length; - return NULL; - } - } -} - -struct drgn_dwarf_expression_buffer { - struct binary_buffer bb; - const char *start; - struct drgn_debug_info_module *module; -}; - -static struct drgn_error * -drgn_error_debug_info(struct drgn_debug_info_module *module, const char *ptr, - const char *message) -{ - uintptr_t p = (uintptr_t)ptr; - int end_match = -1; - for (int i = 0; i < ARRAY_SIZE(module->scn_data); i++) { - if (!module->scn_data[i]) - continue; - uintptr_t start = (uintptr_t)module->scn_data[i]->d_buf; - uintptr_t end = start + module->scn_data[i]->d_size; - if (start <= p) { - if (p < end) { - return drgn_error_debug_info_scn(module, i, ptr, - message); - } else if (p == end) { - end_match = i; - } - } - } - if (end_match != -1) { - /* - * The pointer doesn't lie within a section, but it does point - * to the end of a section. - */ - return drgn_error_debug_info_scn(module, end_match, ptr, - message); - } - /* We couldn't find the section containing the pointer. */ - const char *name = dwfl_module_info(module->dwfl_module, NULL, NULL, - NULL, NULL, NULL, NULL, NULL); - return drgn_error_format(DRGN_ERROR_OTHER, "%s: %s", name, message); -} - -static struct drgn_error * -drgn_dwarf_expression_buffer_error(struct binary_buffer *bb, const char *pos, - const char *message) -{ - struct drgn_dwarf_expression_buffer *buffer = - container_of(bb, struct drgn_dwarf_expression_buffer, bb); - return drgn_error_debug_info(buffer->module, pos, message); -} - -static void -drgn_dwarf_expression_buffer_init(struct drgn_dwarf_expression_buffer *buffer, - struct drgn_debug_info_module *module, - const char *expr, size_t expr_size) -{ - binary_buffer_init(&buffer->bb, expr, expr_size, - drgn_platform_is_little_endian(&module->platform), - drgn_dwarf_expression_buffer_error); - buffer->start = expr; - buffer->module = module; -} - -static struct drgn_error * -drgn_dwarf_frame_base(struct drgn_program *prog, - struct drgn_debug_info_module *module, Dwarf_Die *die, - int *remaining_ops, - const struct drgn_register_state *regs, uint64_t *ret); - -/* - * Evaluate a DWARF expression up to the next location description operation. - * - * Returns &drgn_not_found if it tried to use an unknown register value. - */ -static struct drgn_error * -drgn_eval_dwarf_expression(struct drgn_program *prog, - struct drgn_dwarf_expression_buffer *expr, - struct uint64_vector *stack, - int *remaining_ops, - Dwarf_Die *function_die, - const struct drgn_register_state *regs) -{ - struct drgn_error *err; - const struct drgn_platform *platform = &expr->module->platform; - bool little_endian = drgn_platform_is_little_endian(platform); - uint8_t address_size = drgn_platform_address_size(platform); - uint8_t address_bits = address_size * CHAR_BIT; - uint64_t address_mask = uint_max(address_size); - drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = - platform->arch->dwarf_regno_to_internal; - -#define CHECK(n) do { \ - size_t _n = (n); \ - if (stack->size < _n) { \ - return binary_buffer_error(&expr->bb, \ - "DWARF expression stack underflow"); \ - } \ -} while (0) - -#define ELEM(i) stack->data[stack->size - 1 - (i)] - -#define PUSH(x) do { \ - uint64_t push = (x); \ - if (!uint64_vector_append(stack, &push)) \ - return &drgn_enomem; \ -} while (0) - -#define PUSH_MASK(x) PUSH((x) & address_mask) - - while (binary_buffer_has_next(&expr->bb)) { - if (*remaining_ops <= 0) { - return binary_buffer_error(&expr->bb, - "DWARF expression executed too many operations"); - } - (*remaining_ops)--; - uint8_t opcode; - if ((err = binary_buffer_next_u8(&expr->bb, &opcode))) - return err; - uint64_t uvalue; - uint64_t dwarf_regno; - uint8_t deref_size; - switch (opcode) { - /* Literal encodings. */ - case DW_OP_lit0 ... DW_OP_lit31: - PUSH(opcode - DW_OP_lit0); - break; - case DW_OP_addr: - if ((err = binary_buffer_next_uint(&expr->bb, - address_size, - &uvalue))) - return err; - PUSH(uvalue); - break; - case DW_OP_const1u: - if ((err = binary_buffer_next_u8_into_u64(&expr->bb, - &uvalue))) - return err; - PUSH(uvalue); - break; - case DW_OP_const2u: - if ((err = binary_buffer_next_u16_into_u64(&expr->bb, - &uvalue))) - return err; - PUSH_MASK(uvalue); - break; - case DW_OP_const4u: - if ((err = binary_buffer_next_u32_into_u64(&expr->bb, - &uvalue))) - return err; - PUSH_MASK(uvalue); - break; - case DW_OP_const8u: - if ((err = binary_buffer_next_u64(&expr->bb, &uvalue))) - return err; - PUSH_MASK(uvalue); - break; - case DW_OP_const1s: - if ((err = binary_buffer_next_s8_into_u64(&expr->bb, - &uvalue))) - return err; - PUSH_MASK(uvalue); - break; - case DW_OP_const2s: - if ((err = binary_buffer_next_s16_into_u64(&expr->bb, - &uvalue))) - return err; - PUSH_MASK(uvalue); - break; - case DW_OP_const4s: - if ((err = binary_buffer_next_s32_into_u64(&expr->bb, - &uvalue))) - return err; - PUSH_MASK(uvalue); - break; - case DW_OP_const8s: - if ((err = binary_buffer_next_s64_into_u64(&expr->bb, - &uvalue))) - return err; - PUSH_MASK(uvalue); - break; - case DW_OP_constu: - if ((err = binary_buffer_next_uleb128(&expr->bb, - &uvalue))) - return err; - PUSH_MASK(uvalue); - break; - case DW_OP_consts: - if ((err = binary_buffer_next_sleb128_into_u64(&expr->bb, - &uvalue))) - return err; - PUSH_MASK(uvalue); - break; - /* Register values. */ - case DW_OP_fbreg: { - err = drgn_dwarf_frame_base(prog, expr->module, - function_die, remaining_ops, - regs, &uvalue); - if (err) - return err; - int64_t svalue; - if ((err = binary_buffer_next_sleb128(&expr->bb, - &svalue))) - return err; - PUSH_MASK(uvalue + svalue); - break; - } - case DW_OP_breg0 ... DW_OP_breg31: - dwarf_regno = opcode - DW_OP_breg0; - goto breg; - case DW_OP_bregx: - if ((err = binary_buffer_next_uleb128(&expr->bb, - &dwarf_regno))) - return err; -breg: - { - if (!regs) - return &drgn_not_found; - drgn_register_number regno = - dwarf_regno_to_internal(dwarf_regno); - if (!drgn_register_state_has_register(regs, regno)) - return &drgn_not_found; - const struct drgn_register_layout *layout = - &platform->arch->register_layout[regno]; - copy_lsbytes(&uvalue, sizeof(uvalue), - HOST_LITTLE_ENDIAN, - ®s->buf[layout->offset], layout->size, - little_endian); - int64_t svalue; - if ((err = binary_buffer_next_sleb128(&expr->bb, - &svalue))) - return err; - PUSH_MASK(uvalue + svalue); - break; - } - /* Stack operations. */ - case DW_OP_dup: - CHECK(1); - PUSH(ELEM(0)); - break; - case DW_OP_drop: - CHECK(1); - stack->size--; - break; - case DW_OP_pick: { - uint8_t index; - if ((err = binary_buffer_next_u8(&expr->bb, &index))) - return err; - CHECK(index + 1); - PUSH(ELEM(index)); - break; - } - case DW_OP_over: - CHECK(2); - PUSH(ELEM(1)); - break; - case DW_OP_swap: - CHECK(2); - uvalue = ELEM(0); - ELEM(0) = ELEM(1); - ELEM(1) = uvalue; - break; - case DW_OP_rot: - CHECK(3); - uvalue = ELEM(0); - ELEM(0) = ELEM(1); - ELEM(1) = ELEM(2); - ELEM(2) = uvalue; - break; - case DW_OP_deref: - deref_size = address_size; - goto deref; - case DW_OP_deref_size: - if ((err = binary_buffer_next_u8(&expr->bb, - &deref_size))) - return err; - if (deref_size > address_size) { - return binary_buffer_error(&expr->bb, - "DW_OP_deref_size has invalid size"); - } -deref: - { - CHECK(1); - char deref_buf[8]; - err = drgn_program_read_memory(prog, deref_buf, ELEM(0), - deref_size, false); - if (err) - return err; - copy_lsbytes(&ELEM(0), sizeof(ELEM(0)), - HOST_LITTLE_ENDIAN, deref_buf, deref_size, - little_endian); - break; - } - case DW_OP_call_frame_cfa: { - if (!regs) - return &drgn_not_found; - /* - * The DWARF 5 specification says that - * DW_OP_call_frame_cfa cannot be used for CFI. For - * DW_CFA_def_cfa_expression, it is clearly invalid to - * define the CFA in terms of the CFA, and it will fail - * naturally below. This restriction doesn't make sense - * for DW_CFA_expression and DW_CFA_val_expression, as - * they push the CFA and thus depend on it anyways, so - * we don't bother enforcing it. - */ - struct optional_uint64 cfa = - drgn_register_state_get_cfa(regs); - if (!cfa.has_value) - return &drgn_not_found; - PUSH(cfa.value); - break; - } - /* Arithmetic and logical operations. */ -#define UNOP_MASK(op) do { \ - CHECK(1); \ - ELEM(0) = (op ELEM(0)) & address_mask; \ -} while (0) -#define BINOP(op) do { \ - CHECK(2); \ - ELEM(1) = ELEM(1) op ELEM(0); \ - stack->size--; \ -} while (0) -#define BINOP_MASK(op) do { \ - CHECK(2); \ - ELEM(1) = (ELEM(1) op ELEM(0)) & address_mask; \ - stack->size--; \ -} while (0) - case DW_OP_abs: - CHECK(1); - if (ELEM(0) & (UINT64_C(1) << (address_bits - 1))) - ELEM(0) = -ELEM(0) & address_mask; - break; - case DW_OP_and: - BINOP(&); - break; - case DW_OP_div: - CHECK(2); - if (ELEM(0) == 0) { - return binary_buffer_error(&expr->bb, - "division by zero in DWARF expression"); - } - ELEM(1) = ((truncate_signed(ELEM(1), address_bits) - / truncate_signed(ELEM(0), address_bits)) - & address_mask); - stack->size--; - break; - case DW_OP_minus: - BINOP_MASK(-); - break; - case DW_OP_mod: - CHECK(2); - if (ELEM(0) == 0) { - return binary_buffer_error(&expr->bb, - "modulo by zero in DWARF expression"); - } - ELEM(1) = ELEM(1) % ELEM(0); - stack->size--; - break; - case DW_OP_mul: - BINOP_MASK(*); - break; - case DW_OP_neg: - UNOP_MASK(-); - break; - case DW_OP_not: - UNOP_MASK(~); - break; - case DW_OP_or: - BINOP(|); - break; - case DW_OP_plus: - BINOP_MASK(+); - break; - case DW_OP_plus_uconst: - CHECK(1); - if ((err = binary_buffer_next_uleb128(&expr->bb, - &uvalue))) - return err; - ELEM(0) = (ELEM(0) + uvalue) & address_mask; - break; - case DW_OP_shl: - CHECK(2); - if (ELEM(0) < address_bits) - ELEM(1) = (ELEM(1) << ELEM(0)) & address_mask; - else - ELEM(1) = 0; - stack->size--; - break; - case DW_OP_shr: - CHECK(2); - if (ELEM(0) < address_bits) - ELEM(1) >>= ELEM(0); - else - ELEM(1) = 0; - stack->size--; - break; - case DW_OP_shra: - CHECK(2); - if (ELEM(0) < address_bits) { - ELEM(1) = ((truncate_signed(ELEM(1), address_bits) - >> ELEM(0)) - & address_mask); - } else if (ELEM(1) & (UINT64_C(1) << (address_bits - 1))) { - ELEM(1) = -INT64_C(1) & address_mask; - } else { - ELEM(1) = 0; - } - stack->size--; - break; - case DW_OP_xor: - BINOP(^); - break; -#undef BINOP_MASK -#undef BINOP -#undef UNOP_MASK - /* Control flow operations. */ -#define RELOP(op) do { \ - CHECK(2); \ - ELEM(1) = (truncate_signed(ELEM(1), address_bits) op \ - truncate_signed(ELEM(0), address_bits)); \ - stack->size--; \ -} while (0) - case DW_OP_le: - RELOP(<=); - break; - case DW_OP_ge: - RELOP(>=); - break; - case DW_OP_eq: - RELOP(==); - break; - case DW_OP_lt: - RELOP(<); - break; - case DW_OP_gt: - RELOP(>); - break; - case DW_OP_ne: - RELOP(!=); - break; -#undef RELOP - case DW_OP_skip: -branch: - { - int16_t skip; - if ((err = binary_buffer_next_s16(&expr->bb, &skip))) - return err; - if ((skip >= 0 && skip > expr->bb.end - expr->bb.pos) || - (skip < 0 && -skip > expr->bb.pos - expr->start)) { - return binary_buffer_error(&expr->bb, - "DWARF expression branch is out of bounds"); - } - expr->bb.pos += skip; - break; - } - case DW_OP_bra: - CHECK(1); - if (ELEM(0)) { - stack->size--; - goto branch; - } else { - stack->size--; - if ((err = binary_buffer_skip(&expr->bb, 2))) - return err; - } - break; - /* Special operations. */ - case DW_OP_nop: - break; - /* Location description operations. */ - case DW_OP_reg0 ... DW_OP_reg31: - case DW_OP_regx: - case DW_OP_implicit_value: - case DW_OP_stack_value: - case DW_OP_piece: - case DW_OP_bit_piece: - /* The caller must handle it. */ - expr->bb.pos = expr->bb.prev; - return NULL; - /* - * We don't yet support: - * - * - DW_OP_push_object_address - * - DW_OP_form_tls_address - * - DW_OP_entry_value - * DW_OP_implicit_pointer - * - Procedure calls: DW_OP_call2, DW_OP_call4, DW_OP_call_ref. - * - Operations that use .debug_addr: DW_OP_addrx, - * DW_OP_constx. - * - Typed operations: DW_OP_const_type, DW_OP_regval_type, - * DW_OP_deref_type, DW_OP_convert, DW_OP_reinterpret. - * - Operations for multiple address spaces: DW_OP_xderef, - * DW_OP_xderef_size, DW_OP_xderef_type. - */ - default: - return binary_buffer_error(&expr->bb, - "unknown DWARF expression opcode %#" PRIx8, - opcode); - } - } - -#undef PUSH_MASK -#undef PUSH -#undef ELEM -#undef CHECK - - return NULL; -} - -static struct drgn_error * -drgn_dwarf_frame_base(struct drgn_program *prog, - struct drgn_debug_info_module *module, Dwarf_Die *die, - int *remaining_ops, - const struct drgn_register_state *regs, uint64_t *ret) -{ - struct drgn_error *err; - bool little_endian = drgn_platform_is_little_endian(&module->platform); - drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = - module->platform.arch->dwarf_regno_to_internal; - - if (!die) - return &drgn_not_found; - Dwarf_Attribute attr_mem, *attr; - if (!(attr = dwarf_attr_integrate(die, DW_AT_frame_base, &attr_mem))) - return &drgn_not_found; - const char *expr; - size_t expr_size; - err = drgn_dwarf_location(module, attr, regs, &expr, &expr_size); - if (err) - return err; - - struct uint64_vector stack = VECTOR_INIT; - struct drgn_dwarf_expression_buffer buffer; - drgn_dwarf_expression_buffer_init(&buffer, module, expr, expr_size); - for (;;) { - err = drgn_eval_dwarf_expression(prog, &buffer, &stack, - remaining_ops, NULL, regs); - if (err) - goto out; - if (binary_buffer_has_next(&buffer.bb)) { - uint8_t opcode; - if ((err = binary_buffer_next_u8(&buffer.bb, &opcode))) - goto out; - - uint64_t dwarf_regno; - switch (opcode) { - case DW_OP_reg0 ... DW_OP_reg31: - dwarf_regno = opcode - DW_OP_reg0; - goto reg; - case DW_OP_regx: - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno))) - goto out; -reg: - { - if (!regs) { - err = &drgn_not_found; - goto out; - } - drgn_register_number regno = - dwarf_regno_to_internal(dwarf_regno); - if (!drgn_register_state_has_register(regs, - regno)) { - err = &drgn_not_found; - goto out; - } - const struct drgn_register_layout *layout = - &prog->platform.arch->register_layout[regno]; - /* - * Note that this doesn't mask the address since - * the caller does that. - */ - copy_lsbytes(ret, sizeof(*ret), - HOST_LITTLE_ENDIAN, - ®s->buf[layout->offset], - layout->size, little_endian); - if (binary_buffer_has_next(&buffer.bb)) { - err = binary_buffer_error(&buffer.bb, - "stray operations in DW_AT_frame_base expression"); - } else { - err = NULL; - } - goto out; - } - default: - err = binary_buffer_error(&buffer.bb, - "invalid opcode %#" PRIx8 " for DW_AT_frame_base expression", - opcode); - goto out; - } - } else if (stack.size) { - *ret = stack.data[stack.size - 1]; - err = NULL; - break; - } else { - err = &drgn_not_found; - break; - } - } -out: - uint64_vector_deinit(&stack); - return err; -} - -DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_type_map, ptr_key_hash_pair, - scalar_key_eq) - -/** - * Return whether a DWARF DIE is little-endian. - * - * @param[in] check_attr Whether to check the DW_AT_endianity attribute. If @c - * false, only the ELF header is checked and this function cannot fail. - * @return @c NULL on success, non-@c NULL on error. - */ -static struct drgn_error *dwarf_die_is_little_endian(Dwarf_Die *die, - bool check_attr, bool *ret) -{ - Dwarf_Attribute endianity_attr_mem, *endianity_attr; - Dwarf_Word endianity; - if (check_attr && - (endianity_attr = dwarf_attr_integrate(die, DW_AT_endianity, - &endianity_attr_mem))) { - if (dwarf_formudata(endianity_attr, &endianity)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid DW_AT_endianity"); - } - } else { - endianity = DW_END_default; - } - switch (endianity) { - case DW_END_default: { - Elf *elf = dwarf_getelf(dwarf_cu_getdwarf(die->cu)); - *ret = elf_getident(elf, NULL)[EI_DATA] == ELFDATA2LSB; - return NULL; - } - case DW_END_little: - *ret = true; - return NULL; - case DW_END_big: - *ret = false; - return NULL; - default: - return drgn_error_create(DRGN_ERROR_OTHER, - "unknown DW_AT_endianity"); - } -} - -/** Like dwarf_die_is_little_endian(), but returns a @ref drgn_byte_order. */ -static struct drgn_error *dwarf_die_byte_order(Dwarf_Die *die, bool check_attr, - enum drgn_byte_order *ret) -{ - bool little_endian; - struct drgn_error *err = dwarf_die_is_little_endian(die, check_attr, - &little_endian); - /* - * dwarf_die_is_little_endian() can't fail if check_attr is false, so - * the !check_attr test suppresses maybe-uninitialized warnings. - */ - if (!err || !check_attr) - *ret = drgn_byte_order_from_little_endian(little_endian); - return err; -} - -static int dwarf_type(Dwarf_Die *die, Dwarf_Die *ret) -{ - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - - if (!(attr = dwarf_attr_integrate(die, DW_AT_type, &attr_mem))) - return 1; - - return dwarf_formref_die(attr, ret) ? 0 : -1; -} - -static int dwarf_flag(Dwarf_Die *die, unsigned int name, bool *ret) -{ - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - - if (!(attr = dwarf_attr(die, name, &attr_mem))) { - *ret = false; - return 0; - } - return dwarf_formflag(attr, ret); -} - -static int dwarf_flag_integrate(Dwarf_Die *die, unsigned int name, bool *ret) -{ - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - - if (!(attr = dwarf_attr_integrate(die, name, &attr_mem))) { - *ret = false; - return 0; - } - return dwarf_formflag(attr, ret); -} - -/** - * Parse a type from a DWARF debugging information entry. - * - * This is the same as @ref drgn_type_from_dwarf() except that it can be used to - * work around a bug in GCC < 9.0 that zero length array types are encoded the - * same as incomplete array types. There are a few places where GCC allows - * zero-length arrays but not incomplete arrays: - * - * - As the type of a member of a structure with only one member. - * - As the type of a structure member other than the last member. - * - As the type of a union member. - * - As the element type of an array. - * - * In these cases, we know that what appears to be an incomplete array type must - * actually have a length of zero. In other cases, a subrange DIE without - * DW_AT_count or DW_AT_upper_bound is ambiguous; we return an incomplete array - * type. - * - * @param[in] dbinfo Debugging information. - * @param[in] module Module containing @p die. - * @param[in] die DIE to parse. - * @param[in] can_be_incomplete_array Whether the type can be an incomplete - * array type. If this is @c false and the type appears to be an incomplete - * array type, its length is set to zero instead. - * @param[out] is_incomplete_array_ret Whether the encoded type is an incomplete - * array type or a typedef of an incomplete array type (regardless of @p - * can_be_incomplete_array). - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -static struct drgn_error * -drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, bool can_be_incomplete_array, - bool *is_incomplete_array_ret, - struct drgn_qualified_type *ret); - -/** - * Parse a type from a DWARF debugging information entry. - * - * @param[in] dbinfo Debugging information. - * @param[in] module Module containing @p die. - * @param[in] die DIE to parse. - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -static inline struct drgn_error * -drgn_type_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, Dwarf_Die *die, - struct drgn_qualified_type *ret) -{ - return drgn_type_from_dwarf_internal(dbinfo, module, die, true, NULL, - ret); -} - -/** - * Parse a type from the @c DW_AT_type attribute of a DWARF debugging - * information entry. - * - * @param[in] dbinfo Debugging information. - * @param[in] module Module containing @p die. - * @param[in] die DIE with @c DW_AT_type attribute. - * @param[in] lang Language of @p die if it is already known, @c NULL if it - * should be determined from @p die. - * @param[in] can_be_void Whether the @c DW_AT_type attribute may be missing, - * which is interpreted as a void type. If this is false and the @c DW_AT_type - * attribute is missing, an error is returned. - * @param[in] can_be_incomplete_array See @ref drgn_type_from_dwarf_internal(). - * @param[in] is_incomplete_array_ret See @ref drgn_type_from_dwarf_internal(). - * @param[out] ret Returned type. - * @return @c NULL on success, non-@c NULL on error. - */ -static struct drgn_error * -drgn_type_from_dwarf_attr(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, Dwarf_Die *die, - const struct drgn_language *lang, - bool can_be_void, bool can_be_incomplete_array, - bool *is_incomplete_array_ret, - struct drgn_qualified_type *ret) -{ - struct drgn_error *err; - char tag_buf[DW_TAG_BUF_LEN]; - - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - if (!(attr = dwarf_attr_integrate(die, DW_AT_type, &attr_mem))) { - if (can_be_void) { - if (!lang) { - err = drgn_language_from_die(die, true, &lang); - if (err) - return err; - } - ret->type = drgn_void_type(dbinfo->prog, lang); - ret->qualifiers = 0; - return NULL; - } else { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s is missing DW_AT_type", - dwarf_tag_str(die, tag_buf)); - } - } - - Dwarf_Die type_die; - if (!dwarf_formref_die(attr, &type_die)) { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s has invalid DW_AT_type", - dwarf_tag_str(die, tag_buf)); - } - - return drgn_type_from_dwarf_internal(dbinfo, module, &type_die, - can_be_incomplete_array, - is_incomplete_array_ret, ret); -} - -static struct drgn_error * -drgn_object_from_dwarf_enumerator(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, const char *name, - struct drgn_object *ret) -{ - struct drgn_error *err; - struct drgn_qualified_type qualified_type; - err = drgn_type_from_dwarf(dbinfo, module, die, &qualified_type); - if (err) - return err; - const struct drgn_type_enumerator *enumerators = - drgn_type_enumerators(qualified_type.type); - size_t num_enumerators = drgn_type_num_enumerators(qualified_type.type); - for (size_t i = 0; i < num_enumerators; i++) { - if (strcmp(enumerators[i].name, name) != 0) - continue; - - if (drgn_enum_type_is_signed(qualified_type.type)) { - return drgn_object_set_signed(ret, qualified_type, - enumerators[i].svalue, 0); - } else { - return drgn_object_set_unsigned(ret, qualified_type, - enumerators[i].uvalue, - 0); - } - } - UNREACHABLE(); -} - -static struct drgn_error * -drgn_object_from_dwarf_subprogram(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, struct drgn_object *ret) -{ - struct drgn_qualified_type qualified_type; - struct drgn_error *err = drgn_type_from_dwarf(dbinfo, module, die, - &qualified_type); - if (err) - return err; - Dwarf_Addr low_pc; - if (dwarf_lowpc(die, &low_pc) == -1) - return drgn_object_set_absent(ret, qualified_type, 0); - Dwarf_Addr bias; - dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, &bias, NULL, - NULL, NULL); - return drgn_object_set_reference(ret, qualified_type, low_pc + bias, 0, - 0); -} - -static struct drgn_error *read_bits(struct drgn_program *prog, void *dst, - unsigned int dst_bit_offset, uint64_t src, - unsigned int src_bit_offset, - uint64_t bit_size, bool lsb0) -{ - struct drgn_error *err; - - assert(dst_bit_offset < 8); - assert(src_bit_offset < 8); - - if (bit_size == 0) - return NULL; - - if (dst_bit_offset == src_bit_offset) { - /* - * We can read directly into the the destination buffer, but we - * may have to preserve some bits at the start and/or end. - */ - uint8_t *d = dst; - uint64_t last_bit = dst_bit_offset + bit_size - 1; - uint8_t first_byte = d[0]; - uint8_t last_byte = d[last_bit / 8]; - err = drgn_program_read_memory(prog, d, src, last_bit / 8 + 1, - false); - if (err) - return err; - if (dst_bit_offset != 0) { - uint8_t mask = - copy_bits_first_mask(dst_bit_offset, lsb0); - d[0] = (first_byte & ~mask) | (d[0] & mask); - } - if (last_bit % 8 != 7) { - uint8_t mask = copy_bits_last_mask(last_bit, lsb0); - d[last_bit / 8] = ((last_byte & ~mask) - | (d[last_bit / 8] & mask)); - } - return NULL; - } else { - /* - * If the source and destination have different offsets, then - * depending on the size and source offset, we may have to read - * one more byte than is available in the destination. To keep - * things simple, we always read into a temporary buffer (rather - * than adding a special case for reading directly into the - * destination and shifting bits around). - */ - uint64_t src_bytes = (src_bit_offset + bit_size - 1) / 8 + 1; - char stack_tmp[16], *tmp; - if (src_bytes <= sizeof(stack_tmp)) { - tmp = stack_tmp; - } else { - tmp = malloc64(src_bytes); - if (!tmp) - return &drgn_enomem; - } - err = drgn_program_read_memory(prog, tmp, src, src_bytes, - false); - if (!err) { - copy_bits(dst, dst_bit_offset, tmp, src_bit_offset, - bit_size, lsb0); - } - if (src_bytes > sizeof(stack_tmp)) - free(tmp); - return err; - } -} - -static struct drgn_error * -drgn_object_from_dwarf_location(struct drgn_program *prog, - struct drgn_debug_info_module *module, - Dwarf_Die *die, - struct drgn_qualified_type qualified_type, - const char *expr, size_t expr_size, - Dwarf_Die *function_die, - const struct drgn_register_state *regs, - struct drgn_object *ret) -{ - struct drgn_error *err; - bool little_endian = drgn_platform_is_little_endian(&module->platform); - uint64_t address_mask = drgn_platform_address_mask(&module->platform); - drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = - module->platform.arch->dwarf_regno_to_internal; - - struct drgn_object_type type; - err = drgn_object_type(qualified_type, 0, &type); - if (err) - return err; - - union drgn_value value; - char *value_buf = NULL; - - uint64_t address = 0; /* GCC thinks this may be used uninitialized. */ - int bit_offset = -1; /* -1 means that we don't have an address. */ - - uint64_t bit_pos = 0; - - struct uint64_vector stack = VECTOR_INIT; - int remaining_ops = MAX_DWARF_EXPR_OPS; - struct drgn_dwarf_expression_buffer buffer; - drgn_dwarf_expression_buffer_init(&buffer, module, expr, expr_size); - do { - stack.size = 0; - err = drgn_eval_dwarf_expression(prog, &buffer, &stack, - &remaining_ops, function_die, - regs); - if (err == &drgn_not_found) - goto absent; - else if (err) - goto out; - - const void *src = NULL; - size_t src_size; - - if (binary_buffer_has_next(&buffer.bb)) { - uint8_t opcode; - if ((err = binary_buffer_next_u8(&buffer.bb, &opcode))) - goto out; - - uint64_t uvalue; - uint64_t dwarf_regno; - drgn_register_number regno; - switch (opcode) { - case DW_OP_reg0 ... DW_OP_reg31: - dwarf_regno = opcode - DW_OP_reg0; - goto reg; - case DW_OP_regx: - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno))) - goto out; -reg: - if (!regs) - goto absent; - regno = dwarf_regno_to_internal(dwarf_regno); - if (!drgn_register_state_has_register(regs, - regno)) - goto absent; - const struct drgn_register_layout *layout = - &prog->platform.arch->register_layout[regno]; - src = ®s->buf[layout->offset]; - src_size = layout->size; - break; - case DW_OP_implicit_value: - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &uvalue))) - goto out; - if (uvalue > buffer.bb.end - buffer.bb.pos) { - err = binary_buffer_error(&buffer.bb, - "DW_OP_implicit_value size is out of bounds"); - goto out; - } - src = buffer.bb.pos; - src_size = uvalue; - buffer.bb.pos += uvalue; - break; - case DW_OP_stack_value: - if (!stack.size) - goto absent; - if (little_endian != HOST_LITTLE_ENDIAN) { - stack.data[stack.size - 1] = - bswap_64(stack.data[stack.size - 1]); - } - src = &stack.data[stack.size - 1]; - src_size = sizeof(stack.data[0]); - break; - default: - buffer.bb.pos = buffer.bb.prev; - break; - } - } - - uint64_t piece_bit_size; - uint64_t piece_bit_offset; - if (binary_buffer_has_next(&buffer.bb)) { - uint8_t opcode; - if ((err = binary_buffer_next_u8(&buffer.bb, &opcode))) - goto out; - - switch (opcode) { - case DW_OP_piece: - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &piece_bit_size))) - goto out; - /* - * It's probably bogus for the piece size to be - * larger than the remaining value size, but - * that's not explicitly stated in the DWARF 5 - * specification, so clamp it instead. - */ - if (__builtin_mul_overflow(piece_bit_size, 8U, - &piece_bit_size) || - piece_bit_size > type.bit_size - bit_pos) - piece_bit_size = type.bit_size - bit_pos; - piece_bit_offset = 0; - break; - case DW_OP_bit_piece: - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &piece_bit_size)) || - (err = binary_buffer_next_uleb128(&buffer.bb, - &piece_bit_offset))) - goto out; - if (piece_bit_size > type.bit_size - bit_pos) - piece_bit_size = type.bit_size - bit_pos; - break; - default: - err = binary_buffer_error(&buffer.bb, - "unknown DWARF expression opcode %#" PRIx8 " after simple location description", - opcode); - goto out; - } - } else { - piece_bit_size = type.bit_size - bit_pos; - piece_bit_offset = 0; - } - - /* - * TODO: there are a few cases that a DWARF location can - * describe that can't be represented in drgn's object model: - * - * 1. An object that is partially known and partially unknown. - * 2. An object that is partially in memory and partially a - * value. - * 3. An object that is in memory at non-contiguous addresses. - * 4. A pointer object whose pointer value is not known but - * whose referenced value is known (DW_OP_implicit_pointer). - * - * For case 1, we consider the whole object as absent. For cases - * 2 and 3, we convert the whole object to a value. Case 4 is - * not supported at all. We should add a way to represent all of - * these situations precisely. - */ - if (src && piece_bit_size == 0) { - /* Ignore empty value. */ - } else if (src) { - if (!value_buf && - !drgn_value_zalloc(drgn_value_size(type.bit_size), - &value, &value_buf)) { - err = &drgn_enomem; - goto out; - } - if (bit_offset >= 0) { - /* - * We previously had an address. Read it into - * the value. - */ - err = read_bits(prog, value_buf, 0, address, - bit_offset, bit_pos, - little_endian); - if (err) - goto out; - bit_offset = -1; - } - /* - * It's probably safe to assume that we don't have an - * implicit value larger than 2 exabytes. - */ - assert(src_size <= UINT64_MAX / 8); - uint64_t src_bit_size = UINT64_C(8) * src_size; - if (piece_bit_offset > src_bit_size) - piece_bit_offset = src_bit_size; - uint64_t copy_bit_size = - min(piece_bit_size, - src_bit_size - piece_bit_offset); - uint64_t copy_bit_offset = bit_pos; - if (!little_endian) { - copy_bit_offset += piece_bit_size - copy_bit_size; - piece_bit_offset = (src_bit_size - - copy_bit_size - - piece_bit_offset); - } - copy_bits(&value_buf[copy_bit_offset / 8], - copy_bit_offset % 8, - (const char *)src + (piece_bit_offset / 8), - piece_bit_offset % 8, copy_bit_size, - little_endian); - } else if (stack.size) { - uint64_t piece_address = - ((stack.data[stack.size - 1] + piece_bit_offset / 8) - & address_mask); - piece_bit_offset %= 8; - if (bit_pos > 0 && bit_offset >= 0) { - /* - * We already had an address. Merge the pieces - * if the addresses are contiguous, otherwise - * convert to a value. - * - * The obvious way to write this is - * (address + (bit_pos + bit_offset) / 8), but - * (bit_pos + bit_offset) can overflow uint64_t. - */ - uint64_t end_address = - ((address - + bit_pos / 8 - + (bit_pos % 8 + bit_offset) / 8) - & address_mask); - unsigned int end_bit_offset = - (bit_offset + bit_pos) % 8; - if (piece_bit_size == 0 || - (piece_address == end_address && - piece_bit_offset == end_bit_offset)) { - /* Piece is contiguous. */ - piece_address = address; - piece_bit_offset = bit_offset; - } else { - if (!drgn_value_zalloc(drgn_value_size(type.bit_size), - &value, - &value_buf)) { - err = &drgn_enomem; - goto out; - } - err = read_bits(prog, value_buf, 0, - address, bit_offset, - bit_pos, little_endian); - if (err) - goto out; - bit_offset = -1; - } - } - if (value_buf) { - /* We already have a value. Read into it. */ - err = read_bits(prog, &value_buf[bit_pos / 8], - bit_pos % 8, piece_address, - piece_bit_offset, - piece_bit_size, little_endian); - if (err) - goto out; - } else { - address = piece_address; - bit_offset = piece_bit_offset; - } - } else if (piece_bit_size > 0) { - goto absent; - } - bit_pos += piece_bit_size; - } while (binary_buffer_has_next(&buffer.bb)); - - if (bit_pos < type.bit_size || (bit_offset < 0 && !value_buf)) { -absent: - if (dwarf_tag(die) == DW_TAG_template_value_parameter) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_AT_template_value_parameter is missing value"); - } - drgn_object_reinit(ret, &type, DRGN_OBJECT_ABSENT); - err = NULL; - } else if (bit_offset >= 0) { - Dwarf_Addr start, end, bias; - dwfl_module_info(module->dwfl_module, NULL, &start, &end, &bias, - NULL, NULL, NULL); - /* - * If the address is not in the module's address range, then - * it's probably something special like a Linux per-CPU variable - * (which isn't actually a variable address but an offset). - * Don't apply the bias in that case. - */ - if (start <= address + bias && address + bias < end) - address += bias; - err = drgn_object_set_reference_internal(ret, &type, address, - bit_offset); - } else if (type.encoding == DRGN_OBJECT_ENCODING_BUFFER) { - drgn_object_reinit(ret, &type, DRGN_OBJECT_VALUE); - ret->value = value; - value_buf = NULL; - err = NULL; - } else { - err = drgn_object_set_from_buffer_internal(ret, &type, - value_buf, 0); - } - -out: - if (value_buf != value.ibuf) - free(value_buf); - uint64_vector_deinit(&stack); - return err; -} - -static struct drgn_error * -drgn_object_from_dwarf_constant(struct drgn_debug_info *dbinfo, Dwarf_Die *die, - struct drgn_qualified_type qualified_type, - Dwarf_Attribute *attr, struct drgn_object *ret) -{ - struct drgn_object_type type; - struct drgn_error *err = drgn_object_type(qualified_type, 0, &type); - if (err) - return err; - Dwarf_Block block; - if (dwarf_formblock(attr, &block) == 0) { - if (block.length < drgn_value_size(type.bit_size)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_AT_const_value block is too small"); - } - return drgn_object_set_from_buffer_internal(ret, &type, - block.data, 0); - } else if (type.encoding == DRGN_OBJECT_ENCODING_SIGNED) { - Dwarf_Sword svalue; - if (dwarf_formsdata(attr, &svalue)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid DW_AT_const_value"); - } - drgn_object_set_signed_internal(ret, &type, svalue); - return NULL; - } else if (type.encoding == DRGN_OBJECT_ENCODING_UNSIGNED) { - Dwarf_Word uvalue; - if (dwarf_formudata(attr, &uvalue)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid DW_AT_const_value"); - } - drgn_object_set_unsigned_internal(ret, &type, uvalue); - return NULL; - } else { - return drgn_error_create(DRGN_ERROR_OTHER, - "unknown DW_AT_const_value form"); - } -} - -struct drgn_error * -drgn_object_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, Dwarf_Die *type_die, - Dwarf_Die *function_die, - const struct drgn_register_state *regs, - struct drgn_object *ret) -{ - struct drgn_error *err; - if (dwarf_tag(die) == DW_TAG_subprogram) { - return drgn_object_from_dwarf_subprogram(dbinfo, module, die, - ret); - } - /* - * The DWARF 5 specifications mentions that data object entries can have - * DW_AT_endianity, but that doesn't seem to be used in practice. It - * would be inconvenient to support, so ignore it for now. - */ - struct drgn_qualified_type qualified_type; - if (type_die) { - err = drgn_type_from_dwarf(dbinfo, module, type_die, - &qualified_type); - } else { - err = drgn_type_from_dwarf_attr(dbinfo, module, die, NULL, true, - true, NULL, &qualified_type); - } - if (err) - return err; - Dwarf_Attribute attr_mem, *attr; - const char *expr; - size_t expr_size; - if ((attr = dwarf_attr_integrate(die, DW_AT_location, &attr_mem))) { - err = drgn_dwarf_location(module, attr, regs, &expr, - &expr_size); - if (err) - return err; - } else if ((attr = dwarf_attr_integrate(die, DW_AT_const_value, - &attr_mem))) { - return drgn_object_from_dwarf_constant(dbinfo, die, - qualified_type, attr, - ret); - } else { - expr = NULL; - expr_size = 0; - } - return drgn_object_from_dwarf_location(dbinfo->prog, module, die, - qualified_type, expr, expr_size, - function_die, regs, ret); -} - -static struct drgn_error *find_dwarf_enumerator(Dwarf_Die *enumeration_type, - const char *name, - Dwarf_Die *ret) -{ - int r = dwarf_child(enumeration_type, ret); - while (r == 0) { - if (dwarf_tag(ret) == DW_TAG_enumerator && - strcmp(dwarf_diename(ret), name) == 0) - return NULL; - r = dwarf_siblingof(ret, ret); - } - if (r < 0) - return drgn_error_libdw(); - ret->addr = NULL; - return NULL; -} - -struct drgn_error *drgn_find_in_dwarf_scopes(Dwarf_Die *scopes, - size_t num_scopes, - const char *name, - Dwarf_Die *die_ret, - Dwarf_Die *type_ret) -{ - struct drgn_error *err; - Dwarf_Die die; - for (size_t scope = num_scopes; scope--;) { - bool have_declaration = false; - if (dwarf_child(&scopes[scope], &die) != 0) - continue; - do { - switch (dwarf_tag(&die)) { - case DW_TAG_variable: - case DW_TAG_formal_parameter: - case DW_TAG_subprogram: - if (strcmp(dwarf_diename(&die), name) == 0) { - *die_ret = die; - bool declaration; - if (dwarf_flag(&die, DW_AT_declaration, - &declaration)) - return drgn_error_libdw(); - if (declaration) - have_declaration = true; - else - return NULL; - } - break; - case DW_TAG_enumeration_type: { - bool enum_class; - if (dwarf_flag_integrate(&die, DW_AT_enum_class, - &enum_class)) - return drgn_error_libdw(); - if (!enum_class) { - Dwarf_Die enumerator; - err = find_dwarf_enumerator(&die, name, - &enumerator); - if (err) - return err; - if (enumerator.addr) { - *die_ret = enumerator; - *type_ret = die; - return NULL; - } - } - break; - } - default: - continue; - } - } while (dwarf_siblingof(&die, &die) == 0); - if (have_declaration) - return NULL; - } - die_ret->addr = NULL; - return NULL; -} - -static struct drgn_error * -drgn_base_type_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, Dwarf_Die *die, - const struct drgn_language *lang, - struct drgn_type **ret) -{ - struct drgn_error *err; - - const char *name = dwarf_diename(die); - if (!name) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_base_type has missing or invalid DW_AT_name"); - } - - Dwarf_Attribute attr; - Dwarf_Word encoding; - if (!dwarf_attr_integrate(die, DW_AT_encoding, &attr) || - dwarf_formudata(&attr, &encoding)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_base_type has missing or invalid DW_AT_encoding"); - } - int size = dwarf_bytesize(die); - if (size == -1) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_base_type has missing or invalid DW_AT_byte_size"); - } - - enum drgn_byte_order byte_order; - err = dwarf_die_byte_order(die, true, &byte_order); - if (err) - return err; - - switch (encoding) { - case DW_ATE_boolean: - return drgn_bool_type_create(dbinfo->prog, name, size, - byte_order, lang, ret); - case DW_ATE_float: - return drgn_float_type_create(dbinfo->prog, name, size, - byte_order, lang, ret); - case DW_ATE_signed: - case DW_ATE_signed_char: - return drgn_int_type_create(dbinfo->prog, name, size, true, - byte_order, lang, ret); - case DW_ATE_unsigned: - case DW_ATE_unsigned_char: - return drgn_int_type_create(dbinfo->prog, name, size, false, - byte_order, lang, ret); - /* We don't support complex types yet. */ - case DW_ATE_complex_float: - default: - return drgn_error_format(DRGN_ERROR_OTHER, - "DW_TAG_base_type has unknown DWARF encoding 0x%llx", - (unsigned long long)encoding); - } -} - -/* - * DW_TAG_structure_type, DW_TAG_union_type, DW_TAG_class_type, and - * DW_TAG_enumeration_type can be incomplete (i.e., have a DW_AT_declaration of - * true). This tries to find the complete type. If it succeeds, it returns NULL. - * If it can't find a complete type, it returns &drgn_not_found. Otherwise, it - * returns an error. - */ -static struct drgn_error * -drgn_debug_info_find_complete(struct drgn_debug_info *dbinfo, uint64_t tag, - const char *name, struct drgn_type **ret) -{ - struct drgn_error *err; - - struct drgn_dwarf_index_iterator it; - err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dindex.global, name, - strlen(name), &tag, 1); - if (err) - return err; - - /* - * Find a matching DIE. Note that drgn_dwarf_index does not contain DIEs - * with DW_AT_declaration, so this will always be a complete type. - */ - struct drgn_dwarf_index_die *index_die = - drgn_dwarf_index_iterator_next(&it); - if (!index_die) - return &drgn_not_found; - /* - * Look for another matching DIE. If there is one, then we can't be sure - * which type this is, so leave it incomplete rather than guessing. - */ - if (drgn_dwarf_index_iterator_next(&it)) - return &drgn_not_found; - - Dwarf_Die die; - err = drgn_dwarf_index_get_die(index_die, &die); - if (err) - return err; - struct drgn_qualified_type qualified_type; - err = drgn_type_from_dwarf(dbinfo, index_die->module, &die, - &qualified_type); - if (err) - return err; - *ret = qualified_type.type; - return NULL; -} - -struct drgn_dwarf_member_thunk_arg { - struct drgn_debug_info_module *module; - Dwarf_Die die; - bool can_be_incomplete_array; -}; - -static struct drgn_error * -drgn_dwarf_member_thunk_fn(struct drgn_object *res, void *arg_) -{ - struct drgn_error *err; - struct drgn_dwarf_member_thunk_arg *arg = arg_; - if (res) { - struct drgn_qualified_type qualified_type; - err = drgn_type_from_dwarf_attr(drgn_object_program(res)->_dbinfo, - arg->module, &arg->die, NULL, - false, - arg->can_be_incomplete_array, - NULL, &qualified_type); - if (err) - return err; - - Dwarf_Attribute attr_mem, *attr; - uint64_t bit_field_size; - if ((attr = dwarf_attr_integrate(&arg->die, DW_AT_bit_size, - &attr_mem))) { - Dwarf_Word bit_size; - if (dwarf_formudata(attr, &bit_size)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_bit_size"); - } - bit_field_size = bit_size; - } else { - bit_field_size = 0; - } - - err = drgn_object_set_absent(res, qualified_type, - bit_field_size); - if (err) - return err; - } - free(arg); - return NULL; -} - -static struct drgn_error * -parse_member_offset(Dwarf_Die *die, union drgn_lazy_object *member_object, - bool little_endian, uint64_t *ret) -{ - struct drgn_error *err; - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - - /* - * The simplest case is when we have DW_AT_data_bit_offset, which is - * already the offset in bits from the beginning of the containing - * object to the beginning of the member (which may be a bit field). - */ - attr = dwarf_attr_integrate(die, DW_AT_data_bit_offset, &attr_mem); - if (attr) { - Dwarf_Word bit_offset; - if (dwarf_formudata(attr, &bit_offset)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_data_bit_offset"); - } - *ret = bit_offset; - return NULL; - } - - /* - * Otherwise, we might have DW_AT_data_member_location, which is the - * offset in bytes from the beginning of the containing object. - */ - attr = dwarf_attr_integrate(die, DW_AT_data_member_location, &attr_mem); - if (attr) { - Dwarf_Word byte_offset; - if (dwarf_formudata(attr, &byte_offset)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_data_member_location"); - } - *ret = 8 * byte_offset; - } else { - *ret = 0; - } - - /* - * In addition to DW_AT_data_member_location, a bit field might have - * DW_AT_bit_offset, which is the offset in bits of the most significant - * bit of the bit field from the most significant bit of the containing - * object. - */ - attr = dwarf_attr_integrate(die, DW_AT_bit_offset, &attr_mem); - if (attr) { - Dwarf_Word bit_offset; - if (dwarf_formudata(attr, &bit_offset)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_bit_offset"); - } - - /* - * If the architecture is little-endian, then we must compute - * the location of the most significant bit from the size of the - * member, then subtract the bit offset and bit size to get the - * location of the beginning of the bit field. - * - * If the architecture is big-endian, then the most significant - * bit of the bit field is the beginning. - */ - if (little_endian) { - err = drgn_lazy_object_evaluate(member_object); - if (err) - return err; - - attr = dwarf_attr_integrate(die, DW_AT_byte_size, - &attr_mem); - /* - * If the member has an explicit byte size, we can use - * that. Otherwise, we have to get it from the member - * type. - */ - uint64_t byte_size; - if (attr) { - Dwarf_Word word; - if (dwarf_formudata(attr, &word)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_byte_size"); - } - byte_size = word; - } else { - if (!drgn_type_has_size(member_object->obj.type)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member bit field type does not have size"); - } - err = drgn_type_sizeof(member_object->obj.type, - &byte_size); - if (err) - return err; - } - *ret += 8 * byte_size - bit_offset - member_object->obj.bit_size; - } else { - *ret += bit_offset; - } - } - - return NULL; -} - -static struct drgn_error * -parse_member(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, Dwarf_Die *die, - bool little_endian, bool can_be_incomplete_array, - struct drgn_compound_type_builder *builder) -{ - struct drgn_error *err; - - Dwarf_Attribute attr_mem, *attr; - const char *name; - if ((attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem))) { - name = dwarf_formstring(attr); - if (!name) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_member has invalid DW_AT_name"); - } - } else { - name = NULL; - } - - struct drgn_dwarf_member_thunk_arg *thunk_arg = - malloc(sizeof(*thunk_arg)); - if (!thunk_arg) - return &drgn_enomem; - thunk_arg->module = module; - thunk_arg->die = *die; - thunk_arg->can_be_incomplete_array = can_be_incomplete_array; - - union drgn_lazy_object member_object; - drgn_lazy_object_init_thunk(&member_object, dbinfo->prog, - drgn_dwarf_member_thunk_fn, thunk_arg); - - uint64_t bit_offset; - err = parse_member_offset(die, &member_object, little_endian, - &bit_offset); - if (err) - goto err; - - err = drgn_compound_type_builder_add_member(builder, &member_object, - name, bit_offset); - if (err) - goto err; - return NULL; - -err: - drgn_lazy_object_deinit(&member_object); - return err; -} - -struct drgn_dwarf_die_thunk_arg { - struct drgn_debug_info_module *module; - Dwarf_Die die; -}; - -static struct drgn_error * -drgn_dwarf_template_type_parameter_thunk_fn(struct drgn_object *res, void *arg_) -{ - struct drgn_error *err; - struct drgn_dwarf_die_thunk_arg *arg = arg_; - if (res) { - struct drgn_qualified_type qualified_type; - err = drgn_type_from_dwarf_attr(drgn_object_program(res)->_dbinfo, - arg->module, &arg->die, NULL, - true, true, NULL, - &qualified_type); - if (err) - return err; - - err = drgn_object_set_absent(res, qualified_type, 0); - if (err) - return err; - } - free(arg); - return NULL; -} - -static struct drgn_error * -drgn_dwarf_template_value_parameter_thunk_fn(struct drgn_object *res, - void *arg_) -{ - struct drgn_error *err; - struct drgn_dwarf_die_thunk_arg *arg = arg_; - if (res) { - err = drgn_object_from_dwarf(drgn_object_program(res)->_dbinfo, - arg->module, &arg->die, NULL, NULL, - NULL, res); - if (err) - return err; - } - free(arg); - return NULL; -} - -static struct drgn_error * -parse_template_parameter(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, Dwarf_Die *die, - drgn_object_thunk_fn *thunk_fn, - struct drgn_template_parameters_builder *builder) -{ - char tag_buf[DW_TAG_BUF_LEN]; - - Dwarf_Attribute attr_mem, *attr; - const char *name; - if ((attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem))) { - name = dwarf_formstring(attr); - if (!name) { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s has invalid DW_AT_name", - dwarf_tag_str(die, tag_buf)); - } - } else { - name = NULL; - } - - bool defaulted; - if (dwarf_flag_integrate(die, DW_AT_default_value, &defaulted)) { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s has invalid DW_AT_default_value", - dwarf_tag_str(die, tag_buf)); - } - - struct drgn_dwarf_die_thunk_arg *thunk_arg = - malloc(sizeof(*thunk_arg)); - if (!thunk_arg) - return &drgn_enomem; - thunk_arg->module = module; - thunk_arg->die = *die; - - union drgn_lazy_object argument; - drgn_lazy_object_init_thunk(&argument, dbinfo->prog, thunk_fn, - thunk_arg); - - struct drgn_error *err = - drgn_template_parameters_builder_add(builder, &argument, name, - defaulted); - if (err) - drgn_lazy_object_deinit(&argument); - return err; -} - -static struct drgn_error * -drgn_compound_type_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, const struct drgn_language *lang, - enum drgn_type_kind kind, struct drgn_type **ret) -{ - struct drgn_error *err; - char tag_buf[DW_TAG_BUF_LEN]; - - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr = dwarf_attr_integrate(die, DW_AT_name, - &attr_mem); - const char *tag; - if (attr) { - tag = dwarf_formstring(attr); - if (!tag) { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s has invalid DW_AT_name", - dwarf_tag_str(die, tag_buf)); - } - } else { - tag = NULL; - } - - bool declaration; - if (dwarf_flag(die, DW_AT_declaration, &declaration)) { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s has invalid DW_AT_declaration", - dwarf_tag_str(die, tag_buf)); - } - if (declaration && tag) { - err = drgn_debug_info_find_complete(dbinfo, dwarf_tag(die), tag, - ret); - if (err != &drgn_not_found) - return err; - } - - struct drgn_compound_type_builder builder; - drgn_compound_type_builder_init(&builder, dbinfo->prog, kind); - - int size; - bool little_endian; - if (declaration) { - size = 0; - } else { - size = dwarf_bytesize(die); - if (size == -1) { - return drgn_error_format(DRGN_ERROR_OTHER, - "%s has missing or invalid DW_AT_byte_size", - dwarf_tag_str(die, tag_buf)); - } - dwarf_die_is_little_endian(die, false, &little_endian); - } - - Dwarf_Die member = {}, child; - int r = dwarf_child(die, &child); - while (r == 0) { - switch (dwarf_tag(&child)) { - case DW_TAG_member: - if (!declaration) { - if (member.addr) { - err = parse_member(dbinfo, module, - &member, - little_endian, false, - &builder); - if (err) - goto err; - } - member = child; - } - break; - case DW_TAG_template_type_parameter: - err = parse_template_parameter(dbinfo, module, &child, - drgn_dwarf_template_type_parameter_thunk_fn, - &builder.template_builder); - if (err) - goto err; - break; - case DW_TAG_template_value_parameter: - err = parse_template_parameter(dbinfo, module, &child, - drgn_dwarf_template_value_parameter_thunk_fn, - &builder.template_builder); - if (err) - goto err; - break; - default: - break; - } - r = dwarf_siblingof(&child, &child); - } - if (r == -1) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "libdw could not parse DIE children"); - goto err; - } - /* - * Flexible array members are only allowed as the last member of a - * structure with at least one other member. - */ - if (member.addr) { - err = parse_member(dbinfo, module, &member, little_endian, - kind != DRGN_TYPE_UNION && - builder.members.size > 0, - &builder); - if (err) - goto err; - } - - err = drgn_compound_type_create(&builder, tag, size, !declaration, lang, - ret); - if (err) - goto err; - return NULL; - -err: - drgn_compound_type_builder_deinit(&builder); - return err; -} - -#if !_ELFUTILS_PREREQ(0, 175) -static Elf *dwelf_elf_begin(int fd) -{ - return elf_begin(fd, ELF_C_READ_MMAP_PRIVATE, NULL); -} -#endif - -static struct drgn_error * -parse_enumerator(Dwarf_Die *die, struct drgn_enum_type_builder *builder, - bool *is_signed) -{ - const char *name = dwarf_diename(die); - if (!name) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumerator has missing or invalid DW_AT_name"); - } - - Dwarf_Attribute attr_mem, *attr; - if (!(attr = dwarf_attr_integrate(die, DW_AT_const_value, &attr_mem))) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumerator is missing DW_AT_const_value"); - } - struct drgn_error *err; - if (attr->form == DW_FORM_sdata || - attr->form == DW_FORM_implicit_const) { - Dwarf_Sword svalue; - if (dwarf_formsdata(attr, &svalue)) - goto invalid; - err = drgn_enum_type_builder_add_signed(builder, name, - svalue); - /* - * GCC before 7.1 didn't include DW_AT_encoding for - * DW_TAG_enumeration_type DIEs, so we have to guess the sign - * for enum_compatible_type_fallback(). - */ - if (!err && svalue < 0) - *is_signed = true; - } else { - Dwarf_Word uvalue; - if (dwarf_formudata(attr, &uvalue)) - goto invalid; - err = drgn_enum_type_builder_add_unsigned(builder, name, - uvalue); - } - return err; - -invalid: - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumerator has invalid DW_AT_const_value"); -} - -/* - * GCC before 5.1 did not include DW_AT_type for DW_TAG_enumeration_type DIEs, - * so we have to fabricate the compatible type. - */ -static struct drgn_error * -enum_compatible_type_fallback(struct drgn_debug_info *dbinfo, - Dwarf_Die *die, bool is_signed, - const struct drgn_language *lang, - struct drgn_type **ret) -{ - int size = dwarf_bytesize(die); - if (size == -1) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumeration_type has missing or invalid DW_AT_byte_size"); - } - enum drgn_byte_order byte_order; - dwarf_die_byte_order(die, false, &byte_order); - return drgn_int_type_create(dbinfo->prog, "", size, is_signed, - byte_order, lang, ret); -} - -static struct drgn_error * -drgn_enum_type_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, Dwarf_Die *die, - const struct drgn_language *lang, - struct drgn_type **ret) -{ - struct drgn_error *err; - - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr = dwarf_attr_integrate(die, DW_AT_name, - &attr_mem); - const char *tag; - if (attr) { - tag = dwarf_formstring(attr); - if (!tag) - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumeration_type has invalid DW_AT_name"); - } else { - tag = NULL; - } - - bool declaration; - if (dwarf_flag(die, DW_AT_declaration, &declaration)) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumeration_type has invalid DW_AT_declaration"); - } - if (declaration && tag) { - err = drgn_debug_info_find_complete(dbinfo, - DW_TAG_enumeration_type, - tag, ret); - if (err != &drgn_not_found) - return err; - } - - if (declaration) { - return drgn_incomplete_enum_type_create(dbinfo->prog, tag, lang, - ret); - } - - struct drgn_enum_type_builder builder; - drgn_enum_type_builder_init(&builder, dbinfo->prog); - bool is_signed = false; - Dwarf_Die child; - int r = dwarf_child(die, &child); - while (r == 0) { - if (dwarf_tag(&child) == DW_TAG_enumerator) { - err = parse_enumerator(&child, &builder, &is_signed); - if (err) - goto err; - } - r = dwarf_siblingof(&child, &child); - } - if (r == -1) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "libdw could not parse DIE children"); - goto err; - } - - struct drgn_type *compatible_type; - r = dwarf_type(die, &child); - if (r == -1) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_enumeration_type has invalid DW_AT_type"); - goto err; - } else if (r) { - err = enum_compatible_type_fallback(dbinfo, die, is_signed, - lang, &compatible_type); - if (err) - goto err; - } else { - struct drgn_qualified_type qualified_compatible_type; - err = drgn_type_from_dwarf(dbinfo, module, &child, - &qualified_compatible_type); - if (err) - goto err; - compatible_type = qualified_compatible_type.type; - if (drgn_type_kind(compatible_type) != DRGN_TYPE_INT) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "DW_AT_type of DW_TAG_enumeration_type is not an integer type"); - goto err; - } - } - - err = drgn_enum_type_create(&builder, tag, compatible_type, lang, ret); - if (err) - goto err; - return NULL; - -err: - drgn_enum_type_builder_deinit(&builder); - return err; -} - -static struct drgn_error * -drgn_typedef_type_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, const struct drgn_language *lang, - bool can_be_incomplete_array, - bool *is_incomplete_array_ret, - struct drgn_type **ret) -{ - const char *name = dwarf_diename(die); - if (!name) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_typedef has missing or invalid DW_AT_name"); - } - - struct drgn_qualified_type aliased_type; - struct drgn_error *err = drgn_type_from_dwarf_attr(dbinfo, module, die, - lang, true, - can_be_incomplete_array, - is_incomplete_array_ret, - &aliased_type); - if (err) - return err; - - return drgn_typedef_type_create(dbinfo->prog, name, aliased_type, lang, - ret); -} - -static struct drgn_error * -drgn_pointer_type_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, const struct drgn_language *lang, - struct drgn_type **ret) -{ - struct drgn_qualified_type referenced_type; - struct drgn_error *err = drgn_type_from_dwarf_attr(dbinfo, module, die, - lang, true, true, - NULL, - &referenced_type); - if (err) - return err; - - Dwarf_Attribute attr_mem, *attr; - uint64_t size; - if ((attr = dwarf_attr_integrate(die, DW_AT_byte_size, &attr_mem))) { - Dwarf_Word word; - if (dwarf_formudata(attr, &word)) { - return drgn_error_format(DRGN_ERROR_OTHER, - "DW_TAG_pointer_type has invalid DW_AT_byte_size"); - } - size = word; - } else { - uint8_t address_size; - err = drgn_program_address_size(dbinfo->prog, &address_size); - if (err) - return err; - size = address_size; - } - - /* - * The DWARF 5 specification doesn't mention DW_AT_endianity for - * DW_TAG_pointer_type DIEs, and GCC as of version 10.2 doesn't emit it - * even for pointers stored in the opposite byte order (e.g., when using - * scalar_storage_order), but it probably should. - */ - enum drgn_byte_order byte_order; - dwarf_die_byte_order(die, false, &byte_order); - return drgn_pointer_type_create(dbinfo->prog, referenced_type, size, - byte_order, lang, ret); -} - -struct array_dimension { - uint64_t length; - bool is_complete; -}; - -DEFINE_VECTOR(array_dimension_vector, struct array_dimension) - -static struct drgn_error *subrange_length(Dwarf_Die *die, - struct array_dimension *dimension) -{ - Dwarf_Attribute attr_mem; - Dwarf_Attribute *attr; - Dwarf_Word word; - - if (!(attr = dwarf_attr_integrate(die, DW_AT_upper_bound, &attr_mem)) && - !(attr = dwarf_attr_integrate(die, DW_AT_count, &attr_mem))) { - dimension->is_complete = false; - return NULL; - } - - if (dwarf_formudata(attr, &word)) { - return drgn_error_format(DRGN_ERROR_OTHER, - "DW_TAG_subrange_type has invalid %s", - attr->code == DW_AT_upper_bound ? - "DW_AT_upper_bound" : - "DW_AT_count"); - } - - dimension->is_complete = true; - /* - * GCC emits a DW_FORM_sdata DW_AT_upper_bound of -1 for empty array - * variables without an explicit size (e.g., `int arr[] = {};`). - */ - if (attr->code == DW_AT_upper_bound && attr->form == DW_FORM_sdata && - word == (Dwarf_Word)-1) { - dimension->length = 0; - } else if (attr->code == DW_AT_upper_bound) { - if (word >= UINT64_MAX) { - return drgn_error_create(DRGN_ERROR_OVERFLOW, - "DW_AT_upper_bound is too large"); - } - dimension->length = (uint64_t)word + 1; - } else { - if (word > UINT64_MAX) { - return drgn_error_create(DRGN_ERROR_OVERFLOW, - "DW_AT_count is too large"); - } - dimension->length = word; - } - return NULL; -} - -static struct drgn_error * -drgn_array_type_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, const struct drgn_language *lang, - bool can_be_incomplete_array, - bool *is_incomplete_array_ret, - struct drgn_type **ret) -{ - struct drgn_error *err; - struct array_dimension_vector dimensions = VECTOR_INIT; - struct array_dimension *dimension; - Dwarf_Die child; - int r = dwarf_child(die, &child); - while (r == 0) { - if (dwarf_tag(&child) == DW_TAG_subrange_type) { - dimension = array_dimension_vector_append_entry(&dimensions); - if (!dimension) - goto out; - err = subrange_length(&child, dimension); - if (err) - goto out; - } - r = dwarf_siblingof(&child, &child); - } - if (r == -1) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "libdw could not parse DIE children"); - goto out; - } - if (!dimensions.size) { - dimension = array_dimension_vector_append_entry(&dimensions); - if (!dimension) - goto out; - dimension->is_complete = false; - } - - struct drgn_qualified_type element_type; - err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, false, false, - NULL, &element_type); - if (err) - goto out; - - *is_incomplete_array_ret = !dimensions.data[0].is_complete; - struct drgn_type *type; - do { - dimension = array_dimension_vector_pop(&dimensions); - if (dimension->is_complete) { - err = drgn_array_type_create(dbinfo->prog, element_type, - dimension->length, lang, - &type); - } else if (dimensions.size || !can_be_incomplete_array) { - err = drgn_array_type_create(dbinfo->prog, element_type, - 0, lang, &type); - } else { - err = drgn_incomplete_array_type_create(dbinfo->prog, - element_type, - lang, &type); - } - if (err) - goto out; - - element_type.type = type; - element_type.qualifiers = 0; - } while (dimensions.size); - - *ret = type; - err = NULL; -out: - array_dimension_vector_deinit(&dimensions); - return err; -} - -static struct drgn_error * -drgn_dwarf_formal_parameter_thunk_fn(struct drgn_object *res, void *arg_) -{ - struct drgn_error *err; - struct drgn_dwarf_die_thunk_arg *arg = arg_; - if (res) { - struct drgn_qualified_type qualified_type; - err = drgn_type_from_dwarf_attr(drgn_object_program(res)->_dbinfo, - arg->module, &arg->die, NULL, - false, true, NULL, - &qualified_type); - if (err) - return err; - - err = drgn_object_set_absent(res, qualified_type, 0); - if (err) - return err; - } - free(arg); - return NULL; -} - -static struct drgn_error * -parse_formal_parameter(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, Dwarf_Die *die, - struct drgn_function_type_builder *builder) -{ - Dwarf_Attribute attr_mem, *attr; - const char *name; - if ((attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem))) { - name = dwarf_formstring(attr); - if (!name) { - return drgn_error_create(DRGN_ERROR_OTHER, - "DW_TAG_formal_parameter has invalid DW_AT_name"); - } - } else { - name = NULL; - } - - struct drgn_dwarf_die_thunk_arg *thunk_arg = - malloc(sizeof(*thunk_arg)); - if (!thunk_arg) - return &drgn_enomem; - thunk_arg->module = module; - thunk_arg->die = *die; - - union drgn_lazy_object default_argument; - drgn_lazy_object_init_thunk(&default_argument, dbinfo->prog, - drgn_dwarf_formal_parameter_thunk_fn, - thunk_arg); - - struct drgn_error *err = - drgn_function_type_builder_add_parameter(builder, - &default_argument, - name); - if (err) - drgn_lazy_object_deinit(&default_argument); - return err; -} - -static struct drgn_error * -drgn_function_type_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, const struct drgn_language *lang, - struct drgn_type **ret) -{ - struct drgn_error *err; - char tag_buf[DW_TAG_BUF_LEN]; - - struct drgn_function_type_builder builder; - drgn_function_type_builder_init(&builder, dbinfo->prog); - bool is_variadic = false; - Dwarf_Die child; - int r = dwarf_child(die, &child); - while (r == 0) { - switch (dwarf_tag(&child)) { - case DW_TAG_formal_parameter: - if (is_variadic) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "%s has DW_TAG_formal_parameter child after DW_TAG_unspecified_parameters child", - dwarf_tag_str(die, - tag_buf)); - goto err; - } - err = parse_formal_parameter(dbinfo, module, &child, - &builder); - if (err) - goto err; - break; - case DW_TAG_unspecified_parameters: - if (is_variadic) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "%s has multiple DW_TAG_unspecified_parameters children", - dwarf_tag_str(die, - tag_buf)); - goto err; - } - is_variadic = true; - break; - case DW_TAG_template_type_parameter: - err = parse_template_parameter(dbinfo, module, &child, - drgn_dwarf_template_type_parameter_thunk_fn, - &builder.template_builder); - if (err) - goto err; - break; - case DW_TAG_template_value_parameter: - err = parse_template_parameter(dbinfo, module, &child, - drgn_dwarf_template_value_parameter_thunk_fn, - &builder.template_builder); - if (err) - goto err; - break; - default: - break; - } - r = dwarf_siblingof(&child, &child); - } - if (r == -1) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "libdw could not parse DIE children"); - goto err; - } - - struct drgn_qualified_type return_type; - err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, true, - NULL, &return_type); - if (err) - goto err; - - err = drgn_function_type_create(&builder, return_type, is_variadic, - lang, ret); - if (err) - goto err; - return NULL; - -err: - drgn_function_type_builder_deinit(&builder); - return err; -} - -static struct drgn_error * -drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, bool can_be_incomplete_array, - bool *is_incomplete_array_ret, - struct drgn_qualified_type *ret) -{ - if (dbinfo->depth >= 1000) { - return drgn_error_create(DRGN_ERROR_RECURSION, - "maximum DWARF type parsing depth exceeded"); - } - - /* If the DIE has a type unit signature, follow it. */ - Dwarf_Die definition_die; - { - Dwarf_Attribute attr_mem, *attr; - if ((attr = dwarf_attr_integrate(die, DW_AT_signature, - &attr_mem))) { - if (!dwarf_formref_die(attr, &definition_die)) - return drgn_error_libdw(); - die = &definition_die; - } - } - - /* If we got a declaration, try to find the definition. */ - bool declaration; - if (dwarf_flag(die, DW_AT_declaration, &declaration)) - return drgn_error_libdw(); - if (declaration) { - uintptr_t die_addr; - if (drgn_dwarf_index_find_definition(&dbinfo->dindex, - (uintptr_t)die->addr, - &module, &die_addr)) { - Dwarf_Addr bias; - Dwarf *dwarf = dwfl_module_getdwarf(module->dwfl_module, - &bias); - if (!dwarf) - return drgn_error_libdwfl(); - uintptr_t start = - (uintptr_t)module->scn_data[DRGN_SCN_DEBUG_INFO]->d_buf; - size_t size = - module->scn_data[DRGN_SCN_DEBUG_INFO]->d_size; - if (die_addr >= start && die_addr < start + size) { - if (!dwarf_offdie(dwarf, die_addr - start, - &definition_die)) - return drgn_error_libdw(); - } else { - start = (uintptr_t)module->scn_data[DRGN_SCN_DEBUG_TYPES]->d_buf; - /* Assume .debug_types */ - if (!dwarf_offdie_types(dwarf, die_addr - start, - &definition_die)) - return drgn_error_libdw(); - } - die = &definition_die; - } - } - - struct drgn_dwarf_type_map_entry entry = { - .key = die->addr, - }; - struct hash_pair hp = drgn_dwarf_type_map_hash(&entry.key); - struct drgn_dwarf_type_map_iterator it = - drgn_dwarf_type_map_search_hashed(&dbinfo->types, &entry.key, - hp); - if (it.entry) { - if (!can_be_incomplete_array && - it.entry->value.is_incomplete_array) { - it = drgn_dwarf_type_map_search_hashed(&dbinfo->cant_be_incomplete_array_types, - &entry.key, hp); - } - if (it.entry) { - ret->type = it.entry->value.type; - ret->qualifiers = it.entry->value.qualifiers; - return NULL; - } - } - - const struct drgn_language *lang; - struct drgn_error *err = drgn_language_from_die(die, true, &lang); - if (err) - return err; - - ret->qualifiers = 0; - dbinfo->depth++; - entry.value.is_incomplete_array = false; - switch (dwarf_tag(die)) { - case DW_TAG_const_type: - err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, - can_be_incomplete_array, - &entry.value.is_incomplete_array, - ret); - ret->qualifiers |= DRGN_QUALIFIER_CONST; - break; - case DW_TAG_restrict_type: - err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, - can_be_incomplete_array, - &entry.value.is_incomplete_array, - ret); - ret->qualifiers |= DRGN_QUALIFIER_RESTRICT; - break; - case DW_TAG_volatile_type: - err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, - can_be_incomplete_array, - &entry.value.is_incomplete_array, - ret); - ret->qualifiers |= DRGN_QUALIFIER_VOLATILE; - break; - case DW_TAG_atomic_type: - err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, - can_be_incomplete_array, - &entry.value.is_incomplete_array, - ret); - ret->qualifiers |= DRGN_QUALIFIER_ATOMIC; - break; - case DW_TAG_base_type: - err = drgn_base_type_from_dwarf(dbinfo, module, die, lang, - &ret->type); - break; - case DW_TAG_structure_type: - err = drgn_compound_type_from_dwarf(dbinfo, module, die, lang, - DRGN_TYPE_STRUCT, - &ret->type); - break; - case DW_TAG_union_type: - err = drgn_compound_type_from_dwarf(dbinfo, module, die, lang, - DRGN_TYPE_UNION, - &ret->type); - break; - case DW_TAG_class_type: - err = drgn_compound_type_from_dwarf(dbinfo, module, die, lang, - DRGN_TYPE_CLASS, - &ret->type); - break; - case DW_TAG_enumeration_type: - err = drgn_enum_type_from_dwarf(dbinfo, module, die, lang, - &ret->type); - break; - case DW_TAG_typedef: - err = drgn_typedef_type_from_dwarf(dbinfo, module, die, lang, - can_be_incomplete_array, - &entry.value.is_incomplete_array, - &ret->type); - break; - case DW_TAG_pointer_type: - err = drgn_pointer_type_from_dwarf(dbinfo, module, die, lang, - &ret->type); - break; - case DW_TAG_array_type: - err = drgn_array_type_from_dwarf(dbinfo, module, die, lang, - can_be_incomplete_array, - &entry.value.is_incomplete_array, - &ret->type); - break; - case DW_TAG_subroutine_type: - case DW_TAG_subprogram: - err = drgn_function_type_from_dwarf(dbinfo, module, die, lang, - &ret->type); - break; - default: - err = drgn_error_format(DRGN_ERROR_OTHER, - "unknown DWARF type tag 0x%x", - dwarf_tag(die)); - break; - } - dbinfo->depth--; - if (err) - return err; - - entry.value.type = ret->type; - entry.value.qualifiers = ret->qualifiers; - struct drgn_dwarf_type_map *map; - if (!can_be_incomplete_array && entry.value.is_incomplete_array) - map = &dbinfo->cant_be_incomplete_array_types; - else - map = &dbinfo->types; - if (drgn_dwarf_type_map_insert_searched(map, &entry, hp, NULL) == -1) { - /* - * This will "leak" the type we created, but it'll still be - * cleaned up when the program is freed. - */ - return &drgn_enomem; - } - if (is_incomplete_array_ret) - *is_incomplete_array_ret = entry.value.is_incomplete_array; - return NULL; -} - -struct drgn_error *drgn_debug_info_find_type(enum drgn_type_kind kind, - const char *name, size_t name_len, - const char *filename, void *arg, - struct drgn_qualified_type *ret) -{ - struct drgn_error *err; - struct drgn_debug_info *dbinfo = arg; - - uint64_t tag; - switch (kind) { - case DRGN_TYPE_INT: - case DRGN_TYPE_BOOL: - case DRGN_TYPE_FLOAT: - tag = DW_TAG_base_type; - break; - case DRGN_TYPE_STRUCT: - tag = DW_TAG_structure_type; - break; - case DRGN_TYPE_UNION: - tag = DW_TAG_union_type; - break; - case DRGN_TYPE_CLASS: - tag = DW_TAG_class_type; - break; - case DRGN_TYPE_ENUM: - tag = DW_TAG_enumeration_type; - break; - case DRGN_TYPE_TYPEDEF: - tag = DW_TAG_typedef; - break; - default: - UNREACHABLE(); - } - - struct drgn_dwarf_index_iterator it; - err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dindex.global, name, - name_len, &tag, 1); - if (err) - return err; - struct drgn_dwarf_index_die *index_die; - while ((index_die = drgn_dwarf_index_iterator_next(&it))) { - Dwarf_Die die; - err = drgn_dwarf_index_get_die(index_die, &die); - if (err) - return err; - if (die_matches_filename(&die, filename)) { - err = drgn_type_from_dwarf(dbinfo, index_die->module, - &die, ret); - if (err) - return err; - /* - * For DW_TAG_base_type, we need to check that the type - * we found was the right kind. - */ - if (drgn_type_kind(ret->type) == kind) - return NULL; - } - } - return &drgn_not_found; -} - -struct drgn_error * -drgn_debug_info_find_object(const char *name, size_t name_len, - const char *filename, - enum drgn_find_object_flags flags, void *arg, - struct drgn_object *ret) -{ - struct drgn_error *err; - struct drgn_debug_info *dbinfo = arg; - - struct drgn_dwarf_index_namespace *ns = &dbinfo->dindex.global; - if (name_len >= 2 && memcmp(name, "::", 2) == 0) { - /* Explicit global namespace. */ - name_len -= 2; - name += 2; - } - const char *colons; - while ((colons = memmem(name, name_len, "::", 2))) { - struct drgn_dwarf_index_iterator it; - uint64_t ns_tag = DW_TAG_namespace; - err = drgn_dwarf_index_iterator_init(&it, ns, name, - colons - name, &ns_tag, 1); - if (err) - return err; - struct drgn_dwarf_index_die *index_die = - drgn_dwarf_index_iterator_next(&it); - if (!index_die) - return &drgn_not_found; - ns = index_die->namespace; - name_len -= colons + 2 - name; - name = colons + 2; - } - - uint64_t tags[3]; - size_t num_tags = 0; - if (flags & DRGN_FIND_OBJECT_CONSTANT) - tags[num_tags++] = DW_TAG_enumerator; - if (flags & DRGN_FIND_OBJECT_FUNCTION) - tags[num_tags++] = DW_TAG_subprogram; - if (flags & DRGN_FIND_OBJECT_VARIABLE) - tags[num_tags++] = DW_TAG_variable; - - struct drgn_dwarf_index_iterator it; - err = drgn_dwarf_index_iterator_init(&it, ns, name, strlen(name), tags, - num_tags); - if (err) - return err; - struct drgn_dwarf_index_die *index_die; - while ((index_die = drgn_dwarf_index_iterator_next(&it))) { - Dwarf_Die die; - err = drgn_dwarf_index_get_die(index_die, &die); - if (err) - return err; - if (!die_matches_filename(&die, filename)) - continue; - if (dwarf_tag(&die) == DW_TAG_enumeration_type) { - return drgn_object_from_dwarf_enumerator(dbinfo, - index_die->module, - &die, name, - ret); - } else { - return drgn_object_from_dwarf(dbinfo, index_die->module, - &die, NULL, NULL, NULL, - ret); - } - } - return &drgn_not_found; -} - -struct drgn_error *drgn_debug_info_create(struct drgn_program *prog, - struct drgn_debug_info **ret) -{ - struct drgn_debug_info *dbinfo = malloc(sizeof(*dbinfo)); - if (!dbinfo) - return &drgn_enomem; - dbinfo->prog = prog; - const Dwfl_Callbacks *dwfl_callbacks; - if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) - dwfl_callbacks = &drgn_dwfl_callbacks; - else if (prog->flags & DRGN_PROGRAM_IS_LIVE) - dwfl_callbacks = &drgn_linux_proc_dwfl_callbacks; - else - dwfl_callbacks = &drgn_userspace_core_dump_dwfl_callbacks; - dbinfo->dwfl = dwfl_begin(dwfl_callbacks); - if (!dbinfo->dwfl) { - free(dbinfo); - return drgn_error_libdwfl(); - } - drgn_debug_info_module_table_init(&dbinfo->modules); - c_string_set_init(&dbinfo->module_names); - drgn_dwarf_index_init(&dbinfo->dindex); - drgn_dwarf_type_map_init(&dbinfo->types); - drgn_dwarf_type_map_init(&dbinfo->cant_be_incomplete_array_types); - dbinfo->depth = 0; - *ret = dbinfo; - return NULL; -} - -void drgn_debug_info_destroy(struct drgn_debug_info *dbinfo) -{ - if (!dbinfo) - return; - drgn_dwarf_type_map_deinit(&dbinfo->cant_be_incomplete_array_types); - drgn_dwarf_type_map_deinit(&dbinfo->types); - drgn_dwarf_index_deinit(&dbinfo->dindex); - c_string_set_deinit(&dbinfo->module_names); - drgn_debug_info_free_modules(dbinfo, false, true); - assert(drgn_debug_info_module_table_empty(&dbinfo->modules)); - drgn_debug_info_module_table_deinit(&dbinfo->modules); - dwfl_end(dbinfo->dwfl); - free(dbinfo); -} - -static struct drgn_error * -drgn_dwarf_cfi_next_encoded(struct drgn_debug_info_buffer *buffer, - uint8_t address_size, uint8_t encoding, - uint64_t func_addr, uint64_t *ret) -{ - struct drgn_error *err; - - /* Not currently used for CFI. */ - if (encoding & DW_EH_PE_indirect) { -unknown_fde_encoding: - return binary_buffer_error(&buffer->bb, - "unknown EH encoding %#" PRIx8, - encoding); - } - - size_t pos = (buffer->bb.pos - - (char *)buffer->module->scn_data[buffer->scn]->d_buf); - uint64_t base; - switch (encoding & 0x70) { - case DW_EH_PE_absptr: - base = 0; - break; - case DW_EH_PE_pcrel: - base = buffer->module->pcrel_base + pos; - break; - case DW_EH_PE_textrel: - base = buffer->module->textrel_base; - break; - case DW_EH_PE_datarel: - base = buffer->module->datarel_base; - break; - case DW_EH_PE_funcrel: - /* Relative to the FDE's initial location. */ - base = func_addr; - break; - case DW_EH_PE_aligned: - base = 0; - if (pos % address_size != 0 && - (err = binary_buffer_skip(&buffer->bb, - address_size - pos % address_size))) - return err; - break; - default: - goto unknown_fde_encoding; - } - - uint64_t offset; - switch (encoding & 0xf) { - case DW_EH_PE_absptr: - if ((err = binary_buffer_next_uint(&buffer->bb, address_size, - &offset))) - return err; - break; - case DW_EH_PE_uleb128: - if ((err = binary_buffer_next_uleb128(&buffer->bb, &offset))) - return err; - break; - case DW_EH_PE_udata2: - if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, - &offset))) - return err; - break; - case DW_EH_PE_udata4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &offset))) - return err; - break; - case DW_EH_PE_udata8: - if ((err = binary_buffer_next_u64(&buffer->bb, &offset))) - return err; - break; - case DW_EH_PE_sleb128: - if ((err = binary_buffer_next_sleb128_into_u64(&buffer->bb, - &offset))) - return err; - break; - case DW_EH_PE_sdata2: - if ((err = binary_buffer_next_s16_into_u64(&buffer->bb, - &offset))) - return err; - break; - case DW_EH_PE_sdata4: - if ((err = binary_buffer_next_s32_into_u64(&buffer->bb, - &offset))) - return err; - break; - case DW_EH_PE_sdata8: - if ((err = binary_buffer_next_s64_into_u64(&buffer->bb, - &offset))) - return err; - break; - default: - goto unknown_fde_encoding; + if (nt_file) { + err = userspace_core_report_debug_info(load, + nt_file, + nt_file_len); + if (err) + return err; + } else if (dwfl_core_file_report(dwfl, prog->core, + NULL) == -1) { + return drgn_error_libdwfl(); + } + } } - *ret = (base + offset) & uint_max(address_size); - return NULL; } -static struct drgn_error * -drgn_parse_dwarf_cie(struct drgn_debug_info_module *module, - enum drgn_debug_info_scn scn, size_t cie_pointer, - struct drgn_dwarf_cie *cie) +static struct drgn_error *relocate_elf_section(Elf_Scn *scn, Elf_Scn *reloc_scn, + Elf_Scn *symtab_scn, + const uint64_t *sh_addrs, + size_t shdrnum, + const struct drgn_platform *platform) { - bool is_eh = scn == DRGN_SCN_EH_FRAME; struct drgn_error *err; - cie->is_eh = is_eh; - - struct drgn_debug_info_buffer buffer; - drgn_debug_info_buffer_init(&buffer, module, scn); - buffer.bb.pos += cie_pointer; + bool is_64_bit = drgn_platform_is_64_bit(platform); + bool bswap = drgn_platform_bswap(platform); + apply_elf_rela_fn *apply_elf_rela = platform->arch->apply_elf_rela; - uint32_t tmp; - if ((err = binary_buffer_next_u32(&buffer.bb, &tmp))) + Elf_Data *data, *reloc_data, *symtab_data; + err = read_elf_section(scn, &data); + if (err) return err; - bool is_64_bit = tmp == UINT32_C(0xffffffff); - uint64_t length; - if (is_64_bit) { - if ((err = binary_buffer_next_u64(&buffer.bb, &length))) - return err; - } else { - length = tmp; - } - if (length > buffer.bb.end - buffer.bb.pos) { - return binary_buffer_error(&buffer.bb, - "entry length is out of bounds"); - } - buffer.bb.end = buffer.bb.pos + length; - uint64_t cie_id, expected_cie_id; - if (is_64_bit) { - if ((err = binary_buffer_next_u64(&buffer.bb, &cie_id))) - return err; - expected_cie_id = is_eh ? 0 : UINT64_C(0xffffffffffffffff); - } else { - if ((err = binary_buffer_next_u32_into_u64(&buffer.bb, - &cie_id))) - return err; - expected_cie_id = is_eh ? 0 : UINT64_C(0xffffffff); - } - if (cie_id != expected_cie_id) - return binary_buffer_error(&buffer.bb, "invalid CIE ID"); + struct drgn_relocating_section relocating = { + .buf = data->d_buf, + .buf_size = data->d_size, + .addr = sh_addrs[elf_ndxscn(scn)], + .bswap = bswap, + }; - uint8_t version; - if ((err = binary_buffer_next_u8(&buffer.bb, &version))) + err = read_elf_section(reloc_scn, &reloc_data); + if (err) return err; - if (version < 1 || version == 2 || version > 4) { - return binary_buffer_error(&buffer.bb, - "unknown CIE version %" PRIu8, - version); - } + const void *relocs = reloc_data->d_buf; + size_t reloc_size = is_64_bit ? sizeof(Elf64_Rela) : sizeof(Elf32_Rela); + size_t num_relocs = reloc_data->d_size / reloc_size; - const char *augmentation; - size_t augmentation_len; - if ((err = binary_buffer_next_string(&buffer.bb, &augmentation, - &augmentation_len))) + err = read_elf_section(symtab_scn, &symtab_data); + if (err) return err; - cie->have_augmentation_length = augmentation[0] == 'z'; - cie->signal_frame = false; - for (size_t i = 0; i < augmentation_len; i++) { - switch (augmentation[i]) { - case 'z': - if (i != 0) - goto unknown_augmentation; - break; - case 'L': - case 'P': - case 'R': - if (augmentation[0] != 'z') - goto unknown_augmentation; - break; - case 'S': - cie->signal_frame = true; - break; - default: -unknown_augmentation: - /* - * We could ignore this CIE and all FDEs that reference - * it or skip the augmentation if we have its length, - * but let's fail loudly so that we find out about - * missing support. - */ - return binary_buffer_error_at(&buffer.bb, - &augmentation[i], - "unknown CFI augmentation %s", - augmentation); - } - } + const void *syms = symtab_data->d_buf; + size_t sym_size = is_64_bit ? sizeof(Elf64_Sym) : sizeof(Elf32_Sym); + size_t num_syms = symtab_data->d_size / sym_size; - if (version >= 4) { - if ((err = binary_buffer_next_u8(&buffer.bb, - &cie->address_size))) - return err; - if (cie->address_size < 1 || cie->address_size > 8) { - return binary_buffer_error(&buffer.bb, - "unsupported address size %" PRIu8, - cie->address_size); + for (size_t i = 0; i < num_relocs; i++) { + uint64_t r_offset; + uint32_t r_sym; + uint32_t r_type; + int64_t r_addend; + if (is_64_bit) { + Elf64_Rela *rela = (Elf64_Rela *)relocs + i; + uint64_t r_info; + memcpy(&r_offset, &rela->r_offset, sizeof(r_offset)); + memcpy(&r_info, &rela->r_info, sizeof(r_info)); + memcpy(&r_addend, &rela->r_addend, sizeof(r_addend)); + if (bswap) { + r_offset = bswap_64(r_offset); + r_info = bswap_64(r_info); + r_addend = bswap_64(r_addend); + } + r_sym = ELF64_R_SYM(r_info); + r_type = ELF64_R_TYPE(r_info); + } else { + Elf32_Rela *rela32 = (Elf32_Rela *)relocs + i; + uint32_t r_offset32; + uint32_t r_info32; + int32_t r_addend32; + memcpy(&r_offset32, &rela32->r_offset, sizeof(r_offset32)); + memcpy(&r_info32, &rela32->r_info, sizeof(r_info32)); + memcpy(&r_addend32, &rela32->r_addend, sizeof(r_addend32)); + if (bswap) { + r_offset32 = bswap_32(r_offset32); + r_info32 = bswap_32(r_info32); + r_addend32 = bswap_32(r_addend32); + } + r_offset = r_offset32; + r_sym = ELF32_R_SYM(r_info32); + r_type = ELF32_R_TYPE(r_info32); + r_addend = r_addend32; } - uint8_t segment_selector_size; - if ((err = binary_buffer_next_u8(&buffer.bb, - &segment_selector_size))) - return err; - if (segment_selector_size) { - return binary_buffer_error(&buffer.bb, - "unsupported segment selector size %" PRIu8, - segment_selector_size); + if (r_sym >= num_syms) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid ELF relocation symbol"); } - } else { - cie->address_size = - drgn_platform_address_size(&module->platform); - } - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &cie->code_alignment_factor)) || - (err = binary_buffer_next_sleb128(&buffer.bb, - &cie->data_alignment_factor))) - return err; - uint64_t return_address_register; - if (version >= 3) { - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &return_address_register))) - return err; - } else { - if ((err = binary_buffer_next_u8_into_u64(&buffer.bb, - &return_address_register))) - return err; - } - cie->return_address_register = - module->platform.arch->dwarf_regno_to_internal(return_address_register); - if (cie->return_address_register == DRGN_REGISTER_NUMBER_UNKNOWN) { - return binary_buffer_error(&buffer.bb, - "unknown return address register"); - } - cie->address_encoding = DW_EH_PE_absptr; - if (augmentation[0] == 'z') { - for (size_t i = 0; i < augmentation_len; i++) { - switch (augmentation[i]) { - case 'z': - if ((err = binary_buffer_skip_leb128(&buffer.bb))) - return err; - break; - case 'L': - if ((err = binary_buffer_skip(&buffer.bb, 1))) - return err; - break; - case 'P': { - uint8_t encoding; - if ((err = binary_buffer_next_u8(&buffer.bb, &encoding))) - return err; - /* - * We don't need the result, so don't bother - * dereferencing. - */ - encoding &= ~DW_EH_PE_indirect; - uint64_t unused; - if ((err = drgn_dwarf_cfi_next_encoded(&buffer, - cie->address_size, - encoding, - 0, - &unused))) - return err; - break; + uint16_t st_shndx; + uint64_t st_value; + if (is_64_bit) { + const Elf64_Sym *sym = (Elf64_Sym *)syms + r_sym; + memcpy(&st_shndx, &sym->st_shndx, sizeof(st_shndx)); + memcpy(&st_value, &sym->st_value, sizeof(st_value)); + if (bswap) { + st_shndx = bswap_16(st_shndx); + st_value = bswap_64(st_value); } - case 'R': - if ((err = binary_buffer_next_u8(&buffer.bb, - &cie->address_encoding))) - return err; - break; + } else { + const Elf32_Sym *sym = (Elf32_Sym *)syms + r_sym; + memcpy(&st_shndx, &sym->st_shndx, sizeof(st_shndx)); + uint32_t st_value32; + memcpy(&st_value32, &sym->st_value, sizeof(st_value32)); + if (bswap) { + st_shndx = bswap_16(st_shndx); + st_value32 = bswap_32(st_value32); } + st_value = st_value32; + } + if (st_shndx >= shdrnum) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid ELF symbol section index"); } + + err = apply_elf_rela(&relocating, r_offset, r_type, r_addend, + sh_addrs[st_shndx] + st_value); + if (err) + return err; } - cie->initial_instructions = buffer.bb.pos; - cie->initial_instructions_size = buffer.bb.end - buffer.bb.pos; + + /* + * Mark the relocation section as empty so that libdwfl doesn't try to + * apply it again. + */ + GElf_Shdr *shdr, shdr_mem; + shdr = gelf_getshdr(reloc_scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + shdr->sh_size = 0; + if (!gelf_update_shdr(reloc_scn, shdr)) + return drgn_error_libelf(); + reloc_data->d_size = 0; return NULL; } -static struct drgn_error * -drgn_parse_dwarf_frames(struct drgn_debug_info_module *module, - enum drgn_debug_info_scn scn, - struct drgn_dwarf_cie_vector *cies, - struct drgn_dwarf_fde_vector *fdes) +/* + * Before the debugging information in a relocatable ELF file (e.g., Linux + * kernel module) can be used, it must have ELF relocations applied. This is + * usually done by libdwfl. However, libdwfl is relatively slow at it. This is a + * much faster implementation. + */ +static struct drgn_error *relocate_elf_file(Elf *elf) { - bool is_eh = scn == DRGN_SCN_EH_FRAME; struct drgn_error *err; - if (!module->scns[scn]) + GElf_Ehdr ehdr_mem, *ehdr; + ehdr = gelf_getehdr(elf, &ehdr_mem); + if (!ehdr) + return drgn_error_libelf(); + + if (ehdr->e_type != ET_REL) { + /* Not a relocatable file. */ return NULL; - err = drgn_debug_info_module_cache_section(module, scn); - if (err) - return err; - Elf_Data *data = module->scn_data[scn]; - struct drgn_debug_info_buffer buffer; - drgn_debug_info_buffer_init(&buffer, module, scn); - - struct drgn_dwarf_cie_map cie_map = HASH_TABLE_INIT; - while (binary_buffer_has_next(&buffer.bb)) { - uint32_t tmp; - if ((err = binary_buffer_next_u32(&buffer.bb, &tmp))) + } + + struct drgn_platform platform; + drgn_platform_from_elf(ehdr, &platform); + if (!platform.arch->apply_elf_rela) { + /* Unsupported; fall back to libdwfl. */ + return NULL; + } + + size_t shdrnum; + if (elf_getshdrnum(elf, &shdrnum)) + return drgn_error_libelf(); + uint64_t *sh_addrs = calloc(shdrnum, sizeof(sh_addrs[0])); + if (!sh_addrs && shdrnum > 0) + return &drgn_enomem; + + Elf_Scn *scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr *shdr, shdr_mem; + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) { + err = drgn_error_libelf(); goto out; - bool is_64_bit = tmp == UINT32_C(0xffffffff); - uint64_t length; - if (is_64_bit) { - if ((err = binary_buffer_next_u64(&buffer.bb, &length))) - goto out; - } else { - length = tmp; } - /* - * Technically, a length of zero is only a terminator in - * .eh_frame, but other consumers (binutils, elfutils, GDB) - * handle it the same way in .debug_frame. - */ - if (length == 0) - break; - if (length > buffer.bb.end - buffer.bb.pos) { - err = binary_buffer_error(&buffer.bb, - "entry length is out of bounds"); + sh_addrs[elf_ndxscn(scn)] = shdr->sh_addr; + } + + size_t shstrndx; + if (elf_getshdrstrndx(elf, &shstrndx)) { + err = drgn_error_libelf(); + goto out; + } + + Elf_Scn *reloc_scn = NULL; + while ((reloc_scn = elf_nextscn(elf, reloc_scn))) { + GElf_Shdr *shdr, shdr_mem; + shdr = gelf_getshdr(reloc_scn, &shdr_mem); + if (!shdr) { + err = drgn_error_libelf(); goto out; } - buffer.bb.end = buffer.bb.pos + length; + /* We don't support any architectures that use SHT_REL yet. */ + if (shdr->sh_type != SHT_RELA) + continue; - /* - * The Linux Standard Base Core Specification [1] states that - * the CIE ID in .eh_frame is always 4 bytes. However, other - * consumers handle it the same as in .debug_frame (8 bytes for - * the 64-bit format). - * - * 1: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html - */ - uint64_t cie_pointer, cie_id; - if (is_64_bit) { - if ((err = binary_buffer_next_u64(&buffer.bb, - &cie_pointer))) - goto out; - cie_id = is_eh ? 0 : UINT64_C(0xffffffffffffffff); - } else { - if ((err = binary_buffer_next_u32_into_u64(&buffer.bb, - &cie_pointer))) - goto out; - cie_id = is_eh ? 0 : UINT64_C(0xffffffff); + const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!scnname) { + err = drgn_error_libelf(); + goto out; } - if (cie_pointer != cie_id) { - if (is_eh) { - size_t pointer_offset = - (buffer.bb.pos - - (is_64_bit ? 8 : 4) - - (char *)data->d_buf); - if (cie_pointer > pointer_offset) { - err = binary_buffer_error(&buffer.bb, - "CIE pointer is out of bounds"); - goto out; - } - cie_pointer = pointer_offset - cie_pointer; - } else if (cie_pointer > data->d_size) { - err = binary_buffer_error(&buffer.bb, - "CIE pointer is out of bounds"); - goto out; - } - struct drgn_dwarf_fde *fde = - drgn_dwarf_fde_vector_append_entry(fdes); - if (!fde) { - err = &drgn_enomem; - goto out; - } - struct drgn_dwarf_cie_map_entry entry = { - .key = cie_pointer, - .value = cies->size, - }; - struct drgn_dwarf_cie_map_iterator it; - int r = drgn_dwarf_cie_map_insert(&cie_map, &entry, - &it); - struct drgn_dwarf_cie *cie; - if (r > 0) { - cie = drgn_dwarf_cie_vector_append_entry(cies); - if (!cie) { - err = &drgn_enomem; - goto out; - } - err = drgn_parse_dwarf_cie(module, scn, - cie_pointer, cie); - if (err) - goto out; - } else if (r == 0) { - cie = &cies->data[it.entry->value]; - } else { - err = &drgn_enomem; + if (strstartswith(scnname, ".rela.debug_") || + strstartswith(scnname, ".rela.orc_")) { + Elf_Scn *scn = elf_getscn(elf, shdr->sh_info); + if (!scn) { + err = drgn_error_libelf(); goto out; } - if ((err = drgn_dwarf_cfi_next_encoded(&buffer, - cie->address_size, - cie->address_encoding, - 0, - &fde->initial_location)) || - (err = drgn_dwarf_cfi_next_encoded(&buffer, - cie->address_size, - cie->address_encoding & 0xf, - 0, - &fde->address_range))) + + Elf_Scn *symtab_scn = elf_getscn(elf, shdr->sh_link); + if (!symtab_scn) { + err = drgn_error_libelf(); goto out; - if (cie->have_augmentation_length) { - uint64_t augmentation_length; - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &augmentation_length))) - goto out; - if (augmentation_length > - buffer.bb.end - buffer.bb.pos) { - err = binary_buffer_error(&buffer.bb, - "augmentation length is out of bounds"); - goto out; - } - buffer.bb.pos += augmentation_length; } - fde->cie = it.entry->value; - fde->instructions = buffer.bb.pos; - fde->instructions_size = buffer.bb.end - buffer.bb.pos; - } - buffer.bb.pos = buffer.bb.end; - buffer.bb.end = (const char *)data->d_buf + data->d_size; + err = relocate_elf_section(scn, reloc_scn, symtab_scn, + sh_addrs, shdrnum, + &platform); + if (err) + goto out; + } } - err = NULL; out: - drgn_dwarf_cie_map_deinit(&cie_map); + free(sh_addrs); return err; } -static void drgn_debug_info_cache_sh_addr(struct drgn_debug_info_module *module, - enum drgn_debug_info_scn scn, - uint64_t *addr) -{ - if (module->scns[scn]) { - GElf_Shdr shdr_mem; - GElf_Shdr *shdr = gelf_getshdr(module->scns[scn], &shdr_mem); - if (shdr) - *addr = shdr->sh_addr; - } -} - -static int drgn_dwarf_fde_compar(const void *_a, const void *_b, void *arg) -{ - const struct drgn_dwarf_fde *a = _a; - const struct drgn_dwarf_fde *b = _b; - const struct drgn_dwarf_cie *cies = arg; - if (a->initial_location < b->initial_location) - return -1; - else if (a->initial_location > b->initial_location) - return 1; - else - return cies[a->cie].is_eh - cies[b->cie].is_eh; -} - static struct drgn_error * -drgn_debug_info_parse_frames(struct drgn_debug_info_module *module) +drgn_debug_info_find_sections(struct drgn_debug_info_module *module) { struct drgn_error *err; - drgn_debug_info_cache_sh_addr(module, DRGN_SCN_EH_FRAME, - &module->pcrel_base); - drgn_debug_info_cache_sh_addr(module, DRGN_SCN_TEXT, - &module->textrel_base); - drgn_debug_info_cache_sh_addr(module, DRGN_SCN_GOT, - &module->datarel_base); + if (module->elf) { + err = relocate_elf_file(module->elf); + if (err) + return err; + } - struct drgn_dwarf_cie_vector cies = VECTOR_INIT; - struct drgn_dwarf_fde_vector fdes = VECTOR_INIT; + /* + * Note: not dwfl_module_getelf(), because then libdwfl applies + * ELF relocations to all sections, not just debug sections. + */ + Dwarf_Addr bias; + Dwarf *dwarf; + #pragma omp critical(drgn_dwfl_module_getdwarf) + dwarf = dwfl_module_getdwarf(module->dwfl_module, &bias); + if (!dwarf) + return drgn_error_libdwfl(); + Elf *elf = dwarf_getelf(dwarf); + if (!elf) + return drgn_error_libdw(); + GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr(elf, &ehdr_mem); + if (!ehdr) + return drgn_error_libelf(); + drgn_platform_from_elf(ehdr, &module->platform); - err = drgn_parse_dwarf_frames(module, DRGN_SCN_DEBUG_FRAME, &cies, - &fdes); - if (err) - goto err; - err = drgn_parse_dwarf_frames(module, DRGN_SCN_EH_FRAME, &cies, &fdes); - if (err) - goto err; + size_t shstrndx; + if (elf_getshdrstrndx(elf, &shstrndx)) + return drgn_error_libelf(); - drgn_dwarf_cie_vector_shrink_to_fit(&cies); + Elf_Scn *scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr shdr_mem; + GElf_Shdr *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); - /* - * Sort FDEs and remove duplicates, preferring .debug_frame over - * .eh_frame. - */ - qsort_r(fdes.data, fdes.size, sizeof(fdes.data[0]), - drgn_dwarf_fde_compar, cies.data); - if (fdes.size > 0) { - size_t src = 1, dst = 1; - for (; src < fdes.size; src++) { - if (fdes.data[src].initial_location != - fdes.data[dst - 1].initial_location) { - if (src != dst) - fdes.data[dst] = fdes.data[src]; - dst++; + if (shdr->sh_type != SHT_PROGBITS) + continue; + const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!scnname) + return drgn_error_libelf(); + + for (size_t i = 0; i < DRGN_NUM_DEBUG_SCNS; i++) { + if (!module->scns[i] && + strcmp(scnname, drgn_debug_scn_names[i]) == 0) { + module->scns[i] = scn; + break; } } - fdes.size = dst; } - drgn_dwarf_fde_vector_shrink_to_fit(&fdes); - - module->cies = cies.data; - module->fdes = fdes.data; - module->num_fdes = fdes.size; - return NULL; -err: - drgn_dwarf_fde_vector_deinit(&fdes); - drgn_dwarf_cie_vector_deinit(&cies); - return err; -} + Dwarf *altdwarf = dwarf_getalt(dwarf); + if (altdwarf) { + elf = dwarf_getelf(altdwarf); + if (!elf) + return drgn_error_libdw(); + if (elf_getshdrstrndx(elf, &shstrndx)) + return drgn_error_libelf(); -static struct drgn_error * -drgn_debug_info_find_fde(struct drgn_debug_info_module *module, - uint64_t unbiased_pc, struct drgn_dwarf_fde **ret) -{ - struct drgn_error *err; + scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr shdr_mem; + GElf_Shdr *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); - if (!module->parsed_frames) { - err = drgn_debug_info_parse_frames(module); - if (err) - return err; - module->parsed_frames = true; - } + if (shdr->sh_type != SHT_PROGBITS) + continue; + const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!scnname) + return drgn_error_libelf(); - /* Binary search for the containing FDE. */ - size_t lo = 0, hi = module->num_fdes; - while (lo < hi) { - size_t mid = lo + (hi - lo) / 2; - struct drgn_dwarf_fde *fde = &module->fdes[mid]; - if (unbiased_pc < fde->initial_location) { - hi = mid; - } else if (unbiased_pc - fde->initial_location >= - fde->address_range) { - lo = mid + 1; - } else { - *ret = fde; - return NULL; + /* + * TODO: save more sections and support imported units. + */ + if (strcmp(scnname, ".debug_info") == 0 && + !module->alt_debug_info) + module->alt_debug_info = scn; + else if (strcmp(scnname, ".debug_str") == 0 && + !module->alt_debug_str) + module->alt_debug_str = scn; } } - *ret = NULL; + return NULL; } -static struct drgn_error * -drgn_dwarf_cfi_next_offset(struct drgn_debug_info_buffer *buffer, int64_t *ret) +static void truncate_null_terminated_section(Elf_Data *data) { - struct drgn_error *err; - uint64_t offset; - if ((err = binary_buffer_next_uleb128(&buffer->bb, &offset))) - return err; - if (offset > INT64_MAX) - return binary_buffer_error(&buffer->bb, "offset is too large"); - *ret = offset; - return NULL; + if (data) { + const char *buf = data->d_buf; + const char *nul = memrchr(buf, '\0', data->d_size); + if (nul) + data->d_size = nul - buf + 1; + else + data->d_size = 0; + } } static struct drgn_error * -drgn_dwarf_cfi_next_offset_sf(struct drgn_debug_info_buffer *buffer, - struct drgn_dwarf_cie *cie, int64_t *ret) +drgn_debug_info_precache_sections(struct drgn_debug_info_module *module) { struct drgn_error *err; - int64_t factored; - if ((err = binary_buffer_next_sleb128(&buffer->bb, &factored))) - return err; - if (__builtin_mul_overflow(factored, cie->data_alignment_factor, ret)) - return binary_buffer_error(&buffer->bb, "offset is too large"); + + for (size_t i = 0; i < DRGN_NUM_DEBUG_SCN_DATA_PRECACHE; i++) { + if (module->scns[i]) { + err = read_elf_section(module->scns[i], + &module->scn_data[i]); + if (err) + return err; + } + } + if (module->alt_debug_info) { + err = read_elf_section(module->alt_debug_info, + &module->alt_debug_info_data); + if (err) + return err; + } + if (module->alt_debug_str) { + err = read_elf_section(module->alt_debug_str, + &module->alt_debug_str_data); + if (err) + return err; + } + + /* + * Truncate any extraneous bytes so that we can assume that a pointer + * within .debug_{,line_}str is always null-terminated. + */ + truncate_null_terminated_section(module->scn_data[DRGN_SCN_DEBUG_STR]); + truncate_null_terminated_section(module->scn_data[DRGN_SCN_DEBUG_LINE_STR]); + truncate_null_terminated_section(module->alt_debug_str_data); return NULL; } -static struct drgn_error * -drgn_dwarf_cfi_next_offset_f(struct drgn_debug_info_buffer *buffer, - struct drgn_dwarf_cie *cie, int64_t *ret) +struct drgn_error * +drgn_debug_info_module_cache_section(struct drgn_debug_info_module *module, + enum drgn_debug_info_scn scn) { - struct drgn_error *err; - uint64_t factored; - if ((err = binary_buffer_next_uleb128(&buffer->bb, &factored))) - return err; - if (__builtin_mul_overflow(factored, cie->data_alignment_factor, ret)) - return binary_buffer_error(&buffer->bb, "offset is too large"); - return NULL; + if (module->scn_data[scn]) + return NULL; + return read_elf_section(module->scns[scn], &module->scn_data[scn]); } static struct drgn_error * -drgn_dwarf_cfi_next_block(struct drgn_debug_info_buffer *buffer, - const char **buf_ret, size_t *size_ret) +drgn_debug_info_read_module(struct drgn_debug_info_load_state *load, + struct drgn_dwarf_index_state *index, + struct drgn_debug_info_module *head) { struct drgn_error *err; - uint64_t size; - if ((err = binary_buffer_next_uleb128(&buffer->bb, &size))) - return err; - if (size > buffer->bb.end - buffer->bb.pos) { - return binary_buffer_error(&buffer->bb, - "block is out of bounds"); + struct drgn_debug_info_module *module; + for (module = head; module; module = module->next) { + err = drgn_debug_info_find_sections(module); + if (err) { + module->err = err; + continue; + } + if (module->scns[DRGN_SCN_DEBUG_INFO] && + module->scns[DRGN_SCN_DEBUG_ABBREV]) { + err = drgn_debug_info_precache_sections(module); + if (err) { + module->err = err; + continue; + } + module->state = DRGN_DEBUG_INFO_MODULE_INDEXING; + return drgn_dwarf_index_read_module(index, + module); + } } - *buf_ret = buffer->bb.pos; - buffer->bb.pos += size; - *size_ret = size; - return NULL; + /* + * We checked all of the files and didn't find debugging information. + * Report why for each one. + * + * (If we did find debugging information, we discard errors on the + * unused files.) + */ + err = NULL; + #pragma omp critical(drgn_debug_info_read_module_error) + for (module = head; module; module = module->next) { + const char *name = + dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, + NULL, NULL, NULL, NULL); + if (module->err) { + err = drgn_debug_info_report_error(load, name, NULL, + module->err); + module->err = NULL; + } else { + err = drgn_debug_info_report_error(load, name, + "no debugging information", + NULL); + } + if (err) + break; + } + return err; } static struct drgn_error * -drgn_eval_dwarf_cfi(struct drgn_debug_info_module *module, - struct drgn_dwarf_fde *fde, - const struct drgn_cfi_row *initial_row, uint64_t target, - const char *instructions, size_t instructions_size, - struct drgn_cfi_row **row) +drgn_debug_info_update_index(struct drgn_debug_info_load_state *load) { - struct drgn_error *err; - drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = - module->platform.arch->dwarf_regno_to_internal; - struct drgn_dwarf_cie *cie = &module->cies[fde->cie]; - uint64_t pc = fde->initial_location; - - struct drgn_cfi_row_vector state_stack = VECTOR_INIT; - struct drgn_debug_info_buffer buffer; - drgn_debug_info_buffer_init(&buffer, module, - cie->is_eh ? - DRGN_SCN_EH_FRAME : DRGN_SCN_DEBUG_FRAME); - buffer.bb.pos = instructions; - buffer.bb.end = instructions + instructions_size; - while (binary_buffer_has_next(&buffer.bb)) { - uint8_t opcode; - if ((err = binary_buffer_next_u8(&buffer.bb, &opcode))) - goto out; + if (!load->new_modules.size) + return NULL; + struct drgn_debug_info *dbinfo = load->dbinfo; + if (!c_string_set_reserve(&dbinfo->module_names, + c_string_set_size(&dbinfo->module_names) + + load->new_modules.size)) + return &drgn_enomem; - uint64_t dwarf_regno; - drgn_register_number regno; - struct drgn_cfi_rule rule; - uint64_t tmp; - switch ((opcode & 0xc0) ? (opcode & 0xc0) : opcode) { - case DW_CFA_set_loc: - if (!initial_row) - goto invalid_for_initial; - if ((err = drgn_dwarf_cfi_next_encoded(&buffer, - cie->address_size, - cie->address_encoding, - fde->initial_location, - &tmp))) - goto out; - if (tmp <= pc) { - err = binary_buffer_error(&buffer.bb, - "DW_CFA_set_loc location is not greater than current location"); - goto out; - } - pc = tmp; - if (pc > target) - goto found; - break; - case DW_CFA_advance_loc: - if (!initial_row) - goto invalid_for_initial; - tmp = opcode & 0x3f; - goto advance_loc; - case DW_CFA_advance_loc1: - if (!initial_row) - goto invalid_for_initial; - if ((err = binary_buffer_next_u8_into_u64(&buffer.bb, - &tmp))) - goto out; - goto advance_loc; - case DW_CFA_advance_loc2: - if (!initial_row) - goto invalid_for_initial; - if ((err = binary_buffer_next_u16_into_u64(&buffer.bb, - &tmp))) - goto out; - goto advance_loc; - case DW_CFA_advance_loc4: - if (!initial_row) - goto invalid_for_initial; - if ((err = binary_buffer_next_u32_into_u64(&buffer.bb, - &tmp))) - goto out; -advance_loc: - if (__builtin_mul_overflow(tmp, - cie->code_alignment_factor, - &tmp) || - __builtin_add_overflow(pc, tmp, &pc) || - pc > uint_max(cie->address_size)) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "DW_CFA_advance_loc* overflows location"); - goto out; - } - if (pc > target) - goto found; - break; - case DW_CFA_def_cfa: - rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno)) || - (err = drgn_dwarf_cfi_next_offset(&buffer, &rule.offset))) - goto out; - if ((rule.regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - rule.kind = DRGN_CFI_RULE_UNDEFINED; - goto set_cfa; - case DW_CFA_def_cfa_sf: - rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno)) || - (err = drgn_dwarf_cfi_next_offset_sf(&buffer, cie, - &rule.offset))) - goto out; - if ((rule.regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - rule.kind = DRGN_CFI_RULE_UNDEFINED; - goto set_cfa; - case DW_CFA_def_cfa_register: - drgn_cfi_row_get_cfa(*row, &rule); - if (rule.kind != DRGN_CFI_RULE_REGISTER_PLUS_OFFSET) { - err = binary_buffer_error(&buffer.bb, - "DW_CFA_def_cfa_register with incompatible CFA rule"); - goto out; - } - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno))) - goto out; - if ((rule.regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - rule.kind = DRGN_CFI_RULE_UNDEFINED; - goto set_cfa; - case DW_CFA_def_cfa_offset: - drgn_cfi_row_get_cfa(*row, &rule); - if (rule.kind != DRGN_CFI_RULE_REGISTER_PLUS_OFFSET) { - err = binary_buffer_error(&buffer.bb, - "DW_CFA_def_cfa_offset with incompatible CFA rule"); - goto out; - } - if ((err = drgn_dwarf_cfi_next_offset(&buffer, - &rule.offset))) - goto out; - goto set_cfa; - case DW_CFA_def_cfa_offset_sf: - drgn_cfi_row_get_cfa(*row, &rule); - if (rule.kind != DRGN_CFI_RULE_REGISTER_PLUS_OFFSET) { - err = binary_buffer_error(&buffer.bb, - "DW_CFA_def_cfa_offset_sf with incompatible CFA rule"); - goto out; - } - if ((err = drgn_dwarf_cfi_next_offset_sf(&buffer, cie, - &rule.offset))) - goto out; - goto set_cfa; - case DW_CFA_def_cfa_expression: - rule.kind = DRGN_CFI_RULE_DWARF_EXPRESSION; - rule.push_cfa = false; - if ((err = drgn_dwarf_cfi_next_block(&buffer, - &rule.expr, - &rule.expr_size))) - goto out; -set_cfa: - if (!drgn_cfi_row_set_cfa(row, &rule)) { - err = &drgn_enomem; - goto out; - } - break; - case DW_CFA_undefined: - rule.kind = DRGN_CFI_RULE_UNDEFINED; - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno))) - goto out; - if ((regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - break; - goto set_reg; - case DW_CFA_same_value: - rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; - rule.offset = 0; - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno))) - goto out; - if ((regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - break; - rule.regno = regno; - goto set_reg; - case DW_CFA_offset: - rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET; - if ((err = drgn_dwarf_cfi_next_offset_f(&buffer, cie, - &rule.offset))) - goto out; - if ((regno = dwarf_regno_to_internal(opcode & 0x3f)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - break; - goto set_reg; - case DW_CFA_offset_extended: - rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET; - goto reg_offset_f; - case DW_CFA_offset_extended_sf: - rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET; - goto reg_offset_sf; - case DW_CFA_val_offset: - rule.kind = DRGN_CFI_RULE_CFA_PLUS_OFFSET; -reg_offset_f: - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno)) || - (err = drgn_dwarf_cfi_next_offset_f(&buffer, cie, - &rule.offset))) - goto out; - if ((regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - break; - goto set_reg; - case DW_CFA_val_offset_sf: - rule.kind = DRGN_CFI_RULE_CFA_PLUS_OFFSET; -reg_offset_sf: - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno)) || - (err = drgn_dwarf_cfi_next_offset_sf(&buffer, cie, - &rule.offset))) - goto out; - if ((regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - break; - goto set_reg; - case DW_CFA_register: { - rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; - rule.offset = 0; - uint64_t dwarf_regno2; - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno)) || - (err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno2))) - goto out; - if ((regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - break; - if ((rule.regno = dwarf_regno_to_internal(dwarf_regno2)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - rule.kind = DRGN_CFI_RULE_UNDEFINED; - goto set_reg; - } - case DW_CFA_expression: - rule.kind = DRGN_CFI_RULE_AT_DWARF_EXPRESSION; - goto reg_expression; - case DW_CFA_val_expression: - rule.kind = DRGN_CFI_RULE_DWARF_EXPRESSION; -reg_expression: - rule.push_cfa = true; - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno)) || - (err = drgn_dwarf_cfi_next_block(&buffer, - &rule.expr, - &rule.expr_size))) - goto out; - if ((regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - break; - goto set_reg; - case DW_CFA_restore: - if (!initial_row) - goto invalid_for_initial; - dwarf_regno = opcode & 0x3f; - goto restore; - case DW_CFA_restore_extended: - if (!initial_row) { -invalid_for_initial: - err = binary_buffer_error(&buffer.bb, - "invalid initial DWARF CFI opcode %#" PRIx8, - opcode); - goto out; - } - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &dwarf_regno))) - goto out; -restore: - if ((regno = dwarf_regno_to_internal(dwarf_regno)) == - DRGN_REGISTER_NUMBER_UNKNOWN) - break; - drgn_cfi_row_get_register(initial_row, regno, &rule); -set_reg: - if (!drgn_cfi_row_set_register(row, regno, &rule)) { - err = &drgn_enomem; - goto out; - } - break; - case DW_CFA_remember_state: { - struct drgn_cfi_row **state = - drgn_cfi_row_vector_append_entry(&state_stack); - if (!state) { - err = &drgn_enomem; - goto out; - } - *state = drgn_empty_cfi_row; - if (!drgn_cfi_row_copy(state, *row)) { - err = &drgn_enomem; - goto out; - } - break; - } - case DW_CFA_restore_state: - if (state_stack.size == 0) { - err = binary_buffer_error(&buffer.bb, - "DW_CFA_restore_state with empty state stack"); - goto out; - } - drgn_cfi_row_destroy(*row); - *row = state_stack.data[--state_stack.size]; - break; - case DW_CFA_nop: - break; - default: - err = binary_buffer_error(&buffer.bb, - "unknown DWARF CFI opcode %#" PRIx8, - opcode); - goto out; + struct drgn_dwarf_index_state index; + if (!drgn_dwarf_index_state_init(&index, dbinfo)) + return &drgn_enomem; + struct drgn_error *err = NULL; + #pragma omp parallel for schedule(dynamic) + for (size_t i = 0; i < load->new_modules.size; i++) { + if (err) + continue; + struct drgn_error *module_err = + drgn_debug_info_read_module(load, &index, + load->new_modules.data[i]); + if (module_err) { + #pragma omp critical(drgn_debug_info_update_index_error) + if (err) + drgn_error_destroy(module_err); + else + err = module_err; } } -found: - err = NULL; -out: - for (size_t i = 0; i < state_stack.size; i++) - drgn_cfi_row_destroy(state_stack.data[i]); - drgn_cfi_row_vector_deinit(&state_stack); - return err; -} - -static struct drgn_error * -drgn_debug_info_find_cfi_in_fde(struct drgn_debug_info_module *module, - struct drgn_dwarf_fde *fde, - uint64_t unbiased_pc, struct drgn_cfi_row **ret) -{ - struct drgn_error *err; - struct drgn_dwarf_cie *cie = &module->cies[fde->cie]; - struct drgn_cfi_row *initial_row = - (struct drgn_cfi_row *)module->platform.arch->default_dwarf_cfi_row; - err = drgn_eval_dwarf_cfi(module, fde, NULL, unbiased_pc, - cie->initial_instructions, - cie->initial_instructions_size, &initial_row); - if (err) - goto out; - if (!drgn_cfi_row_copy(ret, initial_row)) { - err = &drgn_enomem; - goto out; - } - err = drgn_eval_dwarf_cfi(module, fde, initial_row, unbiased_pc, - fde->instructions, fde->instructions_size, - ret); -out: - drgn_cfi_row_destroy(initial_row); + if (!err) + err = drgn_dwarf_info_update_index(&index); + drgn_dwarf_index_state_deinit(&index); + if (!err) + drgn_debug_info_free_modules(dbinfo, true, false); return err; } -static struct drgn_error * -drgn_debug_info_find_dwarf_cfi(struct drgn_debug_info_module *module, - uint64_t unbiased_pc, - struct drgn_cfi_row **row_ret, - bool *interrupted_ret, - drgn_register_number *ret_addr_regno_ret) +struct drgn_error * +drgn_debug_info_report_flush(struct drgn_debug_info_load_state *load) { - struct drgn_error *err; - struct drgn_dwarf_fde *fde; - err = drgn_debug_info_find_fde(module, unbiased_pc, &fde); - if (err) - return err; - if (!fde) - return &drgn_not_found; - err = drgn_debug_info_find_cfi_in_fde(module, fde, unbiased_pc, - row_ret); + struct drgn_debug_info *dbinfo = load->dbinfo; + my_dwfl_report_end(dbinfo, NULL, NULL); + struct drgn_error *err = drgn_debug_info_update_index(load); + dwfl_report_begin_add(dbinfo->dwfl); if (err) return err; - *interrupted_ret = module->cies[fde->cie].signal_frame; - *ret_addr_regno_ret = module->cies[fde->cie].return_address_register; + load->new_modules.size = 0; return NULL; } -/* - * Get the program counter of an ORC entry directly from the .orc_unwind_ip - * section. - */ -static inline uint64_t drgn_raw_orc_pc(struct drgn_debug_info_module *module, - size_t i) -{ - int32_t offset; - memcpy(&offset, - (int32_t *)module->scn_data[DRGN_SCN_ORC_UNWIND_IP]->d_buf + i, - sizeof(offset)); - if (drgn_platform_bswap(&module->platform)) - offset = bswap_32(offset); - return module->orc_pc_base + UINT64_C(4) * i + offset; -} - -static int compare_orc_entries(const void *a, const void *b, void *arg) -{ - struct drgn_debug_info_module *module = arg; - size_t index_a = *(size_t *)a; - size_t index_b = *(size_t *)b; - - uint64_t pc_a = drgn_raw_orc_pc(module, index_a); - uint64_t pc_b = drgn_raw_orc_pc(module, index_b); - if (pc_a < pc_b) - return -1; - else if (pc_a > pc_b) - return 1; - - /* - * If two entries have the same PC, then one is probably a "terminator" - * at the end of a compilation unit. Prefer the real entry. - */ - const struct drgn_orc_entry *entries = - module->scn_data[DRGN_SCN_ORC_UNWIND]->d_buf; - uint16_t flags_a, flags_b; - memcpy(&flags_a, &entries[index_a].flags, sizeof(flags_a)); - memcpy(&flags_b, &entries[index_b].flags, sizeof(flags_b)); - if (drgn_platform_bswap(&module->platform)) { - flags_a = bswap_16(flags_a); - flags_b = bswap_16(flags_b); - } - return (drgn_orc_flags_is_terminator(flags_b) - - drgn_orc_flags_is_terminator(flags_a)); -} - -static size_t keep_orc_entry(struct drgn_debug_info_module *module, - size_t *indices, size_t num_entries, size_t i) -{ - - const struct drgn_orc_entry *entries = - module->scn_data[DRGN_SCN_ORC_UNWIND]->d_buf; - if (num_entries > 0 && - memcmp(&entries[indices[num_entries - 1]], &entries[indices[i]], - sizeof(entries[0])) == 0) { - /* - * The previous entry is identical to this one, so we can skip - * this entry (which effectively merges it into the previous - * one). This usually happens for "terminator" entries. - */ - return num_entries; - } - indices[num_entries] = indices[i]; - return num_entries + 1; -} - -/* - * The vast majority of ORC entries are redundant with DWARF CFI, and it's a - * waste to store and binary search those entries. This removes ORC entries that - * are entirely shadowed by DWARF FDEs. - */ -static size_t remove_fdes_from_orc(struct drgn_debug_info_module *module, - size_t *indices, size_t num_entries) +static struct drgn_error * +drgn_debug_info_report_finalize_errors(struct drgn_debug_info_load_state *load) { - if (module->num_fdes == 0) - return num_entries; - - struct drgn_dwarf_fde *fde = module->fdes; - struct drgn_dwarf_fde *last_fde = &module->fdes[module->num_fdes - 1]; - - size_t new_num_entries = 0; - - /* Keep any entries that start before the first DWARF FDE. */ - uint64_t start_pc; - for (;;) { - start_pc = drgn_raw_orc_pc(module, new_num_entries); - if (fde->initial_location <= start_pc) - break; - new_num_entries++; - if (new_num_entries == num_entries) - return num_entries; + if (load->num_errors > load->max_errors && + (!string_builder_line_break(&load->errors) || + !string_builder_appendf(&load->errors, "... %u more", + load->num_errors - load->max_errors))) { + free(load->errors.str); + return &drgn_enomem; } - - for (size_t i = new_num_entries; i < num_entries - 1; i++) { - uint64_t end_pc = drgn_raw_orc_pc(module, i + 1); - - /* - * Find the last FDE that starts at or before the current ORC - * entry. - */ - while (fde != last_fde && fde[1].initial_location <= start_pc) - fde++; - - /* - * Check whether the current ORC entry is completely covered by - * one or more FDEs. - */ - while (end_pc - fde->initial_location > fde->address_range) { - /* - * The current FDE doesn't cover the current ORC entry. - */ - if (fde == last_fde) { - /* - * There are no more FDEs. Keep the remaining - * ORC entries. - */ - if (i != new_num_entries) { - memmove(&indices[new_num_entries], - &indices[i], - (num_entries - i) * - sizeof(indices[0])); - } - return new_num_entries + (num_entries - i); - } - if (fde[1].initial_location - fde->initial_location - > fde->address_range) { - /* - * There is a gap between the current FDE and - * the next FDE that exposes the current ORC - * entry. Keep it. - */ - new_num_entries = keep_orc_entry(module, - indices, - new_num_entries, - i); - break; - } - fde++; - } - - start_pc = end_pc; + if (load->num_errors) { + return drgn_error_from_string_builder(DRGN_ERROR_MISSING_DEBUG_INFO, + &load->errors); + } else { + return NULL; } - /* We don't know where the last ORC entry ends, so always keep it. */ - return keep_orc_entry(module, indices, new_num_entries, - num_entries - 1); } -static struct drgn_error * -drgn_debug_info_parse_orc(struct drgn_debug_info_module *module) +struct drgn_error *drgn_debug_info_load(struct drgn_debug_info *dbinfo, + const char **paths, size_t n, + bool load_default, bool load_main) { + struct drgn_program *prog = dbinfo->prog; struct drgn_error *err; - if (!module->platform.arch->orc_to_cfi || - !module->scns[DRGN_SCN_ORC_UNWIND_IP] || - !module->scns[DRGN_SCN_ORC_UNWIND]) - return NULL; - - GElf_Shdr shdr_mem, *shdr; - shdr = gelf_getshdr(module->scns[DRGN_SCN_ORC_UNWIND_IP], &shdr_mem); - if (!shdr) - return drgn_error_libelf(); - module->orc_pc_base = shdr->sh_addr; + if (load_default) + load_main = true; - err = drgn_debug_info_module_cache_section(module, - DRGN_SCN_ORC_UNWIND_IP); - if (err) - return err; - err = drgn_debug_info_module_cache_section(module, DRGN_SCN_ORC_UNWIND); + const char *max_errors = getenv("DRGN_MAX_DEBUG_INFO_ERRORS"); + struct drgn_debug_info_load_state load = { + .dbinfo = dbinfo, + .paths = paths, + .num_paths = n, + .load_default = load_default, + .load_main = load_main, + .new_modules = VECTOR_INIT, + .max_errors = max_errors ? atoi(max_errors) : 5, + }; + dwfl_report_begin_add(dbinfo->dwfl); + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) + err = linux_kernel_report_debug_info(&load); + else + err = userspace_report_debug_info(&load); + my_dwfl_report_end(dbinfo, NULL, NULL); if (err) - return err; - Elf_Data *orc_unwind_ip = module->scn_data[DRGN_SCN_ORC_UNWIND_IP]; - Elf_Data *orc_unwind = module->scn_data[DRGN_SCN_ORC_UNWIND]; - - size_t num_entries = orc_unwind_ip->d_size / sizeof(int32_t); - if (orc_unwind_ip->d_size % sizeof(int32_t) != 0 || - orc_unwind->d_size % sizeof(struct drgn_orc_entry) != 0 || - orc_unwind->d_size / sizeof(struct drgn_orc_entry) != num_entries) { - return drgn_error_create(DRGN_ERROR_OTHER, - ".orc_unwind_ip and/or .orc_unwind has invalid size"); - } - if (!num_entries) - return NULL; - - size_t *indices = malloc_array(num_entries, sizeof(indices[0])); - if (!indices) - return &drgn_enomem; - for (size_t i = 0; i < num_entries; i++) - indices[i] = i; + goto err; /* - * Sort the ORC entries for binary search. Since Linux kernel commit - * f14bf6a350df ("x86/unwind/orc: Remove boot-time ORC unwind tables - * sorting") (in v5.6), this is already sorted for vmlinux, so only sort - * it if necessary. + * userspace_report_debug_info() reports the main debugging information + * directly with libdwfl, so we need to report it to dbinfo. */ - for (size_t i = 1; i < num_entries; i++) { - if (compare_orc_entries(&indices[i - 1], &indices[i], - module) > 0) { - qsort_r(indices, num_entries, sizeof(indices[0]), - compare_orc_entries, module); - break; - } - } - - num_entries = remove_fdes_from_orc(module, indices, num_entries); - - int32_t *pc_offsets = malloc_array(num_entries, sizeof(pc_offsets[0])); - if (!pc_offsets) { - err = &drgn_enomem; - goto out; - } - struct drgn_orc_entry *entries = malloc_array(num_entries, - sizeof(entries[0])); - if (!entries) { - free(pc_offsets); + if (!(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) && load_main && + dwfl_getmodules(dbinfo->dwfl, drgn_debug_info_report_dwfl_module, + &load, 0)) { err = &drgn_enomem; - goto out; - } - const int32_t *orig_offsets = orc_unwind_ip->d_buf; - const struct drgn_orc_entry *orig_entries = orc_unwind->d_buf; - bool bswap = drgn_platform_bswap(&module->platform); - for (size_t i = 0; i < num_entries; i++) { - size_t index = indices[i]; - int32_t offset; - memcpy(&offset, &orig_offsets[index], sizeof(offset)); - struct drgn_orc_entry entry; - memcpy(&entry, &orig_entries[index], sizeof(entry)); - if (bswap) { - offset = bswap_32(offset); - entry.sp_offset = bswap_16(entry.sp_offset); - entry.bp_offset = bswap_16(entry.bp_offset); - entry.flags = bswap_16(entry.flags); - } - pc_offsets[i] = UINT64_C(4) * index + offset - UINT64_C(4) * i; - entries[i] = entry; + goto err; } - module->orc_pc_offsets = pc_offsets; - module->orc_entries = entries; - module->num_orc_entries = num_entries; + err = drgn_debug_info_update_index(&load); + if (err) + goto err; - err = NULL; + /* + * TODO: for core dumps, we need to add memory reader segments for + * read-only segments of the loaded binaries since those aren't saved in + * the core dump. + */ + + /* + * If this fails, it's too late to roll back. This can only fail with + * enomem, so it's not a big deal. + */ + err = drgn_debug_info_report_finalize_errors(&load); out: - free(indices); + drgn_debug_info_module_vector_deinit(&load.new_modules); return err; + +err: + drgn_debug_info_free_modules(dbinfo, false, false); + free(load.errors.str); + goto out; } -static inline uint64_t drgn_orc_pc(struct drgn_debug_info_module *module, - size_t i) +bool drgn_debug_info_is_indexed(struct drgn_debug_info *dbinfo, + const char *name) { - return module->orc_pc_base + UINT64_C(4) * i + module->orc_pc_offsets[i]; + return c_string_set_search(&dbinfo->module_names, &name).entry != NULL; } -static struct drgn_error * -drgn_debug_info_find_orc_cfi(struct drgn_debug_info_module *module, - uint64_t unbiased_pc, - struct drgn_cfi_row **row_ret, - bool *interrupted_ret, - drgn_register_number *ret_addr_regno_ret) +struct drgn_error *drgn_debug_info_create(struct drgn_program *prog, + struct drgn_debug_info **ret) { - struct drgn_error *err; - - if (!module->parsed_orc) { - err = drgn_debug_info_parse_orc(module); - if (err) - return err; - module->parsed_orc = true; + struct drgn_debug_info *dbinfo = malloc(sizeof(*dbinfo)); + if (!dbinfo) + return &drgn_enomem; + dbinfo->prog = prog; + const Dwfl_Callbacks *dwfl_callbacks; + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) + dwfl_callbacks = &drgn_dwfl_callbacks; + else if (prog->flags & DRGN_PROGRAM_IS_LIVE) + dwfl_callbacks = &drgn_linux_proc_dwfl_callbacks; + else + dwfl_callbacks = &drgn_userspace_core_dump_dwfl_callbacks; + dbinfo->dwfl = dwfl_begin(dwfl_callbacks); + if (!dbinfo->dwfl) { + free(dbinfo); + return drgn_error_libdwfl(); } + drgn_debug_info_module_table_init(&dbinfo->modules); + c_string_set_init(&dbinfo->module_names); + drgn_dwarf_info_init(dbinfo); + *ret = dbinfo; + return NULL; +} - /* - * We don't know the maximum program counter covered by the ORC data, - * but the last entry seems to always be a terminator, so it doesn't - * matter. All addresses beyond the max will fall into the last entry. - */ - if (!module->num_orc_entries || unbiased_pc < drgn_orc_pc(module, 0)) - return &drgn_not_found; - size_t lo = 0, hi = module->num_orc_entries, found = 0; - while (lo < hi) { - size_t mid = lo + (hi - lo) / 2; - if (drgn_orc_pc(module, mid) <= unbiased_pc) { - found = mid; - lo = mid + 1; - } else { - hi = mid; - } - } - return module->platform.arch->orc_to_cfi(&module->orc_entries[found], - row_ret, interrupted_ret, - ret_addr_regno_ret); +void drgn_debug_info_destroy(struct drgn_debug_info *dbinfo) +{ + if (!dbinfo) + return; + drgn_dwarf_info_deinit(dbinfo); + c_string_set_deinit(&dbinfo->module_names); + drgn_debug_info_free_modules(dbinfo, false, true); + assert(drgn_debug_info_module_table_empty(&dbinfo->modules)); + drgn_debug_info_module_table_deinit(&dbinfo->modules); + dwfl_end(dbinfo->dwfl); + free(dbinfo); } struct drgn_error * @@ -5686,63 +2117,12 @@ drgn_debug_info_module_find_cfi(struct drgn_program *prog, } } -struct drgn_error * -drgn_eval_cfi_dwarf_expression(struct drgn_program *prog, - const struct drgn_cfi_rule *rule, - const struct drgn_register_state *regs, - void *buf, size_t size) +#if !_ELFUTILS_PREREQ(0, 175) +static Elf *dwelf_elf_begin(int fd) { - struct drgn_error *err; - struct uint64_vector stack = VECTOR_INIT; - - if (rule->push_cfa) { - struct optional_uint64 cfa = drgn_register_state_get_cfa(regs); - if (!cfa.has_value) { - err = &drgn_not_found; - goto out; - } - if (!uint64_vector_append(&stack, &cfa.value)) { - err = &drgn_enomem; - goto out; - } - } - - int remaining_ops = MAX_DWARF_EXPR_OPS; - struct drgn_dwarf_expression_buffer buffer; - drgn_dwarf_expression_buffer_init(&buffer, regs->module, rule->expr, - rule->expr_size); - err = drgn_eval_dwarf_expression(prog, &buffer, &stack, &remaining_ops, - NULL, regs); - if (err) - goto out; - if (binary_buffer_has_next(&buffer.bb)) { - uint8_t opcode; - err = binary_buffer_next_u8(&buffer.bb, &opcode); - if (!err) { - err = binary_buffer_error(&buffer.bb, - "invalid opcode %#" PRIx8 " for CFI expression", - opcode); - } - goto out; - } - if (stack.size == 0) { - err = &drgn_not_found; - } else if (rule->kind == DRGN_CFI_RULE_AT_DWARF_EXPRESSION) { - err = drgn_program_read_memory(prog, buf, - stack.data[stack.size - 1], size, - false); - } else { - copy_lsbytes(buf, size, - drgn_platform_is_little_endian(&prog->platform), - &stack.data[stack.size - 1], sizeof(uint64_t), - HOST_LITTLE_ENDIAN); - err = NULL; - } - -out: - uint64_vector_deinit(&stack); - return err; + return elf_begin(fd, ELF_C_READ_MMAP_PRIVATE, NULL); } +#endif struct drgn_error *open_elf_file(const char *path, int *fd_ret, Elf **elf_ret) { @@ -5834,46 +2214,54 @@ struct drgn_error *read_elf_section(Elf_Scn *scn, Elf_Data **ret) return NULL; } +/* + * Get the start address from the first loadable segment and the end address + * from the last loadable segment. + * + * The ELF specification states that loadable segments are sorted on p_vaddr. + * However, vmlinux on x86-64 has an out of order segment for .data..percpu, and + * Arm has a couple for .vector and .stubs. Thankfully, those are placed in the + * middle by the vmlinux linker script, so we can still rely on the first and + * last loadable segments. + */ struct drgn_error *elf_address_range(Elf *elf, uint64_t bias, uint64_t *start_ret, uint64_t *end_ret) { - uint64_t start = UINT64_MAX, end = 0; - size_t phnum, i; - - /* - * Get the minimum and maximum addresses from the PT_LOAD segments. We - * ignore memory ranges that start beyond UINT64_MAX, and we truncate - * ranges that end beyond UINT64_MAX. - */ + size_t phnum; if (elf_getphdrnum(elf, &phnum) != 0) return drgn_error_libelf(); - for (i = 0; i < phnum; i++) { - GElf_Phdr phdr_mem, *phdr; - uint64_t segment_start, segment_end; + GElf_Phdr phdr_mem, *phdr; + size_t i; + for (i = 0; i < phnum; i++) { phdr = gelf_getphdr(elf, i, &phdr_mem); if (!phdr) return drgn_error_libelf(); - if (phdr->p_type != PT_LOAD || !phdr->p_vaddr) - continue; - if (__builtin_add_overflow(phdr->p_vaddr, bias, - &segment_start)) - continue; - if (__builtin_add_overflow(segment_start, phdr->p_memsz, - &segment_end)) - segment_end = UINT64_MAX; - if (segment_start < segment_end) { - if (segment_start < start) - start = segment_start; - if (segment_end > end) - end = segment_end; + if (phdr->p_type == PT_LOAD) { + uint64_t align = phdr->p_align ? phdr->p_align : 1; + *start_ret = (phdr->p_vaddr & -align) + bias; + break; } } - if (start >= end) { - return drgn_error_create(DRGN_ERROR_OTHER, - "ELF file has no loadable segments"); + if (i >= phnum) { + /* There were no loadable segments. */ + *start_ret = *end_ret = 0; + return NULL; } - *start_ret = start; - *end_ret = end; + + for (i = phnum; i-- > 0;) { + phdr = gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) + return drgn_error_libelf(); + if (phdr->p_type == PT_LOAD) { + *end_ret = (phdr->p_vaddr + phdr->p_memsz) + bias; + if (*start_ret >= *end_ret) + *start_ret = *end_ret = 0; + return NULL; + } + } + /* We found a loadable segment earlier, so this shouldn't happen. */ + assert(!"PT_LOAD segment disappeared"); + *end_ret = 0; return NULL; } diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index b7f313ea5..e35b9b662 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** @@ -12,21 +12,20 @@ #ifndef DRGN_DEBUG_INFO_H #define DRGN_DEBUG_INFO_H -#include #include +#include #include #include "binary_buffer.h" #include "cfi.h" #include "drgn.h" -#include "dwarf_index.h" +#include "dwarf_info.h" #include "hash_table.h" +#include "orc_info.h" #include "platform.h" #include "string_builder.h" #include "vector.h" -struct drgn_register_state; - /** * @ingroup Internals * @@ -34,8 +33,8 @@ struct drgn_register_state; * * Caching of debugging information. * - * @ref drgn_debug_info caches debugging information (currently only DWARF). It - * translates the debugging information to types and objects. + * @ref drgn_debug_info caches debugging information (currently DWARF and ORC). + * It translates the debugging information to types and objects. * * @{ */ @@ -56,16 +55,20 @@ enum drgn_debug_info_scn { DRGN_SCN_DEBUG_TYPES, DRGN_SCN_DEBUG_ABBREV, DRGN_SCN_DEBUG_STR, + DRGN_SCN_DEBUG_STR_OFFSETS, DRGN_SCN_DEBUG_LINE, + DRGN_SCN_DEBUG_LINE_STR, DRGN_NUM_DEBUG_SCN_DATA_PRECACHE, /* Sections whose data we should cache when it is first used. */ - DRGN_SCN_DEBUG_FRAME = DRGN_NUM_DEBUG_SCN_DATA_PRECACHE, + DRGN_SCN_DEBUG_ADDR = DRGN_NUM_DEBUG_SCN_DATA_PRECACHE, + DRGN_SCN_DEBUG_FRAME, DRGN_SCN_EH_FRAME, DRGN_SCN_ORC_UNWIND_IP, DRGN_SCN_ORC_UNWIND, DRGN_SCN_DEBUG_LOC, + DRGN_SCN_DEBUG_LOCLISTS, DRGN_NUM_DEBUG_SCN_DATA, @@ -98,55 +101,16 @@ struct drgn_debug_info_module { Dwfl_Module *dwfl_module; struct drgn_platform platform; Elf_Scn *scns[DRGN_NUM_DEBUG_SCNS]; + Elf_Scn *alt_debug_info; + Elf_Scn *alt_debug_str; Elf_Data *scn_data[DRGN_NUM_DEBUG_SCN_DATA]; + Elf_Data *alt_debug_info_data; + Elf_Data *alt_debug_str_data; - /** Base for `DW_EH_PE_pcrel`. */ - uint64_t pcrel_base; - /** Base for `DW_EH_PE_textrel`. */ - uint64_t textrel_base; - /** Base for `DW_EH_PE_datarel`. */ - uint64_t datarel_base; - /** Array of DWARF Common Information Entries. */ - struct drgn_dwarf_cie *cies; - /** - * Array of DWARF Frame Description Entries sorted by initial_location. - */ - struct drgn_dwarf_fde *fdes; - /** Number of elements in @ref drgn_debug_info_module::fdes. */ - size_t num_fdes; - - /** - * Base for calculating program counter corresponding to an ORC unwinder - * entry. - * - * This is the address of the `.orc_unwind_ip` ELF section. - * - * @sa drgn_debug_info_module::orc_entries - */ - uint64_t orc_pc_base; - /** - * Offsets for calculating program counter corresponding to an ORC - * unwinder entry. - * - * This is the contents of the `.orc_unwind_ip` ELF section, byte - * swapped to the host's byte order if necessary. - * - * @sa drgn_debug_info_module::orc_entries - */ - int32_t *orc_pc_offsets; - /** - * ORC unwinder entries. - * - * This is the contents of the `.orc_unwind` ELF section, byte swapped - * to the host's byte order if necessary. - * - * Entry `i` specifies how to unwind the stack if - * `orc_pc(i) <= PC < orc_pc(i + 1)`, where - * `orc_pc(i) = orc_pc_base + 4 * i + orc_pc_offsets[i]`. - */ - struct drgn_orc_entry *orc_entries; - /** Number of ORC unwinder entries. */ - size_t num_orc_entries; + /** DWARF debugging information. */ + struct drgn_dwarf_module_info dwarf; + /** ORC unwinder information. */ + struct drgn_orc_module_info orc; /** Whether .debug_frame and .eh_frame have been parsed. */ bool parsed_frames; @@ -175,6 +139,10 @@ struct drgn_debug_info_module { struct drgn_debug_info_module *next; }; +struct drgn_error * +drgn_debug_info_module_cache_section(struct drgn_debug_info_module *module, + enum drgn_debug_info_scn scn); + struct drgn_error * drgn_error_debug_info_scn(struct drgn_debug_info_module *module, enum drgn_debug_info_scn scn, const char *ptr, @@ -203,80 +171,11 @@ drgn_debug_info_buffer_init(struct drgn_debug_info_buffer *buffer, buffer->scn = scn; } -/** - * Find the DWARF DIEs in a @ref drgn_debug_info_module for the scope containing - * a given program counter. - * - * @param[in] module Module containing @p pc. - * @param[in] pc Program counter. - * @param[out] bias_ret Returned difference between addresses in the loaded - * module and addresses in the returned DIEs. - * @param[out] dies_ret Returned DIEs. `(*dies_ret)[*length_ret - 1]` is the - * innermost DIE containing @p pc, `(*dies_ret)[*length_ret - 2]` is its parent - * (which may not contain @p pc itself), `(*dies_ret)[*length_ret - 3]` is its - * grandparent, etc. Must be freed with @c free(). - * @param[out] length_ret Returned length of @p dies_ret. - */ -struct drgn_error * -drgn_debug_info_module_find_dwarf_scopes(struct drgn_debug_info_module *module, - uint64_t pc, uint64_t *bias_ret, - Dwarf_Die **dies_ret, - size_t *length_ret) - __attribute__((__nonnull__(1, 3, 4, 5))); - -/** - * Find the ancestors of a DWARF DIE. - * - * This finds the parent, grandparent, etc., of a DWARF DIE in the tree of DIEs. - * - * @param[in] module Module containing @p die. - * @param[in] die DIE to find. - * @param[out] dies_ret Returned DIEs. `(*dies_ret)[*length_ret]` is the DIE, - * `(*dies_ret)[*length_ret - 1]` is its parent, `(*dies_ret)[*length_ret - 2]` - * is its grandparent, etc., and `(*dies_ret)[0]` is the top-level unit DIE. - * @param[out] length_ret Returned number of ancestors in @p dies_ret. - */ -struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret, - size_t *length_ret) - __attribute__((__nonnull__(2, 3))); - -struct drgn_debug_info_module_key { - const void *build_id; - size_t build_id_len; - uint64_t start, end; -}; - -static inline struct drgn_debug_info_module_key -drgn_debug_info_module_key(struct drgn_debug_info_module * const *entry) -{ - return (struct drgn_debug_info_module_key){ - .build_id = (*entry)->build_id, - .build_id_len = (*entry)->build_id_len, - .start = (*entry)->start, - .end = (*entry)->end, - }; -} DEFINE_HASH_TABLE_TYPE(drgn_debug_info_module_table, - struct drgn_debug_info_module *, - drgn_debug_info_module_key) + struct drgn_debug_info_module *) DEFINE_HASH_SET_TYPE(c_string_set, const char *) -/** Cached type in a @ref drgn_debug_info. */ -struct drgn_dwarf_type { - struct drgn_type *type; - enum drgn_qualifiers qualifiers; - /** - * Whether this is an incomplete array type or a typedef of one. - * - * This is used to work around a GCC bug; see @ref - * drgn_type_from_dwarf_internal(). - */ - bool is_incomplete_array; -}; - -DEFINE_HASH_MAP_TYPE(drgn_dwarf_type_map, const void *, struct drgn_dwarf_type); - /** Cache of debugging information. */ struct drgn_debug_info { /** Program owning this cache. */ @@ -293,25 +192,8 @@ struct drgn_debug_info { * they should not be freed. */ struct c_string_set module_names; - /** Index of DWARF debugging information. */ - struct drgn_dwarf_index dindex; - - /** - * Cache of parsed types. - * - * The key is the address of the DIE (@c Dwarf_Die::addr). The value is - * a @ref drgn_dwarf_type. - */ - struct drgn_dwarf_type_map types; - /** - * Cache of parsed types which appear to be incomplete array types but - * can't be. - * - * See @ref drgn_type_from_dwarf_internal(). - */ - struct drgn_dwarf_type_map cant_be_incomplete_array_types; - /** Current parsing recursion depth. */ - int depth; + /** DWARF debugging information. */ + struct drgn_dwarf_info dwarf; }; /** Create a @ref drgn_debug_info. */ @@ -405,6 +287,14 @@ struct drgn_error *drgn_debug_info_load(struct drgn_debug_info *dbinfo, bool drgn_debug_info_is_indexed(struct drgn_debug_info *dbinfo, const char *name); +/** + * Get the language of the program's `main` function or `NULL` if it could not + * be found. + */ +struct drgn_error * +drgn_debug_info_main_language(struct drgn_debug_info *dbinfo, + const struct drgn_language **ret); + /** @ref drgn_type_find_fn() that uses debugging information. */ struct drgn_error *drgn_debug_info_find_type(enum drgn_type_kind kind, const char *name, size_t name_len, @@ -418,44 +308,6 @@ drgn_debug_info_find_object(const char *name, size_t name_len, enum drgn_find_object_flags flags, void *arg, struct drgn_object *ret); -/** - * Find an object DIE in an array of DWARF scopes. - * - * @param[in] scopes Array of scopes, from outermost to innermost. - * @param[in] num_scopes Number of scopes in @p scopes. - * @param[out] die_ret Returned object DIE. - * @param[out] type_ret If @p die_ret is a `DW_TAG_enumerator` DIE, its parent. - * Otherwise, undefined. - */ -struct drgn_error *drgn_find_in_dwarf_scopes(Dwarf_Die *scopes, - size_t num_scopes, - const char *name, - Dwarf_Die *die_ret, - Dwarf_Die *type_ret); - -/** - * Create a @ref drgn_object from a `Dwarf_Die`. - * - * @param[in] die Object DIE (e.g., `DW_TAG_subprogram`, `DW_TAG_variable`, - * `DW_TAG_formal_parameter`, `DW_TAG_enumerator`, - * `DW_TAG_template_value_parameter`). - * @param[in] type_die DIE of object's type. If @c NULL, use the `DW_AT_type` - * attribute of @p die. If @p die is a `DW_TAG_enumerator` DIE, this should be - * its parent. - * @param[in] function_die DIE of current function. @c NULL if not in function - * context. - * @param[in] regs Registers of current stack frame. @c NULL if not in stack - * frame context. - * @param[out] ret Returned object. - */ -struct drgn_error * -drgn_object_from_dwarf(struct drgn_debug_info *dbinfo, - struct drgn_debug_info_module *module, - Dwarf_Die *die, Dwarf_Die *type_die, - Dwarf_Die *function_die, - const struct drgn_register_state *regs, - struct drgn_object *ret); - /** * Get the Call Frame Information in a @ref drgn_debug_info_module at a given * program counter. @@ -475,12 +327,6 @@ drgn_debug_info_module_find_cfi(struct drgn_program *prog, bool *interrupted_ret, drgn_register_number *ret_addr_regno_ret); -struct drgn_error * -drgn_eval_cfi_dwarf_expression(struct drgn_program *prog, - const struct drgn_cfi_rule *rule, - const struct drgn_register_state *regs, - void *buf, size_t size); - struct drgn_error *open_elf_file(const char *path, int *fd_ret, Elf **elf_ret); struct drgn_error *find_elf_file(char **path_ret, int *fd_ret, Elf **elf_ret, @@ -491,6 +337,15 @@ struct drgn_error *read_elf_section(Elf_Scn *scn, Elf_Data **ret); struct drgn_error *elf_address_range(Elf *elf, uint64_t bias, uint64_t *start_ret, uint64_t *end_ret); +static inline Elf_Type note_header_type(uint64_t p_align) +{ +#if _ELFUTILS_PREREQ(0, 175) + if (p_align == 8) + return ELF_T_NHDR8; +#endif + return ELF_T_NHDR; +} + /** @} */ #endif /* DRGN_DEBUG_INFO_H */ diff --git a/libdrgn/drgn.h.in b/libdrgn/drgn.h.in index b6b0827a5..2949c9347 100644 --- a/libdrgn/drgn.h.in +++ b/libdrgn/drgn.h.in @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** @@ -1403,6 +1403,15 @@ struct drgn_error *drgn_object_read_value(const struct drgn_object *obj, void drgn_object_deinit_value(const struct drgn_object *obj, const union drgn_value *value); +/** + * Get the binary representation of the value of a @ref drgn_object. + * + * @param[out] Buffer to read into. Size must be at least + * `drgn_object_size(obj)`. + */ +struct drgn_error *drgn_object_read_bytes(const struct drgn_object *obj, + void *buf); + /** * Get the value of an object encoded with @ref * drgn_object_encoding::DRGN_OBJECT_ENCODING_SIGNED. @@ -2557,6 +2566,36 @@ struct drgn_error *drgn_format_type(struct drgn_qualified_type qualified_type, * @{ */ +/** Symbol linkage behavior and visibility. */ +enum drgn_symbol_binding { + DRGN_SYMBOL_BINDING_UNKNOWN, + /* + * These values match the ELF STB_* definitions (offset by 1). This is + * an implementation detail; future values may not correspond 1:1 with + * ELF definitions. + */ + DRGN_SYMBOL_BINDING_LOCAL, + DRGN_SYMBOL_BINDING_GLOBAL, + DRGN_SYMBOL_BINDING_WEAK, + DRGN_SYMBOL_BINDING_UNIQUE = 11, /* STB_GNU_UNIQUE + 1 */ +}; + +/** Kind of entity represented by a symbol. */ +enum drgn_symbol_kind { + /* + * Like enum drgn_symbol_binding, these values match the ELF STT_* + * definitions, but this will not necessarily be true for future values. + */ + DRGN_SYMBOL_KIND_UNKNOWN, + DRGN_SYMBOL_KIND_OBJECT, + DRGN_SYMBOL_KIND_FUNC, + DRGN_SYMBOL_KIND_SECTION, + DRGN_SYMBOL_KIND_FILE, + DRGN_SYMBOL_KIND_COMMON, + DRGN_SYMBOL_KIND_TLS, + DRGN_SYMBOL_KIND_IFUNC = 10, /* STT_GNU_IFUNC */ +}; + /** Destroy a @ref drgn_symbol. */ void drgn_symbol_destroy(struct drgn_symbol *sym); @@ -2574,6 +2613,12 @@ uint64_t drgn_symbol_address(struct drgn_symbol *sym); /** Get the size in bytes of a @ref drgn_symbol. */ uint64_t drgn_symbol_size(struct drgn_symbol *sym); +/** Get the binding of a @ref drgn_symbol. */ +enum drgn_symbol_binding drgn_symbol_binding(struct drgn_symbol *sym); + +/** Get the kind of a @ref drgn_symbol. */ +enum drgn_symbol_kind drgn_symbol_kind(struct drgn_symbol *sym); + /** Return whether two symbols are identical. */ bool drgn_symbol_eq(struct drgn_symbol *a, struct drgn_symbol *b); diff --git a/libdrgn/dwarf_index.c b/libdrgn/dwarf_index.c deleted file mode 100644 index 6ca1b4f2e..000000000 --- a/libdrgn/dwarf_index.c +++ /dev/null @@ -1,2154 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// SPDX-License-Identifier: GPL-3.0-or-later - -#include -#include -#include -#include -#include -#include -#include - -#include "binary_buffer.h" -#include "debug_info.h" -#include "drgn.h" -#include "dwarf_index.h" -#include "error.h" -#include "path.h" -#include "platform.h" -#include "util.h" - -struct drgn_dwarf_index_pending_cu { - struct drgn_debug_info_module *module; - const char *buf; - size_t len; - bool is_64_bit; - enum drgn_debug_info_scn scn; -}; - -DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_pending_cu_vector) - -/* - * The DWARF abbreviation table gets translated into a series of instructions. - * An instruction <= INSN_MAX_SKIP indicates a number of bytes to be skipped - * over. The next few instructions mean that the corresponding attribute can be - * skipped over. The remaining instructions indicate that the corresponding - * attribute should be parsed. Finally, every sequence of instructions - * corresponding to a DIE is terminated by a zero byte followed by the DIE - * flags, which are a bitmask of flags combined with the DWARF tag (which may be - * set to zero if the tag is not of interest); see DIE_FLAG_*. - */ -enum { - INSN_MAX_SKIP = 215, - ATTRIB_BLOCK, - ATTRIB_BLOCK1, - ATTRIB_BLOCK2, - ATTRIB_BLOCK4, - ATTRIB_LEB128, - ATTRIB_STRING, - ATTRIB_SIBLING_REF1, - ATTRIB_SIBLING_REF2, - ATTRIB_SIBLING_REF4, - ATTRIB_SIBLING_REF8, - ATTRIB_SIBLING_REF_UDATA, - ATTRIB_NAME_STRP4, - ATTRIB_NAME_STRP8, - ATTRIB_NAME_STRING, - ATTRIB_COMP_DIR_STRP4, - ATTRIB_COMP_DIR_STRP8, - ATTRIB_COMP_DIR_STRING, - ATTRIB_STMT_LIST_LINEPTR4, - ATTRIB_STMT_LIST_LINEPTR8, - ATTRIB_DECL_FILE_DATA1, - ATTRIB_DECL_FILE_DATA2, - ATTRIB_DECL_FILE_DATA4, - ATTRIB_DECL_FILE_DATA8, - ATTRIB_DECL_FILE_UDATA, - ATTRIB_DECLARATION_FLAG, - ATTRIB_SPECIFICATION_REF1, - ATTRIB_SPECIFICATION_REF2, - ATTRIB_SPECIFICATION_REF4, - ATTRIB_SPECIFICATION_REF8, - ATTRIB_SPECIFICATION_REF_UDATA, - ATTRIB_SPECIFICATION_REF_ADDR4, - ATTRIB_SPECIFICATION_REF_ADDR8, - ATTRIB_INDIRECT, - ATTRIB_SIBLING_INDIRECT, - ATTRIB_NAME_INDIRECT, - ATTRIB_COMP_DIR_INDIRECT, - ATTRIB_STMT_LIST_INDIRECT, - ATTRIB_DECL_FILE_INDIRECT, - ATTRIB_DECLARATION_INDIRECT, - ATTRIB_SPECIFICATION_INDIRECT, - ATTRIB_MAX_INSN = ATTRIB_SPECIFICATION_INDIRECT, -}; - -enum { - /* Mask of tags that we care about. */ - DIE_FLAG_TAG_MASK = 0x3f, - /* The remaining bits can be used for other purposes. */ - DIE_FLAG_DECLARATION = 0x40, - DIE_FLAG_CHILDREN = 0x80, -}; - -DEFINE_VECTOR(uint8_vector, uint8_t) -DEFINE_VECTOR(uint32_vector, uint32_t) -DEFINE_VECTOR(uint64_vector, uint64_t) - -struct drgn_dwarf_index_cu { - struct drgn_debug_info_module *module; - const char *buf; - size_t len; - uint8_t version; - uint8_t address_size; - bool is_64_bit; - bool is_type_unit; - /* - * This is indexed on the DWARF abbreviation code minus one. It maps the - * abbreviation code to an index in abbrev_insns where the instruction - * stream for that code begins. - * - * Technically, abbreviation codes don't have to be sequential. In - * practice, GCC and Clang seem to always generate sequential codes - * starting at one, so we can get away with a flat array. - */ - uint32_t *abbrev_decls; - size_t num_abbrev_decls; - uint8_t *abbrev_insns; - uint64_t *file_name_hashes; - size_t num_file_names; -}; - -struct drgn_dwarf_index_cu_buffer { - struct binary_buffer bb; - struct drgn_dwarf_index_cu *cu; -}; - -static struct drgn_error * -drgn_dwarf_index_cu_buffer_error(struct binary_buffer *bb, const char *pos, - const char *message) -{ - struct drgn_dwarf_index_cu_buffer *buffer = - container_of(bb, struct drgn_dwarf_index_cu_buffer, bb); - return drgn_error_debug_info_scn(buffer->cu->module, - DRGN_SCN_DEBUG_INFO, pos, message); -} - -static void -drgn_dwarf_index_cu_buffer_init(struct drgn_dwarf_index_cu_buffer *buffer, - struct drgn_dwarf_index_cu *cu) -{ - binary_buffer_init(&buffer->bb, cu->buf, cu->len, - drgn_platform_is_little_endian(&cu->module->platform), - drgn_dwarf_index_cu_buffer_error); - buffer->cu = cu; -} - -DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_cu_vector) - -/* DIE which needs to be indexed. */ -struct drgn_dwarf_index_pending_die { - /* Index of compilation unit containing DIE. */ - size_t cu; - /* Address of DIE */ - uintptr_t addr; -}; - -DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_pending_die_vector) - -DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_index_die_map, string_hash_pair, - string_eq) -DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_die_vector) -DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_index_specification_map, - int_key_hash_pair, scalar_key_eq) - -static inline size_t hash_pair_to_shard(struct hash_pair hp) -{ - /* - * The 8 most significant bits of the hash are used as the F14 tag, so - * we don't want to use those for sharding. - */ - return ((hp.first >> - (8 * sizeof(size_t) - 8 - DRGN_DWARF_INDEX_SHARD_BITS)) & - (((size_t)1 << DRGN_DWARF_INDEX_SHARD_BITS) - 1)); -} - -static void -drgn_dwarf_index_namespace_init(struct drgn_dwarf_index_namespace *ns, - struct drgn_dwarf_index *dindex) -{ - for (size_t i = 0; i < ARRAY_SIZE(ns->shards); i++) { - struct drgn_dwarf_index_shard *shard = &ns->shards[i]; - omp_init_lock(&shard->lock); - drgn_dwarf_index_die_map_init(&shard->map); - drgn_dwarf_index_die_vector_init(&shard->dies); - } - ns->dindex = dindex; - drgn_dwarf_index_pending_die_vector_init(&ns->pending_dies); - ns->saved_err = NULL; -} - -void drgn_dwarf_index_init(struct drgn_dwarf_index *dindex) -{ - drgn_dwarf_index_namespace_init(&dindex->global, dindex); - drgn_dwarf_index_specification_map_init(&dindex->specifications); - drgn_dwarf_index_cu_vector_init(&dindex->cus); -} - -static void drgn_dwarf_index_cu_deinit(struct drgn_dwarf_index_cu *cu) -{ - free(cu->file_name_hashes); - free(cu->abbrev_insns); - free(cu->abbrev_decls); -} - -static void -drgn_dwarf_index_namespace_deinit(struct drgn_dwarf_index_namespace *ns) -{ - drgn_error_destroy(ns->saved_err); - drgn_dwarf_index_pending_die_vector_deinit(&ns->pending_dies); - for (size_t i = 0; i < ARRAY_SIZE(ns->shards); i++) { - struct drgn_dwarf_index_shard *shard = &ns->shards[i]; - for (size_t j = 0; j < shard->dies.size; j++) { - struct drgn_dwarf_index_die *die = &shard->dies.data[j]; - if (die->tag == DW_TAG_namespace) { - drgn_dwarf_index_namespace_deinit(die->namespace); - free(die->namespace); - } - } - drgn_dwarf_index_die_vector_deinit(&shard->dies); - drgn_dwarf_index_die_map_deinit(&shard->map); - omp_destroy_lock(&shard->lock); - } -} - -void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex) -{ - if (!dindex) - return; - for (size_t i = 0; i < dindex->cus.size; i++) - drgn_dwarf_index_cu_deinit(&dindex->cus.data[i]); - drgn_dwarf_index_cu_vector_deinit(&dindex->cus); - drgn_dwarf_index_specification_map_deinit(&dindex->specifications); - drgn_dwarf_index_namespace_deinit(&dindex->global); -} - -bool -drgn_dwarf_index_update_state_init(struct drgn_dwarf_index_update_state *state, - struct drgn_dwarf_index *dindex) -{ - state->dindex = dindex; - state->max_threads = omp_get_max_threads(); - state->cus = malloc_array(state->max_threads, sizeof(*state->cus)); - if (!state->cus) - return false; - for (size_t i = 0; i < state->max_threads; i++) - drgn_dwarf_index_pending_cu_vector_init(&state->cus[i]); - return true; -} - -void -drgn_dwarf_index_update_state_deinit(struct drgn_dwarf_index_update_state *state) -{ - for (size_t i = 0; i < state->max_threads; i++) - drgn_dwarf_index_pending_cu_vector_deinit(&state->cus[i]); - free(state->cus); -} - -static struct drgn_error *dw_form_to_insn(struct drgn_dwarf_index_cu *cu, - struct binary_buffer *bb, - uint64_t form, uint8_t *insn_ret) -{ - switch (form) { - case DW_FORM_addr: - *insn_ret = cu->address_size; - return NULL; - case DW_FORM_data1: - case DW_FORM_ref1: - case DW_FORM_flag: - *insn_ret = 1; - return NULL; - case DW_FORM_data2: - case DW_FORM_ref2: - *insn_ret = 2; - return NULL; - case DW_FORM_data4: - case DW_FORM_ref4: - *insn_ret = 4; - return NULL; - case DW_FORM_data8: - case DW_FORM_ref8: - case DW_FORM_ref_sig8: - *insn_ret = 8; - return NULL; - case DW_FORM_block: - case DW_FORM_exprloc: - *insn_ret = ATTRIB_BLOCK; - return NULL; - case DW_FORM_block1: - *insn_ret = ATTRIB_BLOCK1; - return NULL; - case DW_FORM_block2: - *insn_ret = ATTRIB_BLOCK2; - return NULL; - case DW_FORM_block4: - *insn_ret = ATTRIB_BLOCK4; - return NULL; - case DW_FORM_sdata: - case DW_FORM_udata: - case DW_FORM_ref_udata: - *insn_ret = ATTRIB_LEB128; - return NULL; - case DW_FORM_ref_addr: - if (cu->version < 3) { - *insn_ret = cu->address_size; - return NULL; - } - /* fallthrough */ - case DW_FORM_sec_offset: - case DW_FORM_strp: - *insn_ret = cu->is_64_bit ? 8 : 4; - return NULL; - case DW_FORM_string: - *insn_ret = ATTRIB_STRING; - return NULL; - case DW_FORM_flag_present: - *insn_ret = 0; - return NULL; - case DW_FORM_indirect: - *insn_ret = ATTRIB_INDIRECT; - return NULL; - default: - return binary_buffer_error(bb, - "unknown attribute form %" PRIu64, - form); - } -} - -static struct drgn_error *dw_at_sibling_to_insn(struct binary_buffer *bb, - uint64_t form, - uint8_t *insn_ret) -{ - switch (form) { - case DW_FORM_ref1: - *insn_ret = ATTRIB_SIBLING_REF1; - return NULL; - case DW_FORM_ref2: - *insn_ret = ATTRIB_SIBLING_REF2; - return NULL; - case DW_FORM_ref4: - *insn_ret = ATTRIB_SIBLING_REF4; - return NULL; - case DW_FORM_ref8: - *insn_ret = ATTRIB_SIBLING_REF8; - return NULL; - case DW_FORM_ref_udata: - *insn_ret = ATTRIB_SIBLING_REF_UDATA; - return NULL; - case DW_FORM_indirect: - *insn_ret = ATTRIB_SIBLING_INDIRECT; - return NULL; - default: - return binary_buffer_error(bb, - "unknown attribute form %" PRIu64 " for DW_AT_sibling", - form); - } -} - -static struct drgn_error *dw_at_name_to_insn(struct drgn_dwarf_index_cu *cu, - struct binary_buffer *bb, - uint64_t form, uint8_t *insn_ret) -{ - switch (form) { - case DW_FORM_strp: - if (!cu->module->scn_data[DRGN_SCN_DEBUG_STR]) { - return binary_buffer_error(bb, - "DW_FORM_strp without .debug_str section"); - } - if (cu->is_64_bit) - *insn_ret = ATTRIB_NAME_STRP8; - else - *insn_ret = ATTRIB_NAME_STRP4; - return NULL; - case DW_FORM_string: - *insn_ret = ATTRIB_NAME_STRING; - return NULL; - case DW_FORM_indirect: - *insn_ret = ATTRIB_NAME_INDIRECT; - return NULL; - default: - return binary_buffer_error(bb, - "unknown attribute form %" PRIu64 " for DW_AT_name", - form); - } -} - -static struct drgn_error *dw_at_comp_dir_to_insn(struct drgn_dwarf_index_cu *cu, - struct binary_buffer *bb, - uint64_t form, - uint8_t *insn_ret) -{ - switch (form) { - case DW_FORM_strp: - if (!cu->module->scn_data[DRGN_SCN_DEBUG_STR]) { - return binary_buffer_error(bb, - "DW_FORM_strp without .debug_str section"); - } - if (cu->is_64_bit) - *insn_ret = ATTRIB_COMP_DIR_STRP8; - else - *insn_ret = ATTRIB_COMP_DIR_STRP4; - return NULL; - case DW_FORM_string: - *insn_ret = ATTRIB_COMP_DIR_STRING; - return NULL; - case DW_FORM_indirect: - *insn_ret = ATTRIB_COMP_DIR_INDIRECT; - return NULL; - default: - return binary_buffer_error(bb, - "unknown attribute form %" PRIu64 " for DW_AT_comp_dir", - form); - } -} - -static struct drgn_error * -dw_at_stmt_list_to_insn(struct drgn_dwarf_index_cu *cu, - struct binary_buffer *bb, uint64_t form, - uint8_t *insn_ret) -{ - switch (form) { - case DW_FORM_data4: - *insn_ret = ATTRIB_STMT_LIST_LINEPTR4; - return NULL; - case DW_FORM_data8: - *insn_ret = ATTRIB_STMT_LIST_LINEPTR8; - return NULL; - case DW_FORM_sec_offset: - if (cu->is_64_bit) - *insn_ret = ATTRIB_STMT_LIST_LINEPTR8; - else - *insn_ret = ATTRIB_STMT_LIST_LINEPTR4; - return NULL; - case DW_FORM_indirect: - *insn_ret = ATTRIB_STMT_LIST_INDIRECT; - return NULL; - default: - return binary_buffer_error(bb, - "unknown attribute form %" PRIu64 " for DW_AT_stmt_list", - form); - } -} - -static struct drgn_error *dw_at_decl_file_to_insn(struct binary_buffer *bb, - uint64_t form, - uint8_t *insn_ret) -{ - switch (form) { - case DW_FORM_data1: - *insn_ret = ATTRIB_DECL_FILE_DATA1; - return NULL; - case DW_FORM_data2: - *insn_ret = ATTRIB_DECL_FILE_DATA2; - return NULL; - case DW_FORM_data4: - *insn_ret = ATTRIB_DECL_FILE_DATA4; - return NULL; - case DW_FORM_data8: - *insn_ret = ATTRIB_DECL_FILE_DATA8; - return NULL; - /* - * decl_file must be positive, so if the compiler uses - * DW_FORM_sdata for some reason, just treat it as udata. - */ - case DW_FORM_sdata: - case DW_FORM_udata: - *insn_ret = ATTRIB_DECL_FILE_UDATA; - return NULL; - case DW_FORM_indirect: - *insn_ret = ATTRIB_DECL_FILE_INDIRECT; - return NULL; - default: - return binary_buffer_error(bb, - "unknown attribute form %" PRIu64 " for DW_AT_decl_file", - form); - } -} - -static struct drgn_error * -dw_at_declaration_to_insn(struct binary_buffer *bb, uint64_t form, - uint8_t *insn_ret, uint8_t *die_flags) -{ - switch (form) { - case DW_FORM_flag: - *insn_ret = ATTRIB_DECLARATION_FLAG; - return NULL; - case DW_FORM_flag_present: - /* - * This could be an instruction, but as long as we have a free - * DIE flag bit, we might as well use it. - */ - *insn_ret = 0; - *die_flags |= DIE_FLAG_DECLARATION; - return NULL; - case DW_FORM_indirect: - *insn_ret = ATTRIB_DECLARATION_INDIRECT; - return NULL; - default: - return binary_buffer_error(bb, - "unknown attribute form %" PRIu64 " for DW_AT_declaration", - form); - } -} - -static struct drgn_error * -dw_at_specification_to_insn(struct drgn_dwarf_index_cu *cu, - struct binary_buffer *bb, uint64_t form, - uint8_t *insn_ret) -{ - switch (form) { - case DW_FORM_ref1: - *insn_ret = ATTRIB_SPECIFICATION_REF1; - return NULL; - case DW_FORM_ref2: - *insn_ret = ATTRIB_SPECIFICATION_REF2; - return NULL; - case DW_FORM_ref4: - *insn_ret = ATTRIB_SPECIFICATION_REF4; - return NULL; - case DW_FORM_ref8: - *insn_ret = ATTRIB_SPECIFICATION_REF8; - return NULL; - case DW_FORM_ref_udata: - *insn_ret = ATTRIB_SPECIFICATION_REF_UDATA; - return NULL; - case DW_FORM_ref_addr: - if (cu->version >= 3) { - if (cu->is_64_bit) - *insn_ret = ATTRIB_SPECIFICATION_REF_ADDR8; - else - *insn_ret = ATTRIB_SPECIFICATION_REF_ADDR4; - } else { - if (cu->address_size == 8) - *insn_ret = ATTRIB_SPECIFICATION_REF_ADDR8; - else if (cu->address_size == 4) - *insn_ret = ATTRIB_SPECIFICATION_REF_ADDR4; - else - return binary_buffer_error(bb, - "unsupported address size %" PRIu8 " for DW_FORM_ref_addr", - cu->address_size); - } - return NULL; - case DW_FORM_indirect: - *insn_ret = ATTRIB_SPECIFICATION_INDIRECT; - return NULL; - default: - return binary_buffer_error(bb, - "unknown attribute form %" PRIu64 " for DW_AT_specification", - form); - } -} - -static struct drgn_error * -read_abbrev_decl(struct drgn_debug_info_buffer *buffer, - struct drgn_dwarf_index_cu *cu, struct uint32_vector *decls, - struct uint8_vector *insns) -{ - struct drgn_error *err; - - static_assert(ATTRIB_MAX_INSN == UINT8_MAX, - "maximum DWARF attribute instruction is invalid"); - - uint64_t code; - if ((err = binary_buffer_next_uleb128(&buffer->bb, &code))) - return err; - if (code == 0) - return &drgn_stop; - if (code != decls->size + 1) { - return binary_buffer_error(&buffer->bb, - "DWARF abbrevation table is not sequential"); - } - - uint32_t insn_index = insns->size; - if (!uint32_vector_append(decls, &insn_index)) - return &drgn_enomem; - - uint64_t tag; - if ((err = binary_buffer_next_uleb128(&buffer->bb, &tag))) - return err; - - bool should_index; - switch (tag) { - /* Types. */ - case DW_TAG_base_type: - case DW_TAG_class_type: - case DW_TAG_enumeration_type: - case DW_TAG_structure_type: - case DW_TAG_typedef: - case DW_TAG_union_type: - /* Variables. */ - case DW_TAG_variable: - /* Constants. */ - case DW_TAG_enumerator: - /* Functions. */ - case DW_TAG_subprogram: - /* Namespaces */ - case DW_TAG_namespace: - /* If adding anything here, make sure it fits in DIE_FLAG_TAG_MASK. */ - should_index = true; - break; - default: - should_index = false; - break; - } - uint8_t die_flags = should_index ? tag : 0; - - uint8_t children; - if ((err = binary_buffer_next_u8(&buffer->bb, &children))) - return err; - if (children) - die_flags |= DIE_FLAG_CHILDREN; - - bool first = true; - uint8_t insn; - for (;;) { - uint64_t name, form; - if ((err = binary_buffer_next_uleb128(&buffer->bb, &name))) - return err; - if ((err = binary_buffer_next_uleb128(&buffer->bb, &form))) - return err; - if (name == 0 && form == 0) - break; - - if (name == DW_AT_sibling) { - err = dw_at_sibling_to_insn(&buffer->bb, form, &insn); - } else if (name == DW_AT_name && should_index) { - err = dw_at_name_to_insn(cu, &buffer->bb, form, &insn); - } else if (name == DW_AT_comp_dir) { - err = dw_at_comp_dir_to_insn(cu, &buffer->bb, form, - &insn); - } else if (name == DW_AT_stmt_list) { - if (!cu->module->scn_data[DRGN_SCN_DEBUG_LINE]) { - return binary_buffer_error(&buffer->bb, - "DW_AT_stmt_list without .debug_line section"); - } - err = dw_at_stmt_list_to_insn(cu, &buffer->bb, form, - &insn); - } else if (name == DW_AT_decl_file && should_index && - /* Namespaces are merged, so we ignore their file. */ - tag != DW_TAG_namespace) { - err = dw_at_decl_file_to_insn(&buffer->bb, form, &insn); - } else if (name == DW_AT_declaration && should_index) { - err = dw_at_declaration_to_insn(&buffer->bb, form, - &insn, &die_flags); - } else if (name == DW_AT_specification && should_index) { - err = dw_at_specification_to_insn(cu, &buffer->bb, form, - &insn); - } else { - err = dw_form_to_insn(cu, &buffer->bb, form, &insn); - } - if (err) - return err; - - if (insn != 0) { - if (!first && insn <= INSN_MAX_SKIP) { - uint8_t last_insn = insns->data[insns->size - 1]; - if (last_insn + insn <= INSN_MAX_SKIP) { - insns->data[insns->size - 1] += insn; - continue; - } else if (last_insn < INSN_MAX_SKIP) { - insn = last_insn + insn - INSN_MAX_SKIP; - insns->data[insns->size - 1] = INSN_MAX_SKIP; - } - } - - if (!uint8_vector_append(insns, &insn)) - return &drgn_enomem; - first = false; - } - } - insn = 0; - if (!uint8_vector_append(insns, &insn) || - !uint8_vector_append(insns, &die_flags)) - return &drgn_enomem; - return NULL; -} - -static struct drgn_error *read_abbrev_table(struct drgn_dwarf_index_cu *cu, - size_t debug_abbrev_offset) -{ - struct drgn_debug_info_buffer buffer; - drgn_debug_info_buffer_init(&buffer, cu->module, DRGN_SCN_DEBUG_ABBREV); - /* Checked in read_cu(). */ - buffer.bb.pos += debug_abbrev_offset; - struct uint32_vector decls = VECTOR_INIT; - struct uint8_vector insns = VECTOR_INIT; - for (;;) { - struct drgn_error *err = read_abbrev_decl(&buffer, cu, &decls, - &insns); - if (err == &drgn_stop) { - break; - } else if (err) { - uint8_vector_deinit(&insns); - uint32_vector_deinit(&decls); - return err; - } - } - cu->abbrev_decls = decls.data; - cu->num_abbrev_decls = decls.size; - cu->abbrev_insns = insns.data; - return NULL; -} - -static struct drgn_error *read_cu(struct drgn_dwarf_index_cu_buffer *buffer) -{ - struct drgn_error *err; - buffer->bb.pos += buffer->cu->is_64_bit ? 12 : 4; - uint16_t version; - if ((err = binary_buffer_next_u16(&buffer->bb, &version))) - return err; - if (version < 2 || version > 4) { - return binary_buffer_error(&buffer->bb, - "unknown DWARF CU version %" PRIu16, - version); - } - buffer->cu->version = version; - - uint64_t debug_abbrev_offset; - if (buffer->cu->is_64_bit) { - if ((err = binary_buffer_next_u64(&buffer->bb, - &debug_abbrev_offset))) - return err; - } else { - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &debug_abbrev_offset))) - return err; - } - if (debug_abbrev_offset > - buffer->cu->module->scn_data[DRGN_SCN_DEBUG_ABBREV]->d_size) { - return binary_buffer_error(&buffer->bb, - "debug_abbrev_offset is out of bounds"); - } - - if ((err = binary_buffer_next_u8(&buffer->bb, - &buffer->cu->address_size))) - return err; - if (buffer->cu->address_size > 8) { - return binary_buffer_error(&buffer->bb, - "unsupported address size %" PRIu8, - buffer->cu->address_size); - } - - /* Skip type_signature and type_offset for type units. */ - if (buffer->cu->is_type_unit && - (err = binary_buffer_skip(&buffer->bb, - buffer->cu->is_64_bit ? 16 : 12))) - return err; - - return read_abbrev_table(buffer->cu, debug_abbrev_offset); -} - -static struct drgn_error *skip_lnp_header(struct drgn_debug_info_buffer *buffer) -{ - struct drgn_error *err; - uint32_t tmp; - if ((err = binary_buffer_next_u32(&buffer->bb, &tmp))) - return err; - bool is_64_bit = tmp == UINT32_C(0xffffffff); - if (is_64_bit && - (err = binary_buffer_skip(&buffer->bb, sizeof(uint64_t)))) - return err; - - uint16_t version; - if ((err = binary_buffer_next_u16(&buffer->bb, &version))) - return err; - if (version < 2 || version > 4) { - return binary_buffer_error(&buffer->bb, - "unknown DWARF LNP version %" PRIu16, - version); - } - - /* - * Skip: - * header_length - * minimum_instruction_length - * maximum_operations_per_instruction (DWARF 4 only) - * default_is_stmt - * line_base - * line_range - * standard_opcode_lengths - */ - uint8_t opcode_base; - if ((err = binary_buffer_skip(&buffer->bb, - (is_64_bit ? 8 : 4) + 4 + (version >= 4))) || - (err = binary_buffer_next_u8(&buffer->bb, &opcode_base)) || - (err = binary_buffer_skip(&buffer->bb, opcode_base - 1))) - return err; - - return NULL; -} - -/** - * Cached hash of file path. - * - * File names in the DWARF line number program header consist of three parts: - * the compilation directory path, the directory path, and the file name. - * Multiple file names may be relative to the same directory, and relative - * directory paths are all relative to the compilation directory. - * - * We'd like to hash DWARF file names to a unique hash so that we can - * deduplicate definitions without comparing full paths. - * - * The naive way to hash a DWARF file name entry would be to join and normalize - * the compilation directory path, directory path, and file name, and hash that. - * But this would involve a lot of redundant computations since most paths will - * have common prefixes. Instead, we cache the hashes of each directory path and - * update the hash for relative paths. - * - * It is not sufficient to cache the final hash for each directory because ".." - * components may require us to use the hash of a parent directory. So, we also - * cache the hash of every parent directory in a linked list. - * - * We use the FNV-1a hash function. Although FNV-1a is - * [known](https://github.com/rurban/smhasher/blob/master/doc/FNV1a.txt) to have - * some hash quality problems, it is sufficient for producing unique 64-bit - * hashes of file names. It has a couple of advantages over "better" hash - * functions: - * - * 1. Its only internal state is the 64-bit hash value, which keeps this - * structure small. - * 2. It operates byte-by-byte, which works well for incrementally hashing lots - * of short path components. - */ -struct path_hash { - /** Hash of this path. */ - uint64_t hash; - /** - * Tagged pointer comprising `struct path_hash *` of parent directory - * and flag in lowest-order bit specifying whether this path ends in a - * ".." component. - */ - uintptr_t parent_and_is_dot_dot; -}; - -#define FNV_OFFSET_BASIS_64 UINT64_C(0xcbf29ce484222325) -#define FNV_PRIME_64 UINT64_C(0x00000100000001b3) - -static inline void path_hash_update(struct path_hash *path_hash, - const void *src, size_t len) -{ - const uint8_t *s = src, *end = s + len; - uint64_t hash = path_hash->hash; - while (s < end) { - hash ^= *(s++); - hash *= FNV_PRIME_64; - } - path_hash->hash = hash; -} - -/** Path hash of "" (empty string). */ -static const struct path_hash empty_path_hash = { FNV_OFFSET_BASIS_64 }; -/** Path hash of "/". */ -static const struct path_hash absolute_path_hash = { - (FNV_OFFSET_BASIS_64 ^ '/') * FNV_PRIME_64, -}; - -static inline const struct path_hash * -path_hash_parent(const struct path_hash *path_hash) -{ - return (struct path_hash *)(path_hash->parent_and_is_dot_dot - & ~(uintptr_t)1); -} - -static inline bool path_hash_is_dot_dot(const struct path_hash *path_hash) -{ - return path_hash->parent_and_is_dot_dot & 1; -} - -/** Chunk of allocated @ref path_hash objects. See @ref path_hash_cache. */ -struct path_hash_chunk { - struct path_hash objects[(4096 - sizeof(struct path_hash_chunk *)) - / sizeof(struct path_hash)]; - struct path_hash_chunk *next; -}; - -DEFINE_VECTOR(path_hash_vector, const struct path_hash *) - -/** - * Cache of hashed file paths. - * - * This uses a bump allocator for @ref path_hash objects. @ref path_hash objects - * are allocated sequentially out of a @ref path_hash_chunk; when a chunk is - * exhausted, a new @ref path_hash_chunk is allocated from the heap. The - * allocated chunks are kept and reused for each DWARF line number program; they - * are freed at the end of the first indexing pass. - */ -struct path_hash_cache { - /** Next @ref path_hash object to be allocated. */ - struct path_hash *next_object; - /** @ref path_hash_chunk currently being allocated from. */ - struct path_hash_chunk *current_chunk; - /** First allocated @ref path_hash_chunk. */ - struct path_hash_chunk *first_chunk; - /** Hashed directory paths. */ - struct path_hash_vector directories; -}; - -static struct path_hash *path_hash_alloc(struct path_hash_cache *cache) -{ - struct path_hash_chunk *current_chunk = cache->current_chunk; - if (cache->next_object < - ¤t_chunk->objects[ARRAY_SIZE(current_chunk->objects)]) - return cache->next_object++; - struct path_hash_chunk *next_chunk = current_chunk->next; - if (!next_chunk) { - next_chunk = malloc(sizeof(*next_chunk)); - if (!next_chunk) - return NULL; - next_chunk->next = NULL; - current_chunk->next = next_chunk; - } - cache->current_chunk = next_chunk; - cache->next_object = &next_chunk->objects[1]; - return next_chunk->objects; -} - -static inline bool is_dot_dot(const char *component, size_t component_len) -{ - return component_len == 2 && component[0] == '.' && component[1] == '.'; -} - -static const struct path_hash *hash_path(struct path_hash_cache *cache, - const char *path, - const struct path_hash *path_hash) -{ - const char *p = path; - if (*p == '/') { - path_hash = &absolute_path_hash; - p++; - } - while (*p != '\0') { - const char *component = p; - p = strchrnul(p, '/'); - size_t component_len = p - component; - if (*p == '/') - p++; - if (component_len == 0 || - (component_len == 1 && component[0] == '.')) { - } else if (!is_dot_dot(component, component_len) || - path_hash == &empty_path_hash || - path_hash_is_dot_dot(path_hash)) { - struct path_hash *new_path_hash = path_hash_alloc(cache); - if (!new_path_hash) - return NULL; - new_path_hash->hash = path_hash->hash; - if (path_hash->parent_and_is_dot_dot != 0) - path_hash_update(new_path_hash, "/", 1); - path_hash_update(new_path_hash, component, - component_len); - new_path_hash->parent_and_is_dot_dot = - ((uintptr_t)path_hash | - is_dot_dot(component, component_len)); - path_hash = new_path_hash; - } else if (path_hash != &absolute_path_hash) { - path_hash = path_hash_parent(path_hash); - } - } - return path_hash; -} - -static struct drgn_error *read_file_name_table(struct path_hash_cache *cache, - struct drgn_dwarf_index_cu *cu, - const char *comp_dir, - size_t stmt_list) -{ - struct drgn_error *err; - - struct drgn_debug_info_buffer buffer; - drgn_debug_info_buffer_init(&buffer, cu->module, DRGN_SCN_DEBUG_LINE); - /* Checked in index_cu_first_pass(). */ - buffer.bb.pos += stmt_list; - - if ((err = skip_lnp_header(&buffer))) - return err; - - cache->current_chunk = cache->first_chunk; - cache->next_object = cache->first_chunk->objects; - cache->directories.size = 0; - - const struct path_hash *path_hash = hash_path(cache, comp_dir, - &empty_path_hash); - if (!path_hash || - !path_hash_vector_append(&cache->directories, &path_hash)) - return &drgn_enomem; - for (;;) { - const char *path; - size_t path_len; - if ((err = binary_buffer_next_string(&buffer.bb, &path, - &path_len))) - return err; - if (!path_len) - break; - path_hash = hash_path(cache, path, cache->directories.data[0]); - if (!path_hash || - !path_hash_vector_append(&cache->directories, &path_hash)) - return &drgn_enomem; - } - - struct uint64_vector file_name_hashes = VECTOR_INIT; - for (;;) { - const char *path; - size_t path_len; - if ((err = binary_buffer_next_string(&buffer.bb, &path, - &path_len))) - goto err; - if (!path_len) - break; - - uint64_t directory_index; - if ((err = binary_buffer_next_uleb128(&buffer.bb, - &directory_index))) - goto err; - if (directory_index >= cache->directories.size) { - err = binary_buffer_error(&buffer.bb, - "directory index %" PRIu64 " is invalid", - directory_index); - goto err; - } - - /* mtime, size */ - if ((err = binary_buffer_skip_leb128(&buffer.bb)) || - (err = binary_buffer_skip_leb128(&buffer.bb))) - goto err; - - struct path_hash *prev_object = cache->next_object; - struct path_hash_chunk *prev_chunk = cache->current_chunk; - path_hash = hash_path(cache, path, - cache->directories.data[directory_index]); - if (!path_hash || - !uint64_vector_append(&file_name_hashes, &path_hash->hash)) { - err = &drgn_enomem; - goto err; - } - - /* "Free" the objects allocated for this file name. */ - cache->next_object = prev_object; - cache->current_chunk = prev_chunk; - } - - uint64_vector_shrink_to_fit(&file_name_hashes); - cu->file_name_hashes = file_name_hashes.data; - cu->num_file_names = file_name_hashes.size; - return NULL; - -err: - uint64_vector_deinit(&file_name_hashes); - return err; -} - -static struct drgn_error * -index_specification(struct drgn_dwarf_index *dindex, uintptr_t declaration, - struct drgn_debug_info_module *module, uintptr_t addr) -{ - struct drgn_dwarf_index_specification entry = { - .declaration = declaration, - .module = module, - .addr = addr, - }; - struct hash_pair hp = - drgn_dwarf_index_specification_map_hash(&declaration); - int ret; - #pragma omp critical(drgn_index_specification) - ret = drgn_dwarf_index_specification_map_insert_hashed(&dindex->specifications, - &entry, hp, - NULL); - /* - * There may be duplicates if multiple DIEs reference one declaration, - * but we ignore them. - */ - return ret == -1 ? &drgn_enomem : NULL; -} - -static struct drgn_error *read_indirect_insn(struct drgn_dwarf_index_cu *cu, - struct binary_buffer *bb, - uint8_t insn, uint8_t *insn_ret, - uint8_t *die_flags) -{ - struct drgn_error *err; - uint64_t form; - if ((err = binary_buffer_next_uleb128(bb, &form))) - return err; - switch (insn) { - case ATTRIB_INDIRECT: - return dw_form_to_insn(cu, bb, form, insn_ret); - case ATTRIB_SIBLING_INDIRECT: - return dw_at_sibling_to_insn(bb, form, insn_ret); - case ATTRIB_NAME_INDIRECT: - return dw_at_name_to_insn(cu, bb, form, insn_ret); - case ATTRIB_COMP_DIR_INDIRECT: - return dw_at_comp_dir_to_insn(cu, bb, form, insn_ret); - case ATTRIB_STMT_LIST_INDIRECT: - return dw_at_stmt_list_to_insn(cu, bb, form, insn_ret); - case ATTRIB_DECL_FILE_INDIRECT: - return dw_at_decl_file_to_insn(bb, form, insn_ret); - case ATTRIB_DECLARATION_INDIRECT: - return dw_at_declaration_to_insn(bb, form, insn_ret, die_flags); - case ATTRIB_SPECIFICATION_INDIRECT: - return dw_at_specification_to_insn(cu, bb, form, insn_ret); - default: - UNREACHABLE(); - } -} - -/* - * First pass: read the file name tables and index DIEs with - * DW_AT_specification. This recurses into namespaces. - */ -static struct drgn_error * -index_cu_first_pass(struct drgn_dwarf_index *dindex, - struct drgn_dwarf_index_cu_buffer *buffer, - struct path_hash_cache *path_hash_cache) -{ - struct drgn_error *err; - struct drgn_dwarf_index_cu *cu = buffer->cu; - Elf_Data *debug_str = cu->module->scn_data[DRGN_SCN_DEBUG_STR]; - Elf_Data *debug_info = cu->module->scn_data[ - cu->is_type_unit ? DRGN_SCN_DEBUG_TYPES : DRGN_SCN_DEBUG_INFO]; - const char *debug_info_buffer = debug_info->d_buf; - unsigned int depth = 0; - for (;;) { - size_t die_addr = (uintptr_t)buffer->bb.pos; - - uint64_t code; - if ((err = binary_buffer_next_uleb128(&buffer->bb, &code))) - return err; - if (code == 0) { - if (depth-- > 1) - continue; - else - break; - } else if (code > cu->num_abbrev_decls) { - return binary_buffer_error(&buffer->bb, - "unknown abbreviation code %" PRIu64, - code); - } - - uint8_t *insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; - bool declaration = false; - uintptr_t specification = 0; - const char *comp_dir = ""; - const char *stmt_list_ptr = NULL; - uint64_t stmt_list; - const char *sibling = NULL; - uint8_t insn; - uint8_t extra_die_flags = 0; - while ((insn = *insnp++)) { -indirect_insn:; - uint64_t skip, tmp; - switch (insn) { - case ATTRIB_BLOCK: - if ((err = binary_buffer_next_uleb128(&buffer->bb, - &skip))) - return err; - goto skip; - case ATTRIB_BLOCK1: - if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, - &skip))) - return err; - goto skip; - case ATTRIB_BLOCK2: - if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, - &skip))) - return err; - goto skip; - case ATTRIB_BLOCK4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &skip))) - return err; - goto skip; - case ATTRIB_LEB128: - case ATTRIB_DECL_FILE_UDATA: - if ((err = binary_buffer_skip_leb128(&buffer->bb))) - return err; - break; - case ATTRIB_COMP_DIR_STRING: - comp_dir = buffer->bb.pos; - /* fallthrough */ - case ATTRIB_STRING: - case ATTRIB_NAME_STRING: - if ((err = binary_buffer_skip_string(&buffer->bb))) - return err; - break; - case ATTRIB_SIBLING_REF1: - if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, - &tmp))) - return err; - goto sibling; - case ATTRIB_SIBLING_REF2: - if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, - &tmp))) - return err; - goto sibling; - case ATTRIB_SIBLING_REF4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &tmp))) - return err; - goto sibling; - case ATTRIB_SIBLING_REF8: - if ((err = binary_buffer_next_u64(&buffer->bb, - &tmp))) - return err; - goto sibling; - case ATTRIB_SIBLING_REF_UDATA: - if ((err = binary_buffer_next_uleb128(&buffer->bb, - &tmp))) - return err; -sibling: - if (tmp > cu->len) { - return binary_buffer_error(&buffer->bb, - "DW_AT_sibling is out of bounds"); - } - sibling = cu->buf + tmp; - __builtin_prefetch(sibling); - if (sibling < buffer->bb.pos) { - return binary_buffer_error(&buffer->bb, - "DW_AT_sibling points backwards"); - } - break; - case ATTRIB_COMP_DIR_STRP4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &tmp))) - return err; - goto comp_dir_strp; - case ATTRIB_COMP_DIR_STRP8: - if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) - return err; -comp_dir_strp: - if (tmp >= debug_str->d_size) { - return binary_buffer_error(&buffer->bb, - "DW_AT_comp_dir is out of bounds"); - } - comp_dir = (const char *)debug_str->d_buf + tmp; - break; - case ATTRIB_STMT_LIST_LINEPTR4: - stmt_list_ptr = buffer->bb.pos; - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &stmt_list))) - return err; - break; - case ATTRIB_STMT_LIST_LINEPTR8: - stmt_list_ptr = buffer->bb.pos; - if ((err = binary_buffer_next_u64(&buffer->bb, - &stmt_list))) - return err; - break; - case ATTRIB_DECL_FILE_DATA1: - skip = 1; - goto skip; - case ATTRIB_DECL_FILE_DATA2: - skip = 2; - goto skip; - case ATTRIB_NAME_STRP4: - case ATTRIB_DECL_FILE_DATA4: - skip = 4; - goto skip; - case ATTRIB_NAME_STRP8: - case ATTRIB_DECL_FILE_DATA8: - skip = 8; - goto skip; - case ATTRIB_DECLARATION_FLAG: { - uint8_t flag; - if ((err = binary_buffer_next_u8(&buffer->bb, - &flag))) - return err; - if (flag) - declaration = true; - break; - } - case ATTRIB_SPECIFICATION_REF1: - if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, - &tmp))) - return err; - goto specification; - case ATTRIB_SPECIFICATION_REF2: - if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, - &tmp))) - return err; - goto specification; - case ATTRIB_SPECIFICATION_REF4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &tmp))) - return err; - goto specification; - case ATTRIB_SPECIFICATION_REF8: - if ((err = binary_buffer_next_u64(&buffer->bb, - &tmp))) - return err; - goto specification; - case ATTRIB_SPECIFICATION_REF_UDATA: - if ((err = binary_buffer_next_uleb128(&buffer->bb, - &tmp))) - return err; -specification: - specification = (uintptr_t)cu->buf + tmp; - break; - case ATTRIB_SPECIFICATION_REF_ADDR4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &tmp))) - return err; - goto specification_ref_addr; - case ATTRIB_SPECIFICATION_REF_ADDR8: - if ((err = binary_buffer_next_u64(&buffer->bb, - &tmp))) - return err; -specification_ref_addr: - specification = (uintptr_t)debug_info_buffer + tmp; - break; - case ATTRIB_INDIRECT: - case ATTRIB_SIBLING_INDIRECT: - case ATTRIB_NAME_INDIRECT: - case ATTRIB_COMP_DIR_INDIRECT: - case ATTRIB_STMT_LIST_INDIRECT: - case ATTRIB_DECL_FILE_INDIRECT: - case ATTRIB_DECLARATION_INDIRECT: - case ATTRIB_SPECIFICATION_INDIRECT: - if ((err = read_indirect_insn(cu, &buffer->bb, - insn, &insn, - &extra_die_flags))) - return err; - if (insn) - goto indirect_insn; - else - continue; - default: - skip = insn; -skip: - if ((err = binary_buffer_skip(&buffer->bb, - skip))) - return err; - break; - } - } - insn = *insnp | extra_die_flags; - - if (depth == 0) { - if (stmt_list_ptr) { - if (stmt_list > - cu->module->scn_data[DRGN_SCN_DEBUG_LINE]->d_size) { - return binary_buffer_error_at(&buffer->bb, - stmt_list_ptr, - "DW_AT_stmt_list is out of bounds"); - } - if ((err = read_file_name_table(path_hash_cache, - cu, comp_dir, - stmt_list))) - return err; - } - } else if (specification) { - if (insn & DIE_FLAG_DECLARATION) - declaration = true; - /* - * For now, we don't handle DIEs with - * DW_AT_specification which are themselves - * declarations. We may need to handle - * DW_AT_specification "chains" in the future. - */ - if (!declaration && - (err = index_specification(dindex, specification, - cu->module, die_addr))) - return err; - } - - if (insn & DIE_FLAG_CHILDREN) { - if (sibling && - (insn & DIE_FLAG_TAG_MASK) != DW_TAG_namespace) - buffer->bb.pos = sibling; - else - depth++; - } else if (depth == 0) { - break; - } - } - return NULL; -} - -static struct drgn_error * -drgn_dwarf_index_read_cus(struct drgn_dwarf_index_update_state *state, - struct drgn_debug_info_module *module, - enum drgn_debug_info_scn scn) -{ - struct drgn_dwarf_index_pending_cu_vector *cus = - &state->cus[omp_get_thread_num()]; - - struct drgn_error *err; - struct drgn_debug_info_buffer buffer; - drgn_debug_info_buffer_init(&buffer, module, scn); - while (binary_buffer_has_next(&buffer.bb)) { - struct drgn_dwarf_index_pending_cu *cu = - drgn_dwarf_index_pending_cu_vector_append_entry(cus); - if (!cu) - return &drgn_enomem; - cu->module = module; - cu->buf = buffer.bb.pos; - uint32_t unit_length32; - if ((err = binary_buffer_next_u32(&buffer.bb, &unit_length32))) - return err; - cu->is_64_bit = unit_length32 == UINT32_C(0xffffffff); - if (cu->is_64_bit) { - uint64_t unit_length64; - if ((err = binary_buffer_next_u64(&buffer.bb, - &unit_length64)) || - (err = binary_buffer_skip(&buffer.bb, - unit_length64))) - return err; - } else { - if ((err = binary_buffer_skip(&buffer.bb, - unit_length32))) - return err; - } - cu->len = buffer.bb.pos - cu->buf; - cu->scn = scn; - } - return NULL; -} - -struct drgn_error * -drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, - struct drgn_debug_info_module *module) -{ - struct drgn_error *err; - err = drgn_dwarf_index_read_cus(state, module, DRGN_SCN_DEBUG_INFO); - if (!err && module->scn_data[DRGN_SCN_DEBUG_TYPES]) { - err = drgn_dwarf_index_read_cus(state, module, - DRGN_SCN_DEBUG_TYPES); - } - return err; -} - -bool -drgn_dwarf_index_find_definition(struct drgn_dwarf_index *dindex, - uintptr_t die_addr, - struct drgn_debug_info_module **module_ret, - uintptr_t *addr_ret) -{ - struct drgn_dwarf_index_specification_map_iterator it = - drgn_dwarf_index_specification_map_search(&dindex->specifications, - &die_addr); - if (!it.entry) - return false; - *module_ret = it.entry->module; - *addr_ret = it.entry->addr; - return true; -} - -static bool append_die_entry(struct drgn_dwarf_index *dindex, - struct drgn_dwarf_index_shard *shard, uint8_t tag, - uint64_t file_name_hash, - struct drgn_debug_info_module *module, - uintptr_t addr) -{ - if (shard->dies.size == UINT32_MAX) - return false; - struct drgn_dwarf_index_die *die = - drgn_dwarf_index_die_vector_append_entry(&shard->dies); - if (!die) - return false; - die->next = UINT32_MAX; - die->tag = tag; - if (die->tag == DW_TAG_namespace) { - die->namespace = malloc(sizeof(*die->namespace)); - if (!die->namespace) { - shard->dies.size--; - return false; - } - drgn_dwarf_index_namespace_init(die->namespace, dindex); - } else { - die->file_name_hash = file_name_hash; - } - die->module = module; - die->addr = addr; - - return true; -} - -static struct drgn_error *index_die(struct drgn_dwarf_index_namespace *ns, - struct drgn_dwarf_index_cu *cu, - const char *name, uint8_t tag, - uint64_t file_name_hash, - struct drgn_debug_info_module *module, - uintptr_t addr) -{ - struct drgn_error *err; - struct drgn_dwarf_index_die_map_entry entry = { - .key = { - .str = name, - .len = strlen(name), - }, - }; - struct hash_pair hp; - struct drgn_dwarf_index_shard *shard; - struct drgn_dwarf_index_die_map_iterator it; - size_t index; - struct drgn_dwarf_index_die *die; - - hp = drgn_dwarf_index_die_map_hash(&entry.key); - shard = &ns->shards[hash_pair_to_shard(hp)]; - omp_set_lock(&shard->lock); - it = drgn_dwarf_index_die_map_search_hashed(&shard->map, &entry.key, - hp); - if (!it.entry) { - if (!append_die_entry(ns->dindex, shard, tag, file_name_hash, - module, addr)) { - err = &drgn_enomem; - goto err; - } - entry.value = shard->dies.size - 1; - if (!drgn_dwarf_index_die_map_insert_searched(&shard->map, - &entry, hp, - NULL)) { - err = &drgn_enomem; - goto err; - } - die = &shard->dies.data[shard->dies.size - 1]; - goto out; - } - - die = &shard->dies.data[it.entry->value]; - for (;;) { - const uint64_t die_file_name_hash = - die->tag == DW_TAG_namespace ? 0 : die->file_name_hash; - if (die->tag == tag && die_file_name_hash == file_name_hash) - goto out; - - if (die->next == UINT32_MAX) - break; - die = &shard->dies.data[die->next]; - } - - index = die - shard->dies.data; - if (!append_die_entry(ns->dindex, shard, tag, file_name_hash, module, - addr)) { - err = &drgn_enomem; - goto err; - } - die = &shard->dies.data[shard->dies.size - 1]; - shard->dies.data[index].next = shard->dies.size - 1; -out: - if (tag == DW_TAG_namespace) { - struct drgn_dwarf_index_pending_die *pending = - drgn_dwarf_index_pending_die_vector_append_entry(&die->namespace->pending_dies); - if (!pending) { - err = &drgn_enomem; - goto err; - } - pending->cu = cu - ns->dindex->cus.data; - pending->addr = addr; - } - err = NULL; -err: - omp_unset_lock(&shard->lock); - return err; -} - -/* Second pass: index the actual DIEs. */ -static struct drgn_error * -index_cu_second_pass(struct drgn_dwarf_index_namespace *ns, - struct drgn_dwarf_index_cu_buffer *buffer) -{ - struct drgn_error *err; - struct drgn_dwarf_index_cu *cu = buffer->cu; - Elf_Data *debug_str = cu->module->scn_data[DRGN_SCN_DEBUG_STR]; - unsigned int depth = 0; - uint8_t depth1_tag = 0; - size_t depth1_addr = 0; - for (;;) { - size_t die_addr = (uintptr_t)buffer->bb.pos; - - uint64_t code; - if ((err = binary_buffer_next_uleb128(&buffer->bb, &code))) - return err; - if (code == 0) { - if (depth-- > 1) - continue; - else - break; - } else if (code > cu->num_abbrev_decls) { - return binary_buffer_error(&buffer->bb, - "unknown abbreviation code %" PRIu64, - code); - } - - uint8_t *insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; - const char *name = NULL; - const char *decl_file_ptr = NULL; - uint64_t decl_file = 0; - bool declaration = false; - bool specification = false; - const char *sibling = NULL; - uint8_t insn; - uint8_t extra_die_flags = 0; - while ((insn = *insnp++)) { -indirect_insn:; - uint64_t skip, tmp; - switch (insn) { - case ATTRIB_BLOCK: - if ((err = binary_buffer_next_uleb128(&buffer->bb, - &skip))) - return err; - goto skip; - case ATTRIB_BLOCK1: - if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, - &skip))) - return err; - goto skip; - case ATTRIB_BLOCK2: - if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, - &skip))) - return err; - goto skip; - case ATTRIB_BLOCK4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &skip))) - return err; - goto skip; - case ATTRIB_SPECIFICATION_REF_UDATA: - specification = true; - /* fallthrough */ - case ATTRIB_LEB128: - if ((err = binary_buffer_skip_leb128(&buffer->bb))) - return err; - break; - case ATTRIB_NAME_STRING: - name = buffer->bb.pos; - /* fallthrough */ - case ATTRIB_STRING: - case ATTRIB_COMP_DIR_STRING: - if ((err = binary_buffer_skip_string(&buffer->bb))) - return err; - break; - case ATTRIB_SIBLING_REF1: - if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, - &tmp))) - return err; - goto sibling; - case ATTRIB_SIBLING_REF2: - if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, - &tmp))) - return err; - goto sibling; - case ATTRIB_SIBLING_REF4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &tmp))) - return err; - goto sibling; - case ATTRIB_SIBLING_REF8: - if ((err = binary_buffer_next_u64(&buffer->bb, - &tmp))) - return err; - goto sibling; - case ATTRIB_SIBLING_REF_UDATA: - if ((err = binary_buffer_next_uleb128(&buffer->bb, - &tmp))) - return err; -sibling: - if (tmp > cu->len) { - return binary_buffer_error(&buffer->bb, - "DW_AT_sibling is out of bounds"); - } - sibling = cu->buf + tmp; - __builtin_prefetch(sibling); - if (sibling < buffer->bb.pos) { - return binary_buffer_error(&buffer->bb, - "DW_AT_sibling points backwards"); - } - break; - case ATTRIB_NAME_STRP4: - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &tmp))) - return err; - goto strp; - case ATTRIB_NAME_STRP8: - if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) - return err; -strp: - if (tmp >= debug_str->d_size) { - return binary_buffer_error(&buffer->bb, - "DW_AT_name is out of bounds"); - } - name = (const char *)debug_str->d_buf + tmp; - __builtin_prefetch(name); - break; - case ATTRIB_COMP_DIR_STRP4: - case ATTRIB_STMT_LIST_LINEPTR4: - skip = 4; - goto skip; - case ATTRIB_COMP_DIR_STRP8: - case ATTRIB_STMT_LIST_LINEPTR8: - skip = 8; - goto skip; - case ATTRIB_DECL_FILE_DATA1: - decl_file_ptr = buffer->bb.pos; - if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, - &decl_file))) - return err; - break; - case ATTRIB_DECL_FILE_DATA2: - decl_file_ptr = buffer->bb.pos; - if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, - &decl_file))) - return err; - break; - case ATTRIB_DECL_FILE_DATA4: - decl_file_ptr = buffer->bb.pos; - if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, - &decl_file))) - return err; - break; - case ATTRIB_DECL_FILE_DATA8: - decl_file_ptr = buffer->bb.pos; - if ((err = binary_buffer_next_u64(&buffer->bb, - &decl_file))) - return err; - break; - case ATTRIB_DECL_FILE_UDATA: - decl_file_ptr = buffer->bb.pos; - if ((err = binary_buffer_next_uleb128(&buffer->bb, - &decl_file))) - return err; - break; - case ATTRIB_DECLARATION_FLAG: { - uint8_t flag; - if ((err = binary_buffer_next_u8(&buffer->bb, - &flag))) - return err; - if (flag) - declaration = true; - break; - } - case ATTRIB_SPECIFICATION_REF1: - specification = true; - skip = 1; - goto skip; - case ATTRIB_SPECIFICATION_REF2: - specification = true; - skip = 2; - goto skip; - case ATTRIB_SPECIFICATION_REF4: - case ATTRIB_SPECIFICATION_REF_ADDR4: - specification = true; - skip = 4; - goto skip; - case ATTRIB_SPECIFICATION_REF8: - case ATTRIB_SPECIFICATION_REF_ADDR8: - specification = true; - skip = 8; - goto skip; - case ATTRIB_INDIRECT: - case ATTRIB_SIBLING_INDIRECT: - case ATTRIB_NAME_INDIRECT: - case ATTRIB_COMP_DIR_INDIRECT: - case ATTRIB_STMT_LIST_INDIRECT: - case ATTRIB_DECL_FILE_INDIRECT: - case ATTRIB_DECLARATION_INDIRECT: - case ATTRIB_SPECIFICATION_INDIRECT: - if ((err = read_indirect_insn(cu, &buffer->bb, - insn, &insn, - &extra_die_flags))) - return err; - if (insn) - goto indirect_insn; - else - continue; - default: - skip = insn; -skip: - if ((err = binary_buffer_skip(&buffer->bb, - skip))) - return err; - break; - } - } - insn = *insnp | extra_die_flags; - - uint8_t tag = insn & DIE_FLAG_TAG_MASK; - if (depth == 1) { - depth1_tag = tag; - depth1_addr = die_addr; - } - if (depth == (tag == DW_TAG_enumerator ? 2 : 1) && name && - !specification) { - if (insn & DIE_FLAG_DECLARATION) - declaration = true; - struct drgn_debug_info_module *module = cu->module; - if (tag == DW_TAG_enumerator) { - if (depth1_tag != DW_TAG_enumeration_type) - goto next; - /* - * NB: the enumerator name points to the - * enumeration_type DIE. Also, enumerators can't - * be declared in C/C++, so we don't check for - * that. - */ - die_addr = depth1_addr; - } else if (declaration && - !drgn_dwarf_index_find_definition(ns->dindex, - die_addr, - &module, - &die_addr)) { - goto next; - } - - uint64_t file_name_hash; - if (decl_file) { - if (decl_file > cu->num_file_names) { - return binary_buffer_error_at(&buffer->bb, - decl_file_ptr, - "invalid DW_AT_decl_file %" PRIu64, - decl_file); - } - file_name_hash = cu->file_name_hashes[decl_file - 1]; - } else { - file_name_hash = 0; - } - if ((err = index_die(ns, cu, name, tag, file_name_hash, - module, die_addr))) - return err; - } - -next: - if (insn & DIE_FLAG_CHILDREN) { - /* - * We must descend into the children of enumeration_type - * DIEs to index enumerator DIEs. We don't want to skip - * over the children of the top-level DIE even if it has - * a sibling pointer. - */ - if (sibling && tag != DW_TAG_enumeration_type && - depth > 0) - buffer->bb.pos = sibling; - else - depth++; - } else if (depth == 0) { - break; - } - } - return NULL; -} - -static void drgn_dwarf_index_rollback(struct drgn_dwarf_index *dindex) -{ - for (size_t i = 0; i < ARRAY_SIZE(dindex->global.shards); i++) { - struct drgn_dwarf_index_shard *shard = - &dindex->global.shards[i]; - - /* - * Because we're deleting everything that was added since the - * last update, we can just shrink the dies array to the first - * entry that was added for this update. - */ - while (shard->dies.size) { - struct drgn_dwarf_index_die *die = - &shard->dies.data[shard->dies.size - 1]; - if (die->module->state == - DRGN_DEBUG_INFO_MODULE_INDEXED) - break; - if (die->tag == DW_TAG_namespace) { - drgn_dwarf_index_namespace_deinit(die->namespace); - free(die->namespace); - } - shard->dies.size--; - } - - /* - * The new entries may be chained off of existing entries; - * unchain them. Note that any entries chained off of the new - * entries must also be new, so there's no need to preserve - * them. - */ - for (size_t index = 0; index < shard->dies.size; i++) { - struct drgn_dwarf_index_die *die = - &shard->dies.data[index]; - if (die->next != UINT32_MAX && - die->next >= shard->dies.size) - die->next = UINT32_MAX; - } - - /* Finally, delete the new entries in the map. */ - for (struct drgn_dwarf_index_die_map_iterator it = - drgn_dwarf_index_die_map_first(&shard->map); - it.entry; ) { - if (it.entry->value >= shard->dies.size) { - it = drgn_dwarf_index_die_map_delete_iterator(&shard->map, - it); - } else { - it = drgn_dwarf_index_die_map_next(it); - } - } - } - - for (struct drgn_dwarf_index_specification_map_iterator it = - drgn_dwarf_index_specification_map_first(&dindex->specifications); - it.entry; ) { - if (it.entry->module->state == DRGN_DEBUG_INFO_MODULE_INDEXED) { - it = drgn_dwarf_index_specification_map_next(it); - } else { - it = drgn_dwarf_index_specification_map_delete_iterator(&dindex->specifications, - it); - } - } -} - -struct drgn_error * -drgn_dwarf_index_update(struct drgn_dwarf_index_update_state *state) -{ - struct drgn_dwarf_index *dindex = state->dindex; - - size_t old_cus_size = dindex->cus.size; - size_t new_cus_size = old_cus_size; - for (size_t i = 0; i < state->max_threads; i++) - new_cus_size += state->cus[i].size; - if (!drgn_dwarf_index_cu_vector_reserve(&dindex->cus, new_cus_size)) - return &drgn_enomem; - for (size_t i = 0; i < state->max_threads; i++) { - for (size_t j = 0; j < state->cus[i].size; j++) { - struct drgn_dwarf_index_pending_cu *pending_cu = - &state->cus[i].data[j]; - dindex->cus.data[dindex->cus.size++] = (struct drgn_dwarf_index_cu){ - .module = pending_cu->module, - .buf = pending_cu->buf, - .len = pending_cu->len, - .is_64_bit = pending_cu->is_64_bit, - .is_type_unit = - pending_cu->scn == DRGN_SCN_DEBUG_TYPES, - }; - } - } - - struct drgn_error *err = NULL; - #pragma omp parallel - { - struct path_hash_cache path_hash_cache; - path_hash_vector_init(&path_hash_cache.directories); - path_hash_cache.first_chunk = - malloc(sizeof(struct path_hash_chunk)); - if (path_hash_cache.first_chunk) { - path_hash_cache.first_chunk->next = NULL; - } else { - #pragma omp critical(drgn_dwarf_index_update_error) - if (!err) - err = &drgn_enomem; - } - #pragma omp for schedule(dynamic) - for (size_t i = old_cus_size; i < dindex->cus.size; i++) { - if (err) - continue; - struct drgn_dwarf_index_cu *cu = &dindex->cus.data[i]; - struct drgn_dwarf_index_cu_buffer cu_buffer; - drgn_dwarf_index_cu_buffer_init(&cu_buffer, cu); - struct drgn_error *cu_err = read_cu(&cu_buffer); - if (!cu_err) - cu_err = index_cu_first_pass(state->dindex, - &cu_buffer, - &path_hash_cache); - if (cu_err) { - #pragma omp critical(drgn_dwarf_index_update_error) - if (err) - drgn_error_destroy(cu_err); - else - err = cu_err; - } - } - path_hash_vector_deinit(&path_hash_cache.directories); - struct path_hash_chunk *chunk = path_hash_cache.first_chunk; - while (chunk) { - struct path_hash_chunk *next_chunk = chunk->next; - free(chunk); - chunk = next_chunk; - } - } - if (err) - goto err; - - #pragma omp parallel for schedule(dynamic) - for (size_t i = old_cus_size; i < dindex->cus.size; i++) { - if (err) - continue; - struct drgn_dwarf_index_cu *cu = &dindex->cus.data[i]; - struct drgn_dwarf_index_cu_buffer buffer; - drgn_dwarf_index_cu_buffer_init(&buffer, cu); - buffer.bb.pos += cu->is_64_bit ? 23 : 11; - if (cu->is_type_unit) - buffer.bb.pos += cu->is_64_bit ? 16 : 12; - struct drgn_error *cu_err = - index_cu_second_pass(&dindex->global, &buffer); - if (cu_err) { - #pragma omp critical(drgn_dwarf_index_update_error) - if (err) - drgn_error_destroy(cu_err); - else - err = cu_err; - } - } - if (err) { - drgn_dwarf_index_rollback(dindex); -err: - for (size_t i = old_cus_size; i < dindex->cus.size; i++) - drgn_dwarf_index_cu_deinit(&dindex->cus.data[i]); - dindex->cus.size = old_cus_size; - } - return err; -} - -static struct drgn_error *index_namespace(struct drgn_dwarf_index_namespace *ns) -{ - if (ns->saved_err) - return drgn_error_copy(ns->saved_err); - - struct drgn_error *err = NULL; - #pragma omp for schedule(dynamic) - for (size_t i = 0; i < ns->pending_dies.size; i++) { - if (!err) { - struct drgn_dwarf_index_pending_die *pending = - &ns->pending_dies.data[i]; - struct drgn_dwarf_index_cu *cu = - &ns->dindex->cus.data[pending->cu]; - struct drgn_dwarf_index_cu_buffer buffer; - drgn_dwarf_index_cu_buffer_init(&buffer, cu); - buffer.bb.pos = (char *)pending->addr; - struct drgn_error *cu_err = - index_cu_second_pass(ns, &buffer); - if (cu_err) { - #pragma omp critical(drgn_index_namespace) - if (err) - drgn_error_destroy(cu_err); - else - err = cu_err; - } - } - } - if (err) { - ns->saved_err = err; - return drgn_error_copy(ns->saved_err); - } - ns->pending_dies.size = 0; - return err; -} - -struct drgn_error * -drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, - struct drgn_dwarf_index_namespace *ns, - const char *name, size_t name_len, - const uint64_t *tags, size_t num_tags) -{ - struct drgn_error *err = index_namespace(ns); - if (err) - return err; - it->ns = ns; - if (name) { - struct string key = { - .str = name, - .len = name_len, - }; - struct hash_pair hp; - struct drgn_dwarf_index_shard *shard; - struct drgn_dwarf_index_die_map_iterator map_it; - - hp = drgn_dwarf_index_die_map_hash(&key); - it->shard = hash_pair_to_shard(hp); - shard = &ns->shards[it->shard]; - map_it = drgn_dwarf_index_die_map_search_hashed(&shard->map, - &key, hp); - it->index = map_it.entry ? map_it.entry->value : UINT32_MAX; - it->any_name = false; - } else { - it->index = 0; - for (it->shard = 0; it->shard < ARRAY_SIZE(ns->shards); - it->shard++) { - if (ns->shards[it->shard].dies.size) - break; - } - it->any_name = true; - } - it->tags = tags; - it->num_tags = num_tags; - return NULL; -} - -static inline bool -drgn_dwarf_index_iterator_matches_tag(struct drgn_dwarf_index_iterator *it, - struct drgn_dwarf_index_die *die) -{ - size_t i; - - if (it->num_tags == 0) - return true; - for (i = 0; i < it->num_tags; i++) { - if (die->tag == it->tags[i]) - return true; - } - return false; -} - -struct drgn_dwarf_index_die * -drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it) -{ - struct drgn_dwarf_index_namespace *ns = it->ns; - struct drgn_dwarf_index_die *die; - if (it->any_name) { - for (;;) { - if (it->shard >= ARRAY_SIZE(ns->shards)) - return NULL; - - struct drgn_dwarf_index_shard *shard = - &ns->shards[it->shard]; - die = &shard->dies.data[it->index]; - - if (++it->index >= shard->dies.size) { - it->index = 0; - while (++it->shard < ARRAY_SIZE(ns->shards)) { - if (ns->shards[it->shard].dies.size) - break; - } - } - - if (drgn_dwarf_index_iterator_matches_tag(it, die)) - break; - } - } else { - for (;;) { - if (it->index == UINT32_MAX) - return NULL; - - struct drgn_dwarf_index_shard *shard = - &ns->shards[it->shard]; - die = &shard->dies.data[it->index]; - - it->index = die->next; - - if (drgn_dwarf_index_iterator_matches_tag(it, die)) - break; - } - } - return die; -} - -struct drgn_error *drgn_dwarf_index_get_die(struct drgn_dwarf_index_die *die, - Dwarf_Die *die_ret) -{ - Dwarf_Addr bias; - Dwarf *dwarf = dwfl_module_getdwarf(die->module->dwfl_module, &bias); - if (!dwarf) - return drgn_error_libdwfl(); - uintptr_t start = - (uintptr_t)die->module->scn_data[DRGN_SCN_DEBUG_INFO]->d_buf; - size_t size = die->module->scn_data[DRGN_SCN_DEBUG_INFO]->d_size; - if (die->addr >= start && die->addr < start + size) { - if (!dwarf_offdie(dwarf, die->addr - start, die_ret)) - return drgn_error_libdw(); - } else { - start = (uintptr_t)die->module->scn_data[DRGN_SCN_DEBUG_TYPES]->d_buf; - if (!dwarf_offdie_types(dwarf, die->addr - start, die_ret)) - return drgn_error_libdw(); - } - return NULL; -} diff --git a/libdrgn/dwarf_index.h b/libdrgn/dwarf_index.h deleted file mode 100644 index ce3f65193..000000000 --- a/libdrgn/dwarf_index.h +++ /dev/null @@ -1,311 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// SPDX-License-Identifier: GPL-3.0-or-later - -/** - * @file - * - * DWARF debugging information index. - * - * See @ref DwarfIndex. - */ - -#ifndef DRGN_DWARF_INDEX_H -#define DRGN_DWARF_INDEX_H - -#include -#include -#include -#include - -#ifdef _OPENMP -#include -#else -typedef struct {} omp_lock_t; -#define omp_init_lock(lock) do {} while (0) -#define omp_destroy_lock(lock) do {} while (0) -#define omp_set_lock(lock) do {} while (0) -#define omp_unset_lock(lock) do {} while (0) -static inline int omp_get_thread_num(void) -{ - return 0; -} -static inline int omp_get_max_threads(void) -{ - return 1; -} -#endif - -#include "hash_table.h" -#include "vector.h" - -struct drgn_debug_info_module; -struct drgn_error; - -/** - * @ingroup Internals - * - * @defgroup DwarfIndex DWARF index - * - * DWARF debugging information index. - * - * A core part of debugger functionality is looking up types, variables, etc. by - * name. A @ref drgn_dwarf_index combines debugging information from all object - * files and indexes it by name. - * - * Because this indexing step happens as part of startup, it is parallelized and - * highly optimized. This is implemented as a homegrown DWARF parser specialized - * for the task of scanning over DIEs quickly. - * - * Although the DWARF standard defines ".debug_pubnames" and ".debug_names" - * sections, GCC and Clang currently don't emit them by default, so we don't use - * them. - * - * @{ - */ - -/* - * An indexed DIE. - * - * DIEs with the same name but different tags or files are considered distinct. - * We only compare the hash of the file name, not the string value, because a - * 64-bit collision is unlikely enough, especially when also considering the - * name and tag. - */ -struct drgn_dwarf_index_die { - /* - * The next DIE with the same name (as an index into - * drgn_dwarf_index_shard::dies), or UINT32_MAX if this is the last DIE. - */ - uint32_t next; - uint8_t tag; - union { - /* - * If tag != DW_TAG_namespace (namespaces are merged, so they - * don't need this). - */ - uint64_t file_name_hash; - /* If tag == DW_TAG_namespace. */ - struct drgn_dwarf_index_namespace *namespace; - }; - struct drgn_debug_info_module *module; - uintptr_t addr; -}; - -DEFINE_HASH_MAP_TYPE(drgn_dwarf_index_die_map, struct string, uint32_t) -DEFINE_VECTOR_TYPE(drgn_dwarf_index_die_vector, struct drgn_dwarf_index_die) - -struct drgn_dwarf_index_shard { - /** @privatesection */ - omp_lock_t lock; - /* - * Map from name to list of DIEs with that name (as the index in - * drgn_dwarf_index_shard::dies of the first DIE with that name). - */ - struct drgn_dwarf_index_die_map map; - /* - * We store all entries in a shard as a single array, which is more - * cache friendly. - */ - struct drgn_dwarf_index_die_vector dies; -}; - -#define DRGN_DWARF_INDEX_SHARD_BITS 8 - -/* A DIE with a DW_AT_specification attribute. */ -struct drgn_dwarf_index_specification { - /* - * Address of non-defining declaration DIE referenced by - * DW_AT_specification. - */ - uintptr_t declaration; - /* Module and address of DIE. */ - struct drgn_debug_info_module *module; - uintptr_t addr; -}; - -static inline uintptr_t -drgn_dwarf_index_specification_to_key(const struct drgn_dwarf_index_specification *entry) -{ - return entry->declaration; -} - -DEFINE_HASH_TABLE_TYPE(drgn_dwarf_index_specification_map, - struct drgn_dwarf_index_specification, - drgn_dwarf_index_specification_to_key) - -DEFINE_VECTOR_TYPE(drgn_dwarf_index_cu_vector, struct drgn_dwarf_index_cu) - -DEFINE_VECTOR_TYPE(drgn_dwarf_index_pending_die_vector, - struct drgn_dwarf_index_pending_die) - -/** Mapping from names/tags to DIEs/nested namespaces. */ -struct drgn_dwarf_index_namespace { - /** - * Index shards. - * - * This is sharded to reduce lock contention. - */ - struct drgn_dwarf_index_shard shards[1 << DRGN_DWARF_INDEX_SHARD_BITS]; - /** Parent DWARF index. */ - struct drgn_dwarf_index *dindex; - /** DIEs we have not indexed yet. */ - struct drgn_dwarf_index_pending_die_vector pending_dies; - /** Saved error from a previous index. */ - struct drgn_error *saved_err; -}; - -/** - * Fast index of DWARF debugging information. - * - * This interface indexes DWARF debugging information by name and tag, - * deduplicating information which exists in multiple compilation units or - * files. It is much faster for this task than other generic DWARF parsing - * libraries. - * - * Searches in the index are done with a @ref drgn_dwarf_index_iterator. - */ -struct drgn_dwarf_index { - /** Global namespace. */ - struct drgn_dwarf_index_namespace global; - /** - * Map from address of DIE referenced by DW_AT_specification to DIE that - * references it. This is used to resolve DIEs with DW_AT_declaration to - * their definition. - * - * This is not sharded because there typically aren't enough of these in - * a program to cause contention. - */ - struct drgn_dwarf_index_specification_map specifications; - /** Indexed compilation units. */ - struct drgn_dwarf_index_cu_vector cus; -}; - -/** Initialize a @ref drgn_dwarf_index. */ -void drgn_dwarf_index_init(struct drgn_dwarf_index *dindex); - -/** - * Deinitialize a @ref drgn_dwarf_index. - * - * After this is called, anything belonging to the index should no longer be - * accessed. - */ -void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex); - -DEFINE_VECTOR_TYPE(drgn_dwarf_index_pending_cu_vector, - struct drgn_dwarf_index_pending_cu) - -/** State tracked while updating a @ref drgn_dwarf_index. */ -struct drgn_dwarf_index_update_state { - struct drgn_dwarf_index *dindex; - /** Per-thread arrays of CUs to be indexed. */ - struct drgn_dwarf_index_pending_cu_vector *cus; - size_t max_threads; -}; - -/** - * Initialize state for updating a @ref drgn_dwarf_index. - * - * @return @c true on success, @c false on failure to allocate memory. - */ -bool -drgn_dwarf_index_update_state_init(struct drgn_dwarf_index_update_state *state, - struct drgn_dwarf_index *dindex); - -/** Deinitialize state for updating a @ref drgn_dwarf_index. */ -void -drgn_dwarf_index_update_state_deinit(struct drgn_dwarf_index_update_state *state); - -/** Read a module for updating a @ref drgn_dwarf_index. */ -struct drgn_error * -drgn_dwarf_index_read_module(struct drgn_dwarf_index_update_state *state, - struct drgn_debug_info_module *module); - -/** - * Update a @ref drgn_dwarf_index. - * - * This should be called once all modules have been read with @ref - * drgn_dwarf_index_read_module() to finish indexing those modules. - */ -struct drgn_error * -drgn_dwarf_index_update(struct drgn_dwarf_index_update_state *state); - -/** - * Iterator over DWARF debugging information. - * - * An iterator is initialized with @ref drgn_dwarf_index_iterator_init(). It is - * advanced with @ref drgn_dwarf_index_iterator_next(). - */ -struct drgn_dwarf_index_iterator { - /** @privatesection */ - struct drgn_dwarf_index_namespace *ns; - const uint64_t *tags; - size_t num_tags; - size_t shard; - uint32_t index; - bool any_name; -}; - -/** - * Create an iterator over DIEs in a DWARF index namespace. - * - * @param[out] it DWARF index iterator to initialize. - * @param[in] ns DWARF index namespace. - * @param[in] name Name of DIE to search for, or @c NULL for any name. - * @param[in] name_len Length of @c name. - * @param[in] tags List of DIE tags to search for. - * @param[in] num_tags Number of tags in @p tags, or zero to search for any tag. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error * -drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, - struct drgn_dwarf_index_namespace *ns, - const char *name, size_t name_len, - const uint64_t *tags, size_t num_tags); - -/** - * Get the next matching DIE from a DWARF index iterator. - * - * If matching any name, this is O(n), where n is the number of indexed DIEs. If - * matching by name, this is O(1) on average and O(n) worst case. - * - * Note that this returns the parent @c DW_TAG_enumeration_type for indexed @c - * DW_TAG_enumerator DIEs. - * - * @param[in] it DWARF index iterator. - * @return Next DIE, or @c NULL if there are no more matching DIEs. - */ -struct drgn_dwarf_index_die * -drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it); - -/** - * Get a @c Dwarf_Die from a @ref drgn_dwarf_index_die. - * - * @param[in] die Indexed DIE. - * @param[out] die_ret Returned DIE. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error *drgn_dwarf_index_get_die(struct drgn_dwarf_index_die *die, - Dwarf_Die *die_ret); - - -/** - * Find a definition corresponding to a declaration DIE. - * - * This finds the address of a DIE with a @c DW_AT_specification attribute that - * refers to the given address. - * - * @param[in] die_addr The address of the declaration DIE. - * @param[out] module_ret Returned module containing the definition DIE. - * @param[out] addr_ret Returned address of the definition DIE. - * @return @c true if a definition DIE was found, @c false if not (in which case - * *@p module_ret and *@p addr_ret are not modified). - */ -bool -drgn_dwarf_index_find_definition(struct drgn_dwarf_index *dindex, - uintptr_t die_addr, - struct drgn_debug_info_module **module_ret, - uintptr_t *addr_ret); - -/** @} */ - -#endif /* DRGN_DWARF_INDEX_H */ diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c new file mode 100644 index 000000000..5dc97dcad --- /dev/null +++ b/libdrgn/dwarf_info.c @@ -0,0 +1,7881 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _OPENMP +#include +#else +typedef struct {} omp_lock_t; +#define omp_init_lock(lock) do {} while (0) +#define omp_destroy_lock(lock) do {} while (0) +#define omp_set_lock(lock) do {} while (0) +#define omp_unset_lock(lock) do {} while (0) +static inline int omp_get_thread_num(void) +{ + return 0; +} +static inline int omp_get_max_threads(void) +{ + return 1; +} +#endif + +#include "array.h" +#include "debug_info.h" // IWYU pragma: associated +#include "error.h" +#include "language.h" +#include "lazy_object.h" +#include "minmax.h" +#include "object.h" +#include "path.h" +#include "program.h" +#include "register_state.h" +#include "serialize.h" +#include "type.h" +#include "util.h" + +void drgn_dwarf_module_info_deinit(struct drgn_debug_info_module *module) +{ + free(module->dwarf.fdes); + free(module->dwarf.cies); +} + +static inline uintptr_t +drgn_dwarf_specification_to_key(const struct drgn_dwarf_specification *entry) +{ + return entry->declaration; +} +DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_specification_map, + drgn_dwarf_specification_to_key, int_key_hash_pair, + scalar_key_eq) + +/** + * Placeholder for drgn_dwarf_index_cu::file_name_hashes if the CU has no + * filenames. + */ +static const uint64_t no_file_name_hashes[1] = { 0 }; + +/** DWARF compilation unit indexed in a @ref drgn_namespace_dwarf_index. */ +struct drgn_dwarf_index_cu { + /** Module containing CU. */ + struct drgn_debug_info_module *module; + /** Address of CU data. */ + const char *buf; + /** Length of CU data. */ + size_t len; + /** DWARF version from CU header. */ + uint8_t version; + /** `DW_UT_*` type from CU header. */ + uint8_t unit_type; + /** Address size from CU header. */ + uint8_t address_size; + /** Whether CU uses 64-bit DWARF format. */ + bool is_64_bit; + /** + * Section containing CU (@ref DRGN_SCN_DEBUG_INFO or @ref + * DRGN_SCN_DEBUG_TYPES). + */ + enum drgn_debug_info_scn scn; + /** + * Mapping from DWARF abbreviation code to instructions for that + * abbreviation. + * + * This is indexed on the DWARF abbreviation code minus one. I.e., + * `abbrev_insns[abbrev_decls[abbrev_code - 1]]` is the first + * instruction for that abbreviation code. + * + * Technically, abbreviation codes don't have to be sequential. In + * practice, GCC and Clang seem to always generate sequential codes + * starting at one, so we can get away with a flat array. + */ + uint32_t *abbrev_decls; + /** Number of abbreviation codes. */ + size_t num_abbrev_decls; + /** + * Buffer of @ref drgn_dwarf_index_abbrev_insn instructions for all + * abbreviation codes. + * + * These are all stored in one array for cache locality. + */ + uint8_t *abbrev_insns; + /** + * Hashes of file names from line number program header for this CU, + * indexed by the line number program file numbers. + */ + uint64_t *file_name_hashes; + /** Number of file names in the line number program header. */ + size_t num_file_names; + /** + * Pointer in `.debug_str_offsets` section to string offset entries for + * this CU. + */ + const char *str_offsets; +}; + +DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_cu_vector) + +DEFINE_HASH_MAP_FUNCTIONS(drgn_dwarf_type_map, ptr_key_hash_pair, scalar_key_eq) + +/** DIE which needs to be indexed. */ +struct drgn_dwarf_index_pending_die { + /** + * CU containing DIE (as an index into @ref drgn_dwarf_info::index_cus). + */ + size_t cu; + /** Address of DIE */ + uintptr_t addr; +}; + +DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_pending_die_vector) + +/** DIE indexed in a @ref drgn_namespace_dwarf_index. */ +struct drgn_dwarf_index_die { + /** + * The next DIE with the same name (as an index into @ref + * drgn_dwarf_index_shard::dies), or `UINT32_MAX` if this is the last + * DIE. + */ + uint32_t next; + /** DIE tag. */ + uint8_t tag; + union { + /** + * Hash of filename containing declaration. + * + * DIEs with the same name but different tags or files are + * considered distinct. We only compare the hash of the file + * name, not the string value, because a 64-bit collision is + * unlikely enough, especially when also considering the name + * and tag. + * + * This is used if `tag != DW_TAG_namespace` (namespaces are + * merged, so they don't need this). + */ + uint64_t file_name_hash; + /** Nested namespace if `tag == DW_TAG_namespace`. */ + struct drgn_namespace_dwarf_index *namespace; + }; + /** Module containing this DIE. */ + struct drgn_debug_info_module *module; + /** Address of this DIE. */ + uintptr_t addr; +}; + +DEFINE_HASH_MAP(drgn_dwarf_index_die_map, struct nstring, uint32_t, + nstring_hash_pair, nstring_eq) +DEFINE_VECTOR(drgn_dwarf_index_die_vector, struct drgn_dwarf_index_die) + +#define DRGN_DWARF_INDEX_SHARD_BITS 8 +static const size_t DRGN_DWARF_INDEX_NUM_SHARDS = 1 << DRGN_DWARF_INDEX_SHARD_BITS; + +/** Shard of a @ref drgn_namespace_dwarf_index. */ +struct drgn_dwarf_index_shard { + /** Mutex for this shard. */ + omp_lock_t lock; + /** + * Map from name to list of DIEs with that name (as the index in @ref + * drgn_dwarf_index_shard::dies of the first DIE with that name). + */ + struct drgn_dwarf_index_die_map map; + /** + * Entries in @ref drgn_dwarf_index_shard::map. + * + * These are stored in one array for cache locality. + */ + struct drgn_dwarf_index_die_vector dies; +}; + +static void +drgn_namespace_dwarf_index_init(struct drgn_namespace_dwarf_index *dindex, + struct drgn_debug_info *dbinfo) +{ + dindex->shards = NULL; + dindex->dbinfo = dbinfo; + drgn_dwarf_index_pending_die_vector_init(&dindex->pending_dies); + dindex->saved_err = NULL; +} + +static void +drgn_namespace_dwarf_index_deinit(struct drgn_namespace_dwarf_index *dindex) +{ + drgn_error_destroy(dindex->saved_err); + drgn_dwarf_index_pending_die_vector_deinit(&dindex->pending_dies); + if (dindex->shards) { + for (size_t i = 0; i < DRGN_DWARF_INDEX_NUM_SHARDS; i++) { + struct drgn_dwarf_index_shard *shard = &dindex->shards[i]; + for (size_t j = 0; j < shard->dies.size; j++) { + struct drgn_dwarf_index_die *die = &shard->dies.data[j]; + if (die->tag == DW_TAG_namespace) { + drgn_namespace_dwarf_index_deinit(die->namespace); + free(die->namespace); + } + } + drgn_dwarf_index_die_vector_deinit(&shard->dies); + drgn_dwarf_index_die_map_deinit(&shard->map); + omp_destroy_lock(&shard->lock); + } + free(dindex->shards); + } +} + +void drgn_dwarf_info_init(struct drgn_debug_info *dbinfo) +{ + drgn_namespace_dwarf_index_init(&dbinfo->dwarf.global, dbinfo); + drgn_dwarf_specification_map_init(&dbinfo->dwarf.specifications); + drgn_dwarf_index_cu_vector_init(&dbinfo->dwarf.index_cus); + drgn_dwarf_type_map_init(&dbinfo->dwarf.types); + drgn_dwarf_type_map_init(&dbinfo->dwarf.cant_be_incomplete_array_types); + dbinfo->dwarf.depth = 0; +} + +static void drgn_dwarf_index_cu_deinit(struct drgn_dwarf_index_cu *cu) +{ + if (cu->file_name_hashes != no_file_name_hashes) + free(cu->file_name_hashes); + free(cu->abbrev_insns); + free(cu->abbrev_decls); +} + +void drgn_dwarf_info_deinit(struct drgn_debug_info *dbinfo) +{ + drgn_dwarf_type_map_deinit(&dbinfo->dwarf.cant_be_incomplete_array_types); + drgn_dwarf_type_map_deinit(&dbinfo->dwarf.types); + for (size_t i = 0; i < dbinfo->dwarf.index_cus.size; i++) + drgn_dwarf_index_cu_deinit(&dbinfo->dwarf.index_cus.data[i]); + drgn_dwarf_index_cu_vector_deinit(&dbinfo->dwarf.index_cus); + drgn_dwarf_specification_map_deinit(&dbinfo->dwarf.specifications); + drgn_namespace_dwarf_index_deinit(&dbinfo->dwarf.global); +} + +/* + * Diagnostics. + */ + +#define DW_TAG_UNKNOWN_FORMAT "unknown DWARF tag 0x%02x" +#define DW_TAG_BUF_LEN (sizeof(DW_TAG_UNKNOWN_FORMAT) - 4 + 2 * sizeof(int)) + +/** + * Get the name of a DWARF tag. + * + * @return Static string if the tag is known or @p buf if the tag is unknown + * (populated with a description). + */ +static const char *dw_tag_str(int tag, char buf[DW_TAG_BUF_LEN]) +{ + switch (tag) { +#define DWARF_ONE_KNOWN_DW_TAG(name, value) case value: return "DW_TAG_" #name; + DWARF_ALL_KNOWN_DW_TAG +#undef DWARF_ONE_KNOWN_DW_TAG + default: + sprintf(buf, DW_TAG_UNKNOWN_FORMAT, tag); + return buf; + } +} + +/** Like @ref dw_tag_str(), but takes a @c Dwarf_Die. */ +static const char *dwarf_tag_str(Dwarf_Die *die, char buf[DW_TAG_BUF_LEN]) +{ + return dw_tag_str(dwarf_tag(die), buf); +} + +static struct drgn_error * +drgn_error_debug_info(struct drgn_debug_info_module *module, const char *ptr, + const char *message) +{ + uintptr_t p = (uintptr_t)ptr; + int end_match = -1; + for (int i = 0; i < array_size(module->scn_data); i++) { + if (!module->scn_data[i]) + continue; + uintptr_t start = (uintptr_t)module->scn_data[i]->d_buf; + uintptr_t end = start + module->scn_data[i]->d_size; + if (start <= p) { + if (p < end) { + return drgn_error_debug_info_scn(module, i, ptr, + message); + } else if (p == end) { + end_match = i; + } + } + } + if (end_match != -1) { + /* + * The pointer doesn't lie within a section, but it does point + * to the end of a section. + */ + return drgn_error_debug_info_scn(module, end_match, ptr, + message); + } + /* We couldn't find the section containing the pointer. */ + const char *name = dwfl_module_info(module->dwfl_module, NULL, NULL, + NULL, NULL, NULL, NULL, NULL); + return drgn_error_format(DRGN_ERROR_OTHER, "%s: %s", name, message); +} + +static inline struct drgn_error *drgn_check_address_size(uint8_t address_size) +{ + if (address_size < 1 || address_size > 8) { + return drgn_error_format(DRGN_ERROR_OTHER, + "unsupported address size %" PRIu8, + address_size); + } + return NULL; +} + +/* + * Indexing. + * + * A core part of debugger functionality is looking up types, variables, etc. by + * name. DWARF information can be very large, so scanning through all of it for + * every lookup would be too slow. Instead, when we load debugging information, + * we build an index of DIEs by name. + * + * This indexing step is parallelized and highly optimized. It is implemented as + * a bespoke DWARF parser specialized for the task of scanning over DIEs + * quickly. + * + * Although the DWARF standard defines ".debug_pubnames" and ".debug_names" + * sections, GCC and Clang currently don't emit them by default, so we don't use + * them. + * + * Every namespace has a separate index (@ref drgn_namespace_dwarf_index). The + * global namespace is indexed immediately upon loading debugging information. + * Other namespaces are indexed when they are first accessed. + */ + +struct drgn_dwarf_index_pending_cu { + struct drgn_debug_info_module *module; + const char *buf; + size_t len; + bool is_64_bit; + enum drgn_debug_info_scn scn; +}; + +DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_pending_cu_vector) + +/** + * DWARF abbreviation table instructions. + * + * The DWARF abbreviation table can be large and contains more information than + * is strictly necessary for indexing. So, we translate the table into a series + * of instructions which specify how to process a DIE. This instruction stream + * omits unnecessary information and is more compact (and thus more cache + * friendly), which is important for the tight DIE parsing loop. + */ +enum drgn_dwarf_index_abbrev_insn { + /* + * Instructions > 0 and <= INSN_MAX_SKIP indicate a number of bytes to + * be skipped over. + */ + INSN_MAX_SKIP = 193, + + /* These instructions indicate an attribute that can be skipped over. */ + INSN_SKIP_BLOCK, + INSN_SKIP_BLOCK1, + INSN_SKIP_BLOCK2, + INSN_SKIP_BLOCK4, + INSN_SKIP_LEB128, + INSN_SKIP_STRING, + + /* These instructions indicate an attribute that should be parsed. */ + INSN_SIBLING_REF1, + INSN_SIBLING_REF2, + INSN_SIBLING_REF4, + INSN_SIBLING_REF8, + INSN_SIBLING_REF_UDATA, + INSN_NAME_STRP4, + INSN_NAME_STRP8, + INSN_NAME_STRING, + INSN_NAME_STRX, + INSN_NAME_STRX1, + INSN_NAME_STRX2, + INSN_NAME_STRX3, + INSN_NAME_STRX4, + INSN_NAME_STRP_ALT4, + INSN_NAME_STRP_ALT8, + INSN_COMP_DIR_STRP4, + INSN_COMP_DIR_STRP8, + INSN_COMP_DIR_LINE_STRP4, + INSN_COMP_DIR_LINE_STRP8, + INSN_COMP_DIR_STRING, + INSN_COMP_DIR_STRX, + INSN_COMP_DIR_STRX1, + INSN_COMP_DIR_STRX2, + INSN_COMP_DIR_STRX3, + INSN_COMP_DIR_STRX4, + INSN_COMP_DIR_STRP_ALT4, + INSN_COMP_DIR_STRP_ALT8, + INSN_STR_OFFSETS_BASE4, + INSN_STR_OFFSETS_BASE8, + INSN_STMT_LIST_LINEPTR4, + INSN_STMT_LIST_LINEPTR8, + INSN_DECL_FILE_DATA1, + INSN_DECL_FILE_DATA2, + INSN_DECL_FILE_DATA4, + INSN_DECL_FILE_DATA8, + INSN_DECL_FILE_UDATA, + /* + * This instruction is the only one with an operand: the ULEB128 + * implicit constant. + */ + INSN_DECL_FILE_IMPLICIT, + INSN_DECLARATION_FLAG, + INSN_SPECIFICATION_REF1, + INSN_SPECIFICATION_REF2, + INSN_SPECIFICATION_REF4, + INSN_SPECIFICATION_REF8, + INSN_SPECIFICATION_REF_UDATA, + INSN_SPECIFICATION_REF_ADDR4, + INSN_SPECIFICATION_REF_ADDR8, + INSN_SPECIFICATION_REF_ALT4, + INSN_SPECIFICATION_REF_ALT8, + INSN_INDIRECT, + INSN_SIBLING_INDIRECT, + INSN_NAME_INDIRECT, + INSN_COMP_DIR_INDIRECT, + INSN_STR_OFFSETS_BASE_INDIRECT, + INSN_STMT_LIST_INDIRECT, + INSN_DECL_FILE_INDIRECT, + INSN_DECLARATION_INDIRECT, + INSN_SPECIFICATION_INDIRECT, + + NUM_INSNS, + + /* + * Every sequence of instructions for a DIE is terminated by a zero + * byte. + */ + INSN_END = 0, + + /* + * The byte after INSN_END contains the DIE flags, which are a bitmask + * of flags combined with the DWARF tag (which is zero if the DIE does + * not need to be indexed). + */ + INSN_DIE_FLAG_TAG_MASK = 0x3f, + /* DIE is a declaration. */ + INSN_DIE_FLAG_DECLARATION = 0x40, + /* DIE has children. */ + INSN_DIE_FLAG_CHILDREN = 0x80, +}; + +/* Instructions are 8 bits. */ +static_assert(NUM_INSNS - 1 == UINT8_MAX, + "maximum DWARF index instruction is invalid"); + +DEFINE_VECTOR(uint8_vector, uint8_t) +DEFINE_VECTOR(uint32_vector, uint32_t) +DEFINE_VECTOR(uint64_vector, uint64_t) + +struct drgn_dwarf_index_cu_buffer { + struct binary_buffer bb; + struct drgn_dwarf_index_cu *cu; +}; + +static struct drgn_error * +drgn_dwarf_index_cu_buffer_error(struct binary_buffer *bb, const char *pos, + const char *message) +{ + struct drgn_dwarf_index_cu_buffer *buffer = + container_of(bb, struct drgn_dwarf_index_cu_buffer, bb); + return drgn_error_debug_info_scn(buffer->cu->module, + DRGN_SCN_DEBUG_INFO, pos, message); +} + +static void +drgn_dwarf_index_cu_buffer_init(struct drgn_dwarf_index_cu_buffer *buffer, + struct drgn_dwarf_index_cu *cu) +{ + binary_buffer_init(&buffer->bb, cu->buf, cu->len, + drgn_platform_is_little_endian(&cu->module->platform), + drgn_dwarf_index_cu_buffer_error); + buffer->cu = cu; +} + +static inline size_t hash_pair_to_shard(struct hash_pair hp) +{ + /* + * The 8 most significant bits of the hash are used as the F14 tag, so + * we don't want to use those for sharding. + */ + return ((hp.first >> + (8 * sizeof(size_t) - 8 - DRGN_DWARF_INDEX_SHARD_BITS)) & + (DRGN_DWARF_INDEX_NUM_SHARDS - 1)); +} + +static bool +drgn_namespace_dwarf_index_alloc_shards(struct drgn_namespace_dwarf_index *dindex) +{ + if (dindex->shards) + return true; + dindex->shards = malloc_array(DRGN_DWARF_INDEX_NUM_SHARDS, + sizeof(*dindex->shards)); + if (!dindex->shards) + return false; + for (size_t i = 0; i < DRGN_DWARF_INDEX_NUM_SHARDS; i++) { + struct drgn_dwarf_index_shard *shard = &dindex->shards[i]; + omp_init_lock(&shard->lock); + drgn_dwarf_index_die_map_init(&shard->map); + drgn_dwarf_index_die_vector_init(&shard->dies); + } + return true; +} + +bool drgn_dwarf_index_state_init(struct drgn_dwarf_index_state *state, + struct drgn_debug_info *dbinfo) +{ + state->dbinfo = dbinfo; + state->max_threads = omp_get_max_threads(); + state->cus = malloc_array(state->max_threads, sizeof(*state->cus)); + if (!state->cus) + return false; + for (size_t i = 0; i < state->max_threads; i++) + drgn_dwarf_index_pending_cu_vector_init(&state->cus[i]); + return true; +} + +void drgn_dwarf_index_state_deinit(struct drgn_dwarf_index_state *state) +{ + for (size_t i = 0; i < state->max_threads; i++) + drgn_dwarf_index_pending_cu_vector_deinit(&state->cus[i]); + free(state->cus); +} + +static struct drgn_error * +drgn_dwarf_index_read_cus(struct drgn_dwarf_index_state *state, + struct drgn_debug_info_module *module, + enum drgn_debug_info_scn scn) +{ + struct drgn_dwarf_index_pending_cu_vector *cus = + &state->cus[omp_get_thread_num()]; + + struct drgn_error *err; + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, module, scn); + while (binary_buffer_has_next(&buffer.bb)) { + struct drgn_dwarf_index_pending_cu *cu = + drgn_dwarf_index_pending_cu_vector_append_entry(cus); + if (!cu) + return &drgn_enomem; + cu->module = module; + cu->buf = buffer.bb.pos; + uint32_t unit_length32; + if ((err = binary_buffer_next_u32(&buffer.bb, &unit_length32))) + return err; + cu->is_64_bit = unit_length32 == UINT32_C(0xffffffff); + if (cu->is_64_bit) { + uint64_t unit_length64; + if ((err = binary_buffer_next_u64(&buffer.bb, + &unit_length64)) || + (err = binary_buffer_skip(&buffer.bb, + unit_length64))) + return err; + } else { + if ((err = binary_buffer_skip(&buffer.bb, + unit_length32))) + return err; + } + cu->len = buffer.bb.pos - cu->buf; + cu->scn = scn; + } + return NULL; +} + +struct drgn_error * +drgn_dwarf_index_read_module(struct drgn_dwarf_index_state *state, + struct drgn_debug_info_module *module) +{ + struct drgn_error *err; + err = drgn_dwarf_index_read_cus(state, module, DRGN_SCN_DEBUG_INFO); + if (!err && module->scn_data[DRGN_SCN_DEBUG_TYPES]) { + err = drgn_dwarf_index_read_cus(state, module, + DRGN_SCN_DEBUG_TYPES); + } + return err; +} + +static struct drgn_error *dw_form_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, + uint64_t form, uint8_t *insn_ret) +{ + struct drgn_error *err; + switch (form) { + case DW_FORM_addr: + *insn_ret = cu->address_size; + return NULL; + case DW_FORM_data1: + case DW_FORM_ref1: + case DW_FORM_flag: + case DW_FORM_strx1: + case DW_FORM_addrx1: + *insn_ret = 1; + return NULL; + case DW_FORM_data2: + case DW_FORM_ref2: + case DW_FORM_strx2: + case DW_FORM_addrx2: + *insn_ret = 2; + return NULL; + case DW_FORM_strx3: + case DW_FORM_addrx3: + *insn_ret = 3; + return NULL; + case DW_FORM_data4: + case DW_FORM_ref4: + case DW_FORM_ref_sup4: + case DW_FORM_strx4: + case DW_FORM_addrx4: + *insn_ret = 4; + return NULL; + case DW_FORM_data8: + case DW_FORM_ref8: + case DW_FORM_ref_sig8: + case DW_FORM_ref_sup8: + *insn_ret = 8; + return NULL; + case DW_FORM_data16: + *insn_ret = 16; + return NULL; + case DW_FORM_block: + case DW_FORM_exprloc: + *insn_ret = INSN_SKIP_BLOCK; + return NULL; + case DW_FORM_block1: + *insn_ret = INSN_SKIP_BLOCK1; + return NULL; + case DW_FORM_block2: + *insn_ret = INSN_SKIP_BLOCK2; + return NULL; + case DW_FORM_block4: + *insn_ret = INSN_SKIP_BLOCK4; + return NULL; + case DW_FORM_sdata: + case DW_FORM_udata: + case DW_FORM_ref_udata: + case DW_FORM_strx: + case DW_FORM_addrx: + case DW_FORM_loclistx: + case DW_FORM_rnglistx: + *insn_ret = INSN_SKIP_LEB128; + return NULL; + case DW_FORM_ref_addr: + if (cu->version < 3) { + *insn_ret = cu->address_size; + return NULL; + } + /* fallthrough */ + case DW_FORM_sec_offset: + case DW_FORM_strp: + case DW_FORM_strp_sup: + case DW_FORM_line_strp: + case DW_FORM_GNU_ref_alt: + case DW_FORM_GNU_strp_alt: + *insn_ret = cu->is_64_bit ? 8 : 4; + return NULL; + case DW_FORM_string: + *insn_ret = INSN_SKIP_STRING; + return NULL; + case DW_FORM_implicit_const: + if ((err = binary_buffer_skip_leb128(bb))) + return err; + /* fallthrough */ + case DW_FORM_flag_present: + *insn_ret = 0; + return NULL; + case DW_FORM_indirect: + *insn_ret = INSN_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64, + form); + } +} + +static struct drgn_error *dw_at_sibling_to_insn(struct binary_buffer *bb, + uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_ref1: + *insn_ret = INSN_SIBLING_REF1; + return NULL; + case DW_FORM_ref2: + *insn_ret = INSN_SIBLING_REF2; + return NULL; + case DW_FORM_ref4: + *insn_ret = INSN_SIBLING_REF4; + return NULL; + case DW_FORM_ref8: + *insn_ret = INSN_SIBLING_REF8; + return NULL; + case DW_FORM_ref_udata: + *insn_ret = INSN_SIBLING_REF_UDATA; + return NULL; + case DW_FORM_indirect: + *insn_ret = INSN_SIBLING_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64 " for DW_AT_sibling", + form); + } +} + +static struct drgn_error *dw_at_name_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, + uint64_t form, uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_strp: + if (!cu->module->scn_data[DRGN_SCN_DEBUG_STR]) { + return binary_buffer_error(bb, + "DW_FORM_strp without .debug_str section"); + } + if (cu->is_64_bit) + *insn_ret = INSN_NAME_STRP8; + else + *insn_ret = INSN_NAME_STRP4; + return NULL; + case DW_FORM_string: + *insn_ret = INSN_NAME_STRING; + return NULL; + case DW_FORM_strx: + *insn_ret = INSN_NAME_STRX; + return NULL; + case DW_FORM_strx1: + *insn_ret = INSN_NAME_STRX1; + return NULL; + case DW_FORM_strx2: + *insn_ret = INSN_NAME_STRX2; + return NULL; + case DW_FORM_strx3: + *insn_ret = INSN_NAME_STRX3; + return NULL; + case DW_FORM_strx4: + *insn_ret = INSN_NAME_STRX4; + return NULL; + case DW_FORM_GNU_strp_alt: + if (!cu->module->alt_debug_str_data) { + return binary_buffer_error(bb, + "DW_FORM_GNU_strp_alt without alternate .debug_str section"); + } + if (cu->is_64_bit) + *insn_ret = INSN_NAME_STRP_ALT8; + else + *insn_ret = INSN_NAME_STRP_ALT4; + return NULL; + case DW_FORM_indirect: + *insn_ret = INSN_NAME_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64 " for DW_AT_name", + form); + } +} + +static struct drgn_error *dw_at_comp_dir_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, + uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_strp: + if (!cu->module->scn_data[DRGN_SCN_DEBUG_STR]) { + return binary_buffer_error(bb, + "DW_FORM_strp without .debug_str section"); + } + if (cu->is_64_bit) + *insn_ret = INSN_COMP_DIR_STRP8; + else + *insn_ret = INSN_COMP_DIR_STRP4; + return NULL; + case DW_FORM_line_strp: + if (!cu->module->scn_data[DRGN_SCN_DEBUG_LINE_STR]) { + return binary_buffer_error(bb, + "DW_FORM_line_strp without .debug_line_str section"); + } + if (cu->is_64_bit) + *insn_ret = INSN_COMP_DIR_LINE_STRP8; + else + *insn_ret = INSN_COMP_DIR_LINE_STRP4; + return NULL; + case DW_FORM_string: + *insn_ret = INSN_COMP_DIR_STRING; + return NULL; + case DW_FORM_strx: + *insn_ret = INSN_COMP_DIR_STRX; + return NULL; + case DW_FORM_strx1: + *insn_ret = INSN_COMP_DIR_STRX1; + return NULL; + case DW_FORM_strx2: + *insn_ret = INSN_COMP_DIR_STRX2; + return NULL; + case DW_FORM_strx3: + *insn_ret = INSN_COMP_DIR_STRX3; + return NULL; + case DW_FORM_strx4: + *insn_ret = INSN_COMP_DIR_STRX4; + return NULL; + case DW_FORM_GNU_strp_alt: + if (!cu->module->alt_debug_str_data) { + return binary_buffer_error(bb, + "DW_FORM_GNU_strp_alt without alternate .debug_str section"); + } + if (cu->is_64_bit) + *insn_ret = INSN_COMP_DIR_STRP_ALT8; + else + *insn_ret = INSN_COMP_DIR_STRP_ALT4; + return NULL; + case DW_FORM_indirect: + *insn_ret = INSN_COMP_DIR_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64 " for DW_AT_comp_dir", + form); + } +} + +static struct drgn_error * +dw_at_str_offsets_base_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_sec_offset: + if (cu->is_64_bit) + *insn_ret = INSN_STR_OFFSETS_BASE8; + else + *insn_ret = INSN_STR_OFFSETS_BASE4; + return NULL; + case DW_FORM_indirect: + *insn_ret = INSN_STR_OFFSETS_BASE_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64 " for DW_AT_str_offsets_base", + form); + } +} + +static struct drgn_error * +dw_at_stmt_list_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_data4: + *insn_ret = INSN_STMT_LIST_LINEPTR4; + return NULL; + case DW_FORM_data8: + *insn_ret = INSN_STMT_LIST_LINEPTR8; + return NULL; + case DW_FORM_sec_offset: + if (cu->is_64_bit) + *insn_ret = INSN_STMT_LIST_LINEPTR8; + else + *insn_ret = INSN_STMT_LIST_LINEPTR4; + return NULL; + case DW_FORM_indirect: + *insn_ret = INSN_STMT_LIST_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64 " for DW_AT_stmt_list", + form); + } +} + +static struct drgn_error *dw_at_decl_file_to_insn(struct binary_buffer *bb, + uint64_t form, + uint8_t *insn_ret, + uint64_t *implicit_const_ret) +{ + switch (form) { + case DW_FORM_data1: + *insn_ret = INSN_DECL_FILE_DATA1; + return NULL; + case DW_FORM_data2: + *insn_ret = INSN_DECL_FILE_DATA2; + return NULL; + case DW_FORM_data4: + *insn_ret = INSN_DECL_FILE_DATA4; + return NULL; + case DW_FORM_data8: + *insn_ret = INSN_DECL_FILE_DATA8; + return NULL; + /* + * decl_file must be positive, so if the compiler uses + * DW_FORM_sdata for some reason, just treat it as udata. + */ + case DW_FORM_sdata: + case DW_FORM_udata: + *insn_ret = INSN_DECL_FILE_UDATA; + return NULL; + case DW_FORM_implicit_const: + *insn_ret = INSN_DECL_FILE_IMPLICIT; + return binary_buffer_next_uleb128(bb, implicit_const_ret); + case DW_FORM_indirect: + *insn_ret = INSN_DECL_FILE_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64 " for DW_AT_decl_file", + form); + } +} + +static struct drgn_error * +dw_at_declaration_to_insn(struct binary_buffer *bb, uint64_t form, + uint8_t *insn_ret, uint8_t *die_flags) +{ + switch (form) { + case DW_FORM_flag: + *insn_ret = INSN_DECLARATION_FLAG; + return NULL; + case DW_FORM_flag_present: + /* + * This could be an instruction, but as long as we have a free + * DIE flag bit, we might as well use it. + */ + *insn_ret = 0; + *die_flags |= INSN_DIE_FLAG_DECLARATION; + return NULL; + case DW_FORM_indirect: + *insn_ret = INSN_DECLARATION_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64 " for DW_AT_declaration", + form); + } +} + +static struct drgn_error * +dw_at_specification_to_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, uint64_t form, + uint8_t *insn_ret) +{ + switch (form) { + case DW_FORM_ref1: + *insn_ret = INSN_SPECIFICATION_REF1; + return NULL; + case DW_FORM_ref2: + *insn_ret = INSN_SPECIFICATION_REF2; + return NULL; + case DW_FORM_ref4: + *insn_ret = INSN_SPECIFICATION_REF4; + return NULL; + case DW_FORM_ref8: + *insn_ret = INSN_SPECIFICATION_REF8; + return NULL; + case DW_FORM_ref_udata: + *insn_ret = INSN_SPECIFICATION_REF_UDATA; + return NULL; + case DW_FORM_ref_addr: + if (cu->version >= 3) { + if (cu->is_64_bit) + *insn_ret = INSN_SPECIFICATION_REF_ADDR8; + else + *insn_ret = INSN_SPECIFICATION_REF_ADDR4; + } else { + if (cu->address_size == 8) + *insn_ret = INSN_SPECIFICATION_REF_ADDR8; + else if (cu->address_size == 4) + *insn_ret = INSN_SPECIFICATION_REF_ADDR4; + else + return binary_buffer_error(bb, + "unsupported address size %" PRIu8 " for DW_FORM_ref_addr", + cu->address_size); + } + return NULL; + case DW_FORM_GNU_ref_alt: + if (!cu->module->alt_debug_info_data) { + return binary_buffer_error(bb, + "DW_FORM_GNU_ref_alt without alternate .debug_info section"); + } + if (cu->is_64_bit) + *insn_ret = INSN_SPECIFICATION_REF_ALT8; + else + *insn_ret = INSN_SPECIFICATION_REF_ALT4; + return NULL; + case DW_FORM_indirect: + *insn_ret = INSN_SPECIFICATION_INDIRECT; + return NULL; + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64 " for DW_AT_specification", + form); + } +} + +static bool append_uleb128(struct uint8_vector *insns, uint64_t value) +{ + do { + uint8_t byte = value & 0x7f; + value >>= 7; + if (value != 0) + byte |= 0x80; + if (!uint8_vector_append(insns, &byte)) + return false; + } while (value != 0); + return true; +} + +static struct drgn_error * +read_abbrev_decl(struct drgn_debug_info_buffer *buffer, + struct drgn_dwarf_index_cu *cu, struct uint32_vector *decls, + struct uint8_vector *insns) +{ + struct drgn_error *err; + + uint64_t code; + if ((err = binary_buffer_next_uleb128(&buffer->bb, &code))) + return err; + if (code == 0) + return &drgn_stop; + if (code != decls->size + 1) { + return binary_buffer_error(&buffer->bb, + "DWARF abbrevation table is not sequential"); + } + + uint32_t insn_index = insns->size; + if (!uint32_vector_append(decls, &insn_index)) + return &drgn_enomem; + + uint64_t tag; + if ((err = binary_buffer_next_uleb128(&buffer->bb, &tag))) + return err; + + bool should_index; + switch (tag) { + /* Types. */ + case DW_TAG_base_type: + case DW_TAG_class_type: + case DW_TAG_enumeration_type: + case DW_TAG_structure_type: + case DW_TAG_typedef: + case DW_TAG_union_type: + /* Variables. */ + case DW_TAG_variable: + /* Constants. */ + case DW_TAG_enumerator: + /* Functions. */ + case DW_TAG_subprogram: + /* Namespaces */ + case DW_TAG_namespace: + /* If adding anything here, make sure it fits in INSN_DIE_FLAG_TAG_MASK. */ + should_index = true; + break; + default: + should_index = false; + break; + } + uint8_t die_flags = should_index ? tag : 0; + + uint8_t children; + if ((err = binary_buffer_next_u8(&buffer->bb, &children))) + return err; + if (children) + die_flags |= INSN_DIE_FLAG_CHILDREN; + + uint8_t insn, last_insn = UINT8_MAX; + for (;;) { + uint64_t name, form; + uint64_t implicit_const; + if ((err = binary_buffer_next_uleb128(&buffer->bb, &name))) + return err; + if ((err = binary_buffer_next_uleb128(&buffer->bb, &form))) + return err; + if (name == 0 && form == 0) + break; + + if (name == DW_AT_sibling) { + err = dw_at_sibling_to_insn(&buffer->bb, form, &insn); + } else if (name == DW_AT_name && should_index) { + err = dw_at_name_to_insn(cu, &buffer->bb, form, &insn); + } else if (name == DW_AT_comp_dir) { + err = dw_at_comp_dir_to_insn(cu, &buffer->bb, form, + &insn); + } else if (name == DW_AT_str_offsets_base) { + if (!cu->module->scn_data[DRGN_SCN_DEBUG_STR_OFFSETS]) { + return binary_buffer_error(&buffer->bb, + "DW_AT_str_offsets_base without .debug_str_offsets section"); + } + err = dw_at_str_offsets_base_to_insn(cu, &buffer->bb, + form, &insn); + } else if (name == DW_AT_stmt_list) { + if (!cu->module->scn_data[DRGN_SCN_DEBUG_LINE]) { + return binary_buffer_error(&buffer->bb, + "DW_AT_stmt_list without .debug_line section"); + } + err = dw_at_stmt_list_to_insn(cu, &buffer->bb, form, + &insn); + } else if (name == DW_AT_decl_file && should_index && + /* Namespaces are merged, so we ignore their file. */ + tag != DW_TAG_namespace) { + err = dw_at_decl_file_to_insn(&buffer->bb, form, &insn, + &implicit_const); + } else if (name == DW_AT_declaration && should_index) { + err = dw_at_declaration_to_insn(&buffer->bb, form, + &insn, &die_flags); + } else if (name == DW_AT_specification && should_index) { + err = dw_at_specification_to_insn(cu, &buffer->bb, form, + &insn); + } else { + err = dw_form_to_insn(cu, &buffer->bb, form, &insn); + } + if (err) + return err; + + if (insn != 0) { + if (insn <= INSN_MAX_SKIP) { + if (last_insn + insn <= INSN_MAX_SKIP) { + insns->data[insns->size - 1] += insn; + continue; + } else if (last_insn < INSN_MAX_SKIP) { + insn = last_insn + insn - INSN_MAX_SKIP; + insns->data[insns->size - 1] = INSN_MAX_SKIP; + } + } + last_insn = insn; + + if (!uint8_vector_append(insns, &insn)) + return &drgn_enomem; + + if (insn == INSN_DECL_FILE_IMPLICIT && + !append_uleb128(insns, implicit_const)) + return &drgn_enomem; + } + } + insn = INSN_END; + if (!uint8_vector_append(insns, &insn) || + !uint8_vector_append(insns, &die_flags)) + return &drgn_enomem; + return NULL; +} + +static struct drgn_error *read_abbrev_table(struct drgn_dwarf_index_cu *cu, + size_t debug_abbrev_offset) +{ + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, cu->module, DRGN_SCN_DEBUG_ABBREV); + /* Checked in read_cu(). */ + buffer.bb.pos += debug_abbrev_offset; + struct uint32_vector decls = VECTOR_INIT; + struct uint8_vector insns = VECTOR_INIT; + for (;;) { + struct drgn_error *err = read_abbrev_decl(&buffer, cu, &decls, + &insns); + if (err == &drgn_stop) { + break; + } else if (err) { + uint8_vector_deinit(&insns); + uint32_vector_deinit(&decls); + return err; + } + } + uint8_vector_shrink_to_fit(&insns); + uint32_vector_shrink_to_fit(&decls); + cu->abbrev_decls = decls.data; + cu->num_abbrev_decls = decls.size; + cu->abbrev_insns = insns.data; + return NULL; +} + +/* Get the size of a unit header beyond that of a normal compilation unit. */ +static size_t cu_header_extra_size(struct drgn_dwarf_index_cu *cu) +{ + switch (cu->unit_type) { + case DW_UT_compile: + case DW_UT_partial: + return 0; + case DW_UT_skeleton: + case DW_UT_split_compile: + /* dwo_id */ + return 8; + case DW_UT_type: + case DW_UT_split_type: + /* type_signature and type_offset */ + return cu->is_64_bit ? 16 : 12; + default: + UNREACHABLE(); + } +} + +static size_t cu_header_size(struct drgn_dwarf_index_cu *cu) +{ + size_t size = cu->is_64_bit ? 23 : 11; + if (cu->version >= 5) + size++; + size += cu_header_extra_size(cu); + return size; +} + +static struct drgn_error *read_cu(struct drgn_dwarf_index_cu_buffer *buffer) +{ + struct drgn_error *err; + buffer->bb.pos += buffer->cu->is_64_bit ? 12 : 4; + uint16_t version; + if ((err = binary_buffer_next_u16(&buffer->bb, &version))) + return err; + if (version < 2 || version > 5) { + return binary_buffer_error(&buffer->bb, + "unknown DWARF CU version %" PRIu16, + version); + } + buffer->cu->version = version; + + if (version >= 5) { + if ((err = binary_buffer_next_u8(&buffer->bb, + &buffer->cu->unit_type))) + return err; + if (buffer->cu->unit_type < DW_UT_compile || + buffer->cu->unit_type > DW_UT_split_type) { + return binary_buffer_error(&buffer->bb, + "unknown DWARF unit type"); + } + } else if (buffer->cu->scn == DRGN_SCN_DEBUG_TYPES) { + buffer->cu->unit_type = DW_UT_type; + } else { + buffer->cu->unit_type = DW_UT_compile; + } + + if (version >= 5 && + (err = binary_buffer_next_u8(&buffer->bb, + &buffer->cu->address_size))) + return err; + + uint64_t debug_abbrev_offset; + if (buffer->cu->is_64_bit) { + if ((err = binary_buffer_next_u64(&buffer->bb, + &debug_abbrev_offset))) + return err; + } else { + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &debug_abbrev_offset))) + return err; + } + if (debug_abbrev_offset > + buffer->cu->module->scn_data[DRGN_SCN_DEBUG_ABBREV]->d_size) { + return binary_buffer_error(&buffer->bb, + "debug_abbrev_offset is out of bounds"); + } + + if (version < 5 && + (err = binary_buffer_next_u8(&buffer->bb, + &buffer->cu->address_size))) + return err; + if (buffer->cu->address_size > 8) { + return binary_buffer_error(&buffer->bb, + "unsupported address size %" PRIu8, + buffer->cu->address_size); + } + + if ((err = binary_buffer_skip(&buffer->bb, + cu_header_extra_size(buffer->cu)))) + return err; + + return read_abbrev_table(buffer->cu, debug_abbrev_offset); +} + +static struct drgn_error *read_strx(struct drgn_dwarf_index_cu_buffer *buffer, + uint64_t strx, const char **ret) +{ + if (!buffer->cu->str_offsets) { + return binary_buffer_error(&buffer->bb, + "string index without DW_AT_str_offsets_base"); + } + Elf_Data *debug_str_offsets = + buffer->cu->module->scn_data[DRGN_SCN_DEBUG_STR_OFFSETS]; + size_t offset_size = buffer->cu->is_64_bit ? 8 : 4; + if (((char *)debug_str_offsets->d_buf + debug_str_offsets->d_size + - buffer->cu->str_offsets) + / offset_size <= strx) { + return binary_buffer_error(&buffer->bb, + "string index out of bounds"); + } + uint64_t strp; + if (buffer->cu->is_64_bit) { + memcpy(&strp, (uint64_t *)buffer->cu->str_offsets + strx, + sizeof(strp)); + if (buffer->bb.bswap) + strp = bswap_64(strp); + } else { + uint32_t strp32; + memcpy(&strp32, (uint32_t *)buffer->cu->str_offsets + strx, + sizeof(strp32)); + if (buffer->bb.bswap) + strp32 = bswap_32(strp32); + strp = strp32; + } + if (strp >= buffer->cu->module->scn_data[DRGN_SCN_DEBUG_STR]->d_size) { + return binary_buffer_error(&buffer->bb, + "indirect string is out of bounds"); + } + *ret = ((char *)buffer->cu->module->scn_data[DRGN_SCN_DEBUG_STR]->d_buf + + strp); + return NULL; +} + +static struct drgn_error *read_lnp_header(struct drgn_debug_info_buffer *buffer, + bool *is_64_bit_ret, int *version_ret) +{ + struct drgn_error *err; + uint32_t tmp; + if ((err = binary_buffer_next_u32(&buffer->bb, &tmp))) + return err; + bool is_64_bit = tmp == UINT32_C(0xffffffff); + if (is_64_bit && + (err = binary_buffer_skip(&buffer->bb, sizeof(uint64_t)))) + return err; + *is_64_bit_ret = is_64_bit; + + uint16_t version; + if ((err = binary_buffer_next_u16(&buffer->bb, &version))) + return err; + if (version < 2 || version > 5) { + return binary_buffer_error(&buffer->bb, + "unknown DWARF LNP version %" PRIu16, + version); + } + *version_ret = version; + + uint8_t opcode_base; + if ((err = binary_buffer_skip(&buffer->bb, + /* address_size + segment_selector_size */ + + (version >= 5 ? 2 : 0) + + (is_64_bit ? 8 : 4) /* header_length */ + + 1 /* minimum_instruction_length */ + + (version >= 4) /* maximum_operations_per_instruction */ + + 1 /* default_is_stmt */ + + 1 /* line_base */ + + 1 /* line_range */)) || + (err = binary_buffer_next_u8(&buffer->bb, &opcode_base)) || + (err = binary_buffer_skip(&buffer->bb, opcode_base - 1))) + return err; + + return NULL; +} + +/** + * Cached hash of file path. + * + * File names in the DWARF line number program header consist of three parts: + * the compilation directory path, the directory path, and the file name. + * Multiple file names may be relative to the same directory, and relative + * directory paths are all relative to the compilation directory. + * + * We'd like to hash DWARF file names to a unique hash so that we can + * deduplicate definitions without comparing full paths. + * + * The naive way to hash a DWARF file name entry would be to join and normalize + * the compilation directory path, directory path, and file name, and hash that. + * But this would involve a lot of redundant computations since most paths will + * have common prefixes. Instead, we cache the hashes of each directory path and + * update the hash for relative paths. + * + * It is not sufficient to cache the final hash for each directory because ".." + * components may require us to use the hash of a parent directory. So, we also + * cache the hash of every parent directory in a linked list. + * + * We use the FNV-1a hash function. Although FNV-1a is + * [known](https://github.com/rurban/smhasher/blob/master/doc/FNV1a.txt) to have + * some hash quality problems, it is sufficient for producing unique 64-bit + * hashes of file names. It has a couple of advantages over "better" hash + * functions: + * + * 1. Its only internal state is the 64-bit hash value, which keeps this + * structure small. + * 2. It operates byte-by-byte, which works well for incrementally hashing lots + * of short path components. + */ +struct path_hash { + /** Hash of this path. */ + uint64_t hash; + /** + * Tagged pointer comprising `struct path_hash *` of parent directory + * and flag in lowest-order bit specifying whether this path ends in a + * ".." component. + */ + uintptr_t parent_and_is_dot_dot; +}; + +#define FNV_OFFSET_BASIS_64 UINT64_C(0xcbf29ce484222325) +#define FNV_PRIME_64 UINT64_C(0x00000100000001b3) + +static inline void path_hash_update(struct path_hash *path_hash, + const void *src, size_t len) +{ + const uint8_t *s = src, *end = s + len; + uint64_t hash = path_hash->hash; + while (s < end) { + hash ^= *(s++); + hash *= FNV_PRIME_64; + } + path_hash->hash = hash; +} + +/** Path hash of "" (empty string). */ +static const struct path_hash empty_path_hash = { FNV_OFFSET_BASIS_64 }; +/** Path hash of "/". */ +static const struct path_hash absolute_path_hash = { + (FNV_OFFSET_BASIS_64 ^ '/') * FNV_PRIME_64, +}; + +static inline const struct path_hash * +path_hash_parent(const struct path_hash *path_hash) +{ + return (struct path_hash *)(path_hash->parent_and_is_dot_dot + & ~(uintptr_t)1); +} + +static inline bool path_hash_is_dot_dot(const struct path_hash *path_hash) +{ + return path_hash->parent_and_is_dot_dot & 1; +} + +/** Chunk of allocated @ref path_hash objects. See @ref path_hash_cache. */ +struct path_hash_chunk { + struct path_hash objects[(4096 - sizeof(struct path_hash_chunk *)) + / sizeof(struct path_hash)]; + struct path_hash_chunk *next; +}; + +DEFINE_VECTOR(path_hash_vector, const struct path_hash *) + +struct lnp_entry_format { + uint64_t content_type; + uint64_t form; +}; + +static const struct lnp_entry_format dwarf4_directory_entry_formats[] = { + { DW_LNCT_path, DW_FORM_string }, +}; +static const struct lnp_entry_format dwarf4_file_name_entry_formats[] = { + { DW_LNCT_path, DW_FORM_string }, + { DW_LNCT_directory_index, DW_FORM_udata }, + { DW_LNCT_timestamp, DW_FORM_udata }, + { DW_LNCT_size, DW_FORM_udata }, +}; + +/** + * Cache of hashed file paths. + * + * This uses a bump allocator for @ref path_hash objects. @ref path_hash objects + * are allocated sequentially out of a @ref path_hash_chunk; when a chunk is + * exhausted, a new @ref path_hash_chunk is allocated from the heap. The + * allocated chunks are kept and reused for each DWARF line number program; they + * are freed at the end of the first indexing pass. + * + * This also caches the allocations for directory hashes and line number program + * header entry formats. + */ +struct path_hash_cache { + /** Next @ref path_hash object to be allocated. */ + struct path_hash *next_object; + /** @ref path_hash_chunk currently being allocated from. */ + struct path_hash_chunk *current_chunk; + /** First allocated @ref path_hash_chunk. */ + struct path_hash_chunk *first_chunk; + /** Hashed directory paths. */ + struct path_hash_vector directories; + /** Line number program header entry formats. */ + struct lnp_entry_format *entry_formats; + /** Allocated size of @ref path_hash_cache::entry_formats. */ + size_t entry_formats_capacity; +}; + +static struct path_hash *path_hash_alloc(struct path_hash_cache *cache) +{ + struct path_hash_chunk *current_chunk = cache->current_chunk; + if (cache->next_object < + ¤t_chunk->objects[array_size(current_chunk->objects)]) + return cache->next_object++; + struct path_hash_chunk *next_chunk = current_chunk->next; + if (!next_chunk) { + next_chunk = malloc(sizeof(*next_chunk)); + if (!next_chunk) + return NULL; + next_chunk->next = NULL; + current_chunk->next = next_chunk; + } + cache->current_chunk = next_chunk; + cache->next_object = &next_chunk->objects[1]; + return next_chunk->objects; +} + +static inline bool is_dot_dot(const char *component, size_t component_len) +{ + return component_len == 2 && component[0] == '.' && component[1] == '.'; +} + +static const struct path_hash *hash_path(struct path_hash_cache *cache, + const char *path, + const struct path_hash *path_hash) +{ + const char *p = path; + if (*p == '/') { + path_hash = &absolute_path_hash; + p++; + } + while (*p != '\0') { + const char *component = p; + p = strchrnul(p, '/'); + size_t component_len = p - component; + if (*p == '/') + p++; + if (component_len == 0 || + (component_len == 1 && component[0] == '.')) { + } else if (!is_dot_dot(component, component_len) || + path_hash == &empty_path_hash || + path_hash_is_dot_dot(path_hash)) { + struct path_hash *new_path_hash = path_hash_alloc(cache); + if (!new_path_hash) + return NULL; + new_path_hash->hash = path_hash->hash; + if (path_hash->parent_and_is_dot_dot != 0) + path_hash_update(new_path_hash, "/", 1); + path_hash_update(new_path_hash, component, + component_len); + new_path_hash->parent_and_is_dot_dot = + ((uintptr_t)path_hash | + is_dot_dot(component, component_len)); + path_hash = new_path_hash; + } else if (path_hash != &absolute_path_hash) { + path_hash = path_hash_parent(path_hash); + } + } + return path_hash; +} + +static struct drgn_error * +read_lnp_entry_formats(struct drgn_debug_info_buffer *buffer, + struct path_hash_cache *cache, int *count_ret) +{ + struct drgn_error *err; + uint8_t count; + if ((err = binary_buffer_next_u8(&buffer->bb, &count))) + return err; + if (count > cache->entry_formats_capacity) { + free(cache->entry_formats); + cache->entry_formats = malloc_array(count, + sizeof(cache->entry_formats[0])); + if (!cache->entry_formats) { + cache->entry_formats_capacity = 0; + return &drgn_enomem; + } + cache->entry_formats_capacity = count; + } + bool have_path = false; + for (int i = 0; i < count; i++) { + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &cache->entry_formats[i].content_type))) + return err; + if (cache->entry_formats[i].content_type == DW_LNCT_path) + have_path = true; + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &cache->entry_formats[i].form))) + return err; + } + if (!have_path) { + return binary_buffer_error(&buffer->bb, + "DWARF line number program header entry does not include DW_LNCT_path"); + } + *count_ret = count; + return NULL; +} + +static struct drgn_error *skip_lnp_form(struct binary_buffer *bb, + bool is_64_bit, uint64_t form) +{ + struct drgn_error *err; + uint64_t skip; + switch (form) { + case DW_FORM_block: + if ((err = binary_buffer_next_uleb128(bb, &skip))) + return err; +block: + return binary_buffer_skip(bb, skip); + case DW_FORM_block1: + if ((err = binary_buffer_next_u8_into_u64(bb, &skip))) + return err; + goto block; + case DW_FORM_block2: + if ((err = binary_buffer_next_u16_into_u64(bb, &skip))) + return err; + goto block; + case DW_FORM_block4: + if ((err = binary_buffer_next_u32_into_u64(bb, &skip))) + return err; + goto block; + case DW_FORM_data1: + case DW_FORM_flag: + case DW_FORM_strx1: + return binary_buffer_skip(bb, 1); + case DW_FORM_data2: + case DW_FORM_strx2: + return binary_buffer_skip(bb, 2); + case DW_FORM_strx3: + return binary_buffer_skip(bb, 3); + case DW_FORM_data4: + case DW_FORM_strx4: + return binary_buffer_skip(bb, 4); + case DW_FORM_data8: + return binary_buffer_skip(bb, 8); + case DW_FORM_data16: + return binary_buffer_skip(bb, 16); + case DW_FORM_line_strp: + case DW_FORM_sec_offset: + case DW_FORM_strp: + return binary_buffer_skip(bb, is_64_bit ? 8 : 4); + case DW_FORM_sdata: + case DW_FORM_strx: + case DW_FORM_udata: + return binary_buffer_skip_leb128(bb); + case DW_FORM_string: + return binary_buffer_skip_string(bb); + default: + return binary_buffer_error(bb, + "unknown attribute form %#" PRIx64 " for line number program", + form); + } +} + +static struct drgn_error *read_lnp_string(struct drgn_debug_info_buffer *buffer, + bool is_64_bit, uint64_t form, + const char **ret) +{ + struct drgn_error *err; + uint64_t strp; + Elf_Data *data; + switch (form) { + case DW_FORM_string: + *ret = buffer->bb.pos; + return binary_buffer_skip_string(&buffer->bb); + case DW_FORM_line_strp: + case DW_FORM_strp: + if (is_64_bit) + err = binary_buffer_next_u64(&buffer->bb, &strp); + else + err = binary_buffer_next_u32_into_u64(&buffer->bb, &strp); + if (err) + return err; + data = buffer->module->scn_data[ + form == DW_FORM_line_strp ? + DRGN_SCN_DEBUG_LINE_STR : DRGN_SCN_DEBUG_STR]; + if (!data || strp >= data->d_size) { + return binary_buffer_error(&buffer->bb, + "DW_LNCT_path is out of bounds"); + } + *ret = (const char *)data->d_buf + strp; + return NULL; + default: + return binary_buffer_error(&buffer->bb, + "unknown attribute form %#" PRIx64 " for DW_LNCT_path", + form); + } +} + +static struct drgn_error * +read_lnp_directory_index(struct drgn_debug_info_buffer *buffer, uint64_t form, + uint64_t *ret) +{ + switch (form) { + case DW_FORM_data1: + return binary_buffer_next_u8_into_u64(&buffer->bb, ret); + case DW_FORM_data2: + return binary_buffer_next_u16_into_u64(&buffer->bb, ret); + case DW_FORM_udata: + return binary_buffer_next_uleb128(&buffer->bb, ret); + default: + return binary_buffer_error(&buffer->bb, + "unknown attribute form %#" PRIx64 " for DW_LNCT_directory_index", + form); + } +} + +static struct drgn_error *read_file_name_table(struct path_hash_cache *cache, + struct drgn_dwarf_index_cu *cu, + const char *comp_dir, + size_t stmt_list) +{ + struct drgn_error *err; + + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, cu->module, DRGN_SCN_DEBUG_LINE); + /* Checked in index_cu_first_pass(). */ + buffer.bb.pos += stmt_list; + + bool is_64_bit; + int version; + if ((err = read_lnp_header(&buffer, &is_64_bit, &version))) + return err; + + cache->current_chunk = cache->first_chunk; + cache->next_object = cache->first_chunk->objects; + cache->directories.size = 0; + + const struct lnp_entry_format *entry_formats; + int entry_format_count; + uint64_t entry_count = 0; /* For -Wmaybe-uninitialized. */ + const struct path_hash *path_hash, *parent; + if (version >= 5) { + if ((err = read_lnp_entry_formats(&buffer, cache, + &entry_format_count))) + return err; + entry_formats = cache->entry_formats; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &entry_count))) + return err; + if (entry_count > SIZE_MAX || + !path_hash_vector_reserve(&cache->directories, entry_count)) + return err; + parent = &empty_path_hash; + } else { + entry_formats = dwarf4_directory_entry_formats; + entry_format_count = array_size(dwarf4_directory_entry_formats); + path_hash = hash_path(cache, comp_dir, &empty_path_hash); + if (!path_hash || + !path_hash_vector_append(&cache->directories, &path_hash)) + return &drgn_enomem; + parent = path_hash; + } + + while (version < 5 || entry_count-- > 0) { + const char *path; + for (int j = 0; j < entry_format_count; j++) { + if (entry_formats[j].content_type == DW_LNCT_path) { + err = read_lnp_string(&buffer, is_64_bit, + entry_formats[j].form, + &path); + if (version < 5 && path[0] == '\0') + goto file_name_entries; + } else { + err = skip_lnp_form(&buffer.bb, is_64_bit, + entry_formats[j].form); + } + if (err) + return err; + } + path_hash = hash_path(cache, path, parent); + if (!path_hash || + !path_hash_vector_append(&cache->directories, &path_hash)) + return &drgn_enomem; + parent = cache->directories.data[0]; + } + +file_name_entries:; + /* + * File name 0 needs special treatment. In DWARF 2-4, file name entries + * are numbered starting at 1, and a DW_AT_decl_file of 0 indicates that + * no file was specified. In DWARF 5, file name entries are numbered + * starting at 0, and entry 0 is the current compilation file name. The + * DWARF 5 specification still states that a DW_AT_decl_file of 0 + * indicates that no file was specified, but some producers (including + * Clang) and consumers (including elfutils and GDB) treat a + * DW_AT_decl_file of 0 as specifying the current compilation file name, + * so we do the same. + * + * So, for DWARF 5, we hash entry 0 as usual, and for DWARF 4, we insert + * a placeholder for entry 0. If there are no file names at all, we keep + * the no_file_name_hashes placeholder. + */ + struct uint64_vector file_name_hashes; + if (version >= 5) { + if ((err = read_lnp_entry_formats(&buffer, cache, + &entry_format_count))) + return err; + entry_formats = cache->entry_formats; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &entry_count))) + return err; + if (entry_count == 0) + return NULL; + if (entry_count > SIZE_MAX) + return &drgn_enomem; + uint64_vector_init(&file_name_hashes); + if (!uint64_vector_reserve(&file_name_hashes, entry_count)) { + err = &drgn_enomem; + goto err; + } + } else { + entry_formats = dwarf4_file_name_entry_formats; + entry_format_count = array_size(dwarf4_file_name_entry_formats); + uint64_vector_init(&file_name_hashes); + } + + while (version < 5 || entry_count-- > 0) { + const char *path; + uint64_t directory_index = 0; + for (int j = 0; j < entry_format_count; j++) { + if (entry_formats[j].content_type == DW_LNCT_path) { + err = read_lnp_string(&buffer, is_64_bit, + entry_formats[j].form, + &path); + if (!err && version < 5) { + if (path[0] == '\0') { + if (file_name_hashes.size == 0) { + uint64_vector_deinit(&file_name_hashes); + return NULL; + } + goto done; + } else if (file_name_hashes.size == 0) { + uint64_t zero = 0; + if (!uint64_vector_append(&file_name_hashes, + &zero)) { + err = &drgn_enomem; + goto err; + } + } + } + } else if (entry_formats[j].content_type == + DW_LNCT_directory_index) { + err = read_lnp_directory_index(&buffer, + entry_formats[j].form, + &directory_index); + } else { + err = skip_lnp_form(&buffer.bb, is_64_bit, + entry_formats[j].form); + } + if (err) + goto err; + } + + if (directory_index >= cache->directories.size) { + err = binary_buffer_error(&buffer.bb, + "directory index %" PRIu64 " is invalid", + directory_index); + goto err; + } + struct path_hash *prev_object = cache->next_object; + struct path_hash_chunk *prev_chunk = cache->current_chunk; + path_hash = hash_path(cache, path, + cache->directories.data[directory_index]); + if (!path_hash || + !uint64_vector_append(&file_name_hashes, &path_hash->hash)) { + err = &drgn_enomem; + goto err; + } + + /* "Free" the objects allocated for this file name. */ + cache->next_object = prev_object; + cache->current_chunk = prev_chunk; + } + +done: + uint64_vector_shrink_to_fit(&file_name_hashes); + cu->file_name_hashes = file_name_hashes.data; + cu->num_file_names = file_name_hashes.size; + return NULL; + +err: + uint64_vector_deinit(&file_name_hashes); + return err; +} + +static struct drgn_error * +index_specification(struct drgn_debug_info *dbinfo, uintptr_t declaration, + struct drgn_debug_info_module *module, uintptr_t addr) +{ + struct drgn_dwarf_specification entry = { + .declaration = declaration, + .module = module, + .addr = addr, + }; + struct hash_pair hp = drgn_dwarf_specification_map_hash(&declaration); + int ret; + #pragma omp critical(drgn_index_specification) + ret = drgn_dwarf_specification_map_insert_hashed(&dbinfo->dwarf.specifications, + &entry, hp, + NULL); + /* + * There may be duplicates if multiple DIEs reference one declaration, + * but we ignore them. + */ + return ret < 0 ? &drgn_enomem : NULL; +} + +static struct drgn_error *read_indirect_insn(struct drgn_dwarf_index_cu *cu, + struct binary_buffer *bb, + uint8_t insn, uint8_t *insn_ret, + uint8_t *die_flags) +{ + struct drgn_error *err; + uint64_t form; + if ((err = binary_buffer_next_uleb128(bb, &form))) + return err; + if (form == DW_FORM_implicit_const) { + return binary_buffer_error(bb, + "DW_FORM_implicit_const in DW_FORM_indirect"); + } + switch (insn) { + case INSN_INDIRECT: + return dw_form_to_insn(cu, bb, form, insn_ret); + case INSN_SIBLING_INDIRECT: + return dw_at_sibling_to_insn(bb, form, insn_ret); + case INSN_NAME_INDIRECT: + return dw_at_name_to_insn(cu, bb, form, insn_ret); + case INSN_COMP_DIR_INDIRECT: + return dw_at_comp_dir_to_insn(cu, bb, form, insn_ret); + case INSN_STR_OFFSETS_BASE_INDIRECT: + return dw_at_str_offsets_base_to_insn(cu, bb, form, insn_ret); + case INSN_STMT_LIST_INDIRECT: + return dw_at_stmt_list_to_insn(cu, bb, form, insn_ret); + case INSN_DECL_FILE_INDIRECT: + return dw_at_decl_file_to_insn(bb, form, insn_ret, NULL); + case INSN_DECLARATION_INDIRECT: + return dw_at_declaration_to_insn(bb, form, insn_ret, die_flags); + case INSN_SPECIFICATION_INDIRECT: + return dw_at_specification_to_insn(cu, bb, form, insn_ret); + default: + UNREACHABLE(); + } +} + +/* + * First pass: read the file name tables and index DIEs with + * DW_AT_specification. This recurses into namespaces. + */ +static struct drgn_error * +index_cu_first_pass(struct drgn_debug_info *dbinfo, + struct drgn_dwarf_index_cu_buffer *buffer, + struct path_hash_cache *path_hash_cache) +{ + /* + * If DW_AT_comp_dir uses a strx* form, we can't read it right away + * because we might not have seen DW_AT_str_offsets_base yet. Rather + * than adding an extra flag to indicate that we need to read it later, + * we set comp_dir to this sentinel value. + */ + static const char comp_dir_is_strx; + + struct drgn_error *err; + struct drgn_dwarf_index_cu *cu = buffer->cu; + const char *debug_info_buffer = cu->module->scn_data[cu->scn]->d_buf; + unsigned int depth = 0; + for (;;) { + size_t die_addr = (uintptr_t)buffer->bb.pos; + + uint64_t code; + if ((err = binary_buffer_next_uleb128(&buffer->bb, &code))) + return err; + if (code == 0) { + if (depth-- > 1) + continue; + else + break; + } else if (code > cu->num_abbrev_decls) { + return binary_buffer_error(&buffer->bb, + "unknown abbreviation code %" PRIu64, + code); + } + + uint8_t *insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; + bool declaration = false; + uintptr_t specification = 0; + const char *comp_dir = ""; + uint64_t comp_dir_strx; + const char *stmt_list_ptr = NULL; + uint64_t stmt_list; + const char *sibling = NULL; + uint8_t insn; + uint8_t extra_die_flags = 0; + while ((insn = *insnp++) != INSN_END) { +indirect_insn:; + uint64_t skip, tmp; + Elf_Data *strp_scn; + switch (insn) { + case INSN_SKIP_BLOCK: + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &skip))) + return err; + goto skip; + case INSN_SKIP_BLOCK1: + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &skip))) + return err; + goto skip; + case INSN_SKIP_BLOCK2: + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &skip))) + return err; + goto skip; + case INSN_SKIP_BLOCK4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &skip))) + return err; + goto skip; + case INSN_SKIP_LEB128: + case INSN_NAME_STRX: + case INSN_DECL_FILE_UDATA: + if ((err = binary_buffer_skip_leb128(&buffer->bb))) + return err; + break; + case INSN_COMP_DIR_STRING: + comp_dir = buffer->bb.pos; + /* fallthrough */ + case INSN_SKIP_STRING: + case INSN_NAME_STRING: + if ((err = binary_buffer_skip_string(&buffer->bb))) + return err; + break; + case INSN_SIBLING_REF1: + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &tmp))) + return err; + goto sibling; + case INSN_SIBLING_REF2: + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &tmp))) + return err; + goto sibling; + case INSN_SIBLING_REF4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto sibling; + case INSN_SIBLING_REF8: + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; + goto sibling; + case INSN_SIBLING_REF_UDATA: + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &tmp))) + return err; +sibling: + if (tmp > cu->len) { + return binary_buffer_error(&buffer->bb, + "DW_AT_sibling is out of bounds"); + } + sibling = cu->buf + tmp; + __builtin_prefetch(sibling); + if (sibling < buffer->bb.pos) { + return binary_buffer_error(&buffer->bb, + "DW_AT_sibling points backwards"); + } + break; + case INSN_COMP_DIR_STRP4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + strp_scn = cu->module->scn_data[DRGN_SCN_DEBUG_STR]; + goto comp_dir_strp; + case INSN_COMP_DIR_STRP8: + if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) + return err; + strp_scn = cu->module->scn_data[DRGN_SCN_DEBUG_STR]; + goto comp_dir_strp; + case INSN_COMP_DIR_LINE_STRP4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + strp_scn = cu->module->scn_data[DRGN_SCN_DEBUG_LINE_STR]; + goto comp_dir_strp; + case INSN_COMP_DIR_LINE_STRP8: + if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) + return err; + strp_scn = cu->module->scn_data[DRGN_SCN_DEBUG_LINE_STR]; +comp_dir_strp: + if (tmp >= strp_scn->d_size) { + return binary_buffer_error(&buffer->bb, + "DW_AT_comp_dir is out of bounds"); + } + comp_dir = (const char *)strp_scn->d_buf + tmp; + break; + case INSN_COMP_DIR_STRX: + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &comp_dir_strx))) + return err; + comp_dir = &comp_dir_is_strx; + break; + case INSN_COMP_DIR_STRX1: + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &comp_dir_strx))) + return err; + comp_dir = &comp_dir_is_strx; + break; + case INSN_COMP_DIR_STRX2: + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &comp_dir_strx))) + return err; + comp_dir = &comp_dir_is_strx; + break; + case INSN_COMP_DIR_STRX3: + if ((err = binary_buffer_next_uint(&buffer->bb, + 3, + &comp_dir_strx))) + return err; + comp_dir = &comp_dir_is_strx; + break; + case INSN_COMP_DIR_STRX4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &comp_dir_strx))) + return err; + comp_dir = &comp_dir_is_strx; + break; + case INSN_COMP_DIR_STRP_ALT4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + strp_scn = cu->module->alt_debug_str_data; + goto comp_dir_strp; + case INSN_COMP_DIR_STRP_ALT8: + if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) + return err; + strp_scn = cu->module->alt_debug_str_data; + goto comp_dir_strp; + case INSN_STR_OFFSETS_BASE4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto str_offsets_base; + case INSN_STR_OFFSETS_BASE8: + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; +str_offsets_base: + if (tmp > cu->module->scn_data[DRGN_SCN_DEBUG_STR_OFFSETS]->d_size) { + return binary_buffer_error(&buffer->bb, + "DW_AT_str_offsets_base is out of bounds"); + } + cu->str_offsets = + (char *)cu->module->scn_data[DRGN_SCN_DEBUG_STR_OFFSETS]->d_buf + + tmp; + break; + case INSN_STMT_LIST_LINEPTR4: + stmt_list_ptr = buffer->bb.pos; + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &stmt_list))) + return err; + break; + case INSN_STMT_LIST_LINEPTR8: + stmt_list_ptr = buffer->bb.pos; + if ((err = binary_buffer_next_u64(&buffer->bb, + &stmt_list))) + return err; + break; + case INSN_NAME_STRX1: + case INSN_DECL_FILE_DATA1: + skip = 1; + goto skip; + case INSN_NAME_STRX2: + case INSN_DECL_FILE_DATA2: + skip = 2; + goto skip; + case INSN_NAME_STRX3: + skip = 3; + goto skip; + case INSN_NAME_STRP4: + case INSN_NAME_STRX4: + case INSN_NAME_STRP_ALT4: + case INSN_DECL_FILE_DATA4: + skip = 4; + goto skip; + case INSN_NAME_STRP8: + case INSN_NAME_STRP_ALT8: + case INSN_DECL_FILE_DATA8: + skip = 8; + goto skip; + case INSN_DECL_FILE_IMPLICIT: + while (*insnp++ & 0x80) + ; + break; + case INSN_DECLARATION_FLAG: { + uint8_t flag; + if ((err = binary_buffer_next_u8(&buffer->bb, + &flag))) + return err; + if (flag) + declaration = true; + break; + } + case INSN_SPECIFICATION_REF1: + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification; + case INSN_SPECIFICATION_REF2: + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification; + case INSN_SPECIFICATION_REF4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification; + case INSN_SPECIFICATION_REF8: + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; + goto specification; + case INSN_SPECIFICATION_REF_UDATA: + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &tmp))) + return err; +specification: + specification = (uintptr_t)cu->buf + tmp; + break; + case INSN_SPECIFICATION_REF_ADDR4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification_ref_addr; + case INSN_SPECIFICATION_REF_ADDR8: + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; +specification_ref_addr: + specification = (uintptr_t)debug_info_buffer + tmp; + break; + case INSN_SPECIFICATION_REF_ALT4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification_ref_alt; + case INSN_SPECIFICATION_REF_ALT8: + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; +specification_ref_alt: + specification = ((uintptr_t)cu->module->alt_debug_info_data->d_buf + + tmp); + break; + case INSN_INDIRECT: + case INSN_SIBLING_INDIRECT: + case INSN_NAME_INDIRECT: + case INSN_COMP_DIR_INDIRECT: + case INSN_STR_OFFSETS_BASE_INDIRECT: + case INSN_STMT_LIST_INDIRECT: + case INSN_DECL_FILE_INDIRECT: + case INSN_DECLARATION_INDIRECT: + case INSN_SPECIFICATION_INDIRECT: + if ((err = read_indirect_insn(cu, &buffer->bb, + insn, &insn, + &extra_die_flags))) + return err; + if (insn) + goto indirect_insn; + else + continue; + default: + skip = insn; +skip: + if ((err = binary_buffer_skip(&buffer->bb, + skip))) + return err; + break; + } + } + insn = *insnp | extra_die_flags; + + if (depth == 0) { + if (stmt_list_ptr) { + if (stmt_list > + cu->module->scn_data[DRGN_SCN_DEBUG_LINE]->d_size) { + return binary_buffer_error_at(&buffer->bb, + stmt_list_ptr, + "DW_AT_stmt_list is out of bounds"); + } + if (comp_dir == &comp_dir_is_strx && + (err = read_strx(buffer, comp_dir_strx, + &comp_dir))) + return err; + if ((err = read_file_name_table(path_hash_cache, + cu, comp_dir, + stmt_list))) + return err; + } + } else if (specification) { + if (insn & INSN_DIE_FLAG_DECLARATION) + declaration = true; + /* + * For now, we don't handle DIEs with + * DW_AT_specification which are themselves + * declarations. We may need to handle + * DW_AT_specification "chains" in the future. + */ + if (!declaration && + (err = index_specification(dbinfo, specification, + cu->module, die_addr))) + return err; + } + + if (insn & INSN_DIE_FLAG_CHILDREN) { + if (sibling && + (insn & INSN_DIE_FLAG_TAG_MASK) != DW_TAG_namespace) + buffer->bb.pos = sibling; + else + depth++; + } else if (depth == 0) { + break; + } + } + return NULL; +} + +/** + * Find a definition corresponding to a declaration DIE. + * + * This finds the address of a DIE with a @c DW_AT_specification attribute that + * refers to the given address. + * + * @param[in] die_addr The address of the declaration DIE. + * @param[out] module_ret Returned module containing the definition DIE. + * @param[out] addr_ret Returned address of the definition DIE. + * @return @c true if a definition DIE was found, @c false if not (in which case + * *@p module_ret and *@p addr_ret are not modified). + */ +static bool +drgn_dwarf_find_definition(struct drgn_debug_info *dbinfo, uintptr_t die_addr, + struct drgn_debug_info_module **module_ret, + uintptr_t *addr_ret) +{ + struct drgn_dwarf_specification_map_iterator it = + drgn_dwarf_specification_map_search(&dbinfo->dwarf.specifications, + &die_addr); + if (!it.entry) + return false; + *module_ret = it.entry->module; + *addr_ret = it.entry->addr; + return true; +} + +static bool append_die_entry(struct drgn_debug_info *dbinfo, + struct drgn_dwarf_index_shard *shard, uint8_t tag, + uint64_t file_name_hash, + struct drgn_debug_info_module *module, + uintptr_t addr) +{ + if (shard->dies.size == UINT32_MAX) + return false; + struct drgn_dwarf_index_die *die = + drgn_dwarf_index_die_vector_append_entry(&shard->dies); + if (!die) + return false; + die->next = UINT32_MAX; + die->tag = tag; + if (die->tag == DW_TAG_namespace) { + die->namespace = malloc(sizeof(*die->namespace)); + if (!die->namespace) { + shard->dies.size--; + return false; + } + drgn_namespace_dwarf_index_init(die->namespace, dbinfo); + } else { + die->file_name_hash = file_name_hash; + } + die->module = module; + die->addr = addr; + + return true; +} + +static bool index_die(struct drgn_namespace_dwarf_index *ns, + struct drgn_dwarf_index_cu *cu, const char *name, + uint8_t tag, uint64_t file_name_hash, + struct drgn_debug_info_module *module, uintptr_t addr) +{ + bool success = false; + struct drgn_dwarf_index_die_map_entry entry = { + .key = { name, strlen(name) }, + }; + struct hash_pair hp = drgn_dwarf_index_die_map_hash(&entry.key); + struct drgn_dwarf_index_shard *shard = + &ns->shards[hash_pair_to_shard(hp)]; + omp_set_lock(&shard->lock); + struct drgn_dwarf_index_die_map_iterator it = + drgn_dwarf_index_die_map_search_hashed(&shard->map, &entry.key, + hp); + struct drgn_dwarf_index_die *die; + if (!it.entry) { + if (!append_die_entry(ns->dbinfo, shard, tag, file_name_hash, + module, addr)) + goto err; + entry.value = shard->dies.size - 1; + if (drgn_dwarf_index_die_map_insert_searched(&shard->map, + &entry, hp, + NULL) < 0) + goto err; + die = &shard->dies.data[shard->dies.size - 1]; + goto out; + } + + die = &shard->dies.data[it.entry->value]; + for (;;) { + const uint64_t die_file_name_hash = + die->tag == DW_TAG_namespace ? 0 : die->file_name_hash; + if (die->tag == tag && die_file_name_hash == file_name_hash) + goto out; + + if (die->next == UINT32_MAX) + break; + die = &shard->dies.data[die->next]; + } + + size_t index = die - shard->dies.data; + if (!append_die_entry(ns->dbinfo, shard, tag, file_name_hash, module, + addr)) + goto err; + die = &shard->dies.data[shard->dies.size - 1]; + shard->dies.data[index].next = shard->dies.size - 1; +out: + if (tag == DW_TAG_namespace) { + struct drgn_dwarf_index_pending_die *pending = + drgn_dwarf_index_pending_die_vector_append_entry(&die->namespace->pending_dies); + if (!pending) + goto err; + pending->cu = cu - ns->dbinfo->dwarf.index_cus.data; + pending->addr = addr; + } + success = true; +err: + omp_unset_lock(&shard->lock); + return success; +} + +/* Second pass: index the actual DIEs. */ +static struct drgn_error * +index_cu_second_pass(struct drgn_namespace_dwarf_index *ns, + struct drgn_dwarf_index_cu_buffer *buffer) +{ + struct drgn_error *err; + struct drgn_dwarf_index_cu *cu = buffer->cu; + Elf_Data *debug_str = cu->module->scn_data[DRGN_SCN_DEBUG_STR]; + unsigned int depth = 0; + uint8_t depth1_tag = 0; + size_t depth1_addr = 0; + for (;;) { + size_t die_addr = (uintptr_t)buffer->bb.pos; + + uint64_t code; + if ((err = binary_buffer_next_uleb128(&buffer->bb, &code))) + return err; + if (code == 0) { + if (depth-- > 1) + continue; + else + break; + } else if (code > cu->num_abbrev_decls) { + return binary_buffer_error(&buffer->bb, + "unknown abbreviation code %" PRIu64, + code); + } + + uint8_t *insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; + const char *name = NULL; + const char *decl_file_ptr = NULL; + uint64_t decl_file = 0; /* For -Wmaybe-uninitialized. */ + bool declaration = false; + bool specification = false; + const char *sibling = NULL; + uint8_t insn; + uint8_t extra_die_flags = 0; + while ((insn = *insnp++) != INSN_END) { +indirect_insn:; + uint64_t skip, tmp; + switch (insn) { + case INSN_SKIP_BLOCK: + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &skip))) + return err; + goto skip; + case INSN_SKIP_BLOCK1: + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &skip))) + return err; + goto skip; + case INSN_SKIP_BLOCK2: + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &skip))) + return err; + goto skip; + case INSN_SKIP_BLOCK4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &skip))) + return err; + goto skip; + case INSN_SPECIFICATION_REF_UDATA: + specification = true; + /* fallthrough */ + case INSN_SKIP_LEB128: + case INSN_COMP_DIR_STRX: + if ((err = binary_buffer_skip_leb128(&buffer->bb))) + return err; + break; + case INSN_NAME_STRING: + name = buffer->bb.pos; + /* fallthrough */ + case INSN_SKIP_STRING: + case INSN_COMP_DIR_STRING: + if ((err = binary_buffer_skip_string(&buffer->bb))) + return err; + break; + case INSN_SIBLING_REF1: + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &tmp))) + return err; + goto sibling; + case INSN_SIBLING_REF2: + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &tmp))) + return err; + goto sibling; + case INSN_SIBLING_REF4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto sibling; + case INSN_SIBLING_REF8: + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; + goto sibling; + case INSN_SIBLING_REF_UDATA: + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &tmp))) + return err; +sibling: + if (tmp > cu->len) { + return binary_buffer_error(&buffer->bb, + "DW_AT_sibling is out of bounds"); + } + sibling = cu->buf + tmp; + __builtin_prefetch(sibling); + if (sibling < buffer->bb.pos) { + return binary_buffer_error(&buffer->bb, + "DW_AT_sibling points backwards"); + } + break; + case INSN_NAME_STRP4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto strp; + case INSN_NAME_STRP8: + if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) + return err; +strp: + if (tmp >= debug_str->d_size) { + return binary_buffer_error(&buffer->bb, + "DW_AT_name is out of bounds"); + } + name = (const char *)debug_str->d_buf + tmp; + __builtin_prefetch(name); + break; + case INSN_NAME_STRX: + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &tmp))) + return err; + goto name_strx; + case INSN_NAME_STRX1: + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &tmp))) + return err; + goto name_strx; + case INSN_NAME_STRX2: + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &tmp))) + return err; + goto name_strx; + case INSN_NAME_STRX3: + if ((err = binary_buffer_next_uint(&buffer->bb, + 3, &tmp))) + return err; + goto name_strx; + case INSN_NAME_STRX4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; +name_strx: + if ((err = read_strx(buffer, tmp, &name))) + return err; + __builtin_prefetch(name); + break; + case INSN_NAME_STRP_ALT4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto name_alt_strp; + case INSN_NAME_STRP_ALT8: + if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) + return err; +name_alt_strp: + if (tmp >= cu->module->alt_debug_str_data->d_size) { + return binary_buffer_error(&buffer->bb, + "DW_AT_name is out of bounds"); + } + name = (const char *)cu->module->alt_debug_str_data->d_buf + tmp; + __builtin_prefetch(name); + break; + case INSN_COMP_DIR_STRP4: + case INSN_COMP_DIR_LINE_STRP4: + case INSN_COMP_DIR_STRP_ALT4: + case INSN_STR_OFFSETS_BASE4: + case INSN_STMT_LIST_LINEPTR4: + skip = 4; + goto skip; + case INSN_COMP_DIR_STRP8: + case INSN_COMP_DIR_LINE_STRP8: + case INSN_COMP_DIR_STRP_ALT8: + case INSN_STR_OFFSETS_BASE8: + case INSN_STMT_LIST_LINEPTR8: + skip = 8; + goto skip; + case INSN_DECL_FILE_DATA1: + decl_file_ptr = buffer->bb.pos; + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &decl_file))) + return err; + break; + case INSN_DECL_FILE_DATA2: + decl_file_ptr = buffer->bb.pos; + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &decl_file))) + return err; + break; + case INSN_DECL_FILE_DATA4: + decl_file_ptr = buffer->bb.pos; + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &decl_file))) + return err; + break; + case INSN_DECL_FILE_DATA8: + decl_file_ptr = buffer->bb.pos; + if ((err = binary_buffer_next_u64(&buffer->bb, + &decl_file))) + return err; + break; + case INSN_DECL_FILE_UDATA: + decl_file_ptr = buffer->bb.pos; + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &decl_file))) + return err; + break; + case INSN_DECL_FILE_IMPLICIT: + decl_file_ptr = buffer->bb.pos; + decl_file = 0; + for (int shift = 0; ; shift += 7) { + uint8_t byte = *insnp++; + decl_file |= (uint64_t)(byte & 0x7f) << shift; + if (!(byte & 0x80)) + break; + } + break; + case INSN_DECLARATION_FLAG: { + uint8_t flag; + if ((err = binary_buffer_next_u8(&buffer->bb, + &flag))) + return err; + if (flag) + declaration = true; + break; + } + case INSN_SPECIFICATION_REF1: + specification = true; + /* fallthrough */ + case INSN_COMP_DIR_STRX1: + skip = 1; + goto skip; + case INSN_SPECIFICATION_REF2: + specification = true; + /* fallthrough */ + case INSN_COMP_DIR_STRX2: + skip = 2; + goto skip; + case INSN_COMP_DIR_STRX3: + skip = 3; + goto skip; + case INSN_SPECIFICATION_REF4: + case INSN_SPECIFICATION_REF_ADDR4: + case INSN_SPECIFICATION_REF_ALT4: + specification = true; + /* fallthrough */ + case INSN_COMP_DIR_STRX4: + skip = 4; + goto skip; + case INSN_SPECIFICATION_REF8: + case INSN_SPECIFICATION_REF_ADDR8: + case INSN_SPECIFICATION_REF_ALT8: + specification = true; + skip = 8; + goto skip; + case INSN_INDIRECT: + case INSN_SIBLING_INDIRECT: + case INSN_NAME_INDIRECT: + case INSN_COMP_DIR_INDIRECT: + case INSN_STR_OFFSETS_BASE_INDIRECT: + case INSN_STMT_LIST_INDIRECT: + case INSN_DECL_FILE_INDIRECT: + case INSN_DECLARATION_INDIRECT: + case INSN_SPECIFICATION_INDIRECT: + if ((err = read_indirect_insn(cu, &buffer->bb, + insn, &insn, + &extra_die_flags))) + return err; + if (insn) + goto indirect_insn; + else + continue; + default: + skip = insn; +skip: + if ((err = binary_buffer_skip(&buffer->bb, + skip))) + return err; + break; + } + } + insn = *insnp | extra_die_flags; + + uint8_t tag = insn & INSN_DIE_FLAG_TAG_MASK; + if (depth == 1) { + depth1_tag = tag; + depth1_addr = die_addr; + } + if (depth == (tag == DW_TAG_enumerator ? 2 : 1) && name && + !specification) { + if (insn & INSN_DIE_FLAG_DECLARATION) + declaration = true; + struct drgn_debug_info_module *module = cu->module; + if (tag == DW_TAG_enumerator) { + if (depth1_tag != DW_TAG_enumeration_type) + goto next; + /* + * NB: the enumerator name points to the + * enumeration_type DIE. Also, enumerators can't + * be declared in C/C++, so we don't check for + * that. + */ + die_addr = depth1_addr; + } else if (declaration && + !drgn_dwarf_find_definition(ns->dbinfo, + die_addr, + &module, + &die_addr)) { + goto next; + } + + uint64_t file_name_hash; + if (decl_file_ptr) { + if (decl_file >= cu->num_file_names) { + return binary_buffer_error_at(&buffer->bb, + decl_file_ptr, + "invalid DW_AT_decl_file %" PRIu64, + decl_file); + } + file_name_hash = cu->file_name_hashes[decl_file]; + } else { + file_name_hash = 0; + } + if (!index_die(ns, cu, name, tag, file_name_hash, + module, die_addr)) + return &drgn_enomem; + } + +next: + if (insn & INSN_DIE_FLAG_CHILDREN) { + /* + * We must descend into the children of enumeration_type + * DIEs to index enumerator DIEs. We don't want to skip + * over the children of the top-level DIE even if it has + * a sibling pointer. + */ + if (sibling && tag != DW_TAG_enumeration_type && + depth > 0) + buffer->bb.pos = sibling; + else + depth++; + } else if (depth == 0) { + break; + } + } + return NULL; +} + +static void drgn_dwarf_index_rollback(struct drgn_debug_info *dbinfo) +{ + for (size_t i = 0; i < DRGN_DWARF_INDEX_NUM_SHARDS; i++) { + struct drgn_dwarf_index_shard *shard = + &dbinfo->dwarf.global.shards[i]; + /* + * Because we're deleting everything that was added since the + * last update, we can just shrink the dies array to the first + * entry that was added for this update. + */ + while (shard->dies.size) { + struct drgn_dwarf_index_die *die = + &shard->dies.data[shard->dies.size - 1]; + if (die->module->state == + DRGN_DEBUG_INFO_MODULE_INDEXED) + break; + if (die->tag == DW_TAG_namespace) { + drgn_namespace_dwarf_index_deinit(die->namespace); + free(die->namespace); + } + shard->dies.size--; + } + + /* + * The new entries may be chained off of existing entries; + * unchain them. Note that any entries chained off of the new + * entries must also be new, so there's no need to preserve + * them. + */ + for (size_t index = 0; index < shard->dies.size; index++) { + struct drgn_dwarf_index_die *die = + &shard->dies.data[index]; + if (die->next != UINT32_MAX && + die->next >= shard->dies.size) + die->next = UINT32_MAX; + } + + /* Finally, delete the new entries in the map. */ + for (struct drgn_dwarf_index_die_map_iterator it = + drgn_dwarf_index_die_map_first(&shard->map); + it.entry; ) { + if (it.entry->value >= shard->dies.size) { + it = drgn_dwarf_index_die_map_delete_iterator(&shard->map, + it); + } else { + it = drgn_dwarf_index_die_map_next(it); + } + } + } + + for (struct drgn_dwarf_specification_map_iterator it = + drgn_dwarf_specification_map_first(&dbinfo->dwarf.specifications); + it.entry; ) { + if (it.entry->module->state == DRGN_DEBUG_INFO_MODULE_INDEXED) { + it = drgn_dwarf_specification_map_next(it); + } else { + it = drgn_dwarf_specification_map_delete_iterator(&dbinfo->dwarf.specifications, + it); + } + } +} + +struct drgn_error * +drgn_dwarf_info_update_index(struct drgn_dwarf_index_state *state) +{ + struct drgn_debug_info *dbinfo = state->dbinfo; + struct drgn_dwarf_index_cu_vector *cus = &dbinfo->dwarf.index_cus; + + if (!drgn_namespace_dwarf_index_alloc_shards(&dbinfo->dwarf.global)) + return &drgn_enomem; + + size_t old_cus_size = cus->size; + size_t new_cus_size = old_cus_size; + for (size_t i = 0; i < state->max_threads; i++) + new_cus_size += state->cus[i].size; + if (!drgn_dwarf_index_cu_vector_reserve(cus, new_cus_size)) + return &drgn_enomem; + for (size_t i = 0; i < state->max_threads; i++) { + for (size_t j = 0; j < state->cus[i].size; j++) { + struct drgn_dwarf_index_pending_cu *pending_cu = + &state->cus[i].data[j]; + cus->data[cus->size++] = (struct drgn_dwarf_index_cu){ + .module = pending_cu->module, + .buf = pending_cu->buf, + .len = pending_cu->len, + .is_64_bit = pending_cu->is_64_bit, + .scn = pending_cu->scn, + .file_name_hashes = + (uint64_t *)no_file_name_hashes, + .num_file_names = + array_size(no_file_name_hashes), + }; + } + } + + struct drgn_error *err = NULL; + #pragma omp parallel + { + struct path_hash_cache path_hash_cache; + path_hash_vector_init(&path_hash_cache.directories); + path_hash_cache.entry_formats = NULL; + path_hash_cache.entry_formats_capacity = 0; + path_hash_cache.first_chunk = + malloc(sizeof(struct path_hash_chunk)); + if (path_hash_cache.first_chunk) { + path_hash_cache.first_chunk->next = NULL; + } else { + #pragma omp critical(drgn_dwarf_info_update_index_error) + if (!err) + err = &drgn_enomem; + } + #pragma omp for schedule(dynamic) + for (size_t i = old_cus_size; i < cus->size; i++) { + if (err) + continue; + struct drgn_dwarf_index_cu *cu = &cus->data[i]; + struct drgn_dwarf_index_cu_buffer cu_buffer; + drgn_dwarf_index_cu_buffer_init(&cu_buffer, cu); + struct drgn_error *cu_err = read_cu(&cu_buffer); + if (!cu_err) + cu_err = index_cu_first_pass(dbinfo, &cu_buffer, + &path_hash_cache); + if (cu_err) { + #pragma omp critical(drgn_dwarf_info_update_index_error) + if (err) + drgn_error_destroy(cu_err); + else + err = cu_err; + } + } + free(path_hash_cache.entry_formats); + path_hash_vector_deinit(&path_hash_cache.directories); + struct path_hash_chunk *chunk = path_hash_cache.first_chunk; + while (chunk) { + struct path_hash_chunk *next_chunk = chunk->next; + free(chunk); + chunk = next_chunk; + } + } + if (err) + goto err; + + #pragma omp parallel for schedule(dynamic) + for (size_t i = old_cus_size; i < cus->size; i++) { + if (err) + continue; + struct drgn_dwarf_index_cu *cu = &cus->data[i]; + struct drgn_dwarf_index_cu_buffer buffer; + drgn_dwarf_index_cu_buffer_init(&buffer, cu); + buffer.bb.pos += cu_header_size(cu); + struct drgn_error *cu_err = + index_cu_second_pass(&dbinfo->dwarf.global, &buffer); + if (cu_err) { + #pragma omp critical(drgn_dwarf_info_update_index_error) + if (err) + drgn_error_destroy(cu_err); + else + err = cu_err; + } + } + if (err) { + drgn_dwarf_index_rollback(dbinfo); +err: + for (size_t i = old_cus_size; i < cus->size; i++) + drgn_dwarf_index_cu_deinit(&cus->data[i]); + cus->size = old_cus_size; + } + return err; +} + +static struct drgn_error *index_namespace(struct drgn_namespace_dwarf_index *ns) +{ + if (ns->pending_dies.size == 0) + return NULL; + + if (ns->saved_err) + return drgn_error_copy(ns->saved_err); + + if (!drgn_namespace_dwarf_index_alloc_shards(ns)) + return &drgn_enomem; + + struct drgn_error *err = NULL; + #pragma omp parallel for schedule(dynamic) + for (size_t i = 0; i < ns->pending_dies.size; i++) { + if (!err) { + struct drgn_dwarf_index_pending_die *pending = + &ns->pending_dies.data[i]; + struct drgn_dwarf_index_cu *cu = + &ns->dbinfo->dwarf.index_cus.data[pending->cu]; + struct drgn_dwarf_index_cu_buffer buffer; + drgn_dwarf_index_cu_buffer_init(&buffer, cu); + buffer.bb.pos = (char *)pending->addr; + struct drgn_error *cu_err = + index_cu_second_pass(ns, &buffer); + if (cu_err) { + #pragma omp critical(drgn_index_namespace_error) + if (err) + drgn_error_destroy(cu_err); + else + err = cu_err; + } + } + } + if (err) { + ns->saved_err = err; + return drgn_error_copy(ns->saved_err); + } + ns->pending_dies.size = 0; + drgn_dwarf_index_pending_die_vector_shrink_to_fit(&ns->pending_dies); + return err; +} + +/** + * Iterator over DWARF debugging information. + * + * An iterator is initialized with @ref drgn_dwarf_index_iterator_init(). It is + * advanced with @ref drgn_dwarf_index_iterator_next(). + */ +struct drgn_dwarf_index_iterator { + const uint64_t *tags; + size_t num_tags; + struct drgn_dwarf_index_shard *shard; + uint32_t index; +}; + +/** + * Create an iterator over DIEs in a DWARF index namespace. + * + * @param[out] it DWARF index iterator to initialize. + * @param[in] ns Namespace DWARF index. + * @param[in] name Name of DIE to search for. + * @param[in] name_len Length of @c name. + * @param[in] tags List of DIE tags to search for. + * @param[in] num_tags Number of tags in @p tags, or zero to search for any tag. + * @return @c NULL on success, non-@c NULL on error. + */ +static struct drgn_error * +drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it, + struct drgn_namespace_dwarf_index *ns, + const char *name, size_t name_len, + const uint64_t *tags, size_t num_tags) +{ + struct drgn_error *err = index_namespace(ns); + if (err) + return err; + if (ns->shards) { + struct nstring key = { name, name_len }; + struct hash_pair hp = drgn_dwarf_index_die_map_hash(&key); + it->shard = &ns->shards[hash_pair_to_shard(hp)]; + struct drgn_dwarf_index_die_map_iterator map_it = + drgn_dwarf_index_die_map_search_hashed(&it->shard->map, + &key, hp); + it->index = map_it.entry ? map_it.entry->value : UINT32_MAX; + } else { + it->shard = NULL; + it->index = UINT32_MAX; + } + it->tags = tags; + it->num_tags = num_tags; + return NULL; +} + +static inline bool +drgn_dwarf_index_iterator_matches_tag(struct drgn_dwarf_index_iterator *it, + struct drgn_dwarf_index_die *die) +{ + if (it->num_tags == 0) + return true; + for (size_t i = 0; i < it->num_tags; i++) { + if (die->tag == it->tags[i]) + return true; + } + return false; +} + +/** + * Get the next matching DIE from a DWARF index iterator. + * + * If matching any name, this is O(n), where n is the number of indexed DIEs. If + * matching by name, this is O(1) on average and O(n) worst case. + * + * Note that this returns the parent `DW_TAG_enumeration_type` for indexed + * `DW_TAG_enumerator` DIEs. + * + * @param[in] it DWARF index iterator. + * @return Next DIE, or @c NULL if there are no more matching DIEs. + */ +static struct drgn_dwarf_index_die * +drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it) +{ + while (it->index != UINT32_MAX) { + struct drgn_dwarf_index_die *die = + &it->shard->dies.data[it->index]; + it->index = die->next; + if (drgn_dwarf_index_iterator_matches_tag(it, die)) + return die; + } + return NULL; +} + +/** + * Get a @c Dwarf_Die from a @ref drgn_dwarf_index_die. + * + * @param[in] die Indexed DIE. + * @param[out] die_ret Returned DIE. + * @return @c NULL on success, non-@c NULL on error. + */ +static struct drgn_error * +drgn_dwarf_index_get_die(struct drgn_dwarf_index_die *die, Dwarf_Die *die_ret) +{ + Dwarf_Addr bias; + Dwarf *dwarf = dwfl_module_getdwarf(die->module->dwfl_module, &bias); + if (!dwarf) + return drgn_error_libdwfl(); + uintptr_t start = + (uintptr_t)die->module->scn_data[DRGN_SCN_DEBUG_INFO]->d_buf; + size_t size = die->module->scn_data[DRGN_SCN_DEBUG_INFO]->d_size; + if (die->addr >= start && die->addr < start + size) { + if (!dwarf_offdie(dwarf, die->addr - start, die_ret)) + return drgn_error_libdw(); + } else { + start = (uintptr_t)die->module->scn_data[DRGN_SCN_DEBUG_TYPES]->d_buf; + if (!dwarf_offdie_types(dwarf, die->addr - start, die_ret)) + return drgn_error_libdw(); + } + return NULL; +} + +/* + * Language support. + */ + +/** + * Return the @ref drgn_language of the CU of the given DIE. + * + * @param[in] fall_back Whether to fall back if the language is not found or + * unknown. If @c true, @ref drgn_default_language is returned in this case. If + * @c false, @c NULL is returned. + * @param[out] ret Returned language. + * @return @c NULL on success, non-@c NULL on error. + */ +static struct drgn_error *drgn_language_from_die(Dwarf_Die *die, bool fall_back, + const struct drgn_language **ret) +{ + Dwarf_Die cudie; + if (!dwarf_cu_die(die->cu, &cudie, NULL, NULL, NULL, NULL, NULL, NULL)) + return drgn_error_libdw(); + switch (dwarf_srclang(&cudie)) { + case DW_LANG_C: + case DW_LANG_C89: + case DW_LANG_C99: + case DW_LANG_C11: + *ret = &drgn_language_c; + break; + case DW_LANG_C_plus_plus: + case DW_LANG_C_plus_plus_03: + case DW_LANG_C_plus_plus_11: + case DW_LANG_C_plus_plus_14: + *ret = &drgn_language_cpp; + break; + default: + *ret = fall_back ? &drgn_default_language : NULL; + break; + } + return NULL; +} + +struct drgn_error * +drgn_debug_info_main_language(struct drgn_debug_info *dbinfo, + const struct drgn_language **ret) +{ + struct drgn_error *err; + struct drgn_dwarf_index_iterator it; + const uint64_t tag = DW_TAG_subprogram; + err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dwarf.global, "main", + strlen("main"), &tag, 1); + if (err) + return err; + struct drgn_dwarf_index_die *index_die; + while ((index_die = drgn_dwarf_index_iterator_next(&it))) { + Dwarf_Die die; + err = drgn_dwarf_index_get_die(index_die, &die); + if (err) { + drgn_error_destroy(err); + continue; + } + + err = drgn_language_from_die(&die, false, ret); + if (err) { + drgn_error_destroy(err); + continue; + } + if (*ret) + return NULL; + } + *ret = NULL; + return NULL; +} + +/* + * DIE iteration. + */ + +DEFINE_VECTOR(dwarf_die_vector, Dwarf_Die) + +/** Iterator over DWARF DIEs in a @ref drgn_debug_info_module. */ +struct drgn_dwarf_die_iterator { + /** Stack of current DIE and its ancestors. */ + struct dwarf_die_vector dies; + Dwarf *dwarf; + /** End of current CU (for bounds checking). */ + const char *cu_end; + /** Offset of next CU. */ + Dwarf_Off next_cu_off; + /** Whether current CU is from .debug_types. */ + bool debug_types; +}; + +static void drgn_dwarf_die_iterator_init(struct drgn_dwarf_die_iterator *it, + Dwarf *dwarf) +{ + dwarf_die_vector_init(&it->dies); + it->dwarf = dwarf; + it->next_cu_off = 0; + it->debug_types = false; +} + +static void drgn_dwarf_die_iterator_deinit(struct drgn_dwarf_die_iterator *it) +{ + dwarf_die_vector_deinit(&it->dies); +} + +/** + * Return the next DWARF DIE in a @ref drgn_dwarf_die_iterator. + * + * The first call returns the top-level DIE for the first unit in the module. + * Subsequent calls return children, siblings, and unit DIEs. + * + * This includes the .debug_types section. + * + * @param[in,out] it Iterator containing the returned DIE and its ancestors. The + * last entry in `it->dies` is the DIE itself, the entry before that is its + * parent, the entry before that is its grandparent, etc. + * @param[in] children If @c true and the last returned DIE has children, return + * its first child (this is a pre-order traversal). Otherwise, return the next + * DIE at the level less than or equal to the last returned DIE, i.e., the last + * returned DIE's sibling, or its ancestor's sibling, or the next top-level unit + * DIE. + * @param[in] subtree If zero, iterate over all DIEs in all units. If non-zero, + * stop after returning all DIEs in the subtree rooted at the DIE that was + * returned in the last call as entry `subtree - 1` in `it->dies`. + * @return @c NULL on success, `&drgn_stop` if there are no more DIEs, in which + * case the size of `it->dies` equals @p subtree and `it->dies` refers to the + * root of the iterated subtree, non-@c NULL on error, in which case this should + * not be called again. + */ +static struct drgn_error * +drgn_dwarf_die_iterator_next(struct drgn_dwarf_die_iterator *it, bool children, + size_t subtree) +{ +#define TOP() (&it->dies.data[it->dies.size - 1]) + int r; + Dwarf_Die die; + assert(subtree <= it->dies.size); + if (it->dies.size == 0) { + /* This is the first call. Get the first unit DIE. */ + if (!dwarf_die_vector_append_entry(&it->dies)) + return &drgn_enomem; + } else { + if (children) { + r = dwarf_child(TOP(), &die); + if (r == 0) { + /* The previous DIE has a child. Return it. */ + if (!dwarf_die_vector_append(&it->dies, &die)) + return &drgn_enomem; + return NULL; + } else if (r < 0) { + return drgn_error_libdw(); + } + /* The previous DIE has no children. */ + } + + if (it->dies.size == subtree) { + /* + * The previous DIE is the root of the subtree. We're + * done. + */ + return &drgn_stop; + } + + if (it->dies.size > 1) { + r = dwarf_siblingof(TOP(), &die); + if (r == 0) { + /* The previous DIE has a sibling. Return it. */ + *TOP() = die; + return NULL; + } else if (r > 0) { + if (!die.addr) + goto next_unit; + /* + * The previous DIE is the last child of its + * parent. + */ + char *addr = die.addr; + do { + /* + * addr points to the null terminator + * for the list of siblings. Go back up + * to its parent. The next byte is + * either the parent's sibling or + * another null terminator. + */ + it->dies.size--; + addr++; + if (it->dies.size == subtree) { + /* + * We're back to the root of the + * subtree. We're done. + */ + return &drgn_stop; + } + if (it->dies.size == 1 || + addr >= it->cu_end) + goto next_unit; + } while (*addr == '\0'); + /* + * addr now points to the next DIE. Return it. + */ + *TOP() = (Dwarf_Die){ + .cu = it->dies.data[0].cu, + .addr = addr, + }; + return NULL; + } else { + return drgn_error_libdw(); + } + } + } + +next_unit:; + /* There are no more DIEs in the current unit. */ + Dwarf_Off cu_off = it->next_cu_off; + size_t cu_header_size; + uint64_t type_signature; + r = dwarf_next_unit(it->dwarf, cu_off, &it->next_cu_off, + &cu_header_size, NULL, NULL, NULL, NULL, + it->debug_types ? &type_signature : NULL, NULL); + if (r == 0) { + /* Got the next unit. Return the unit DIE. */ + if (it->debug_types) { + r = !dwarf_offdie_types(it->dwarf, + cu_off + cu_header_size, TOP()); + } else { + r = !dwarf_offdie(it->dwarf, cu_off + cu_header_size, + TOP()); + } + if (r) + return drgn_error_libdw(); + it->cu_end = ((const char *)TOP()->addr + - dwarf_dieoffset(TOP()) + + it->next_cu_off); + return NULL; + } else if (r > 0) { + if (!it->debug_types) { + it->next_cu_off = 0; + it->debug_types = true; + goto next_unit; + } + /* There are no more units. */ + return &drgn_stop; + } else { + return drgn_error_libdw(); + } +#undef TOP +} + +struct drgn_error * +drgn_debug_info_module_find_dwarf_scopes(struct drgn_debug_info_module *module, + uint64_t pc, uint64_t *bias_ret, + Dwarf_Die **dies_ret, + size_t *length_ret) +{ + struct drgn_error *err; + + Dwarf_Addr bias; + Dwarf *dwarf = dwfl_module_getdwarf(module->dwfl_module, &bias); + if (!dwarf) + return drgn_error_libdw(); + *bias_ret = bias; + pc -= bias; + + /* First, try to get the CU containing the PC. */ + Dwarf_Aranges *aranges; + size_t naranges; + if (dwarf_getaranges(dwarf, &aranges, &naranges) < 0) + return drgn_error_libdw(); + + struct drgn_dwarf_die_iterator it; + bool children; + size_t subtree; + Dwarf_Off offset; + if (dwarf_getarangeinfo(dwarf_getarange_addr(aranges, pc), NULL, NULL, + &offset) >= 0) { + drgn_dwarf_die_iterator_init(&it, dwarf); + Dwarf_Die *cu_die = dwarf_die_vector_append_entry(&it.dies); + if (!cu_die) { + err = &drgn_enomem; + goto err; + } + if (!dwarf_offdie(dwarf, offset, cu_die)) { + err = drgn_error_libdw(); + goto err; + } + if (dwarf_next_unit(dwarf, offset - dwarf_cuoffset(cu_die), + &it.next_cu_off, NULL, NULL, NULL, NULL, + NULL, NULL, NULL)) { + err = drgn_error_libdw(); + goto err; + } + it.cu_end = ((const char *)cu_die->addr + - dwarf_dieoffset(cu_die) + + it.next_cu_off); + children = true; + subtree = 1; + } else { + /* + * Range was not found. .debug_aranges could be missing or + * incomplete, so fall back to checking each CU. + */ + drgn_dwarf_die_iterator_init(&it, dwarf); + children = false; + subtree = 0; + } + + /* Now find DIEs containing the PC. */ + while (!(err = drgn_dwarf_die_iterator_next(&it, children, subtree))) { + int r = dwarf_haspc(&it.dies.data[it.dies.size - 1], pc); + if (r > 0) { + children = true; + subtree = it.dies.size; + } else if (r < 0) { + err = drgn_error_libdw(); + goto err; + } + } + if (err != &drgn_stop) + goto err; + + *dies_ret = it.dies.data; + *length_ret = it.dies.size; + return NULL; + +err: + drgn_dwarf_die_iterator_deinit(&it); + return err; +} + +struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret, + size_t *length_ret) +{ + struct drgn_error *err; + + Dwarf *dwarf = dwarf_cu_getdwarf(die->cu); + if (!dwarf) + return drgn_error_libdw(); + + struct dwarf_die_vector dies = VECTOR_INIT; + Dwarf_Die *cu_die = dwarf_die_vector_append_entry(&dies); + if (!cu_die) { + err = &drgn_enomem; + goto err; + } + + Dwarf_Half cu_version; + Dwarf_Off type_offset; + if (!dwarf_cu_die(die->cu, cu_die, &cu_version, NULL, NULL, NULL, NULL, + &type_offset)) { + err = drgn_error_libdw(); + goto err; + } + Dwarf_Off cu_die_offset = dwarf_dieoffset(cu_die); + bool debug_types = cu_version == 4 && type_offset != 0; + Dwarf_Off next_cu_offset; + uint64_t type_signature; + if (dwarf_next_unit(dwarf, cu_die_offset - dwarf_cuoffset(cu_die), + &next_cu_offset, NULL, NULL, NULL, NULL, NULL, + debug_types ? &type_signature : NULL, NULL)) { + err = drgn_error_libdw(); + goto err; + } + const unsigned char *cu_end = + (unsigned char *)cu_die->addr - cu_die_offset + next_cu_offset; + +#define TOP() (&dies.data[dies.size - 1]) + while ((char *)TOP()->addr <= (char *)die->addr) { + if (TOP()->addr == die->addr) { + *dies_ret = dies.data; + *length_ret = dies.size - 1; + return NULL; + } + + Dwarf_Attribute attr; + if (dwarf_attr(TOP(), DW_AT_sibling, &attr)) { + /* The top DIE has a DW_AT_sibling attribute. */ + Dwarf_Die sibling; + if (!dwarf_formref_die(&attr, &sibling)) { + err = drgn_error_libdw(); + goto err; + } + if (sibling.cu != TOP()->cu || + (char *)sibling.addr <= (char *)TOP()->addr) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "invalid DW_AT_sibling"); + goto err; + } + + if ((char *)sibling.addr > (char *)die->addr) { + /* + * The top DIE's sibling is after the target + * DIE. Therefore, the target DIE must be a + * descendant of the top DIE. + */ + Dwarf_Die *child = + dwarf_die_vector_append_entry(&dies); + if (!child) { + err = &drgn_enomem; + goto err; + } + int r = dwarf_child(TOP() - 1, child); + if (r < 0) { + err = drgn_error_libdw(); + goto err; + } else if (r > 0) { + /* + * The top DIE didn't have any children, + * which should be impossible. + */ + goto not_found; + } + } else { + /* + * The top DIE's sibling is before or equal to + * the target DIE. Therefore, the target DIE + * isn't a descendant of the top DIE. Skip to + * the sibling. + */ + *TOP() = sibling; + } + } else { + /* + * The top DIE does not have a DW_AT_sibling attribute. + * Instead, we found the end of the top DIE. + */ + unsigned char *addr = attr.valp; + if (!addr || addr >= cu_end) + goto not_found; + + /* + * If the top DIE has children, then addr is its first + * child. Otherwise, then addr is its sibling. (Unless + * it is a null terminator.) + */ + size_t new_size = dies.size; + if (dwarf_haschildren(TOP()) > 0) + new_size++; + + while (*addr == '\0') { + /* + * addr points to the null terminator for the + * list of siblings. Go back up to its parent. + * The next byte is either the parent's sibling + * or another null terminator. + */ + new_size--; + addr++; + if (new_size <= 1 || addr >= cu_end) + goto not_found; + } + + /* addr now points to the next DIE. Go to it. */ + if (new_size > dies.size) { + if (!dwarf_die_vector_append_entry(&dies)) { + err = &drgn_enomem; + goto err; + } + } else { + dies.size = new_size; + } + *TOP() = (Dwarf_Die){ + .cu = dies.data[0].cu, + .addr = addr, + }; + } + } +#undef TOP + +not_found: + err = drgn_error_create(DRGN_ERROR_OTHER, + "could not find DWARF DIE ancestors"); +err: + dwarf_die_vector_deinit(&dies); + return err; +} + +/* + * Location lists. + */ + +static struct drgn_error * +drgn_dwarf_next_addrx(struct binary_buffer *bb, + struct drgn_debug_info_module *module, Dwarf_Die *cu_die, + uint8_t address_size, const char **addr_base, + uint64_t *ret) +{ + struct drgn_error *err; + + if (!*addr_base) { + Dwarf_Attribute attr_mem, *attr; + if (!(attr = dwarf_attr(cu_die, DW_AT_addr_base, &attr_mem))) { + return drgn_error_create(DRGN_ERROR_OTHER, + "indirect address without DW_AT_addr_base"); + } + Dwarf_Word base; + if (dwarf_formudata(attr, &base)) + return drgn_error_libdw(); + + if (!module->scns[DRGN_SCN_DEBUG_ADDR]) { + return drgn_error_create(DRGN_ERROR_OTHER, + "indirect address without .debug_addr section"); + } + err = drgn_debug_info_module_cache_section(module, + DRGN_SCN_DEBUG_ADDR); + if (err) + return err; + + if (base > module->scn_data[DRGN_SCN_DEBUG_ADDR]->d_size || + base == 0) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_AT_addr_base is out of bounds"); + } + + *addr_base = (char *)module->scn_data[DRGN_SCN_DEBUG_ADDR]->d_buf + base; + uint8_t segment_selector_size = ((uint8_t *)*addr_base)[-1]; + if (segment_selector_size != 0) { + return drgn_error_format(DRGN_ERROR_OTHER, + "unsupported segment selector size %" PRIu8, + segment_selector_size); + } + } + + uint64_t index; + if ((err = binary_buffer_next_uleb128(bb, &index))) + return err; + + Elf_Data *data = module->scn_data[DRGN_SCN_DEBUG_ADDR]; + if (index >= + ((char *)data->d_buf + data->d_size - *addr_base) / address_size) { + return binary_buffer_error(bb, + "address index is out of bounds"); + } + copy_lsbytes(ret, sizeof(*ret), HOST_LITTLE_ENDIAN, + *addr_base + index * address_size, address_size, + drgn_platform_is_little_endian(&module->platform)); + return NULL; +} + +static struct drgn_error * +drgn_dwarf_read_loclistx(struct drgn_debug_info_module *module, + Dwarf_Die *cu_die, uint8_t offset_size, + Dwarf_Word index, Dwarf_Word *ret) +{ + struct drgn_error *err; + + Dwarf_Attribute attr_mem, *attr; + if (!(attr = dwarf_attr(cu_die, DW_AT_loclists_base, &attr_mem))) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_FORM_loclistx without DW_AT_loclists_base"); + } + Dwarf_Word base; + if (dwarf_formudata(attr, &base)) + return drgn_error_libdw(); + + if (!module->scns[DRGN_SCN_DEBUG_LOCLISTS]) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_FORM_loclistx without .debug_loclists section"); + } + err = drgn_debug_info_module_cache_section(module, + DRGN_SCN_DEBUG_LOCLISTS); + if (err) + return err; + Elf_Data *data = module->scn_data[DRGN_SCN_DEBUG_LOCLISTS]; + + if (base > data->d_size) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_AT_loclists_base is out of bounds"); + } + assert(offset_size == 4 || offset_size == 8); + if (index >= (data->d_size - base) / offset_size) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_FORM_loclistx is out of bounds"); + } + const char *basep = (char *)data->d_buf + base; + if (offset_size == 8) { + uint64_t offset; + memcpy(&offset, (uint64_t *)basep + index, sizeof(offset)); + if (drgn_platform_bswap(&module->platform)) + offset = bswap_64(offset); + *ret = base + offset; + } else { + uint32_t offset; + memcpy(&offset, (uint32_t *)basep + index, sizeof(offset)); + if (drgn_platform_bswap(&module->platform)) + offset = bswap_32(offset); + *ret = base + offset; + } + return NULL; +} + +static struct drgn_error * +drgn_dwarf5_location_list(struct drgn_debug_info_module *module, + Dwarf_Word offset, Dwarf_Die *cu_die, + uint8_t address_size, uint64_t pc, + const char **expr_ret, size_t *expr_size_ret) +{ + struct drgn_error *err; + + if (!module->scns[DRGN_SCN_DEBUG_LOCLISTS]) { + return drgn_error_create(DRGN_ERROR_OTHER, + "loclist without .debug_loclists section"); + } + err = drgn_debug_info_module_cache_section(module, + DRGN_SCN_DEBUG_LOCLISTS); + if (err) + return err; + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, module, DRGN_SCN_DEBUG_LOCLISTS); + if (offset > buffer.bb.end - buffer.bb.pos) { + return drgn_error_create(DRGN_ERROR_OTHER, + "loclist is out of bounds"); + } + buffer.bb.pos += offset; + + const char *addr_base = NULL; + uint64_t base; + bool base_valid = false; + /* Default is unknown. May be overridden by DW_LLE_default_location. */ + *expr_ret = NULL; + *expr_size_ret = 0; + for (;;) { + uint8_t kind; + if ((err = binary_buffer_next_u8(&buffer.bb, &kind))) + return err; + uint64_t start, length, expr_size; + switch (kind) { + case DW_LLE_end_of_list: + return NULL; + case DW_LLE_base_addressx: + if ((err = drgn_dwarf_next_addrx(&buffer.bb, module, + cu_die, address_size, + &addr_base, &base))) + return err; + base_valid = true; + break; + case DW_LLE_startx_endx: + if ((err = drgn_dwarf_next_addrx(&buffer.bb, module, + cu_die, address_size, + &addr_base, &start)) || + (err = drgn_dwarf_next_addrx(&buffer.bb, module, + cu_die, address_size, + &addr_base, &length))) + return err; + length -= start; +counted_location_description: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &expr_size))) + return err; + if (expr_size > buffer.bb.end - buffer.bb.pos) { + return binary_buffer_error(&buffer.bb, + "location description size is out of bounds"); + } + if (pc >= start && pc - start < length) { + *expr_ret = buffer.bb.pos; + *expr_size_ret = expr_size; + return NULL; + } + buffer.bb.pos += expr_size; + break; + case DW_LLE_startx_length: + if ((err = drgn_dwarf_next_addrx(&buffer.bb, module, + cu_die, address_size, + &addr_base, &start)) || + (err = binary_buffer_next_uleb128(&buffer.bb, + &length))) + return err; + goto counted_location_description; + case DW_LLE_offset_pair: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &start)) || + (err = binary_buffer_next_uleb128(&buffer.bb, + &length))) + return err; + length -= start; + if (!base_valid) { + Dwarf_Addr low_pc; + if (dwarf_lowpc(cu_die, &low_pc)) + return drgn_error_libdw(); + base = low_pc; + base_valid = true; + } + start += base; + goto counted_location_description; + case DW_LLE_default_location: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &expr_size))) + return err; + if (expr_size > buffer.bb.end - buffer.bb.pos) { + return binary_buffer_error(&buffer.bb, + "location description size is out of bounds"); + } + *expr_ret = buffer.bb.pos; + *expr_size_ret = expr_size; + buffer.bb.pos += expr_size; + break; + case DW_LLE_base_address: + if ((err = binary_buffer_next_uint(&buffer.bb, + address_size, + &base))) + return err; + base_valid = true; + break; + case DW_LLE_start_end: + if ((err = binary_buffer_next_uint(&buffer.bb, + address_size, + &start)) || + (err = binary_buffer_next_uint(&buffer.bb, + address_size, + &length))) + return err; + length -= start; + goto counted_location_description; + case DW_LLE_start_length: + if ((err = binary_buffer_next_uint(&buffer.bb, + address_size, + &start)) || + (err = binary_buffer_next_uleb128(&buffer.bb, + &length))) + return err; + goto counted_location_description; + default: + return binary_buffer_error(&buffer.bb, + "unknown location list entry kind %#" PRIx8, + kind); + } + } +} + +static struct drgn_error * +drgn_dwarf4_location_list(struct drgn_debug_info_module *module, + Dwarf_Word offset, Dwarf_Die *cu_die, + uint8_t address_size, uint64_t pc, + const char **expr_ret, size_t *expr_size_ret) +{ + struct drgn_error *err; + + if (!module->scns[DRGN_SCN_DEBUG_LOC]) { + return drgn_error_create(DRGN_ERROR_OTHER, + "loclistptr without .debug_loc section"); + } + err = drgn_debug_info_module_cache_section(module, DRGN_SCN_DEBUG_LOC); + if (err) + return err; + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, module, DRGN_SCN_DEBUG_LOC); + if (offset > buffer.bb.end - buffer.bb.pos) { + return drgn_error_create(DRGN_ERROR_OTHER, + "loclistptr is out of bounds"); + } + buffer.bb.pos += offset; + + uint64_t address_max = uint_max(address_size); + uint64_t base; + bool base_valid = false; + for (;;) { + uint64_t start, end; + if ((err = binary_buffer_next_uint(&buffer.bb, address_size, + &start)) || + (err = binary_buffer_next_uint(&buffer.bb, address_size, + &end))) + return err; + if (start == 0 && end == 0) { + *expr_ret = NULL; + *expr_size_ret = 0; + return NULL; + } else if (start == address_max) { + base = end; + base_valid = true; + } else { + if (!base_valid) { + Dwarf_Addr low_pc; + if (dwarf_lowpc(cu_die, &low_pc)) + return drgn_error_libdw(); + base = low_pc; + base_valid = true; + } + uint16_t expr_size; + if ((err = binary_buffer_next_u16(&buffer.bb, + &expr_size))) + return err; + if (expr_size > buffer.bb.end - buffer.bb.pos) { + return binary_buffer_error(&buffer.bb, + "location description size is out of bounds"); + } + if (base + start <= pc && pc < base + end) { + *expr_ret = buffer.bb.pos; + *expr_size_ret = expr_size; + return NULL; + } + buffer.bb.pos += expr_size; + } + } +} + +static struct drgn_error * +drgn_dwarf_location(struct drgn_debug_info_module *module, + Dwarf_Attribute *attr, + const struct drgn_register_state *regs, + const char **expr_ret, size_t *expr_size_ret) +{ + struct drgn_error *err; + switch (attr->form) { + /* DWARF 3 */ + case DW_FORM_data4: + case DW_FORM_data8: + /* DWARF 4-5 */ + case DW_FORM_sec_offset: + /* DWARF 5 */ + case DW_FORM_loclistx: { + Dwarf_Die cu_die; + Dwarf_Half cu_version; + uint8_t address_size; + uint8_t offset_size; + if (!dwarf_cu_die(attr->cu, &cu_die, &cu_version, NULL, + &address_size, &offset_size, NULL, NULL)) + return drgn_error_libdw(); + if ((err = drgn_check_address_size(address_size))) + return err; + + Dwarf_Word offset; + if (dwarf_formudata(attr, &offset)) + return drgn_error_libdw(); + if (attr->form == DW_FORM_loclistx && + ((err = drgn_dwarf_read_loclistx(module, &cu_die, + offset_size, offset, + &offset)))) + return err; + + struct optional_uint64 pc; + if (!regs || + !(pc = drgn_register_state_get_pc(regs)).has_value) { + *expr_ret = NULL; + *expr_size_ret = 0; + return NULL; + } + Dwarf_Addr bias; + dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, &bias, + NULL, NULL, NULL); + pc.value = pc.value - !regs->interrupted - bias; + + if (cu_version >= 5) { + return drgn_dwarf5_location_list(module, offset, + &cu_die, address_size, + pc.value, expr_ret, + expr_size_ret); + } else { + return drgn_dwarf4_location_list(module, offset, + &cu_die, address_size, + pc.value, expr_ret, + expr_size_ret); + } + } + default: { + Dwarf_Block block; + if (dwarf_formblock(attr, &block)) + return drgn_error_libdw(); + *expr_ret = (char *)block.data; + *expr_size_ret = block.length; + return NULL; + } + } +} + +/* + * DWARF expressions. + */ + +/** + * Arbitrary limit for number of operations to execute in a DWARF expression to + * avoid infinite loops. + */ +static const int MAX_DWARF_EXPR_OPS = 10000; + +/* A DWARF expression and the context it is being evaluated in. */ +struct drgn_dwarf_expression_context { + struct binary_buffer bb; + const char *start; + struct drgn_program *prog; + struct drgn_debug_info_module *module; + uint8_t address_size; + Dwarf_Die cu_die; + const char *cu_addr_base; + Dwarf_Die *function; + const struct drgn_register_state *regs; +}; + +static struct drgn_error * +drgn_dwarf_expression_buffer_error(struct binary_buffer *bb, const char *pos, + const char *message) +{ + struct drgn_dwarf_expression_context *ctx = + container_of(bb, struct drgn_dwarf_expression_context, bb); + return drgn_error_debug_info(ctx->module, pos, message); +} + +static inline struct drgn_error * +drgn_dwarf_expression_context_init(struct drgn_dwarf_expression_context *ctx, + struct drgn_program *prog, + struct drgn_debug_info_module *module, + Dwarf_CU *cu, Dwarf_Die *function, + const struct drgn_register_state *regs, + const char *expr, size_t expr_size) +{ + struct drgn_error *err; + binary_buffer_init(&ctx->bb, expr, expr_size, + drgn_platform_is_little_endian(&module->platform), + drgn_dwarf_expression_buffer_error); + ctx->start = expr; + ctx->prog = prog; + ctx->module = module; + if (cu) { + if (!dwarf_cu_die(cu, &ctx->cu_die, NULL, NULL, + &ctx->address_size, NULL, NULL, NULL)) + return drgn_error_libdw(); + if ((err = drgn_check_address_size(ctx->address_size))) + return err; + } else { + ctx->cu_die.addr = NULL; + ctx->address_size = + drgn_platform_address_size(&module->platform); + } + ctx->cu_addr_base = NULL; + ctx->function = function; + ctx->regs = regs; + return NULL; +} + +static struct drgn_error * +drgn_dwarf_frame_base(struct drgn_program *prog, + struct drgn_debug_info_module *module, Dwarf_Die *die, + const struct drgn_register_state *regs, + int *remaining_ops, uint64_t *ret); + +/* + * Evaluate a DWARF expression up to the next location description operation or + * operation that can't be evaluated in the given context. + * + * Returns &drgn_not_found if it tried to use an unknown register value. + */ +static struct drgn_error * +drgn_eval_dwarf_expression(struct drgn_dwarf_expression_context *ctx, + struct uint64_vector *stack, + int *remaining_ops) +{ + struct drgn_error *err; + const struct drgn_platform *platform = &ctx->module->platform; + bool little_endian = drgn_platform_is_little_endian(platform); + uint8_t address_size = ctx->address_size; + uint8_t address_bits = address_size * CHAR_BIT; + uint64_t address_mask = uint_max(address_size); + drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = + platform->arch->dwarf_regno_to_internal; + +#define CHECK(n) do { \ + size_t _n = (n); \ + if (stack->size < _n) { \ + return binary_buffer_error(&ctx->bb, \ + "DWARF expression stack underflow"); \ + } \ +} while (0) + +#define ELEM(i) stack->data[stack->size - 1 - (i)] + +#define PUSH(x) do { \ + uint64_t push = (x); \ + if (!uint64_vector_append(stack, &push)) \ + return &drgn_enomem; \ +} while (0) + +#define PUSH_MASK(x) PUSH((x) & address_mask) + + while (binary_buffer_has_next(&ctx->bb)) { + if (*remaining_ops <= 0) { + return binary_buffer_error(&ctx->bb, + "DWARF expression executed too many operations"); + } + (*remaining_ops)--; + uint8_t opcode; + if ((err = binary_buffer_next_u8(&ctx->bb, &opcode))) + return err; + uint64_t uvalue; + uint64_t dwarf_regno; + uint8_t deref_size; + switch (opcode) { + /* Literal encodings. */ + case DW_OP_lit0 ... DW_OP_lit31: + PUSH(opcode - DW_OP_lit0); + break; + case DW_OP_addr: + if ((err = binary_buffer_next_uint(&ctx->bb, + address_size, + &uvalue))) + return err; + PUSH(uvalue); + break; + case DW_OP_const1u: + if ((err = binary_buffer_next_u8_into_u64(&ctx->bb, + &uvalue))) + return err; + PUSH(uvalue); + break; + case DW_OP_const2u: + if ((err = binary_buffer_next_u16_into_u64(&ctx->bb, + &uvalue))) + return err; + PUSH_MASK(uvalue); + break; + case DW_OP_const4u: + if ((err = binary_buffer_next_u32_into_u64(&ctx->bb, + &uvalue))) + return err; + PUSH_MASK(uvalue); + break; + case DW_OP_const8u: + if ((err = binary_buffer_next_u64(&ctx->bb, &uvalue))) + return err; + PUSH_MASK(uvalue); + break; + case DW_OP_const1s: + if ((err = binary_buffer_next_s8_into_u64(&ctx->bb, + &uvalue))) + return err; + PUSH_MASK(uvalue); + break; + case DW_OP_const2s: + if ((err = binary_buffer_next_s16_into_u64(&ctx->bb, + &uvalue))) + return err; + PUSH_MASK(uvalue); + break; + case DW_OP_const4s: + if ((err = binary_buffer_next_s32_into_u64(&ctx->bb, + &uvalue))) + return err; + PUSH_MASK(uvalue); + break; + case DW_OP_const8s: + if ((err = binary_buffer_next_s64_into_u64(&ctx->bb, + &uvalue))) + return err; + PUSH_MASK(uvalue); + break; + case DW_OP_constu: + if ((err = binary_buffer_next_uleb128(&ctx->bb, + &uvalue))) + return err; + PUSH_MASK(uvalue); + break; + case DW_OP_consts: + if ((err = binary_buffer_next_sleb128_into_u64(&ctx->bb, + &uvalue))) + return err; + PUSH_MASK(uvalue); + break; + case DW_OP_addrx: + case DW_OP_constx: + if (!ctx->cu_die.addr) { + ctx->bb.pos = ctx->bb.prev; + return NULL; + } + if ((err = drgn_dwarf_next_addrx(&ctx->bb, ctx->module, + &ctx->cu_die, + address_size, + &ctx->cu_addr_base, + &uvalue))) + return err; + PUSH(uvalue); + break; + /* Register values. */ + case DW_OP_fbreg: { + err = drgn_dwarf_frame_base(ctx->prog, ctx->module, + ctx->function, ctx->regs, + remaining_ops, &uvalue); + if (err) + return err; + int64_t svalue; + if ((err = binary_buffer_next_sleb128(&ctx->bb, + &svalue))) + return err; + PUSH_MASK(uvalue + svalue); + break; + } + case DW_OP_breg0 ... DW_OP_breg31: + dwarf_regno = opcode - DW_OP_breg0; + goto breg; + case DW_OP_bregx: + if ((err = binary_buffer_next_uleb128(&ctx->bb, + &dwarf_regno))) + return err; +breg: + { + if (!ctx->regs) + return &drgn_not_found; + drgn_register_number regno = + dwarf_regno_to_internal(dwarf_regno); + if (!drgn_register_state_has_register(ctx->regs, regno)) + return &drgn_not_found; + const struct drgn_register_layout *layout = + &platform->arch->register_layout[regno]; + copy_lsbytes(&uvalue, sizeof(uvalue), + HOST_LITTLE_ENDIAN, + &ctx->regs->buf[layout->offset], + layout->size, little_endian); + int64_t svalue; + if ((err = binary_buffer_next_sleb128(&ctx->bb, + &svalue))) + return err; + PUSH_MASK(uvalue + svalue); + break; + } + /* Stack operations. */ + case DW_OP_dup: + CHECK(1); + PUSH(ELEM(0)); + break; + case DW_OP_drop: + CHECK(1); + stack->size--; + break; + case DW_OP_pick: { + uint8_t index; + if ((err = binary_buffer_next_u8(&ctx->bb, &index))) + return err; + CHECK(index + 1); + PUSH(ELEM(index)); + break; + } + case DW_OP_over: + CHECK(2); + PUSH(ELEM(1)); + break; + case DW_OP_swap: + CHECK(2); + uvalue = ELEM(0); + ELEM(0) = ELEM(1); + ELEM(1) = uvalue; + break; + case DW_OP_rot: + CHECK(3); + uvalue = ELEM(0); + ELEM(0) = ELEM(1); + ELEM(1) = ELEM(2); + ELEM(2) = uvalue; + break; + case DW_OP_deref: + deref_size = address_size; + goto deref; + case DW_OP_deref_size: + if ((err = binary_buffer_next_u8(&ctx->bb, + &deref_size))) + return err; + if (deref_size > address_size) { + return binary_buffer_error(&ctx->bb, + "DW_OP_deref_size has invalid size"); + } +deref: + { + CHECK(1); + char deref_buf[8]; + err = drgn_program_read_memory(ctx->prog, deref_buf, + ELEM(0), deref_size, + false); + if (err) + return err; + copy_lsbytes(&ELEM(0), sizeof(ELEM(0)), + HOST_LITTLE_ENDIAN, deref_buf, deref_size, + little_endian); + break; + } + case DW_OP_call_frame_cfa: { + if (!ctx->regs) + return &drgn_not_found; + /* + * The DWARF 5 specification says that + * DW_OP_call_frame_cfa cannot be used for CFI. For + * DW_CFA_def_cfa_expression, it is clearly invalid to + * define the CFA in terms of the CFA, and it will fail + * naturally below. This restriction doesn't make sense + * for DW_CFA_expression and DW_CFA_val_expression, as + * they push the CFA and thus depend on it anyways, so + * we don't bother enforcing it. + */ + struct optional_uint64 cfa = + drgn_register_state_get_cfa(ctx->regs); + if (!cfa.has_value) + return &drgn_not_found; + PUSH(cfa.value); + break; + } + /* Arithmetic and logical operations. */ +#define UNOP_MASK(op) do { \ + CHECK(1); \ + ELEM(0) = (op ELEM(0)) & address_mask; \ +} while (0) +#define BINOP(op) do { \ + CHECK(2); \ + ELEM(1) = ELEM(1) op ELEM(0); \ + stack->size--; \ +} while (0) +#define BINOP_MASK(op) do { \ + CHECK(2); \ + ELEM(1) = (ELEM(1) op ELEM(0)) & address_mask; \ + stack->size--; \ +} while (0) + case DW_OP_abs: + CHECK(1); + if (ELEM(0) & (UINT64_C(1) << (address_bits - 1))) + ELEM(0) = -ELEM(0) & address_mask; + break; + case DW_OP_and: + BINOP(&); + break; + case DW_OP_div: + CHECK(2); + if (ELEM(0) == 0) { + return binary_buffer_error(&ctx->bb, + "division by zero in DWARF expression"); + } + ELEM(1) = ((truncate_signed(ELEM(1), address_bits) + / truncate_signed(ELEM(0), address_bits)) + & address_mask); + stack->size--; + break; + case DW_OP_minus: + BINOP_MASK(-); + break; + case DW_OP_mod: + CHECK(2); + if (ELEM(0) == 0) { + return binary_buffer_error(&ctx->bb, + "modulo by zero in DWARF expression"); + } + ELEM(1) = ELEM(1) % ELEM(0); + stack->size--; + break; + case DW_OP_mul: + BINOP_MASK(*); + break; + case DW_OP_neg: + UNOP_MASK(-); + break; + case DW_OP_not: + UNOP_MASK(~); + break; + case DW_OP_or: + BINOP(|); + break; + case DW_OP_plus: + BINOP_MASK(+); + break; + case DW_OP_plus_uconst: + CHECK(1); + if ((err = binary_buffer_next_uleb128(&ctx->bb, + &uvalue))) + return err; + ELEM(0) = (ELEM(0) + uvalue) & address_mask; + break; + case DW_OP_shl: + CHECK(2); + if (ELEM(0) < address_bits) + ELEM(1) = (ELEM(1) << ELEM(0)) & address_mask; + else + ELEM(1) = 0; + stack->size--; + break; + case DW_OP_shr: + CHECK(2); + if (ELEM(0) < address_bits) + ELEM(1) >>= ELEM(0); + else + ELEM(1) = 0; + stack->size--; + break; + case DW_OP_shra: + CHECK(2); + if (ELEM(0) < address_bits) { + ELEM(1) = ((truncate_signed(ELEM(1), address_bits) + >> ELEM(0)) + & address_mask); + } else if (ELEM(1) & (UINT64_C(1) << (address_bits - 1))) { + ELEM(1) = -INT64_C(1) & address_mask; + } else { + ELEM(1) = 0; + } + stack->size--; + break; + case DW_OP_xor: + BINOP(^); + break; +#undef BINOP_MASK +#undef BINOP +#undef UNOP_MASK + /* Control flow operations. */ +#define RELOP(op) do { \ + CHECK(2); \ + ELEM(1) = (truncate_signed(ELEM(1), address_bits) op \ + truncate_signed(ELEM(0), address_bits)); \ + stack->size--; \ +} while (0) + case DW_OP_le: + RELOP(<=); + break; + case DW_OP_ge: + RELOP(>=); + break; + case DW_OP_eq: + RELOP(==); + break; + case DW_OP_lt: + RELOP(<); + break; + case DW_OP_gt: + RELOP(>); + break; + case DW_OP_ne: + RELOP(!=); + break; +#undef RELOP + case DW_OP_skip: +branch: + { + int16_t skip; + if ((err = binary_buffer_next_s16(&ctx->bb, &skip))) + return err; + if ((skip >= 0 && skip > ctx->bb.end - ctx->bb.pos) || + (skip < 0 && -skip > ctx->bb.pos - ctx->start)) { + return binary_buffer_error(&ctx->bb, + "DWARF expression branch is out of bounds"); + } + ctx->bb.pos += skip; + break; + } + case DW_OP_bra: + CHECK(1); + if (ELEM(0)) { + stack->size--; + goto branch; + } else { + stack->size--; + if ((err = binary_buffer_skip(&ctx->bb, 2))) + return err; + } + break; + /* Special operations. */ + case DW_OP_nop: + break; + /* Location description operations. */ + case DW_OP_reg0 ... DW_OP_reg31: + case DW_OP_regx: + case DW_OP_implicit_value: + case DW_OP_stack_value: + case DW_OP_piece: + case DW_OP_bit_piece: + /* The caller must handle it. */ + ctx->bb.pos = ctx->bb.prev; + return NULL; + /* + * We don't yet support: + * + * - DW_OP_push_object_address + * - DW_OP_form_tls_address + * - DW_OP_entry_value + * DW_OP_implicit_pointer + * - Procedure calls: DW_OP_call2, DW_OP_call4, DW_OP_call_ref. + * - Typed operations: DW_OP_const_type, DW_OP_regval_type, + * DW_OP_deref_type, DW_OP_convert, DW_OP_reinterpret. + * - Operations for multiple address spaces: DW_OP_xderef, + * DW_OP_xderef_size, DW_OP_xderef_type. + */ + default: + return binary_buffer_error(&ctx->bb, + "unknown DWARF expression opcode %#" PRIx8, + opcode); + } + } + +#undef PUSH_MASK +#undef PUSH +#undef ELEM +#undef CHECK + + return NULL; +} + +static struct drgn_error * +drgn_dwarf_frame_base(struct drgn_program *prog, + struct drgn_debug_info_module *module, Dwarf_Die *die, + const struct drgn_register_state *regs, + int *remaining_ops, uint64_t *ret) +{ + struct drgn_error *err; + bool little_endian = drgn_platform_is_little_endian(&module->platform); + drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = + module->platform.arch->dwarf_regno_to_internal; + + if (!die) + return &drgn_not_found; + Dwarf_Attribute attr_mem, *attr; + if (!(attr = dwarf_attr_integrate(die, DW_AT_frame_base, &attr_mem))) + return &drgn_not_found; + const char *expr; + size_t expr_size; + err = drgn_dwarf_location(module, attr, regs, &expr, &expr_size); + if (err) + return err; + + struct drgn_dwarf_expression_context ctx; + if ((err = drgn_dwarf_expression_context_init(&ctx, prog, module, + die->cu, NULL, regs, expr, + expr_size))) + return err; + struct uint64_vector stack = VECTOR_INIT; + for (;;) { + err = drgn_eval_dwarf_expression(&ctx, &stack, remaining_ops); + if (err) + goto out; + if (binary_buffer_has_next(&ctx.bb)) { + uint8_t opcode; + if ((err = binary_buffer_next_u8(&ctx.bb, &opcode))) + goto out; + + uint64_t dwarf_regno; + switch (opcode) { + case DW_OP_reg0 ... DW_OP_reg31: + dwarf_regno = opcode - DW_OP_reg0; + goto reg; + case DW_OP_regx: + if ((err = binary_buffer_next_uleb128(&ctx.bb, + &dwarf_regno))) + goto out; +reg: + { + if (!regs) { + err = &drgn_not_found; + goto out; + } + drgn_register_number regno = + dwarf_regno_to_internal(dwarf_regno); + if (!drgn_register_state_has_register(regs, + regno)) { + err = &drgn_not_found; + goto out; + } + const struct drgn_register_layout *layout = + &prog->platform.arch->register_layout[regno]; + /* + * Note that this doesn't mask the address since + * the caller does that. + */ + copy_lsbytes(ret, sizeof(*ret), + HOST_LITTLE_ENDIAN, + ®s->buf[layout->offset], + layout->size, little_endian); + if (binary_buffer_has_next(&ctx.bb)) { + err = binary_buffer_error(&ctx.bb, + "stray operations in DW_AT_frame_base expression"); + } else { + err = NULL; + } + goto out; + } + default: + err = binary_buffer_error(&ctx.bb, + "invalid opcode %#" PRIx8 " for DW_AT_frame_base expression", + opcode); + goto out; + } + } else if (stack.size) { + *ret = stack.data[stack.size - 1]; + err = NULL; + break; + } else { + err = &drgn_not_found; + break; + } + } +out: + uint64_vector_deinit(&stack); + return err; +} + +/* + * Type and object parsing. + */ + +/** + * Return whether a DWARF DIE is little-endian. + * + * @param[in] check_attr Whether to check the DW_AT_endianity attribute. If @c + * false, only the ELF header is checked and this function cannot fail. + * @return @c NULL on success, non-@c NULL on error. + */ +static struct drgn_error *dwarf_die_is_little_endian(Dwarf_Die *die, + bool check_attr, bool *ret) +{ + Dwarf_Attribute endianity_attr_mem, *endianity_attr; + Dwarf_Word endianity; + if (check_attr && + (endianity_attr = dwarf_attr_integrate(die, DW_AT_endianity, + &endianity_attr_mem))) { + if (dwarf_formudata(endianity_attr, &endianity)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid DW_AT_endianity"); + } + } else { + endianity = DW_END_default; + } + switch (endianity) { + case DW_END_default: { + Elf *elf = dwarf_getelf(dwarf_cu_getdwarf(die->cu)); + *ret = elf_getident(elf, NULL)[EI_DATA] == ELFDATA2LSB; + return NULL; + } + case DW_END_little: + *ret = true; + return NULL; + case DW_END_big: + *ret = false; + return NULL; + default: + return drgn_error_create(DRGN_ERROR_OTHER, + "unknown DW_AT_endianity"); + } +} + +/** Like dwarf_die_is_little_endian(), but returns a @ref drgn_byte_order. */ +static struct drgn_error *dwarf_die_byte_order(Dwarf_Die *die, bool check_attr, + enum drgn_byte_order *ret) +{ + bool little_endian; + struct drgn_error *err = dwarf_die_is_little_endian(die, check_attr, + &little_endian); + /* + * dwarf_die_is_little_endian() can't fail if check_attr is false, so + * the !check_attr test suppresses maybe-uninitialized warnings. + */ + if (!err || !check_attr) + *ret = drgn_byte_order_from_little_endian(little_endian); + return err; +} + +static int dwarf_type(Dwarf_Die *die, Dwarf_Die *ret) +{ + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + + if (!(attr = dwarf_attr_integrate(die, DW_AT_type, &attr_mem))) + return 1; + + return dwarf_formref_die(attr, ret) ? 0 : -1; +} + +static int dwarf_flag(Dwarf_Die *die, unsigned int name, bool *ret) +{ + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + + if (!(attr = dwarf_attr(die, name, &attr_mem))) { + *ret = false; + return 0; + } + return dwarf_formflag(attr, ret); +} + +static int dwarf_flag_integrate(Dwarf_Die *die, unsigned int name, bool *ret) +{ + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + + if (!(attr = dwarf_attr_integrate(die, name, &attr_mem))) { + *ret = false; + return 0; + } + return dwarf_formflag(attr, ret); +} + +/** + * Parse a type from a DWARF debugging information entry. + * + * This is the same as @ref drgn_type_from_dwarf() except that it can be used to + * work around a bug in GCC < 9.0 that zero length array types are encoded the + * same as incomplete array types. There are a few places where GCC allows + * zero-length arrays but not incomplete arrays: + * + * - As the type of a member of a structure with only one member. + * - As the type of a structure member other than the last member. + * - As the type of a union member. + * - As the element type of an array. + * + * In these cases, we know that what appears to be an incomplete array type must + * actually have a length of zero. In other cases, a subrange DIE without + * DW_AT_count or DW_AT_upper_bound is ambiguous; we return an incomplete array + * type. + * + * @param[in] dbinfo Debugging information. + * @param[in] module Module containing @p die. + * @param[in] die DIE to parse. + * @param[in] can_be_incomplete_array Whether the type can be an incomplete + * array type. If this is @c false and the type appears to be an incomplete + * array type, its length is set to zero instead. + * @param[out] is_incomplete_array_ret Whether the encoded type is an incomplete + * array type or a typedef of an incomplete array type (regardless of @p + * can_be_incomplete_array). + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. + */ +static struct drgn_error * +drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, bool can_be_incomplete_array, + bool *is_incomplete_array_ret, + struct drgn_qualified_type *ret); + +/** + * Parse a type from a DWARF debugging information entry. + * + * @param[in] dbinfo Debugging information. + * @param[in] module Module containing @p die. + * @param[in] die DIE to parse. + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. + */ +static inline struct drgn_error * +drgn_type_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, Dwarf_Die *die, + struct drgn_qualified_type *ret) +{ + return drgn_type_from_dwarf_internal(dbinfo, module, die, true, NULL, + ret); +} + +/** + * Parse a type from the @c DW_AT_type attribute of a DWARF debugging + * information entry. + * + * @param[in] dbinfo Debugging information. + * @param[in] module Module containing @p die. + * @param[in] die DIE with @c DW_AT_type attribute. + * @param[in] lang Language of @p die if it is already known, @c NULL if it + * should be determined from @p die. + * @param[in] can_be_void Whether the @c DW_AT_type attribute may be missing, + * which is interpreted as a void type. If this is false and the @c DW_AT_type + * attribute is missing, an error is returned. + * @param[in] can_be_incomplete_array See @ref drgn_type_from_dwarf_internal(). + * @param[in] is_incomplete_array_ret See @ref drgn_type_from_dwarf_internal(). + * @param[out] ret Returned type. + * @return @c NULL on success, non-@c NULL on error. + */ +static struct drgn_error * +drgn_type_from_dwarf_attr(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, Dwarf_Die *die, + const struct drgn_language *lang, + bool can_be_void, bool can_be_incomplete_array, + bool *is_incomplete_array_ret, + struct drgn_qualified_type *ret) +{ + struct drgn_error *err; + char tag_buf[DW_TAG_BUF_LEN]; + + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + if (!(attr = dwarf_attr_integrate(die, DW_AT_type, &attr_mem))) { + if (can_be_void) { + if (!lang) { + err = drgn_language_from_die(die, true, &lang); + if (err) + return err; + } + ret->type = drgn_void_type(dbinfo->prog, lang); + ret->qualifiers = 0; + return NULL; + } else { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s is missing DW_AT_type", + dwarf_tag_str(die, tag_buf)); + } + } + + Dwarf_Die type_die; + if (!dwarf_formref_die(attr, &type_die)) { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s has invalid DW_AT_type", + dwarf_tag_str(die, tag_buf)); + } + + return drgn_type_from_dwarf_internal(dbinfo, module, &type_die, + can_be_incomplete_array, + is_incomplete_array_ret, ret); +} + +static struct drgn_error * +drgn_object_from_dwarf_enumerator(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, const char *name, + struct drgn_object *ret) +{ + struct drgn_error *err; + struct drgn_qualified_type qualified_type; + err = drgn_type_from_dwarf(dbinfo, module, die, &qualified_type); + if (err) + return err; + const struct drgn_type_enumerator *enumerators = + drgn_type_enumerators(qualified_type.type); + size_t num_enumerators = drgn_type_num_enumerators(qualified_type.type); + for (size_t i = 0; i < num_enumerators; i++) { + if (strcmp(enumerators[i].name, name) != 0) + continue; + + if (drgn_enum_type_is_signed(qualified_type.type)) { + return drgn_object_set_signed(ret, qualified_type, + enumerators[i].svalue, 0); + } else { + return drgn_object_set_unsigned(ret, qualified_type, + enumerators[i].uvalue, + 0); + } + } + UNREACHABLE(); +} + +static struct drgn_error * +drgn_object_from_dwarf_subprogram(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, struct drgn_object *ret) +{ + struct drgn_qualified_type qualified_type; + struct drgn_error *err = drgn_type_from_dwarf(dbinfo, module, die, + &qualified_type); + if (err) + return err; + Dwarf_Addr low_pc; + if (dwarf_lowpc(die, &low_pc) == -1) + return drgn_object_set_absent(ret, qualified_type, 0); + Dwarf_Addr bias; + dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, &bias, NULL, + NULL, NULL); + return drgn_object_set_reference(ret, qualified_type, low_pc + bias, 0, + 0); +} + +static struct drgn_error *read_bits(struct drgn_program *prog, void *dst, + unsigned int dst_bit_offset, uint64_t src, + unsigned int src_bit_offset, + uint64_t bit_size, bool lsb0) +{ + struct drgn_error *err; + + assert(dst_bit_offset < 8); + assert(src_bit_offset < 8); + + if (bit_size == 0) + return NULL; + + if (dst_bit_offset == src_bit_offset) { + /* + * We can read directly into the the destination buffer, but we + * may have to preserve some bits at the start and/or end. + */ + uint8_t *d = dst; + uint64_t last_bit = dst_bit_offset + bit_size - 1; + uint8_t first_byte = d[0]; + uint8_t last_byte = d[last_bit / 8]; + err = drgn_program_read_memory(prog, d, src, last_bit / 8 + 1, + false); + if (err) + return err; + if (dst_bit_offset != 0) { + uint8_t mask = + copy_bits_first_mask(dst_bit_offset, lsb0); + d[0] = (first_byte & ~mask) | (d[0] & mask); + } + if (last_bit % 8 != 7) { + uint8_t mask = copy_bits_last_mask(last_bit, lsb0); + d[last_bit / 8] = ((last_byte & ~mask) + | (d[last_bit / 8] & mask)); + } + return NULL; + } else { + /* + * If the source and destination have different offsets, then + * depending on the size and source offset, we may have to read + * one more byte than is available in the destination. To keep + * things simple, we always read into a temporary buffer (rather + * than adding a special case for reading directly into the + * destination and shifting bits around). + */ + uint64_t src_bytes = (src_bit_offset + bit_size - 1) / 8 + 1; + char stack_tmp[16], *tmp; + if (src_bytes <= sizeof(stack_tmp)) { + tmp = stack_tmp; + } else { + tmp = malloc64(src_bytes); + if (!tmp) + return &drgn_enomem; + } + err = drgn_program_read_memory(prog, tmp, src, src_bytes, + false); + if (!err) { + copy_bits(dst, dst_bit_offset, tmp, src_bit_offset, + bit_size, lsb0); + } + if (src_bytes > sizeof(stack_tmp)) + free(tmp); + return err; + } +} + +static struct drgn_error * +drgn_object_from_dwarf_location(struct drgn_program *prog, + struct drgn_debug_info_module *module, + Dwarf_Die *die, + struct drgn_qualified_type qualified_type, + const char *expr, size_t expr_size, + Dwarf_Die *function_die, + const struct drgn_register_state *regs, + struct drgn_object *ret) +{ + struct drgn_error *err; + bool little_endian = drgn_platform_is_little_endian(&module->platform); + uint64_t address_mask = drgn_platform_address_mask(&module->platform); + drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = + module->platform.arch->dwarf_regno_to_internal; + + struct drgn_object_type type; + err = drgn_object_type(qualified_type, 0, &type); + if (err) + return err; + + union drgn_value value; + char *value_buf = NULL; + + uint64_t address = 0; /* GCC thinks this may be used uninitialized. */ + int bit_offset = -1; /* -1 means that we don't have an address. */ + + uint64_t bit_pos = 0; + + int remaining_ops = MAX_DWARF_EXPR_OPS; + struct drgn_dwarf_expression_context ctx; + if ((err = drgn_dwarf_expression_context_init(&ctx, prog, module, + die->cu, function_die, + regs, expr, expr_size))) + return err; + struct uint64_vector stack = VECTOR_INIT; + do { + stack.size = 0; + err = drgn_eval_dwarf_expression(&ctx, &stack, &remaining_ops); + if (err == &drgn_not_found) + goto absent; + else if (err) + goto out; + + const void *src = NULL; + size_t src_size; + + if (binary_buffer_has_next(&ctx.bb)) { + uint8_t opcode; + if ((err = binary_buffer_next_u8(&ctx.bb, &opcode))) + goto out; + + uint64_t uvalue; + uint64_t dwarf_regno; + drgn_register_number regno; + switch (opcode) { + case DW_OP_reg0 ... DW_OP_reg31: + dwarf_regno = opcode - DW_OP_reg0; + goto reg; + case DW_OP_regx: + if ((err = binary_buffer_next_uleb128(&ctx.bb, + &dwarf_regno))) + goto out; +reg: + if (!regs) + goto absent; + regno = dwarf_regno_to_internal(dwarf_regno); + if (!drgn_register_state_has_register(regs, + regno)) + goto absent; + const struct drgn_register_layout *layout = + &prog->platform.arch->register_layout[regno]; + src = ®s->buf[layout->offset]; + src_size = layout->size; + break; + case DW_OP_implicit_value: + if ((err = binary_buffer_next_uleb128(&ctx.bb, + &uvalue))) + goto out; + if (uvalue > ctx.bb.end - ctx.bb.pos) { + err = binary_buffer_error(&ctx.bb, + "DW_OP_implicit_value size is out of bounds"); + goto out; + } + src = ctx.bb.pos; + src_size = uvalue; + ctx.bb.pos += uvalue; + break; + case DW_OP_stack_value: + if (!stack.size) + goto absent; + if (little_endian != HOST_LITTLE_ENDIAN) { + stack.data[stack.size - 1] = + bswap_64(stack.data[stack.size - 1]); + } + src = &stack.data[stack.size - 1]; + src_size = sizeof(stack.data[0]); + break; + default: + ctx.bb.pos = ctx.bb.prev; + break; + } + } + + uint64_t piece_bit_size; + uint64_t piece_bit_offset; + if (binary_buffer_has_next(&ctx.bb)) { + uint8_t opcode; + if ((err = binary_buffer_next_u8(&ctx.bb, &opcode))) + goto out; + + switch (opcode) { + case DW_OP_piece: + if ((err = binary_buffer_next_uleb128(&ctx.bb, + &piece_bit_size))) + goto out; + /* + * It's probably bogus for the piece size to be + * larger than the remaining value size, but + * that's not explicitly stated in the DWARF 5 + * specification, so clamp it instead. + */ + if (__builtin_mul_overflow(piece_bit_size, 8U, + &piece_bit_size) || + piece_bit_size > type.bit_size - bit_pos) + piece_bit_size = type.bit_size - bit_pos; + piece_bit_offset = 0; + break; + case DW_OP_bit_piece: + if ((err = binary_buffer_next_uleb128(&ctx.bb, + &piece_bit_size)) || + (err = binary_buffer_next_uleb128(&ctx.bb, + &piece_bit_offset))) + goto out; + if (piece_bit_size > type.bit_size - bit_pos) + piece_bit_size = type.bit_size - bit_pos; + break; + default: + err = binary_buffer_error(&ctx.bb, + "unknown DWARF expression opcode %#" PRIx8 " after simple location description", + opcode); + goto out; + } + } else { + piece_bit_size = type.bit_size - bit_pos; + piece_bit_offset = 0; + } + + /* + * TODO: there are a few cases that a DWARF location can + * describe that can't be represented in drgn's object model: + * + * 1. An object that is partially known and partially unknown. + * 2. An object that is partially in memory and partially a + * value. + * 3. An object that is in memory at non-contiguous addresses. + * 4. A pointer object whose pointer value is not known but + * whose referenced value is known (DW_OP_implicit_pointer). + * + * For case 1, we consider the whole object as absent. For cases + * 2 and 3, we convert the whole object to a value. Case 4 is + * not supported at all. We should add a way to represent all of + * these situations precisely. + */ + if (src && piece_bit_size == 0) { + /* Ignore empty value. */ + } else if (src) { + if (!value_buf && + !drgn_value_zalloc(drgn_value_size(type.bit_size), + &value, &value_buf)) { + err = &drgn_enomem; + goto out; + } + if (bit_offset >= 0) { + /* + * We previously had an address. Read it into + * the value. + */ + err = read_bits(prog, value_buf, 0, address, + bit_offset, bit_pos, + little_endian); + if (err) + goto out; + bit_offset = -1; + } + /* + * It's probably safe to assume that we don't have an + * implicit value larger than 2 exabytes. + */ + assert(src_size <= UINT64_MAX / 8); + uint64_t src_bit_size = UINT64_C(8) * src_size; + if (piece_bit_offset > src_bit_size) + piece_bit_offset = src_bit_size; + uint64_t copy_bit_size = + min(piece_bit_size, + src_bit_size - piece_bit_offset); + uint64_t copy_bit_offset = bit_pos; + if (!little_endian) { + copy_bit_offset += piece_bit_size - copy_bit_size; + piece_bit_offset = (src_bit_size + - copy_bit_size + - piece_bit_offset); + } + copy_bits(&value_buf[copy_bit_offset / 8], + copy_bit_offset % 8, + (const char *)src + (piece_bit_offset / 8), + piece_bit_offset % 8, copy_bit_size, + little_endian); + } else if (stack.size) { + uint64_t piece_address = + ((stack.data[stack.size - 1] + piece_bit_offset / 8) + & address_mask); + piece_bit_offset %= 8; + if (bit_pos > 0 && bit_offset >= 0) { + /* + * We already had an address. Merge the pieces + * if the addresses are contiguous, otherwise + * convert to a value. + * + * The obvious way to write this is + * (address + (bit_pos + bit_offset) / 8), but + * (bit_pos + bit_offset) can overflow uint64_t. + */ + uint64_t end_address = + ((address + + bit_pos / 8 + + (bit_pos % 8 + bit_offset) / 8) + & address_mask); + unsigned int end_bit_offset = + (bit_offset + bit_pos) % 8; + if (piece_bit_size == 0 || + (piece_address == end_address && + piece_bit_offset == end_bit_offset)) { + /* Piece is contiguous. */ + piece_address = address; + piece_bit_offset = bit_offset; + } else { + if (!drgn_value_zalloc(drgn_value_size(type.bit_size), + &value, + &value_buf)) { + err = &drgn_enomem; + goto out; + } + err = read_bits(prog, value_buf, 0, + address, bit_offset, + bit_pos, little_endian); + if (err) + goto out; + bit_offset = -1; + } + } + if (value_buf) { + /* We already have a value. Read into it. */ + err = read_bits(prog, &value_buf[bit_pos / 8], + bit_pos % 8, piece_address, + piece_bit_offset, + piece_bit_size, little_endian); + if (err) + goto out; + } else { + address = piece_address; + bit_offset = piece_bit_offset; + } + } else if (piece_bit_size > 0) { + goto absent; + } + bit_pos += piece_bit_size; + } while (binary_buffer_has_next(&ctx.bb)); + + if (bit_pos < type.bit_size || (bit_offset < 0 && !value_buf)) { +absent: + if (dwarf_tag(die) == DW_TAG_template_value_parameter) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_AT_template_value_parameter is missing value"); + } + drgn_object_reinit(ret, &type, DRGN_OBJECT_ABSENT); + err = NULL; + } else if (bit_offset >= 0) { + Dwarf_Addr start, end, bias; + dwfl_module_info(module->dwfl_module, NULL, &start, &end, &bias, + NULL, NULL, NULL); + /* + * If the address is not in the module's address range, then + * it's probably something special like a Linux per-CPU variable + * (which isn't actually a variable address but an offset). + * Don't apply the bias in that case. + */ + if (start <= address + bias && address + bias < end) + address += bias; + err = drgn_object_set_reference_internal(ret, &type, address, + bit_offset); + } else if (type.encoding == DRGN_OBJECT_ENCODING_BUFFER) { + drgn_object_reinit(ret, &type, DRGN_OBJECT_VALUE); + ret->value = value; + value_buf = NULL; + err = NULL; + } else { + err = drgn_object_set_from_buffer_internal(ret, &type, + value_buf, 0); + } + +out: + if (value_buf != value.ibuf) + free(value_buf); + uint64_vector_deinit(&stack); + return err; +} + +static struct drgn_error * +drgn_object_from_dwarf_constant(struct drgn_debug_info *dbinfo, Dwarf_Die *die, + struct drgn_qualified_type qualified_type, + Dwarf_Attribute *attr, struct drgn_object *ret) +{ + struct drgn_object_type type; + struct drgn_error *err = drgn_object_type(qualified_type, 0, &type); + if (err) + return err; + Dwarf_Block block; + if (dwarf_formblock(attr, &block) == 0) { + if (block.length < drgn_value_size(type.bit_size)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_AT_const_value block is too small"); + } + return drgn_object_set_from_buffer_internal(ret, &type, + block.data, 0); + } else if (type.encoding == DRGN_OBJECT_ENCODING_SIGNED) { + Dwarf_Sword svalue; + if (dwarf_formsdata(attr, &svalue)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid DW_AT_const_value"); + } + drgn_object_set_signed_internal(ret, &type, svalue); + return NULL; + } else if (type.encoding == DRGN_OBJECT_ENCODING_UNSIGNED) { + Dwarf_Word uvalue; + if (dwarf_formudata(attr, &uvalue)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid DW_AT_const_value"); + } + drgn_object_set_unsigned_internal(ret, &type, uvalue); + return NULL; + } else { + return drgn_error_create(DRGN_ERROR_OTHER, + "unknown DW_AT_const_value form"); + } +} + +struct drgn_error * +drgn_object_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, Dwarf_Die *type_die, + Dwarf_Die *function_die, + const struct drgn_register_state *regs, + struct drgn_object *ret) +{ + struct drgn_error *err; + if (dwarf_tag(die) == DW_TAG_subprogram) { + return drgn_object_from_dwarf_subprogram(dbinfo, module, die, + ret); + } + /* + * The DWARF 5 specifications mentions that data object entries can have + * DW_AT_endianity, but that doesn't seem to be used in practice. It + * would be inconvenient to support, so ignore it for now. + */ + struct drgn_qualified_type qualified_type; + if (type_die) { + err = drgn_type_from_dwarf(dbinfo, module, type_die, + &qualified_type); + } else { + err = drgn_type_from_dwarf_attr(dbinfo, module, die, NULL, true, + true, NULL, &qualified_type); + } + if (err) + return err; + Dwarf_Attribute attr_mem, *attr; + const char *expr; + size_t expr_size; + if ((attr = dwarf_attr_integrate(die, DW_AT_location, &attr_mem))) { + err = drgn_dwarf_location(module, attr, regs, &expr, + &expr_size); + if (err) + return err; + } else if ((attr = dwarf_attr_integrate(die, DW_AT_const_value, + &attr_mem))) { + return drgn_object_from_dwarf_constant(dbinfo, die, + qualified_type, attr, + ret); + } else { + expr = NULL; + expr_size = 0; + } + return drgn_object_from_dwarf_location(dbinfo->prog, module, die, + qualified_type, expr, expr_size, + function_die, regs, ret); +} + +static struct drgn_error *find_dwarf_enumerator(Dwarf_Die *enumeration_type, + const char *name, + Dwarf_Die *ret) +{ + int r = dwarf_child(enumeration_type, ret); + while (r == 0) { + if (dwarf_tag(ret) == DW_TAG_enumerator && + strcmp(dwarf_diename(ret), name) == 0) + return NULL; + r = dwarf_siblingof(ret, ret); + } + if (r < 0) + return drgn_error_libdw(); + ret->addr = NULL; + return NULL; +} + +struct drgn_error *drgn_find_in_dwarf_scopes(Dwarf_Die *scopes, + size_t num_scopes, + const char *name, + Dwarf_Die *die_ret, + Dwarf_Die *type_ret) +{ + struct drgn_error *err; + Dwarf_Die die; + for (size_t scope = num_scopes; scope--;) { + bool have_declaration = false; + if (dwarf_child(&scopes[scope], &die) != 0) + continue; + do { + switch (dwarf_tag(&die)) { + case DW_TAG_variable: + case DW_TAG_formal_parameter: + case DW_TAG_subprogram: + if (strcmp(dwarf_diename(&die), name) == 0) { + *die_ret = die; + bool declaration; + if (dwarf_flag(&die, DW_AT_declaration, + &declaration)) + return drgn_error_libdw(); + if (declaration) + have_declaration = true; + else + return NULL; + } + break; + case DW_TAG_enumeration_type: { + bool enum_class; + if (dwarf_flag_integrate(&die, DW_AT_enum_class, + &enum_class)) + return drgn_error_libdw(); + if (!enum_class) { + Dwarf_Die enumerator; + err = find_dwarf_enumerator(&die, name, + &enumerator); + if (err) + return err; + if (enumerator.addr) { + *die_ret = enumerator; + *type_ret = die; + return NULL; + } + } + break; + } + default: + continue; + } + } while (dwarf_siblingof(&die, &die) == 0); + if (have_declaration) + return NULL; + } + die_ret->addr = NULL; + return NULL; +} + +static struct drgn_error * +drgn_base_type_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, Dwarf_Die *die, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + struct drgn_error *err; + + const char *name = dwarf_diename(die); + if (!name) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_base_type has missing or invalid DW_AT_name"); + } + + Dwarf_Attribute attr; + Dwarf_Word encoding; + if (!dwarf_attr_integrate(die, DW_AT_encoding, &attr) || + dwarf_formudata(&attr, &encoding)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_base_type has missing or invalid DW_AT_encoding"); + } + int size = dwarf_bytesize(die); + if (size == -1) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_base_type has missing or invalid DW_AT_byte_size"); + } + + enum drgn_byte_order byte_order; + err = dwarf_die_byte_order(die, true, &byte_order); + if (err) + return err; + + switch (encoding) { + case DW_ATE_boolean: + return drgn_bool_type_create(dbinfo->prog, name, size, + byte_order, lang, ret); + case DW_ATE_float: + return drgn_float_type_create(dbinfo->prog, name, size, + byte_order, lang, ret); + case DW_ATE_signed: + case DW_ATE_signed_char: + return drgn_int_type_create(dbinfo->prog, name, size, true, + byte_order, lang, ret); + case DW_ATE_unsigned: + case DW_ATE_unsigned_char: + return drgn_int_type_create(dbinfo->prog, name, size, false, + byte_order, lang, ret); + /* We don't support complex types yet. */ + case DW_ATE_complex_float: + default: + return drgn_error_format(DRGN_ERROR_OTHER, + "DW_TAG_base_type has unknown DWARF encoding 0x%llx", + (unsigned long long)encoding); + } +} + +/* + * DW_TAG_structure_type, DW_TAG_union_type, DW_TAG_class_type, and + * DW_TAG_enumeration_type can be incomplete (i.e., have a DW_AT_declaration of + * true). This tries to find the complete type. If it succeeds, it returns NULL. + * If it can't find a complete type, it returns &drgn_not_found. Otherwise, it + * returns an error. + */ +static struct drgn_error * +drgn_debug_info_find_complete(struct drgn_debug_info *dbinfo, uint64_t tag, + const char *name, struct drgn_type **ret) +{ + struct drgn_error *err; + + struct drgn_dwarf_index_iterator it; + err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dwarf.global, name, + strlen(name), &tag, 1); + if (err) + return err; + + /* + * Find a matching DIE. Note that drgn_dwarf_index does not contain DIEs + * with DW_AT_declaration, so this will always be a complete type. + */ + struct drgn_dwarf_index_die *index_die = + drgn_dwarf_index_iterator_next(&it); + if (!index_die) + return &drgn_not_found; + /* + * Look for another matching DIE. If there is one, then we can't be sure + * which type this is, so leave it incomplete rather than guessing. + */ + if (drgn_dwarf_index_iterator_next(&it)) + return &drgn_not_found; + + Dwarf_Die die; + err = drgn_dwarf_index_get_die(index_die, &die); + if (err) + return err; + struct drgn_qualified_type qualified_type; + err = drgn_type_from_dwarf(dbinfo, index_die->module, &die, + &qualified_type); + if (err) + return err; + *ret = qualified_type.type; + return NULL; +} + +struct drgn_dwarf_member_thunk_arg { + struct drgn_debug_info_module *module; + Dwarf_Die die; + bool can_be_incomplete_array; +}; + +static struct drgn_error * +drgn_dwarf_member_thunk_fn(struct drgn_object *res, void *arg_) +{ + struct drgn_error *err; + struct drgn_dwarf_member_thunk_arg *arg = arg_; + if (res) { + struct drgn_qualified_type qualified_type; + err = drgn_type_from_dwarf_attr(drgn_object_program(res)->dbinfo, + arg->module, &arg->die, NULL, + false, + arg->can_be_incomplete_array, + NULL, &qualified_type); + if (err) + return err; + + Dwarf_Attribute attr_mem, *attr; + uint64_t bit_field_size; + if ((attr = dwarf_attr_integrate(&arg->die, DW_AT_bit_size, + &attr_mem))) { + Dwarf_Word bit_size; + if (dwarf_formudata(attr, &bit_size)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_bit_size"); + } + bit_field_size = bit_size; + } else { + bit_field_size = 0; + } + + err = drgn_object_set_absent(res, qualified_type, + bit_field_size); + if (err) + return err; + } + free(arg); + return NULL; +} + +static inline bool drgn_dwarf_attribute_is_block(Dwarf_Attribute *attr) +{ + switch (attr->form) { + case DW_FORM_block1: + case DW_FORM_block2: + case DW_FORM_block4: + case DW_FORM_block: + return true; + default: + return false; + } +} + +static inline bool drgn_dwarf_attribute_is_ptr(Dwarf_Attribute *attr) +{ + switch (attr->form) { + case DW_FORM_sec_offset: + return true; + case DW_FORM_data4: + case DW_FORM_data8: { + /* + * dwarf_cu_die() always returns the DIE. We should use + * dwarf_cu_info(), but that requires elfutils >= 0.171. + */ + Dwarf_Die unused; + Dwarf_Half cu_version; + dwarf_cu_die(attr->cu, &unused, &cu_version, NULL, NULL, NULL, + NULL, NULL); + return cu_version <= 3; + } + default: + return false; + } +} + +static struct drgn_error *invalid_data_member_location(struct binary_buffer *bb, + const char *pos, + const char *message) +{ + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_data_member_location"); +} + +static struct drgn_error * +drgn_parse_dwarf_data_member_location(Dwarf_Attribute *attr, uint64_t *ret) +{ + struct drgn_error *err; + + if (drgn_dwarf_attribute_is_block(attr)) { + Dwarf_Block block; + if (dwarf_formblock(attr, &block)) + return drgn_error_libdw(); + /* + * In DWARF 2, DW_AT_data_member_location is always a location + * description. We can translate a DW_OP_plus_uconst expression + * into a constant offset; other expressions aren't supported + * yet. + */ + struct binary_buffer bb; + /* + * Right now we only parse u8 and ULEB128, so the byte order + * doesn't matter. + */ + binary_buffer_init(&bb, block.data, block.length, + HOST_LITTLE_ENDIAN, + invalid_data_member_location); + uint8_t opcode; + err = binary_buffer_next_u8(&bb, &opcode); + if (err) + return err; + if (opcode != DW_OP_plus_uconst) { +unsupported: + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has unsupported DW_AT_data_member_location"); + } + err = binary_buffer_next_uleb128(&bb, ret); + if (err) + return err; + if (binary_buffer_has_next(&bb)) + goto unsupported; + } else if (drgn_dwarf_attribute_is_ptr(attr)) { + goto unsupported; + } else { + + Dwarf_Word word; + if (dwarf_formudata(attr, &word)) + return invalid_data_member_location(NULL, NULL, NULL); + *ret = word; + } + return NULL; +} + +static struct drgn_error * +parse_member_offset(Dwarf_Die *die, union drgn_lazy_object *member_object, + bool little_endian, uint64_t *ret) +{ + struct drgn_error *err; + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + + /* + * The simplest case is when we have DW_AT_data_bit_offset, which is + * already the offset in bits from the beginning of the containing + * object to the beginning of the member (which may be a bit field). + */ + attr = dwarf_attr_integrate(die, DW_AT_data_bit_offset, &attr_mem); + if (attr) { + Dwarf_Word bit_offset; + if (dwarf_formudata(attr, &bit_offset)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_data_bit_offset"); + } + *ret = bit_offset; + return NULL; + } + + /* + * Otherwise, we might have DW_AT_data_member_location, which is the + * offset in bytes from the beginning of the containing object. + */ + attr = dwarf_attr_integrate(die, DW_AT_data_member_location, &attr_mem); + if (attr) { + err = drgn_parse_dwarf_data_member_location(attr, ret); + if (err) + return err; + *ret *= 8; + } else { + *ret = 0; + } + + /* + * In addition to DW_AT_data_member_location, a bit field might have + * DW_AT_bit_offset, which is the offset in bits of the most significant + * bit of the bit field from the most significant bit of the containing + * object. + */ + attr = dwarf_attr_integrate(die, DW_AT_bit_offset, &attr_mem); + if (attr) { + Dwarf_Word bit_offset; + if (dwarf_formudata(attr, &bit_offset)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_bit_offset"); + } + + /* + * If the architecture is little-endian, then we must compute + * the location of the most significant bit from the size of the + * member, then subtract the bit offset and bit size to get the + * location of the beginning of the bit field. + * + * If the architecture is big-endian, then the most significant + * bit of the bit field is the beginning. + */ + if (little_endian) { + err = drgn_lazy_object_evaluate(member_object); + if (err) + return err; + + attr = dwarf_attr_integrate(die, DW_AT_byte_size, + &attr_mem); + /* + * If the member has an explicit byte size, we can use + * that. Otherwise, we have to get it from the member + * type. + */ + uint64_t byte_size; + if (attr) { + Dwarf_Word word; + if (dwarf_formudata(attr, &word)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_byte_size"); + } + byte_size = word; + } else { + if (!drgn_type_has_size(member_object->obj.type)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member bit field type does not have size"); + } + err = drgn_type_sizeof(member_object->obj.type, + &byte_size); + if (err) + return err; + } + *ret += 8 * byte_size - bit_offset - member_object->obj.bit_size; + } else { + *ret += bit_offset; + } + } + + return NULL; +} + +static struct drgn_error * +parse_member(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, Dwarf_Die *die, + bool little_endian, bool can_be_incomplete_array, + struct drgn_compound_type_builder *builder) +{ + struct drgn_error *err; + + Dwarf_Attribute attr_mem, *attr; + const char *name; + if ((attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem))) { + name = dwarf_formstring(attr); + if (!name) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_member has invalid DW_AT_name"); + } + } else { + name = NULL; + } + + struct drgn_dwarf_member_thunk_arg *thunk_arg = + malloc(sizeof(*thunk_arg)); + if (!thunk_arg) + return &drgn_enomem; + thunk_arg->module = module; + thunk_arg->die = *die; + thunk_arg->can_be_incomplete_array = can_be_incomplete_array; + + union drgn_lazy_object member_object; + drgn_lazy_object_init_thunk(&member_object, dbinfo->prog, + drgn_dwarf_member_thunk_fn, thunk_arg); + + uint64_t bit_offset; + err = parse_member_offset(die, &member_object, little_endian, + &bit_offset); + if (err) + goto err; + + err = drgn_compound_type_builder_add_member(builder, &member_object, + name, bit_offset); + if (err) + goto err; + return NULL; + +err: + drgn_lazy_object_deinit(&member_object); + return err; +} + +struct drgn_dwarf_die_thunk_arg { + struct drgn_debug_info_module *module; + Dwarf_Die die; +}; + +static struct drgn_error * +drgn_dwarf_template_type_parameter_thunk_fn(struct drgn_object *res, void *arg_) +{ + struct drgn_error *err; + struct drgn_dwarf_die_thunk_arg *arg = arg_; + if (res) { + struct drgn_qualified_type qualified_type; + err = drgn_type_from_dwarf_attr(drgn_object_program(res)->dbinfo, + arg->module, &arg->die, NULL, + true, true, NULL, + &qualified_type); + if (err) + return err; + + err = drgn_object_set_absent(res, qualified_type, 0); + if (err) + return err; + } + free(arg); + return NULL; +} + +static struct drgn_error * +drgn_dwarf_template_value_parameter_thunk_fn(struct drgn_object *res, + void *arg_) +{ + struct drgn_error *err; + struct drgn_dwarf_die_thunk_arg *arg = arg_; + if (res) { + err = drgn_object_from_dwarf(drgn_object_program(res)->dbinfo, + arg->module, &arg->die, NULL, NULL, + NULL, res); + if (err) + return err; + } + free(arg); + return NULL; +} + +static struct drgn_error * +parse_template_parameter(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, Dwarf_Die *die, + drgn_object_thunk_fn *thunk_fn, + struct drgn_template_parameters_builder *builder) +{ + char tag_buf[DW_TAG_BUF_LEN]; + + Dwarf_Attribute attr_mem, *attr; + const char *name; + if ((attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem))) { + name = dwarf_formstring(attr); + if (!name) { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s has invalid DW_AT_name", + dwarf_tag_str(die, tag_buf)); + } + } else { + name = NULL; + } + + bool defaulted; + if (dwarf_flag_integrate(die, DW_AT_default_value, &defaulted)) { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s has invalid DW_AT_default_value", + dwarf_tag_str(die, tag_buf)); + } + + struct drgn_dwarf_die_thunk_arg *thunk_arg = + malloc(sizeof(*thunk_arg)); + if (!thunk_arg) + return &drgn_enomem; + thunk_arg->module = module; + thunk_arg->die = *die; + + union drgn_lazy_object argument; + drgn_lazy_object_init_thunk(&argument, dbinfo->prog, thunk_fn, + thunk_arg); + + struct drgn_error *err = + drgn_template_parameters_builder_add(builder, &argument, name, + defaulted); + if (err) + drgn_lazy_object_deinit(&argument); + return err; +} + +static struct drgn_error * +drgn_compound_type_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, const struct drgn_language *lang, + enum drgn_type_kind kind, struct drgn_type **ret) +{ + struct drgn_error *err; + char tag_buf[DW_TAG_BUF_LEN]; + + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr = dwarf_attr_integrate(die, DW_AT_name, + &attr_mem); + const char *tag; + if (attr) { + tag = dwarf_formstring(attr); + if (!tag) { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s has invalid DW_AT_name", + dwarf_tag_str(die, tag_buf)); + } + } else { + tag = NULL; + } + + bool declaration; + if (dwarf_flag(die, DW_AT_declaration, &declaration)) { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s has invalid DW_AT_declaration", + dwarf_tag_str(die, tag_buf)); + } + if (declaration && tag) { + err = drgn_debug_info_find_complete(dbinfo, dwarf_tag(die), tag, + ret); + if (err != &drgn_not_found) + return err; + } + + struct drgn_compound_type_builder builder; + drgn_compound_type_builder_init(&builder, dbinfo->prog, kind); + + int size; + bool little_endian; + if (declaration) { + size = 0; + } else { + size = dwarf_bytesize(die); + if (size == -1) { + return drgn_error_format(DRGN_ERROR_OTHER, + "%s has missing or invalid DW_AT_byte_size", + dwarf_tag_str(die, tag_buf)); + } + dwarf_die_is_little_endian(die, false, &little_endian); + } + + Dwarf_Die member = {}, child; + int r = dwarf_child(die, &child); + while (r == 0) { + switch (dwarf_tag(&child)) { + case DW_TAG_member: + if (!declaration) { + if (member.addr) { + err = parse_member(dbinfo, module, + &member, + little_endian, false, + &builder); + if (err) + goto err; + } + member = child; + } + break; + case DW_TAG_template_type_parameter: + err = parse_template_parameter(dbinfo, module, &child, + drgn_dwarf_template_type_parameter_thunk_fn, + &builder.template_builder); + if (err) + goto err; + break; + case DW_TAG_template_value_parameter: + err = parse_template_parameter(dbinfo, module, &child, + drgn_dwarf_template_value_parameter_thunk_fn, + &builder.template_builder); + if (err) + goto err; + break; + default: + break; + } + r = dwarf_siblingof(&child, &child); + } + if (r == -1) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "libdw could not parse DIE children"); + goto err; + } + /* + * Flexible array members are only allowed as the last member of a + * structure with at least one other member. + */ + if (member.addr) { + err = parse_member(dbinfo, module, &member, little_endian, + kind != DRGN_TYPE_UNION && + builder.members.size > 0, + &builder); + if (err) + goto err; + } + + err = drgn_compound_type_create(&builder, tag, size, !declaration, lang, + ret); + if (err) + goto err; + return NULL; + +err: + drgn_compound_type_builder_deinit(&builder); + return err; +} + +static struct drgn_error * +parse_enumerator(Dwarf_Die *die, struct drgn_enum_type_builder *builder, + bool *is_signed) +{ + const char *name = dwarf_diename(die); + if (!name) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumerator has missing or invalid DW_AT_name"); + } + + Dwarf_Attribute attr_mem, *attr; + if (!(attr = dwarf_attr_integrate(die, DW_AT_const_value, &attr_mem))) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumerator is missing DW_AT_const_value"); + } + struct drgn_error *err; + if (attr->form == DW_FORM_sdata || + attr->form == DW_FORM_implicit_const) { + Dwarf_Sword svalue; + if (dwarf_formsdata(attr, &svalue)) + goto invalid; + err = drgn_enum_type_builder_add_signed(builder, name, + svalue); + /* + * GCC before 7.1 didn't include DW_AT_encoding for + * DW_TAG_enumeration_type DIEs, so we have to guess the sign + * for enum_compatible_type_fallback(). + */ + if (!err && svalue < 0) + *is_signed = true; + } else { + Dwarf_Word uvalue; + if (dwarf_formudata(attr, &uvalue)) + goto invalid; + err = drgn_enum_type_builder_add_unsigned(builder, name, + uvalue); + } + return err; + +invalid: + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumerator has invalid DW_AT_const_value"); +} + +/* + * GCC before 5.1 did not include DW_AT_type for DW_TAG_enumeration_type DIEs, + * so we have to fabricate the compatible type. + */ +static struct drgn_error * +enum_compatible_type_fallback(struct drgn_debug_info *dbinfo, + Dwarf_Die *die, bool is_signed, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + int size = dwarf_bytesize(die); + if (size == -1) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumeration_type has missing or invalid DW_AT_byte_size"); + } + enum drgn_byte_order byte_order; + dwarf_die_byte_order(die, false, &byte_order); + return drgn_int_type_create(dbinfo->prog, "", size, is_signed, + byte_order, lang, ret); +} + +static struct drgn_error * +drgn_enum_type_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, Dwarf_Die *die, + const struct drgn_language *lang, + struct drgn_type **ret) +{ + struct drgn_error *err; + + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr = dwarf_attr_integrate(die, DW_AT_name, + &attr_mem); + const char *tag; + if (attr) { + tag = dwarf_formstring(attr); + if (!tag) + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumeration_type has invalid DW_AT_name"); + } else { + tag = NULL; + } + + bool declaration; + if (dwarf_flag(die, DW_AT_declaration, &declaration)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumeration_type has invalid DW_AT_declaration"); + } + if (declaration && tag) { + err = drgn_debug_info_find_complete(dbinfo, + DW_TAG_enumeration_type, + tag, ret); + if (err != &drgn_not_found) + return err; + } + + if (declaration) { + return drgn_incomplete_enum_type_create(dbinfo->prog, tag, lang, + ret); + } + + struct drgn_enum_type_builder builder; + drgn_enum_type_builder_init(&builder, dbinfo->prog); + bool is_signed = false; + Dwarf_Die child; + int r = dwarf_child(die, &child); + while (r == 0) { + if (dwarf_tag(&child) == DW_TAG_enumerator) { + err = parse_enumerator(&child, &builder, &is_signed); + if (err) + goto err; + } + r = dwarf_siblingof(&child, &child); + } + if (r == -1) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "libdw could not parse DIE children"); + goto err; + } + + struct drgn_type *compatible_type; + r = dwarf_type(die, &child); + if (r == -1) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_enumeration_type has invalid DW_AT_type"); + goto err; + } else if (r) { + err = enum_compatible_type_fallback(dbinfo, die, is_signed, + lang, &compatible_type); + if (err) + goto err; + } else { + struct drgn_qualified_type qualified_compatible_type; + err = drgn_type_from_dwarf(dbinfo, module, &child, + &qualified_compatible_type); + if (err) + goto err; + compatible_type = + drgn_underlying_type(qualified_compatible_type.type); + if (drgn_type_kind(compatible_type) != DRGN_TYPE_INT) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "DW_AT_type of DW_TAG_enumeration_type is not an integer type"); + goto err; + } + } + + err = drgn_enum_type_create(&builder, tag, compatible_type, lang, ret); + if (err) + goto err; + return NULL; + +err: + drgn_enum_type_builder_deinit(&builder); + return err; +} + +static struct drgn_error * +drgn_typedef_type_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, const struct drgn_language *lang, + bool can_be_incomplete_array, + bool *is_incomplete_array_ret, + struct drgn_type **ret) +{ + const char *name = dwarf_diename(die); + if (!name) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_typedef has missing or invalid DW_AT_name"); + } + + struct drgn_qualified_type aliased_type; + struct drgn_error *err = drgn_type_from_dwarf_attr(dbinfo, module, die, + lang, true, + can_be_incomplete_array, + is_incomplete_array_ret, + &aliased_type); + if (err) + return err; + + return drgn_typedef_type_create(dbinfo->prog, name, aliased_type, lang, + ret); +} + +static struct drgn_error * +drgn_pointer_type_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, const struct drgn_language *lang, + struct drgn_type **ret) +{ + struct drgn_qualified_type referenced_type; + struct drgn_error *err = drgn_type_from_dwarf_attr(dbinfo, module, die, + lang, true, true, + NULL, + &referenced_type); + if (err) + return err; + + Dwarf_Attribute attr_mem, *attr; + uint64_t size; + if ((attr = dwarf_attr_integrate(die, DW_AT_byte_size, &attr_mem))) { + Dwarf_Word word; + if (dwarf_formudata(attr, &word)) { + return drgn_error_format(DRGN_ERROR_OTHER, + "DW_TAG_pointer_type has invalid DW_AT_byte_size"); + } + size = word; + } else { + uint8_t address_size; + err = drgn_program_address_size(dbinfo->prog, &address_size); + if (err) + return err; + size = address_size; + } + + /* + * The DWARF 5 specification doesn't mention DW_AT_endianity for + * DW_TAG_pointer_type DIEs, and GCC as of version 10.2 doesn't emit it + * even for pointers stored in the opposite byte order (e.g., when using + * scalar_storage_order), but it probably should. + */ + enum drgn_byte_order byte_order; + dwarf_die_byte_order(die, false, &byte_order); + return drgn_pointer_type_create(dbinfo->prog, referenced_type, size, + byte_order, lang, ret); +} + +struct array_dimension { + uint64_t length; + bool is_complete; +}; + +DEFINE_VECTOR(array_dimension_vector, struct array_dimension) + +static struct drgn_error *subrange_length(Dwarf_Die *die, + struct array_dimension *dimension) +{ + Dwarf_Attribute attr_mem; + Dwarf_Attribute *attr; + Dwarf_Word word; + + if (!(attr = dwarf_attr_integrate(die, DW_AT_upper_bound, &attr_mem)) && + !(attr = dwarf_attr_integrate(die, DW_AT_count, &attr_mem))) { + dimension->is_complete = false; + return NULL; + } + + if (dwarf_formudata(attr, &word)) { + return drgn_error_format(DRGN_ERROR_OTHER, + "DW_TAG_subrange_type has invalid %s", + attr->code == DW_AT_upper_bound ? + "DW_AT_upper_bound" : + "DW_AT_count"); + } + + dimension->is_complete = true; + /* + * GCC emits a DW_FORM_sdata DW_AT_upper_bound of -1 for empty array + * variables without an explicit size (e.g., `int arr[] = {};`). + */ + if (attr->code == DW_AT_upper_bound && attr->form == DW_FORM_sdata && + word == (Dwarf_Word)-1) { + dimension->length = 0; + } else if (attr->code == DW_AT_upper_bound) { + if (word >= UINT64_MAX) { + return drgn_error_create(DRGN_ERROR_OVERFLOW, + "DW_AT_upper_bound is too large"); + } + dimension->length = (uint64_t)word + 1; + } else { + if (word > UINT64_MAX) { + return drgn_error_create(DRGN_ERROR_OVERFLOW, + "DW_AT_count is too large"); + } + dimension->length = word; + } + return NULL; +} + +static struct drgn_error * +drgn_array_type_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, const struct drgn_language *lang, + bool can_be_incomplete_array, + bool *is_incomplete_array_ret, + struct drgn_type **ret) +{ + struct drgn_error *err; + struct array_dimension_vector dimensions = VECTOR_INIT; + struct array_dimension *dimension; + Dwarf_Die child; + int r = dwarf_child(die, &child); + while (r == 0) { + if (dwarf_tag(&child) == DW_TAG_subrange_type) { + dimension = array_dimension_vector_append_entry(&dimensions); + if (!dimension) { + err = &drgn_enomem; + goto out; + } + err = subrange_length(&child, dimension); + if (err) + goto out; + } + r = dwarf_siblingof(&child, &child); + } + if (r == -1) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "libdw could not parse DIE children"); + goto out; + } + if (!dimensions.size) { + dimension = array_dimension_vector_append_entry(&dimensions); + if (!dimension) { + err = &drgn_enomem; + goto out; + } + dimension->is_complete = false; + } + + struct drgn_qualified_type element_type; + err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, false, false, + NULL, &element_type); + if (err) + goto out; + + *is_incomplete_array_ret = !dimensions.data[0].is_complete; + struct drgn_type *type; + do { + dimension = array_dimension_vector_pop(&dimensions); + if (dimension->is_complete) { + err = drgn_array_type_create(dbinfo->prog, element_type, + dimension->length, lang, + &type); + } else if (dimensions.size || !can_be_incomplete_array) { + err = drgn_array_type_create(dbinfo->prog, element_type, + 0, lang, &type); + } else { + err = drgn_incomplete_array_type_create(dbinfo->prog, + element_type, + lang, &type); + } + if (err) + goto out; + + element_type.type = type; + element_type.qualifiers = 0; + } while (dimensions.size); + + *ret = type; + err = NULL; +out: + array_dimension_vector_deinit(&dimensions); + return err; +} + +static struct drgn_error * +drgn_dwarf_formal_parameter_thunk_fn(struct drgn_object *res, void *arg_) +{ + struct drgn_error *err; + struct drgn_dwarf_die_thunk_arg *arg = arg_; + if (res) { + struct drgn_qualified_type qualified_type; + err = drgn_type_from_dwarf_attr(drgn_object_program(res)->dbinfo, + arg->module, &arg->die, NULL, + false, true, NULL, + &qualified_type); + if (err) + return err; + + err = drgn_object_set_absent(res, qualified_type, 0); + if (err) + return err; + } + free(arg); + return NULL; +} + +static struct drgn_error * +parse_formal_parameter(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, Dwarf_Die *die, + struct drgn_function_type_builder *builder) +{ + Dwarf_Attribute attr_mem, *attr; + const char *name; + if ((attr = dwarf_attr_integrate(die, DW_AT_name, &attr_mem))) { + name = dwarf_formstring(attr); + if (!name) { + return drgn_error_create(DRGN_ERROR_OTHER, + "DW_TAG_formal_parameter has invalid DW_AT_name"); + } + } else { + name = NULL; + } + + struct drgn_dwarf_die_thunk_arg *thunk_arg = + malloc(sizeof(*thunk_arg)); + if (!thunk_arg) + return &drgn_enomem; + thunk_arg->module = module; + thunk_arg->die = *die; + + union drgn_lazy_object default_argument; + drgn_lazy_object_init_thunk(&default_argument, dbinfo->prog, + drgn_dwarf_formal_parameter_thunk_fn, + thunk_arg); + + struct drgn_error *err = + drgn_function_type_builder_add_parameter(builder, + &default_argument, + name); + if (err) + drgn_lazy_object_deinit(&default_argument); + return err; +} + +static struct drgn_error * +drgn_function_type_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, const struct drgn_language *lang, + struct drgn_type **ret) +{ + struct drgn_error *err; + char tag_buf[DW_TAG_BUF_LEN]; + + struct drgn_function_type_builder builder; + drgn_function_type_builder_init(&builder, dbinfo->prog); + bool is_variadic = false; + Dwarf_Die child; + int r = dwarf_child(die, &child); + while (r == 0) { + switch (dwarf_tag(&child)) { + case DW_TAG_formal_parameter: + if (is_variadic) { + err = drgn_error_format(DRGN_ERROR_OTHER, + "%s has DW_TAG_formal_parameter child after DW_TAG_unspecified_parameters child", + dwarf_tag_str(die, + tag_buf)); + goto err; + } + err = parse_formal_parameter(dbinfo, module, &child, + &builder); + if (err) + goto err; + break; + case DW_TAG_unspecified_parameters: + if (is_variadic) { + err = drgn_error_format(DRGN_ERROR_OTHER, + "%s has multiple DW_TAG_unspecified_parameters children", + dwarf_tag_str(die, + tag_buf)); + goto err; + } + is_variadic = true; + break; + case DW_TAG_template_type_parameter: + err = parse_template_parameter(dbinfo, module, &child, + drgn_dwarf_template_type_parameter_thunk_fn, + &builder.template_builder); + if (err) + goto err; + break; + case DW_TAG_template_value_parameter: + err = parse_template_parameter(dbinfo, module, &child, + drgn_dwarf_template_value_parameter_thunk_fn, + &builder.template_builder); + if (err) + goto err; + break; + default: + break; + } + r = dwarf_siblingof(&child, &child); + } + if (r == -1) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "libdw could not parse DIE children"); + goto err; + } + + struct drgn_qualified_type return_type; + err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, true, + NULL, &return_type); + if (err) + goto err; + + err = drgn_function_type_create(&builder, return_type, is_variadic, + lang, ret); + if (err) + goto err; + return NULL; + +err: + drgn_function_type_builder_deinit(&builder); + return err; +} + +static struct drgn_error * +drgn_type_from_dwarf_internal(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, bool can_be_incomplete_array, + bool *is_incomplete_array_ret, + struct drgn_qualified_type *ret) +{ + if (dbinfo->dwarf.depth >= 1000) { + return drgn_error_create(DRGN_ERROR_RECURSION, + "maximum DWARF type parsing depth exceeded"); + } + + /* If the DIE has a type unit signature, follow it. */ + Dwarf_Die definition_die; + { + Dwarf_Attribute attr_mem, *attr; + if ((attr = dwarf_attr_integrate(die, DW_AT_signature, + &attr_mem))) { + if (!dwarf_formref_die(attr, &definition_die)) + return drgn_error_libdw(); + die = &definition_die; + } + } + + /* If we got a declaration, try to find the definition. */ + bool declaration; + if (dwarf_flag(die, DW_AT_declaration, &declaration)) + return drgn_error_libdw(); + if (declaration) { + uintptr_t die_addr; + if (drgn_dwarf_find_definition(dbinfo, (uintptr_t)die->addr, + &module, &die_addr)) { + Dwarf_Addr bias; + Dwarf *dwarf = dwfl_module_getdwarf(module->dwfl_module, + &bias); + if (!dwarf) + return drgn_error_libdwfl(); + uintptr_t start = + (uintptr_t)module->scn_data[DRGN_SCN_DEBUG_INFO]->d_buf; + size_t size = + module->scn_data[DRGN_SCN_DEBUG_INFO]->d_size; + if (die_addr >= start && die_addr < start + size) { + if (!dwarf_offdie(dwarf, die_addr - start, + &definition_die)) + return drgn_error_libdw(); + } else { + start = (uintptr_t)module->scn_data[DRGN_SCN_DEBUG_TYPES]->d_buf; + /* Assume .debug_types */ + if (!dwarf_offdie_types(dwarf, die_addr - start, + &definition_die)) + return drgn_error_libdw(); + } + die = &definition_die; + } + } + + struct drgn_dwarf_type_map_entry entry = { + .key = die->addr, + }; + struct hash_pair hp = drgn_dwarf_type_map_hash(&entry.key); + struct drgn_dwarf_type_map_iterator it = + drgn_dwarf_type_map_search_hashed(&dbinfo->dwarf.types, + &entry.key, hp); + if (it.entry) { + if (!can_be_incomplete_array && + it.entry->value.is_incomplete_array) { + it = drgn_dwarf_type_map_search_hashed(&dbinfo->dwarf.cant_be_incomplete_array_types, + &entry.key, hp); + } + if (it.entry) { + ret->type = it.entry->value.type; + ret->qualifiers = it.entry->value.qualifiers; + return NULL; + } + } + + const struct drgn_language *lang; + struct drgn_error *err = drgn_language_from_die(die, true, &lang); + if (err) + return err; + + ret->qualifiers = 0; + dbinfo->dwarf.depth++; + entry.value.is_incomplete_array = false; + switch (dwarf_tag(die)) { + case DW_TAG_const_type: + err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, + can_be_incomplete_array, + &entry.value.is_incomplete_array, + ret); + ret->qualifiers |= DRGN_QUALIFIER_CONST; + break; + case DW_TAG_restrict_type: + err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, + can_be_incomplete_array, + &entry.value.is_incomplete_array, + ret); + ret->qualifiers |= DRGN_QUALIFIER_RESTRICT; + break; + case DW_TAG_volatile_type: + err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, + can_be_incomplete_array, + &entry.value.is_incomplete_array, + ret); + ret->qualifiers |= DRGN_QUALIFIER_VOLATILE; + break; + case DW_TAG_atomic_type: + err = drgn_type_from_dwarf_attr(dbinfo, module, die, lang, true, + can_be_incomplete_array, + &entry.value.is_incomplete_array, + ret); + ret->qualifiers |= DRGN_QUALIFIER_ATOMIC; + break; + case DW_TAG_base_type: + err = drgn_base_type_from_dwarf(dbinfo, module, die, lang, + &ret->type); + break; + case DW_TAG_structure_type: + err = drgn_compound_type_from_dwarf(dbinfo, module, die, lang, + DRGN_TYPE_STRUCT, + &ret->type); + break; + case DW_TAG_union_type: + err = drgn_compound_type_from_dwarf(dbinfo, module, die, lang, + DRGN_TYPE_UNION, + &ret->type); + break; + case DW_TAG_class_type: + err = drgn_compound_type_from_dwarf(dbinfo, module, die, lang, + DRGN_TYPE_CLASS, + &ret->type); + break; + case DW_TAG_enumeration_type: + err = drgn_enum_type_from_dwarf(dbinfo, module, die, lang, + &ret->type); + break; + case DW_TAG_typedef: + err = drgn_typedef_type_from_dwarf(dbinfo, module, die, lang, + can_be_incomplete_array, + &entry.value.is_incomplete_array, + &ret->type); + break; + case DW_TAG_pointer_type: + err = drgn_pointer_type_from_dwarf(dbinfo, module, die, lang, + &ret->type); + break; + case DW_TAG_array_type: + err = drgn_array_type_from_dwarf(dbinfo, module, die, lang, + can_be_incomplete_array, + &entry.value.is_incomplete_array, + &ret->type); + break; + case DW_TAG_subroutine_type: + case DW_TAG_subprogram: + err = drgn_function_type_from_dwarf(dbinfo, module, die, lang, + &ret->type); + break; + default: + err = drgn_error_format(DRGN_ERROR_OTHER, + "unknown DWARF type tag 0x%x", + dwarf_tag(die)); + break; + } + dbinfo->dwarf.depth--; + if (err) + return err; + + entry.value.type = ret->type; + entry.value.qualifiers = ret->qualifiers; + struct drgn_dwarf_type_map *map; + if (!can_be_incomplete_array && entry.value.is_incomplete_array) + map = &dbinfo->dwarf.cant_be_incomplete_array_types; + else + map = &dbinfo->dwarf.types; + if (drgn_dwarf_type_map_insert_searched(map, &entry, hp, NULL) == -1) { + /* + * This will "leak" the type we created, but it'll still be + * cleaned up when the program is freed. + */ + return &drgn_enomem; + } + if (is_incomplete_array_ret) + *is_incomplete_array_ret = entry.value.is_incomplete_array; + return NULL; +} + +struct drgn_error *drgn_debug_info_find_type(enum drgn_type_kind kind, + const char *name, size_t name_len, + const char *filename, void *arg, + struct drgn_qualified_type *ret) +{ + struct drgn_error *err; + struct drgn_debug_info *dbinfo = arg; + + uint64_t tag; + switch (kind) { + case DRGN_TYPE_INT: + case DRGN_TYPE_BOOL: + case DRGN_TYPE_FLOAT: + tag = DW_TAG_base_type; + break; + case DRGN_TYPE_STRUCT: + tag = DW_TAG_structure_type; + break; + case DRGN_TYPE_UNION: + tag = DW_TAG_union_type; + break; + case DRGN_TYPE_CLASS: + tag = DW_TAG_class_type; + break; + case DRGN_TYPE_ENUM: + tag = DW_TAG_enumeration_type; + break; + case DRGN_TYPE_TYPEDEF: + tag = DW_TAG_typedef; + break; + default: + UNREACHABLE(); + } + + struct drgn_dwarf_index_iterator it; + err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dwarf.global, name, + name_len, &tag, 1); + if (err) + return err; + struct drgn_dwarf_index_die *index_die; + while ((index_die = drgn_dwarf_index_iterator_next(&it))) { + Dwarf_Die die; + err = drgn_dwarf_index_get_die(index_die, &die); + if (err) + return err; + if (die_matches_filename(&die, filename)) { + err = drgn_type_from_dwarf(dbinfo, index_die->module, + &die, ret); + if (err) + return err; + /* + * For DW_TAG_base_type, we need to check that the type + * we found was the right kind. + */ + if (drgn_type_kind(ret->type) == kind) + return NULL; + } + } + return &drgn_not_found; +} + +struct drgn_error * +drgn_debug_info_find_object(const char *name, size_t name_len, + const char *filename, + enum drgn_find_object_flags flags, void *arg, + struct drgn_object *ret) +{ + struct drgn_error *err; + struct drgn_debug_info *dbinfo = arg; + + struct drgn_namespace_dwarf_index *ns = &dbinfo->dwarf.global; + if (name_len >= 2 && memcmp(name, "::", 2) == 0) { + /* Explicit global namespace. */ + name_len -= 2; + name += 2; + } + const char *colons; + while ((colons = memmem(name, name_len, "::", 2))) { + struct drgn_dwarf_index_iterator it; + uint64_t ns_tag = DW_TAG_namespace; + err = drgn_dwarf_index_iterator_init(&it, ns, name, + colons - name, &ns_tag, 1); + if (err) + return err; + struct drgn_dwarf_index_die *index_die = + drgn_dwarf_index_iterator_next(&it); + if (!index_die) + return &drgn_not_found; + ns = index_die->namespace; + name_len -= colons + 2 - name; + name = colons + 2; + } + + uint64_t tags[3]; + size_t num_tags = 0; + if (flags & DRGN_FIND_OBJECT_CONSTANT) + tags[num_tags++] = DW_TAG_enumerator; + if (flags & DRGN_FIND_OBJECT_FUNCTION) + tags[num_tags++] = DW_TAG_subprogram; + if (flags & DRGN_FIND_OBJECT_VARIABLE) + tags[num_tags++] = DW_TAG_variable; + + struct drgn_dwarf_index_iterator it; + err = drgn_dwarf_index_iterator_init(&it, ns, name, name_len, tags, + num_tags); + if (err) + return err; + struct drgn_dwarf_index_die *index_die; + while ((index_die = drgn_dwarf_index_iterator_next(&it))) { + Dwarf_Die die; + err = drgn_dwarf_index_get_die(index_die, &die); + if (err) + return err; + if (!die_matches_filename(&die, filename)) + continue; + if (dwarf_tag(&die) == DW_TAG_enumeration_type) { + return drgn_object_from_dwarf_enumerator(dbinfo, + index_die->module, + &die, name, + ret); + } else { + return drgn_object_from_dwarf(dbinfo, index_die->module, + &die, NULL, NULL, NULL, + ret); + } + } + return &drgn_not_found; +} + +/* + * Call frame information. + */ + +struct drgn_dwarf_cie { + /* Whether this CIE is from .eh_frame. */ + bool is_eh; + /* Size of an address in this CIE in bytes. */ + uint8_t address_size; + /* DW_EH_PE_* encoding of addresses in this CIE. */ + uint8_t address_encoding; + /* Whether this CIE has a 'z' augmentation. */ + bool have_augmentation_length; + /* Whether this CIE is for a signal handler ('S' augmentation). */ + bool signal_frame; + drgn_register_number return_address_register; + uint64_t code_alignment_factor; + int64_t data_alignment_factor; + const char *initial_instructions; + size_t initial_instructions_size; +}; + +DEFINE_VECTOR(drgn_dwarf_fde_vector, struct drgn_dwarf_fde) +DEFINE_VECTOR(drgn_dwarf_cie_vector, struct drgn_dwarf_cie) +DEFINE_HASH_MAP(drgn_dwarf_cie_map, size_t, size_t, int_key_hash_pair, + scalar_key_eq) + +static struct drgn_error * +drgn_dwarf_cfi_next_encoded(struct drgn_debug_info_buffer *buffer, + uint8_t address_size, uint8_t encoding, + uint64_t func_addr, uint64_t *ret) +{ + struct drgn_error *err; + + /* Not currently used for CFI. */ + if (encoding & DW_EH_PE_indirect) { +unknown_fde_encoding: + return binary_buffer_error(&buffer->bb, + "unknown EH encoding %#" PRIx8, + encoding); + } + + size_t pos = (buffer->bb.pos - + (char *)buffer->module->scn_data[buffer->scn]->d_buf); + uint64_t base; + switch (encoding & 0x70) { + case DW_EH_PE_absptr: + base = 0; + break; + case DW_EH_PE_pcrel: + base = buffer->module->dwarf.pcrel_base + pos; + break; + case DW_EH_PE_textrel: + base = buffer->module->dwarf.textrel_base; + break; + case DW_EH_PE_datarel: + base = buffer->module->dwarf.datarel_base; + break; + case DW_EH_PE_funcrel: + /* Relative to the FDE's initial location. */ + base = func_addr; + break; + case DW_EH_PE_aligned: + base = 0; + if (pos % address_size != 0 && + (err = binary_buffer_skip(&buffer->bb, + address_size - pos % address_size))) + return err; + break; + default: + goto unknown_fde_encoding; + } + + uint64_t offset; + switch (encoding & 0xf) { + case DW_EH_PE_absptr: + if ((err = binary_buffer_next_uint(&buffer->bb, address_size, + &offset))) + return err; + break; + case DW_EH_PE_uleb128: + if ((err = binary_buffer_next_uleb128(&buffer->bb, &offset))) + return err; + break; + case DW_EH_PE_udata2: + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &offset))) + return err; + break; + case DW_EH_PE_udata4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &offset))) + return err; + break; + case DW_EH_PE_udata8: + if ((err = binary_buffer_next_u64(&buffer->bb, &offset))) + return err; + break; + case DW_EH_PE_sleb128: + if ((err = binary_buffer_next_sleb128_into_u64(&buffer->bb, + &offset))) + return err; + break; + case DW_EH_PE_sdata2: + if ((err = binary_buffer_next_s16_into_u64(&buffer->bb, + &offset))) + return err; + break; + case DW_EH_PE_sdata4: + if ((err = binary_buffer_next_s32_into_u64(&buffer->bb, + &offset))) + return err; + break; + case DW_EH_PE_sdata8: + if ((err = binary_buffer_next_s64_into_u64(&buffer->bb, + &offset))) + return err; + break; + default: + goto unknown_fde_encoding; + } + *ret = (base + offset) & uint_max(address_size); + + return NULL; +} + +static struct drgn_error * +drgn_parse_dwarf_cie(struct drgn_debug_info_module *module, + enum drgn_debug_info_scn scn, size_t cie_pointer, + struct drgn_dwarf_cie *cie) +{ + bool is_eh = scn == DRGN_SCN_EH_FRAME; + struct drgn_error *err; + + cie->is_eh = is_eh; + + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, module, scn); + buffer.bb.pos += cie_pointer; + + uint32_t tmp; + if ((err = binary_buffer_next_u32(&buffer.bb, &tmp))) + return err; + bool is_64_bit = tmp == UINT32_C(0xffffffff); + uint64_t length; + if (is_64_bit) { + if ((err = binary_buffer_next_u64(&buffer.bb, &length))) + return err; + } else { + length = tmp; + } + if (length > buffer.bb.end - buffer.bb.pos) { + return binary_buffer_error(&buffer.bb, + "entry length is out of bounds"); + } + buffer.bb.end = buffer.bb.pos + length; + + uint64_t cie_id, expected_cie_id; + if (is_64_bit) { + if ((err = binary_buffer_next_u64(&buffer.bb, &cie_id))) + return err; + expected_cie_id = is_eh ? 0 : UINT64_C(0xffffffffffffffff); + } else { + if ((err = binary_buffer_next_u32_into_u64(&buffer.bb, + &cie_id))) + return err; + expected_cie_id = is_eh ? 0 : UINT64_C(0xffffffff); + } + if (cie_id != expected_cie_id) + return binary_buffer_error(&buffer.bb, "invalid CIE ID"); + + uint8_t version; + if ((err = binary_buffer_next_u8(&buffer.bb, &version))) + return err; + if (version < 1 || version == 2 || version > 4) { + return binary_buffer_error(&buffer.bb, + "unknown CIE version %" PRIu8, + version); + } + + const char *augmentation; + size_t augmentation_len; + if ((err = binary_buffer_next_string(&buffer.bb, &augmentation, + &augmentation_len))) + return err; + cie->have_augmentation_length = augmentation[0] == 'z'; + cie->signal_frame = false; + for (size_t i = 0; i < augmentation_len; i++) { + switch (augmentation[i]) { + case 'z': + if (i != 0) + goto unknown_augmentation; + break; + case 'L': + case 'P': + case 'R': + if (augmentation[0] != 'z') + goto unknown_augmentation; + break; + case 'S': + cie->signal_frame = true; + break; + default: +unknown_augmentation: + /* + * We could ignore this CIE and all FDEs that reference + * it or skip the augmentation if we have its length, + * but let's fail loudly so that we find out about + * missing support. + */ + return binary_buffer_error_at(&buffer.bb, + &augmentation[i], + "unknown CFI augmentation %s", + augmentation); + } + } + + if (version >= 4) { + if ((err = binary_buffer_next_u8(&buffer.bb, + &cie->address_size))) + return err; + if (cie->address_size < 1 || cie->address_size > 8) { + return binary_buffer_error(&buffer.bb, + "unsupported address size %" PRIu8, + cie->address_size); + } + uint8_t segment_selector_size; + if ((err = binary_buffer_next_u8(&buffer.bb, + &segment_selector_size))) + return err; + if (segment_selector_size) { + return binary_buffer_error(&buffer.bb, + "unsupported segment selector size %" PRIu8, + segment_selector_size); + } + } else { + cie->address_size = + drgn_platform_address_size(&module->platform); + } + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &cie->code_alignment_factor)) || + (err = binary_buffer_next_sleb128(&buffer.bb, + &cie->data_alignment_factor))) + return err; + uint64_t return_address_register; + if (version >= 3) { + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &return_address_register))) + return err; + } else { + if ((err = binary_buffer_next_u8_into_u64(&buffer.bb, + &return_address_register))) + return err; + } + cie->return_address_register = + module->platform.arch->dwarf_regno_to_internal(return_address_register); + if (cie->return_address_register == DRGN_REGISTER_NUMBER_UNKNOWN) { + return binary_buffer_error(&buffer.bb, + "unknown return address register"); + } + cie->address_encoding = DW_EH_PE_absptr; + if (augmentation[0] == 'z') { + for (size_t i = 0; i < augmentation_len; i++) { + switch (augmentation[i]) { + case 'z': + if ((err = binary_buffer_skip_leb128(&buffer.bb))) + return err; + break; + case 'L': + if ((err = binary_buffer_skip(&buffer.bb, 1))) + return err; + break; + case 'P': { + uint8_t encoding; + if ((err = binary_buffer_next_u8(&buffer.bb, &encoding))) + return err; + /* + * We don't need the result, so don't bother + * dereferencing. + */ + encoding &= ~DW_EH_PE_indirect; + uint64_t unused; + if ((err = drgn_dwarf_cfi_next_encoded(&buffer, + cie->address_size, + encoding, + 0, + &unused))) + return err; + break; + } + case 'R': + if ((err = binary_buffer_next_u8(&buffer.bb, + &cie->address_encoding))) + return err; + break; + } + } + } + cie->initial_instructions = buffer.bb.pos; + cie->initial_instructions_size = buffer.bb.end - buffer.bb.pos; + return NULL; +} + +static struct drgn_error * +drgn_parse_dwarf_frames(struct drgn_debug_info_module *module, + enum drgn_debug_info_scn scn, + struct drgn_dwarf_cie_vector *cies, + struct drgn_dwarf_fde_vector *fdes) +{ + bool is_eh = scn == DRGN_SCN_EH_FRAME; + struct drgn_error *err; + + if (!module->scns[scn]) + return NULL; + err = drgn_debug_info_module_cache_section(module, scn); + if (err) + return err; + Elf_Data *data = module->scn_data[scn]; + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, module, scn); + + struct drgn_dwarf_cie_map cie_map = HASH_TABLE_INIT; + while (binary_buffer_has_next(&buffer.bb)) { + uint32_t tmp; + if ((err = binary_buffer_next_u32(&buffer.bb, &tmp))) + goto out; + bool is_64_bit = tmp == UINT32_C(0xffffffff); + uint64_t length; + if (is_64_bit) { + if ((err = binary_buffer_next_u64(&buffer.bb, &length))) + goto out; + } else { + length = tmp; + } + /* + * Technically, a length of zero is only a terminator in + * .eh_frame, but other consumers (binutils, elfutils, GDB) + * handle it the same way in .debug_frame. + */ + if (length == 0) + break; + if (length > buffer.bb.end - buffer.bb.pos) { + err = binary_buffer_error(&buffer.bb, + "entry length is out of bounds"); + goto out; + } + buffer.bb.end = buffer.bb.pos + length; + + /* + * The Linux Standard Base Core Specification [1] states that + * the CIE ID in .eh_frame is always 4 bytes. However, other + * consumers handle it the same as in .debug_frame (8 bytes for + * the 64-bit format). + * + * 1: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html + */ + uint64_t cie_pointer, cie_id; + if (is_64_bit) { + if ((err = binary_buffer_next_u64(&buffer.bb, + &cie_pointer))) + goto out; + cie_id = is_eh ? 0 : UINT64_C(0xffffffffffffffff); + } else { + if ((err = binary_buffer_next_u32_into_u64(&buffer.bb, + &cie_pointer))) + goto out; + cie_id = is_eh ? 0 : UINT64_C(0xffffffff); + } + + if (cie_pointer != cie_id) { + if (is_eh) { + size_t pointer_offset = + (buffer.bb.pos + - (is_64_bit ? 8 : 4) + - (char *)data->d_buf); + if (cie_pointer > pointer_offset) { + err = binary_buffer_error(&buffer.bb, + "CIE pointer is out of bounds"); + goto out; + } + cie_pointer = pointer_offset - cie_pointer; + } else if (cie_pointer > data->d_size) { + err = binary_buffer_error(&buffer.bb, + "CIE pointer is out of bounds"); + goto out; + } + struct drgn_dwarf_fde *fde = + drgn_dwarf_fde_vector_append_entry(fdes); + if (!fde) { + err = &drgn_enomem; + goto out; + } + struct drgn_dwarf_cie_map_entry entry = { + .key = cie_pointer, + .value = cies->size, + }; + struct drgn_dwarf_cie_map_iterator it; + int r = drgn_dwarf_cie_map_insert(&cie_map, &entry, + &it); + struct drgn_dwarf_cie *cie; + if (r > 0) { + cie = drgn_dwarf_cie_vector_append_entry(cies); + if (!cie) { + err = &drgn_enomem; + goto out; + } + err = drgn_parse_dwarf_cie(module, scn, + cie_pointer, cie); + if (err) + goto out; + } else if (r == 0) { + cie = &cies->data[it.entry->value]; + } else { + err = &drgn_enomem; + goto out; + } + if ((err = drgn_dwarf_cfi_next_encoded(&buffer, + cie->address_size, + cie->address_encoding, + 0, + &fde->initial_location)) || + (err = drgn_dwarf_cfi_next_encoded(&buffer, + cie->address_size, + cie->address_encoding & 0xf, + 0, + &fde->address_range))) + goto out; + if (cie->have_augmentation_length) { + uint64_t augmentation_length; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &augmentation_length))) + goto out; + if (augmentation_length > + buffer.bb.end - buffer.bb.pos) { + err = binary_buffer_error(&buffer.bb, + "augmentation length is out of bounds"); + goto out; + } + buffer.bb.pos += augmentation_length; + } + fde->cie = it.entry->value; + fde->instructions = buffer.bb.pos; + fde->instructions_size = buffer.bb.end - buffer.bb.pos; + } + + buffer.bb.pos = buffer.bb.end; + buffer.bb.end = (const char *)data->d_buf + data->d_size; + } + + err = NULL; +out: + drgn_dwarf_cie_map_deinit(&cie_map); + return err; +} + +static void drgn_debug_info_cache_sh_addr(struct drgn_debug_info_module *module, + enum drgn_debug_info_scn scn, + uint64_t *addr) +{ + if (module->scns[scn]) { + GElf_Shdr shdr_mem; + GElf_Shdr *shdr = gelf_getshdr(module->scns[scn], &shdr_mem); + if (shdr) + *addr = shdr->sh_addr; + } +} + +static int drgn_dwarf_fde_compar(const void *_a, const void *_b, void *arg) +{ + const struct drgn_dwarf_fde *a = _a; + const struct drgn_dwarf_fde *b = _b; + const struct drgn_dwarf_cie *cies = arg; + if (a->initial_location < b->initial_location) + return -1; + else if (a->initial_location > b->initial_location) + return 1; + else + return cies[a->cie].is_eh - cies[b->cie].is_eh; +} + +static struct drgn_error * +drgn_debug_info_parse_frames(struct drgn_debug_info_module *module) +{ + struct drgn_error *err; + + drgn_debug_info_cache_sh_addr(module, DRGN_SCN_EH_FRAME, + &module->dwarf.pcrel_base); + drgn_debug_info_cache_sh_addr(module, DRGN_SCN_TEXT, + &module->dwarf.textrel_base); + drgn_debug_info_cache_sh_addr(module, DRGN_SCN_GOT, + &module->dwarf.datarel_base); + + struct drgn_dwarf_cie_vector cies = VECTOR_INIT; + struct drgn_dwarf_fde_vector fdes = VECTOR_INIT; + + err = drgn_parse_dwarf_frames(module, DRGN_SCN_DEBUG_FRAME, &cies, + &fdes); + if (err) + goto err; + err = drgn_parse_dwarf_frames(module, DRGN_SCN_EH_FRAME, &cies, &fdes); + if (err) + goto err; + + drgn_dwarf_cie_vector_shrink_to_fit(&cies); + + /* + * Sort FDEs and remove duplicates, preferring .debug_frame over + * .eh_frame. + */ + qsort_r(fdes.data, fdes.size, sizeof(fdes.data[0]), + drgn_dwarf_fde_compar, cies.data); + if (fdes.size > 0) { + size_t src = 1, dst = 1; + for (; src < fdes.size; src++) { + if (fdes.data[src].initial_location != + fdes.data[dst - 1].initial_location) { + if (src != dst) + fdes.data[dst] = fdes.data[src]; + dst++; + } + } + fdes.size = dst; + } + drgn_dwarf_fde_vector_shrink_to_fit(&fdes); + + module->dwarf.cies = cies.data; + module->dwarf.fdes = fdes.data; + module->dwarf.num_fdes = fdes.size; + return NULL; + +err: + drgn_dwarf_fde_vector_deinit(&fdes); + drgn_dwarf_cie_vector_deinit(&cies); + return err; +} + +static struct drgn_error * +drgn_debug_info_find_fde(struct drgn_debug_info_module *module, + uint64_t unbiased_pc, struct drgn_dwarf_fde **ret) +{ + struct drgn_error *err; + + if (!module->parsed_frames) { + err = drgn_debug_info_parse_frames(module); + if (err) + return err; + module->parsed_frames = true; + } + + /* Binary search for the containing FDE. */ + size_t lo = 0, hi = module->dwarf.num_fdes; + while (lo < hi) { + size_t mid = lo + (hi - lo) / 2; + struct drgn_dwarf_fde *fde = &module->dwarf.fdes[mid]; + if (unbiased_pc < fde->initial_location) { + hi = mid; + } else if (unbiased_pc - fde->initial_location >= + fde->address_range) { + lo = mid + 1; + } else { + *ret = fde; + return NULL; + } + } + *ret = NULL; + return NULL; +} + +static struct drgn_error * +drgn_dwarf_cfi_next_offset(struct drgn_debug_info_buffer *buffer, int64_t *ret) +{ + struct drgn_error *err; + uint64_t offset; + if ((err = binary_buffer_next_uleb128(&buffer->bb, &offset))) + return err; + if (offset > INT64_MAX) + return binary_buffer_error(&buffer->bb, "offset is too large"); + *ret = offset; + return NULL; +} + +static struct drgn_error * +drgn_dwarf_cfi_next_offset_sf(struct drgn_debug_info_buffer *buffer, + struct drgn_dwarf_cie *cie, int64_t *ret) +{ + struct drgn_error *err; + int64_t factored; + if ((err = binary_buffer_next_sleb128(&buffer->bb, &factored))) + return err; + if (__builtin_mul_overflow(factored, cie->data_alignment_factor, ret)) + return binary_buffer_error(&buffer->bb, "offset is too large"); + return NULL; +} + +static struct drgn_error * +drgn_dwarf_cfi_next_offset_f(struct drgn_debug_info_buffer *buffer, + struct drgn_dwarf_cie *cie, int64_t *ret) +{ + struct drgn_error *err; + uint64_t factored; + if ((err = binary_buffer_next_uleb128(&buffer->bb, &factored))) + return err; + if (__builtin_mul_overflow(factored, cie->data_alignment_factor, ret)) + return binary_buffer_error(&buffer->bb, "offset is too large"); + return NULL; +} + +static struct drgn_error * +drgn_dwarf_cfi_next_block(struct drgn_debug_info_buffer *buffer, + const char **buf_ret, size_t *size_ret) +{ + struct drgn_error *err; + uint64_t size; + if ((err = binary_buffer_next_uleb128(&buffer->bb, &size))) + return err; + if (size > buffer->bb.end - buffer->bb.pos) { + return binary_buffer_error(&buffer->bb, + "block is out of bounds"); + } + *buf_ret = buffer->bb.pos; + buffer->bb.pos += size; + *size_ret = size; + return NULL; +} + +DEFINE_VECTOR(drgn_cfi_row_vector, struct drgn_cfi_row *) + +static struct drgn_error * +drgn_eval_dwarf_cfi(struct drgn_debug_info_module *module, + struct drgn_dwarf_fde *fde, + const struct drgn_cfi_row *initial_row, uint64_t target, + const char *instructions, size_t instructions_size, + struct drgn_cfi_row **row) +{ + struct drgn_error *err; + drgn_register_number (*dwarf_regno_to_internal)(uint64_t) = + module->platform.arch->dwarf_regno_to_internal; + struct drgn_dwarf_cie *cie = &module->dwarf.cies[fde->cie]; + uint64_t pc = fde->initial_location; + + struct drgn_cfi_row_vector state_stack = VECTOR_INIT; + struct drgn_debug_info_buffer buffer; + drgn_debug_info_buffer_init(&buffer, module, + cie->is_eh ? + DRGN_SCN_EH_FRAME : DRGN_SCN_DEBUG_FRAME); + buffer.bb.pos = instructions; + buffer.bb.end = instructions + instructions_size; + while (binary_buffer_has_next(&buffer.bb)) { + uint8_t opcode; + if ((err = binary_buffer_next_u8(&buffer.bb, &opcode))) + goto out; + + uint64_t dwarf_regno; + drgn_register_number regno; + struct drgn_cfi_rule rule; + uint64_t tmp; + switch ((opcode & 0xc0) ? (opcode & 0xc0) : opcode) { + case DW_CFA_set_loc: + if (!initial_row) + goto invalid_for_initial; + if ((err = drgn_dwarf_cfi_next_encoded(&buffer, + cie->address_size, + cie->address_encoding, + fde->initial_location, + &tmp))) + goto out; + if (tmp <= pc) { + err = binary_buffer_error(&buffer.bb, + "DW_CFA_set_loc location is not greater than current location"); + goto out; + } + pc = tmp; + if (pc > target) + goto found; + break; + case DW_CFA_advance_loc: + if (!initial_row) + goto invalid_for_initial; + tmp = opcode & 0x3f; + goto advance_loc; + case DW_CFA_advance_loc1: + if (!initial_row) + goto invalid_for_initial; + if ((err = binary_buffer_next_u8_into_u64(&buffer.bb, + &tmp))) + goto out; + goto advance_loc; + case DW_CFA_advance_loc2: + if (!initial_row) + goto invalid_for_initial; + if ((err = binary_buffer_next_u16_into_u64(&buffer.bb, + &tmp))) + goto out; + goto advance_loc; + case DW_CFA_advance_loc4: + if (!initial_row) + goto invalid_for_initial; + if ((err = binary_buffer_next_u32_into_u64(&buffer.bb, + &tmp))) + goto out; +advance_loc: + if (__builtin_mul_overflow(tmp, + cie->code_alignment_factor, + &tmp) || + __builtin_add_overflow(pc, tmp, &pc) || + pc > uint_max(cie->address_size)) { + err = drgn_error_create(DRGN_ERROR_OTHER, + "DW_CFA_advance_loc* overflows location"); + goto out; + } + if (pc > target) + goto found; + break; + case DW_CFA_def_cfa: + rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno)) || + (err = drgn_dwarf_cfi_next_offset(&buffer, &rule.offset))) + goto out; + if ((rule.regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + rule.kind = DRGN_CFI_RULE_UNDEFINED; + goto set_cfa; + case DW_CFA_def_cfa_sf: + rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno)) || + (err = drgn_dwarf_cfi_next_offset_sf(&buffer, cie, + &rule.offset))) + goto out; + if ((rule.regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + rule.kind = DRGN_CFI_RULE_UNDEFINED; + goto set_cfa; + case DW_CFA_def_cfa_register: + drgn_cfi_row_get_cfa(*row, &rule); + if (rule.kind != DRGN_CFI_RULE_REGISTER_PLUS_OFFSET) { + err = binary_buffer_error(&buffer.bb, + "DW_CFA_def_cfa_register with incompatible CFA rule"); + goto out; + } + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno))) + goto out; + if ((rule.regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + rule.kind = DRGN_CFI_RULE_UNDEFINED; + goto set_cfa; + case DW_CFA_def_cfa_offset: + drgn_cfi_row_get_cfa(*row, &rule); + if (rule.kind != DRGN_CFI_RULE_REGISTER_PLUS_OFFSET) { + err = binary_buffer_error(&buffer.bb, + "DW_CFA_def_cfa_offset with incompatible CFA rule"); + goto out; + } + if ((err = drgn_dwarf_cfi_next_offset(&buffer, + &rule.offset))) + goto out; + goto set_cfa; + case DW_CFA_def_cfa_offset_sf: + drgn_cfi_row_get_cfa(*row, &rule); + if (rule.kind != DRGN_CFI_RULE_REGISTER_PLUS_OFFSET) { + err = binary_buffer_error(&buffer.bb, + "DW_CFA_def_cfa_offset_sf with incompatible CFA rule"); + goto out; + } + if ((err = drgn_dwarf_cfi_next_offset_sf(&buffer, cie, + &rule.offset))) + goto out; + goto set_cfa; + case DW_CFA_def_cfa_expression: + rule.kind = DRGN_CFI_RULE_DWARF_EXPRESSION; + rule.push_cfa = false; + if ((err = drgn_dwarf_cfi_next_block(&buffer, + &rule.expr, + &rule.expr_size))) + goto out; +set_cfa: + if (!drgn_cfi_row_set_cfa(row, &rule)) { + err = &drgn_enomem; + goto out; + } + break; + case DW_CFA_undefined: + rule.kind = DRGN_CFI_RULE_UNDEFINED; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno))) + goto out; + if ((regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + break; + goto set_reg; + case DW_CFA_same_value: + rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; + rule.offset = 0; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno))) + goto out; + if ((regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + break; + rule.regno = regno; + goto set_reg; + case DW_CFA_offset: + rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET; + if ((err = drgn_dwarf_cfi_next_offset_f(&buffer, cie, + &rule.offset))) + goto out; + if ((regno = dwarf_regno_to_internal(opcode & 0x3f)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + break; + goto set_reg; + case DW_CFA_offset_extended: + rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET; + goto reg_offset_f; + case DW_CFA_offset_extended_sf: + rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET; + goto reg_offset_sf; + case DW_CFA_val_offset: + rule.kind = DRGN_CFI_RULE_CFA_PLUS_OFFSET; +reg_offset_f: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno)) || + (err = drgn_dwarf_cfi_next_offset_f(&buffer, cie, + &rule.offset))) + goto out; + if ((regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + break; + goto set_reg; + case DW_CFA_val_offset_sf: + rule.kind = DRGN_CFI_RULE_CFA_PLUS_OFFSET; +reg_offset_sf: + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno)) || + (err = drgn_dwarf_cfi_next_offset_sf(&buffer, cie, + &rule.offset))) + goto out; + if ((regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + break; + goto set_reg; + case DW_CFA_register: { + rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; + rule.offset = 0; + uint64_t dwarf_regno2; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno)) || + (err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno2))) + goto out; + if ((regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + break; + if ((rule.regno = dwarf_regno_to_internal(dwarf_regno2)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + rule.kind = DRGN_CFI_RULE_UNDEFINED; + goto set_reg; + } + case DW_CFA_expression: + rule.kind = DRGN_CFI_RULE_AT_DWARF_EXPRESSION; + goto reg_expression; + case DW_CFA_val_expression: + rule.kind = DRGN_CFI_RULE_DWARF_EXPRESSION; +reg_expression: + rule.push_cfa = true; + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno)) || + (err = drgn_dwarf_cfi_next_block(&buffer, + &rule.expr, + &rule.expr_size))) + goto out; + if ((regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + break; + goto set_reg; + case DW_CFA_restore: + if (!initial_row) + goto invalid_for_initial; + dwarf_regno = opcode & 0x3f; + goto restore; + case DW_CFA_restore_extended: + if (!initial_row) { +invalid_for_initial: + err = binary_buffer_error(&buffer.bb, + "invalid initial DWARF CFI opcode %#" PRIx8, + opcode); + goto out; + } + if ((err = binary_buffer_next_uleb128(&buffer.bb, + &dwarf_regno))) + goto out; +restore: + if ((regno = dwarf_regno_to_internal(dwarf_regno)) == + DRGN_REGISTER_NUMBER_UNKNOWN) + break; + drgn_cfi_row_get_register(initial_row, regno, &rule); +set_reg: + if (!drgn_cfi_row_set_register(row, regno, &rule)) { + err = &drgn_enomem; + goto out; + } + break; + case DW_CFA_remember_state: { + struct drgn_cfi_row **state = + drgn_cfi_row_vector_append_entry(&state_stack); + if (!state) { + err = &drgn_enomem; + goto out; + } + *state = drgn_empty_cfi_row; + if (!drgn_cfi_row_copy(state, *row)) { + err = &drgn_enomem; + goto out; + } + break; + } + case DW_CFA_restore_state: + if (state_stack.size == 0) { + err = binary_buffer_error(&buffer.bb, + "DW_CFA_restore_state with empty state stack"); + goto out; + } + drgn_cfi_row_destroy(*row); + *row = state_stack.data[--state_stack.size]; + break; + case DW_CFA_nop: + break; + default: + err = binary_buffer_error(&buffer.bb, + "unknown DWARF CFI opcode %#" PRIx8, + opcode); + goto out; + } + } +found: + err = NULL; +out: + for (size_t i = 0; i < state_stack.size; i++) + drgn_cfi_row_destroy(state_stack.data[i]); + drgn_cfi_row_vector_deinit(&state_stack); + return err; +} + +static struct drgn_error * +drgn_debug_info_find_cfi_in_fde(struct drgn_debug_info_module *module, + struct drgn_dwarf_fde *fde, + uint64_t unbiased_pc, struct drgn_cfi_row **ret) +{ + struct drgn_error *err; + struct drgn_dwarf_cie *cie = &module->dwarf.cies[fde->cie]; + struct drgn_cfi_row *initial_row = + (struct drgn_cfi_row *)module->platform.arch->default_dwarf_cfi_row; + err = drgn_eval_dwarf_cfi(module, fde, NULL, unbiased_pc, + cie->initial_instructions, + cie->initial_instructions_size, &initial_row); + if (err) + goto out; + if (!drgn_cfi_row_copy(ret, initial_row)) { + err = &drgn_enomem; + goto out; + } + err = drgn_eval_dwarf_cfi(module, fde, initial_row, unbiased_pc, + fde->instructions, fde->instructions_size, + ret); +out: + drgn_cfi_row_destroy(initial_row); + return err; +} + +struct drgn_error * +drgn_debug_info_find_dwarf_cfi(struct drgn_debug_info_module *module, + uint64_t unbiased_pc, + struct drgn_cfi_row **row_ret, + bool *interrupted_ret, + drgn_register_number *ret_addr_regno_ret) +{ + struct drgn_error *err; + struct drgn_dwarf_fde *fde; + err = drgn_debug_info_find_fde(module, unbiased_pc, &fde); + if (err) + return err; + if (!fde) + return &drgn_not_found; + err = drgn_debug_info_find_cfi_in_fde(module, fde, unbiased_pc, + row_ret); + if (err) + return err; + *interrupted_ret = module->dwarf.cies[fde->cie].signal_frame; + *ret_addr_regno_ret = + module->dwarf.cies[fde->cie].return_address_register; + return NULL; +} + +struct drgn_error * +drgn_eval_cfi_dwarf_expression(struct drgn_program *prog, + const struct drgn_cfi_rule *rule, + const struct drgn_register_state *regs, + void *buf, size_t size) +{ + struct drgn_error *err; + struct uint64_vector stack = VECTOR_INIT; + + if (rule->push_cfa) { + struct optional_uint64 cfa = drgn_register_state_get_cfa(regs); + if (!cfa.has_value) { + err = &drgn_not_found; + goto out; + } + if (!uint64_vector_append(&stack, &cfa.value)) { + err = &drgn_enomem; + goto out; + } + } + + int remaining_ops = MAX_DWARF_EXPR_OPS; + struct drgn_dwarf_expression_context ctx; + drgn_dwarf_expression_context_init(&ctx, prog, regs->module, NULL, NULL, + regs, rule->expr, rule->expr_size); + err = drgn_eval_dwarf_expression(&ctx, &stack, &remaining_ops); + if (err) + goto out; + if (binary_buffer_has_next(&ctx.bb)) { + uint8_t opcode; + err = binary_buffer_next_u8(&ctx.bb, &opcode); + if (!err) { + err = binary_buffer_error(&ctx.bb, + "invalid opcode %#" PRIx8 " for CFI expression", + opcode); + } + goto out; + } + if (stack.size == 0) { + err = &drgn_not_found; + } else if (rule->kind == DRGN_CFI_RULE_AT_DWARF_EXPRESSION) { + err = drgn_program_read_memory(prog, buf, + stack.data[stack.size - 1], size, + false); + } else { + copy_lsbytes(buf, size, + drgn_platform_is_little_endian(&prog->platform), + &stack.data[stack.size - 1], sizeof(uint64_t), + HOST_LITTLE_ENDIAN); + err = NULL; + } + +out: + uint64_vector_deinit(&stack); + return err; +} diff --git a/libdrgn/dwarf_info.h b/libdrgn/dwarf_info.h new file mode 100644 index 000000000..efe283ea2 --- /dev/null +++ b/libdrgn/dwarf_info.h @@ -0,0 +1,288 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: GPL-3.0-or-later + +/** + * @file + * + * DWARF and .eh_frame support. + * + * See @ref DebugInfo. + */ + +#ifndef DRGN_DEBUG_INFO_DWARF_H +#define DRGN_DEBUG_INFO_DWARF_H + +/** + * @ingroup DebugInfo + * + * @{ + */ + +#include + +#include "cfi.h" +#include "drgn.h" +#include "hash_table.h" +#include "vector.h" + +struct drgn_debug_info; +struct drgn_debug_info_module; +struct drgn_register_state; + +/** DWARF Frame Description Entry. */ +struct drgn_dwarf_fde { + uint64_t initial_location; + uint64_t address_range; + /* CIE for this FDE as an index into drgn_debug_info_module::cies. */ + size_t cie; + const char *instructions; + size_t instructions_size; +}; + +/** DWARF debugging information for a @ref drgn_debug_info_module. */ +struct drgn_dwarf_module_info { + /** Base for `DW_EH_PE_pcrel`. */ + uint64_t pcrel_base; + /** Base for `DW_EH_PE_textrel`. */ + uint64_t textrel_base; + /** Base for `DW_EH_PE_datarel`. */ + uint64_t datarel_base; + /** Array of DWARF Common Information Entries. */ + struct drgn_dwarf_cie *cies; + /** + * Array of DWARF Frame Description Entries sorted by initial_location. + */ + struct drgn_dwarf_fde *fdes; + /** Number of elements in @ref drgn_debug_info_module::fdes. */ + size_t num_fdes; +}; + +void drgn_dwarf_module_info_deinit(struct drgn_debug_info_module *module); + +DEFINE_VECTOR_TYPE(drgn_dwarf_index_pending_die_vector, + struct drgn_dwarf_index_pending_die) + +/** + * Index of DWARF information for a namespace by entity name. + * + * This effectively maps a name to a list of DIEs with that name in a namespace. + * DIEs with the same name and tag and declared in the same file are + * deduplicated. + */ +struct drgn_namespace_dwarf_index { + /** + * Index shards. + * + * Indexing is parallelized, so this is sharded to reduce lock + * contention. + */ + struct drgn_dwarf_index_shard *shards; + /** Debugging information cache that owns this index. */ + struct drgn_debug_info *dbinfo; + /** DIEs we have not indexed yet. */ + struct drgn_dwarf_index_pending_die_vector pending_dies; + /** Saved error from a previous index. */ + struct drgn_error *saved_err; +}; + +/** DIE with a `DW_AT_specification` attribute. */ +struct drgn_dwarf_specification { + /** + * Address of non-defining declaration DIE referenced by + * `DW_AT_specification`. + */ + uintptr_t declaration; + /** Module containing DIE. */ + struct drgn_debug_info_module *module; + /** Address of DIE. */ + uintptr_t addr; +}; + +DEFINE_HASH_TABLE_TYPE(drgn_dwarf_specification_map, + struct drgn_dwarf_specification) + +DEFINE_VECTOR_TYPE(drgn_dwarf_index_cu_vector, struct drgn_dwarf_index_cu) + +/** Cached type in a @ref drgn_debug_info. */ +struct drgn_dwarf_type { + struct drgn_type *type; + enum drgn_qualifiers qualifiers; + /** + * Whether this is an incomplete array type or a typedef of one. + * + * This is used to work around a GCC bug; see @ref + * drgn_type_from_dwarf_internal(). + */ + bool is_incomplete_array; +}; + +DEFINE_HASH_MAP_TYPE(drgn_dwarf_type_map, const void *, struct drgn_dwarf_type) + +/** DWARF debugging information for a program/@ref drgn_debug_info. */ +struct drgn_dwarf_info { + /** Global namespace index. */ + struct drgn_namespace_dwarf_index global; + /** + * Map from address of DIE referenced by DW_AT_specification to DIE that + * references it. This is used to resolve DIEs with DW_AT_declaration to + * their definition. + * + * This is populated while indexing new DWARF information. Unlike the + * name index, it is not sharded because there typically aren't enough + * of these in a program to cause contention. + */ + struct drgn_dwarf_specification_map specifications; + /** Indexed compilation units. */ + struct drgn_dwarf_index_cu_vector index_cus; + + /** + * Cache of parsed types. + * + * The key is the address of the DIE (@c Dwarf_Die::addr). The value is + * a @ref drgn_dwarf_type. + */ + struct drgn_dwarf_type_map types; + /** + * Cache of parsed types which appear to be incomplete array types but + * can't be. + * + * See @ref drgn_type_from_dwarf_internal(). + */ + struct drgn_dwarf_type_map cant_be_incomplete_array_types; + + /** Current parsing recursion depth. */ + int depth; +}; + +void drgn_dwarf_info_init(struct drgn_debug_info *dbinfo); +void drgn_dwarf_info_deinit(struct drgn_debug_info *dbinfo); + +DEFINE_VECTOR_TYPE(drgn_dwarf_index_pending_cu_vector, + struct drgn_dwarf_index_pending_cu) + +/** + * State tracked while indexing new DWARF information in a @ref drgn_dwarf_info. + */ +struct drgn_dwarf_index_state { + struct drgn_debug_info *dbinfo; + /** Per-thread arrays of CUs to be indexed. */ + struct drgn_dwarf_index_pending_cu_vector *cus; + size_t max_threads; +}; + +/** + * Initialize state for indexing new DWARF information. + * + * @return @c true on success, @c false on failure to allocate memory. + */ +bool drgn_dwarf_index_state_init(struct drgn_dwarf_index_state *state, + struct drgn_debug_info *dbinfo); + +/** Deinitialize state for indexing new DWARF information. */ +void drgn_dwarf_index_state_deinit(struct drgn_dwarf_index_state *state); + +/** Read a @ref drgn_debug_info_module to index its DWARF information. */ +struct drgn_error * +drgn_dwarf_index_read_module(struct drgn_dwarf_index_state *state, + struct drgn_debug_info_module *module); + +/** + * Index new DWARF information. + * + * This should be called once all modules have been read with @ref + * drgn_dwarf_index_read_module() to finish indexing those modules. + */ +struct drgn_error * +drgn_dwarf_info_update_index(struct drgn_dwarf_index_state *state); + +/** + * Find the DWARF DIEs in a @ref drgn_debug_info_module for the scope containing + * a given program counter. + * + * @param[in] module Module containing @p pc. + * @param[in] pc Program counter. + * @param[out] bias_ret Returned difference between addresses in the loaded + * module and addresses in the returned DIEs. + * @param[out] dies_ret Returned DIEs. `(*dies_ret)[*length_ret - 1]` is the + * innermost DIE containing @p pc, `(*dies_ret)[*length_ret - 2]` is its parent + * (which may not contain @p pc itself), `(*dies_ret)[*length_ret - 3]` is its + * grandparent, etc. Must be freed with @c free(). + * @param[out] length_ret Returned length of @p dies_ret. + */ +struct drgn_error * +drgn_debug_info_module_find_dwarf_scopes(struct drgn_debug_info_module *module, + uint64_t pc, uint64_t *bias_ret, + Dwarf_Die **dies_ret, + size_t *length_ret) + __attribute__((__nonnull__(1, 3, 4, 5))); + +/** + * Find the ancestors of a DWARF DIE. + * + * This finds the parent, grandparent, etc., of a DWARF DIE in the tree of DIEs. + * + * @param[in] module Module containing @p die. + * @param[in] die DIE to find. + * @param[out] dies_ret Returned DIEs. `(*dies_ret)[*length_ret]` is the DIE, + * `(*dies_ret)[*length_ret - 1]` is its parent, `(*dies_ret)[*length_ret - 2]` + * is its grandparent, etc., and `(*dies_ret)[0]` is the top-level unit DIE. + * @param[out] length_ret Returned number of ancestors in @p dies_ret. + */ +struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret, + size_t *length_ret) + __attribute__((__nonnull__(2, 3))); + +/** + * Find an object DIE in an array of DWARF scopes. + * + * @param[in] scopes Array of scopes, from outermost to innermost. + * @param[in] num_scopes Number of scopes in @p scopes. + * @param[out] die_ret Returned object DIE. + * @param[out] type_ret If @p die_ret is a `DW_TAG_enumerator` DIE, its parent. + * Otherwise, undefined. + */ +struct drgn_error *drgn_find_in_dwarf_scopes(Dwarf_Die *scopes, + size_t num_scopes, + const char *name, + Dwarf_Die *die_ret, + Dwarf_Die *type_ret); + +/** + * Create a @ref drgn_object from a `Dwarf_Die`. + * + * @param[in] die Object DIE (e.g., `DW_TAG_subprogram`, `DW_TAG_variable`, + * `DW_TAG_formal_parameter`, `DW_TAG_enumerator`, + * `DW_TAG_template_value_parameter`). + * @param[in] type_die DIE of object's type. If @c NULL, use the `DW_AT_type` + * attribute of @p die. If @p die is a `DW_TAG_enumerator` DIE, this should be + * its parent. + * @param[in] function_die DIE of current function. @c NULL if not in function + * context. + * @param[in] regs Registers of current stack frame. @c NULL if not in stack + * frame context. + * @param[out] ret Returned object. + */ +struct drgn_error * +drgn_object_from_dwarf(struct drgn_debug_info *dbinfo, + struct drgn_debug_info_module *module, + Dwarf_Die *die, Dwarf_Die *type_die, + Dwarf_Die *function_die, + const struct drgn_register_state *regs, + struct drgn_object *ret); + +struct drgn_error * +drgn_debug_info_find_dwarf_cfi(struct drgn_debug_info_module *module, + uint64_t unbiased_pc, + struct drgn_cfi_row **row_ret, + bool *interrupted_ret, + drgn_register_number *ret_addr_regno_ret); + +struct drgn_error * +drgn_eval_cfi_dwarf_expression(struct drgn_program *prog, + const struct drgn_cfi_rule *rule, + const struct drgn_register_state *regs, + void *buf, size_t size); + +/** @} */ + +#endif /* DRGN_DEBUG_INFO_DWARF_H */ diff --git a/libdrgn/error.c b/libdrgn/error.c index b66c3661f..abf2a4f7c 100644 --- a/libdrgn/error.c +++ b/libdrgn/error.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/error.h b/libdrgn/error.h index f56252557..f70dd250b 100644 --- a/libdrgn/error.h +++ b/libdrgn/error.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/hash_table.c b/libdrgn/hash_table.c index f4b32be76..793bc0157 100644 --- a/libdrgn/hash_table.c +++ b/libdrgn/hash_table.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "hash_table.h" diff --git a/libdrgn/hash_table.h b/libdrgn/hash_table.h index 0b9368a9e..4a66c4067 100644 --- a/libdrgn/hash_table.h +++ b/libdrgn/hash_table.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** @@ -27,6 +27,7 @@ #include "bitops.h" #include "cityhash.h" #include "minmax.h" +#include "nstring.h" // IWYU pragma: export #include "util.h" /** @@ -375,15 +376,8 @@ static inline unsigned int table##_chunk_occupied(struct table##_chunk *chunk) \ * * @sa DEFINE_HASH_TABLE() */ -#define DEFINE_HASH_TABLE_TYPE(table, entry_type, entry_to_key) \ +#define DEFINE_HASH_TABLE_TYPE(table, entry_type) \ typedef typeof(entry_type) table##_entry_type; \ -typedef typeof(entry_to_key((table##_entry_type *)0)) table##_key_type; \ - \ -static inline table##_key_type \ -table##_entry_to_key(const table##_entry_type *entry) \ -{ \ - return entry_to_key(entry); \ -} \ \ enum { \ /* \ @@ -395,6 +389,79 @@ enum { \ table##_vector_policy = sizeof(table##_entry_type) >= 24, \ }; \ \ +struct table { \ + struct table##_chunk *chunks; \ + struct { \ + /* \ + * The vector storage policy stores 32-bit indices, so we only \ + * need 32-bit sizes. \ + */ \ + uint32_t chunk_mask; \ + uint32_t size; \ + /* Allocated together with chunks. */ \ + table##_entry_type *entries; \ + } vector[table##_vector_policy]; \ + struct { \ + size_t chunk_mask; \ + size_t size; \ + uintptr_t first_packed; \ + } basic[!table##_vector_policy]; \ +}; + +/* + * Common search function implementation returning an item iterator. This is + * shared by key lookups and index lookups. + */ +#define HASH_TABLE_SEARCH_IMPL(table, func, key_type, item_to_key, eq_func) \ +static struct table##_iterator table##_##func(struct table *table, \ + const key_type *key, \ + struct hash_pair hp) \ +{ \ + const size_t delta = hash_table_probe_delta(hp); \ + size_t index = hp.first; \ + for (size_t tries = 0; tries <= table##_chunk_mask(table); tries++) { \ + struct table##_chunk *chunk = \ + &table->chunks[index & table##_chunk_mask(table)]; \ + if (sizeof(*chunk) > 64) \ + __builtin_prefetch(&chunk->items[8]); \ + unsigned int mask = table##_chunk_match(chunk, hp.second), i; \ + for_each_bit(i, mask) { \ + table##_item_type *item = &chunk->items[i]; \ + key_type item_key = item_to_key(table, item); \ + if (likely(eq_func(key, &item_key))) { \ + return (struct table##_iterator){ \ + .item = item, \ + .index = i, \ + }; \ + } \ + } \ + if (likely(chunk->outbound_overflow_count == 0)) \ + break; \ + index += delta; \ + } \ + return (struct table##_iterator){}; \ +} + +#define HASH_TABLE_SEARCH_BY_INDEX_ITEM_TO_KEY(table, item) (*(item)->index) + +/** + * Define the functions for a hash table. + * + * The hash table type must have already been defined with @ref + * DEFINE_HASH_TABLE_TYPE(). + * + * Unless the type and function definitions must be in separate places, use @ref + * DEFINE_HASH_TABLE() instead. + */ +#define DEFINE_HASH_TABLE_FUNCTIONS(table, entry_to_key, hash_func, eq_func) \ +typedef typeof(entry_to_key((table##_entry_type *)0)) table##_key_type; \ + \ +static inline table##_key_type \ +table##_entry_to_key(const table##_entry_type *entry) \ +{ \ + return entry_to_key(entry); \ +} \ + \ /* \ * Item stored in a chunk. \ * \ @@ -489,73 +556,6 @@ struct table##_iterator { \ }; \ }; \ \ -struct table { \ - struct table##_chunk *chunks; \ - struct { \ - /* \ - * The vector storage policy stores 32-bit indices, so we only \ - * need 32-bit sizes. \ - */ \ - uint32_t chunk_mask; \ - uint32_t size; \ - /* Allocated together with chunks. */ \ - table##_entry_type *entries; \ - } vector[table##_vector_policy]; \ - struct { \ - size_t chunk_mask; \ - size_t size; \ - uintptr_t first_packed; \ - } basic[!table##_vector_policy]; \ -}; - -/* - * Common search function implementation returning an item iterator. This is - * shared by key lookups and index lookups. - */ -#define HASH_TABLE_SEARCH_IMPL(table, func, key_type, item_to_key, eq_func) \ -static struct table##_iterator table##_##func(struct table *table, \ - const key_type *key, \ - struct hash_pair hp) \ -{ \ - const size_t delta = hash_table_probe_delta(hp); \ - size_t index = hp.first; \ - for (size_t tries = 0; tries <= table##_chunk_mask(table); tries++) { \ - struct table##_chunk *chunk = \ - &table->chunks[index & table##_chunk_mask(table)]; \ - if (sizeof(*chunk) > 64) \ - __builtin_prefetch(&chunk->items[8]); \ - unsigned int mask = table##_chunk_match(chunk, hp.second), i; \ - for_each_bit(i, mask) { \ - table##_item_type *item = &chunk->items[i]; \ - key_type item_key = item_to_key(table, item); \ - if (likely(eq_func(key, &item_key))) { \ - return (struct table##_iterator){ \ - .item = item, \ - .index = i, \ - }; \ - } \ - } \ - if (likely(chunk->outbound_overflow_count == 0)) \ - break; \ - index += delta; \ - } \ - return (struct table##_iterator){}; \ -} - -#define HASH_TABLE_SEARCH_BY_INDEX_ITEM_TO_KEY(table, item) (*(item)->index) - -/** - * Define the functions for a hash table. - * - * The hash table type must have already been defined with @ref - * DEFINE_HASH_TABLE_TYPE(). - * - * Unless the type and function definitions must be in separate places, use @ref - * DEFINE_HASH_TABLE() instead. - * - * @sa DEFINE_HASH_TABLE() - */ -#define DEFINE_HASH_TABLE_FUNCTIONS(table, hash_func, eq_func) \ static inline struct hash_pair table##_hash(const table##_key_type *key) \ { \ return hash_func(key); \ @@ -1425,24 +1425,38 @@ static struct table##_iterator table##_next(struct table##_iterator it) \ * * and returns a @c bool. */ #define DEFINE_HASH_TABLE(table, entry_type, entry_to_key, hash_func, eq_func) \ -DEFINE_HASH_TABLE_TYPE(table, entry_type, entry_to_key) \ -DEFINE_HASH_TABLE_FUNCTIONS(table, hash_func, eq_func) - -#define HASH_MAP_ENTRY_TO_KEY(entry) ((entry)->key) +DEFINE_HASH_TABLE_TYPE(table, entry_type) \ +DEFINE_HASH_TABLE_FUNCTIONS(table, entry_to_key, hash_func, eq_func) /** * Define a hash map type without defining its functions. * - * The functions are defined with @ref DEFINE_HASH_TABLE_FUNCTIONS(). + * The functions are defined with @ref DEFINE_HASH_MAP_FUNCTIONS(). * * @sa DEFINE_HASH_MAP(), DEFINE_HASH_TABLE_TYPE() */ -#define DEFINE_HASH_MAP_TYPE(table, key_type, value_type) \ -struct table##_entry { \ - typeof(key_type) key; \ - typeof(value_type) value; \ -}; \ -DEFINE_HASH_TABLE_TYPE(table, struct table##_entry, HASH_MAP_ENTRY_TO_KEY) +#define DEFINE_HASH_MAP_TYPE(table, key_type, value_type) \ +struct table##_entry { \ + typeof(key_type) key; \ + typeof(value_type) value; \ +}; \ +DEFINE_HASH_TABLE_TYPE(table, struct table##_entry) + +#define HASH_MAP_ENTRY_TO_KEY(entry) ((entry)->key) + +/** + * Define the functions for a hash map. + * + * The hash map type must have already been defined with @ref + * DEFINE_HASH_MAP_TYPE(). + * + * Unless the type and function definitions must be in separate places, use @ref + * DEFINE_HASH_MAP() instead. + * + * @sa DEFINE_HASH_TABLE_FUNCTIONS + */ +#define DEFINE_HASH_MAP_FUNCTIONS(table, hash_func, eq_func) \ +DEFINE_HASH_TABLE_FUNCTIONS(table, HASH_MAP_ENTRY_TO_KEY, hash_func, eq_func) /** * Define a hash map interface. @@ -1466,19 +1480,32 @@ DEFINE_HASH_TABLE_TYPE(table, struct table##_entry, HASH_MAP_ENTRY_TO_KEY) */ #define DEFINE_HASH_MAP(table, key_type, value_type, hash_func, eq_func) \ DEFINE_HASH_MAP_TYPE(table, key_type, value_type) \ -DEFINE_HASH_TABLE_FUNCTIONS(table, hash_func, eq_func) - -#define HASH_SET_ENTRY_TO_KEY(entry) (*(entry)) +DEFINE_HASH_MAP_FUNCTIONS(table, hash_func, eq_func) /** * Define a hash set type without defining its functions. * - * The functions are defined with @ref DEFINE_HASH_TABLE_FUNCTIONS(). + * The functions are defined with @ref DEFINE_HASH_SET_FUNCTIONS(). * * @sa DEFINE_HASH_SET(), DEFINE_HASH_TABLE_TYPE() */ -#define DEFINE_HASH_SET_TYPE(table, key_type) \ - DEFINE_HASH_TABLE_TYPE(table, key_type, HASH_SET_ENTRY_TO_KEY) +#define DEFINE_HASH_SET_TYPE DEFINE_HASH_TABLE_TYPE + +#define HASH_SET_ENTRY_TO_KEY(entry) (*(entry)) + +/** + * Define the functions for a hash set. + * + * The hash set type must have already been defined with @ref + * DEFINE_HASH_SET_TYPE(). + * + * Unless the type and function definitions must be in separate places, use @ref + * DEFINE_HASH_SET() instead. + * + * @sa DEFINE_HASH_TABLE_FUNCTIONS + */ +#define DEFINE_HASH_SET_FUNCTIONS(table, hash_func, eq_func) \ +DEFINE_HASH_TABLE_FUNCTIONS(table, HASH_SET_ENTRY_TO_KEY, hash_func, eq_func) /** * Define a hash set interface. @@ -1494,7 +1521,7 @@ DEFINE_HASH_TABLE_FUNCTIONS(table, hash_func, eq_func) */ #define DEFINE_HASH_SET(table, key_type, hash_func, eq_func) \ DEFINE_HASH_SET_TYPE(table, key_type) \ -DEFINE_HASH_TABLE_FUNCTIONS(table, hash_func, eq_func) +DEFINE_HASH_SET_FUNCTIONS(table, hash_func, eq_func) /** * Empty hash table initializer. @@ -1724,8 +1751,8 @@ bool c_string_key_eq(const char * const *a, const char * const *b); #define c_string_key_eq(a, b) ((bool)(strcmp(*(a), *(b)) == 0)) #endif -/** Double hash a @ref string. */ -static inline struct hash_pair string_hash_pair(const struct string *key) +/** Double hash a @ref nstring. */ +static inline struct hash_pair nstring_hash_pair(const struct nstring *key) { return hash_pair_from_avalanching_hash(hash_bytes(key->str, key->len)); } diff --git a/libdrgn/helpers.h b/libdrgn/helpers.h index 342333603..66a5a5282 100644 --- a/libdrgn/helpers.h +++ b/libdrgn/helpers.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** @@ -15,6 +15,8 @@ #include #include +#include "drgn.h" +#include "vector.h" struct drgn_object; struct drgn_program; @@ -23,6 +25,13 @@ struct drgn_error *linux_helper_read_vm(struct drgn_program *prog, uint64_t pgtable, uint64_t virt_addr, void *buf, size_t count); +struct drgn_error *linux_helper_per_cpu_ptr(struct drgn_object *res, + const struct drgn_object *ptr, + uint64_t cpu); + +struct drgn_error *linux_helper_idle_task(struct drgn_object *res, + uint64_t cpu); + struct drgn_error * linux_helper_radix_tree_lookup(struct drgn_object *res, const struct drgn_object *root, uint64_t index); @@ -43,4 +52,100 @@ struct drgn_error *linux_helper_find_task(struct drgn_object *res, const struct drgn_object *ns, uint64_t pid); +/** + * Iterator convention: + * + * For all of the iterators defined below, the convention for each of the + * `*_next` functions is that upon returning, `*ret` will point to space + * allocated inside of `iter`. The caller is free to do what they wish with + * this return value, but should note that it will be overwritten the next time + * the `*_next` function is called. + */ + +DEFINE_VECTOR_TYPE(linux_helper_radix_tree_iter_frame_vector, + struct linux_helper_radix_tree_iter_frame) + +struct linux_helper_radix_tree_iter_entry { + uint64_t index; + struct drgn_object node; +}; + +struct linux_helper_radix_tree_iter { + bool started; + struct drgn_object root; + // Current value to be yielded + struct linux_helper_radix_tree_iter_entry entry; + // We need this for later initialization of `drgn_object`s + struct drgn_program *prog; + // Frames to keep track of generator state + struct linux_helper_radix_tree_iter_frame_vector frames; + // One-time setup values that are persistent + uint64_t RADIX_TREE_INTERNAL_NODE; + uint64_t RADIX_TREE_MAP_MASK; + struct drgn_qualified_type node_type; +}; + +struct drgn_error *linux_helper_radix_tree_iter_init(struct linux_helper_radix_tree_iter *iter, + const struct drgn_object *root); + +void linux_helper_radix_tree_iter_deinit(struct linux_helper_radix_tree_iter *iter); + +struct drgn_error *linux_helper_radix_tree_iter_next(struct linux_helper_radix_tree_iter *iter, + struct linux_helper_radix_tree_iter_entry **ret); + +struct linux_helper_idr_iter { + struct linux_helper_radix_tree_iter iter; + uint64_t base; +}; + +struct drgn_error *linux_helper_idr_iter_init(struct linux_helper_idr_iter *iter, + const struct drgn_object *idr); + +void linux_helper_idr_iter_deinit(struct linux_helper_idr_iter *iter); + +struct drgn_error *linux_helper_idr_iter_next(struct linux_helper_idr_iter *iter, + struct linux_helper_radix_tree_iter_entry **ret); + +struct linux_helper_pid_iter { + bool has_idr; + struct drgn_qualified_type pid_type; + union { + // if has_idr + struct linux_helper_idr_iter iter; + // else + struct { + struct drgn_qualified_type upid_type; + struct drgn_object pid_hash; + struct drgn_object pos; // a `struct hlist_node*` + struct drgn_object ns; + struct drgn_object entry; // Current value of the iterator + size_t index; // Current loop index + char member_specifier[sizeof("numbers[]") + 20]; + // 20 = maximum length of a uint64_t as a string + // Space for the null terminator is included as part of the sizeof on the string literal + }; + }; +}; + +struct drgn_error *linux_helper_pid_iter_init(struct linux_helper_pid_iter *iter, + const struct drgn_object *ns); + +void linux_helper_pid_iter_deinit(struct linux_helper_pid_iter *iter); + +struct drgn_error *linux_helper_pid_iter_next(struct linux_helper_pid_iter *iter, + struct drgn_object **ret); + +struct linux_helper_task_iter { + struct linux_helper_pid_iter iter; + uint64_t PIDTYPE_PID; +}; + +struct drgn_error *linux_helper_task_iter_init(struct linux_helper_task_iter *iter, + const struct drgn_object *ns); + +void linux_helper_task_iter_deinit(struct linux_helper_task_iter *iter); + +struct drgn_error *linux_helper_task_iter_next(struct linux_helper_task_iter *iter, + struct drgn_object **ret); + #endif /* DRGN_HELPERS_H */ diff --git a/libdrgn/kdump.c b/libdrgn/kdump.c index 2bbfd5781..dd651938a 100644 --- a/libdrgn/kdump.c +++ b/libdrgn/kdump.c @@ -3,6 +3,7 @@ #include #include +#include #include #include "linux_kernel.h" @@ -72,7 +73,6 @@ struct drgn_error *drgn_program_set_kdump(struct drgn_program *prog) struct drgn_error *err; kdump_ctx_t *ctx; kdump_status ks; - const char *vmcoreinfo; bool had_platform; ctx = kdump_new(); @@ -96,6 +96,11 @@ struct drgn_error *drgn_program_set_kdump(struct drgn_program *prog) goto err; } +#if KDUMPFILE_VERSION >= KDUMPFILE_MKVER(0, 4, 1) + char *vmcoreinfo; +#else + const char *vmcoreinfo; +#endif ks = kdump_vmcoreinfo_raw(ctx, &vmcoreinfo); if (ks != KDUMP_OK) { err = drgn_error_format(DRGN_ERROR_OTHER, @@ -106,6 +111,13 @@ struct drgn_error *drgn_program_set_kdump(struct drgn_program *prog) err = parse_vmcoreinfo(vmcoreinfo, strlen(vmcoreinfo) + 1, &prog->vmcoreinfo); + /* + * As of libkdumpfile 0.4.1, the string returned by + * kdump_vmcoreinfo_raw() needs to be freed. + */ +#if KDUMPFILE_VERSION >= KDUMPFILE_MKVER(0, 4, 1) + free(vmcoreinfo); +#endif if (err) goto err; diff --git a/libdrgn/language.c b/libdrgn/language.c index 3e5086228..15a701139 100644 --- a/libdrgn/language.c +++ b/libdrgn/language.c @@ -1,9 +1,6 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later -#include - -#include "error.h" #include "language.h" const struct drgn_language drgn_languages[] = { @@ -62,29 +59,3 @@ const struct drgn_language drgn_languages[] = { .op_not = c_op_not, }, }; - -struct drgn_error *drgn_language_from_die(Dwarf_Die *die, bool fall_back, - const struct drgn_language **ret) -{ - Dwarf_Die cudie; - if (!dwarf_cu_die(die->cu, &cudie, NULL, NULL, NULL, NULL, NULL, NULL)) - return drgn_error_libdw(); - switch (dwarf_srclang(&cudie)) { - case DW_LANG_C: - case DW_LANG_C89: - case DW_LANG_C99: - case DW_LANG_C11: - *ret = &drgn_language_c; - break; - case DW_LANG_C_plus_plus: - case DW_LANG_C_plus_plus_03: - case DW_LANG_C_plus_plus_11: - case DW_LANG_C_plus_plus_14: - *ret = &drgn_language_cpp; - break; - default: - *ret = fall_back ? &drgn_default_language : NULL; - break; - } - return NULL; -} diff --git a/libdrgn/language.h b/libdrgn/language.h index 8bce2b5b4..2c5578f73 100644 --- a/libdrgn/language.h +++ b/libdrgn/language.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** @@ -12,8 +12,6 @@ #ifndef DRGN_LANGUAGE_H #define DRGN_LANGUAGE_H -#include - #include "drgn.h" /** @@ -203,18 +201,6 @@ drgn_element_format_object_flags(enum drgn_format_object_flags flags) (flags & DRGN_FORMAT_OBJECT_ELEMENT_TYPE_NAMES) >> 2); } -/** - * Return the @ref drgn_language of the CU of the given DIE. - * - * @param[in] fall_back Whether to fall back if the language is not found or - * unknown. If @c true, @ref drgn_default_language is returned in this case. If - * @c false, @c NULL is returned. - * @param[out] ret Returned language. - * @return @c NULL on success, non-@c NULL on error. - */ -struct drgn_error *drgn_language_from_die(Dwarf_Die *die, bool fall_back, - const struct drgn_language **ret); - /** @} */ #endif /* DRGN_LANGUAGE_H */ diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index 0a29715bc..7a698145d 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include @@ -10,6 +10,7 @@ #include #include +#include "array.h" #include "bitops.h" #include "error.h" #include "hash_table.h" @@ -27,7 +28,7 @@ static struct drgn_error * c_declare_variable(struct drgn_qualified_type qualified_type, struct string_callback *name, size_t indent, - struct string_builder *sb); + bool define_anonymous_type, struct string_builder *sb); static struct drgn_error * c_define_type(struct drgn_qualified_type qualified_type, size_t indent, @@ -59,7 +60,7 @@ static struct drgn_error *c_append_qualifiers(enum drgn_qualifiers qualifiers, bool first = true; unsigned int i; - static_assert((1 << ARRAY_SIZE(qualifier_names)) - 1 == + static_assert((1 << array_size(qualifier_names)) - 1 == DRGN_ALL_QUALIFIERS, "missing C qualifier name"); for (i = 0; (1U << i) & DRGN_ALL_QUALIFIERS; i++) { @@ -155,16 +156,20 @@ c_append_tagged_name(struct drgn_qualified_type qualified_type, size_t indent, static struct drgn_error * c_declare_tagged(struct drgn_qualified_type qualified_type, struct string_callback *name, size_t indent, - struct string_builder *sb) + bool define_anonymous_type, struct string_builder *sb) { struct drgn_error *err; - if (drgn_type_is_anonymous(qualified_type.type)) + bool anonymous = drgn_type_is_anonymous(qualified_type.type); + if (anonymous && define_anonymous_type) err = c_define_type(qualified_type, indent, sb); else err = c_append_tagged_name(qualified_type, indent, sb); if (err) return err; + if (anonymous && !define_anonymous_type && + !string_builder_append(sb, " ")) + return &drgn_enomem; if (name) { if (!string_builder_appendc(sb, ' ')) @@ -228,7 +233,8 @@ c_declare_pointer(struct drgn_qualified_type qualified_type, struct drgn_qualified_type referenced_type; referenced_type = drgn_type_type(qualified_type.type); - return c_declare_variable(referenced_type, &pointer_name, indent, sb); + return c_declare_variable(referenced_type, &pointer_name, indent, false, + sb); } static struct drgn_error *c_array_name(struct string_callback *name, void *arg, @@ -266,7 +272,7 @@ c_declare_array(struct drgn_qualified_type qualified_type, struct drgn_qualified_type element_type; element_type = drgn_type_type(qualified_type.type); - return c_declare_variable(element_type, &array_name, indent, sb); + return c_declare_variable(element_type, &array_name, indent, false, sb); } static struct drgn_error * @@ -288,7 +294,7 @@ c_declare_function(struct drgn_qualified_type qualified_type, num_parameters = drgn_type_num_parameters(qualified_type.type); return_type = drgn_type_type(qualified_type.type); - err = c_declare_variable(return_type, name, indent, sb); + err = c_declare_variable(return_type, name, indent, false, sb); if (err) return err; @@ -313,7 +319,7 @@ c_declare_function(struct drgn_qualified_type qualified_type, } err = c_declare_variable(parameter_type, parameter_name && parameter_name[0] ? - &name_cb : NULL, 0, sb); + &name_cb : NULL, 0, false, sb); if (err) return err; } @@ -334,7 +340,7 @@ c_declare_function(struct drgn_qualified_type qualified_type, static struct drgn_error * c_declare_variable(struct drgn_qualified_type qualified_type, struct string_callback *name, size_t indent, - struct string_builder *sb) + bool define_anonymous_type, struct string_builder *sb) { SWITCH_ENUM(drgn_type_kind(qualified_type.type), case DRGN_TYPE_VOID: @@ -347,7 +353,8 @@ c_declare_variable(struct drgn_qualified_type qualified_type, case DRGN_TYPE_UNION: case DRGN_TYPE_CLASS: case DRGN_TYPE_ENUM: - return c_declare_tagged(qualified_type, name, indent, sb); + return c_declare_tagged(qualified_type, name, indent, + define_anonymous_type, sb); case DRGN_TYPE_POINTER: return c_declare_pointer(qualified_type, name, indent, sb); case DRGN_TYPE_ARRAY: @@ -394,7 +401,7 @@ c_define_compound(struct drgn_qualified_type qualified_type, size_t indent, }; err = c_declare_variable(member_type, member_name && member_name[0] ? - &name_cb : NULL, indent + 1, sb); + &name_cb : NULL, indent + 1, true, sb); if (err) return err; if (member_bit_field_size && @@ -479,7 +486,7 @@ c_define_typedef(struct drgn_qualified_type qualified_type, size_t indent, return &drgn_enomem; aliased_type = drgn_type_type(qualified_type.type); - return c_declare_variable(aliased_type, &typedef_name, 0, sb); + return c_declare_variable(aliased_type, &typedef_name, 0, true, sb); } static struct drgn_error * @@ -510,27 +517,11 @@ c_define_type(struct drgn_qualified_type qualified_type, size_t indent, ) } -static struct drgn_error * -c_anonymous_type_name(struct drgn_qualified_type qualified_type, - struct string_builder *sb) -{ - struct drgn_error *err; - - err = c_append_tagged_name(qualified_type, 0, sb); - if (err) - return err; - if (!string_builder_append(sb, " ")) - return &drgn_enomem; - return NULL; -} - static struct drgn_error * c_format_type_name_impl(struct drgn_qualified_type qualified_type, struct string_builder *sb) { - if (drgn_type_is_anonymous(qualified_type.type)) { - return c_anonymous_type_name(qualified_type, sb); - } else if (drgn_type_kind(qualified_type.type) == DRGN_TYPE_FUNCTION) { + if (drgn_type_kind(qualified_type.type) == DRGN_TYPE_FUNCTION) { struct string_callback name_cb = { .fn = c_variable_name, .arg = (void *)"", @@ -538,7 +529,7 @@ c_format_type_name_impl(struct drgn_qualified_type qualified_type, return c_declare_function(qualified_type, &name_cb, 0, sb); } else { - return c_declare_variable(qualified_type, NULL, 0, sb); + return c_declare_variable(qualified_type, NULL, 0, false, sb); } } @@ -1685,7 +1676,8 @@ static const char *token_spelling[] = { [C_TOKEN_ENUM] = "enum", }; -DEFINE_HASH_MAP(c_keyword_map, struct string, int, string_hash_pair, string_eq) +DEFINE_HASH_MAP(c_keyword_map, struct nstring, int, nstring_hash_pair, + nstring_eq) static struct c_keyword_map c_keywords = HASH_TABLE_INIT; @@ -1749,7 +1741,7 @@ struct drgn_error *drgn_lexer_c(struct drgn_lexer *lexer, break; default: if (isalpha(*p) || *p == '_') { - struct string key; + struct nstring key; struct c_keyword_map_iterator it; do { @@ -2453,6 +2445,8 @@ c_parse_abstract_declarator(struct drgn_program *prog, return err; err = drgn_lexer_peek(lexer, &token); + if (err) + return err; if (token.kind == C_TOKEN_LPAREN || token.kind == C_TOKEN_LBRACKET) { struct c_declarator *tmp; @@ -2708,15 +2702,13 @@ struct drgn_error *c_integer_literal(struct drgn_object *res, uint64_t uvalue) DRGN_C_TYPE_UNSIGNED_LONG_LONG, }; struct drgn_error *err; - unsigned int bits; - struct drgn_qualified_type qualified_type; - size_t i; - bits = fls(uvalue); + unsigned int bits = fls(uvalue); + struct drgn_qualified_type qualified_type; qualified_type.qualifiers = 0; - for (i = 0; i < ARRAY_SIZE(types); i++) { + array_for_each(type, types) { err = drgn_program_find_primitive_type(drgn_object_program(res), - types[i], + *type, &qualified_type.type); if (err) return err; @@ -2860,7 +2852,7 @@ static struct drgn_error *c_integer_promotions(struct drgn_program *prog, * promotes it to the full width, but GCC does not. We implement the GCC * behavior of preserving the width. */ - if (primitive >= ARRAY_SIZE(c_integer_conversion_rank) || + if (primitive >= array_size(c_integer_conversion_rank) || type->bit_field_size) { err = drgn_program_find_primitive_type(prog, DRGN_C_TYPE_INT, &int_type); diff --git a/libdrgn/lazy_object.c b/libdrgn/lazy_object.c index ae21be2b3..1bf0eddb1 100644 --- a/libdrgn/lazy_object.c +++ b/libdrgn/lazy_object.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/lazy_object.h b/libdrgn/lazy_object.h index fa4ae640d..a44a93001 100644 --- a/libdrgn/lazy_object.h +++ b/libdrgn/lazy_object.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/lexer.c b/libdrgn/lexer.c index 221fa4530..089e1be9b 100644 --- a/libdrgn/lexer.c +++ b/libdrgn/lexer.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "drgn.h" diff --git a/libdrgn/lexer.h b/libdrgn/lexer.h index 693da4412..cdb2cec5b 100644 --- a/libdrgn/lexer.h +++ b/libdrgn/lexer.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 769c26b5c..0ef0bf2ce 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/linux_kernel.h b/libdrgn/linux_kernel.h index 62bead4c4..e6b5b26f3 100644 --- a/libdrgn/linux_kernel.h +++ b/libdrgn/linux_kernel.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #ifndef DRGN_LINUX_KERNEL_H diff --git a/libdrgn/linux_kernel_helpers.c b/libdrgn/linux_kernel_helpers.c index 115fb4fa0..fcd77b714 100644 --- a/libdrgn/linux_kernel_helpers.c +++ b/libdrgn/linux_kernel_helpers.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include @@ -6,10 +6,13 @@ #include #include "drgn.h" +#include "helpers.h" #include "minmax.h" #include "platform.h" #include "program.h" +static const uint64_t RADIX_TREE_ENTRY_MASK = 3; + struct drgn_error *linux_helper_read_vm(struct drgn_program *prog, uint64_t pgtable, uint64_t virt_addr, void *buf, size_t count) @@ -99,68 +102,140 @@ struct drgn_error *linux_helper_read_vm(struct drgn_program *prog, return err; } -struct drgn_error * -linux_helper_radix_tree_lookup(struct drgn_object *res, - const struct drgn_object *root, uint64_t index) +struct drgn_error *linux_helper_per_cpu_ptr(struct drgn_object *res, + const struct drgn_object *ptr, + uint64_t cpu) { struct drgn_error *err; - static const uint64_t RADIX_TREE_ENTRY_MASK = 3; - uint64_t RADIX_TREE_INTERNAL_NODE; - uint64_t RADIX_TREE_MAP_MASK; - struct drgn_object node, tmp; - struct drgn_qualified_type node_type; + struct drgn_program *prog = drgn_object_program(ptr); - drgn_object_init(&node, drgn_object_program(res)); - drgn_object_init(&tmp, drgn_object_program(res)); + struct drgn_object tmp; + drgn_object_init(&tmp, prog); + err = drgn_program_find_object(prog, "__per_cpu_offset", NULL, + DRGN_FIND_OBJECT_ANY, &tmp); + if (!err) { + err = drgn_object_subscript(&tmp, &tmp, cpu); + if (err) + goto out; + union drgn_value per_cpu_offset; + err = drgn_object_read_integer(&tmp, &per_cpu_offset); + if (err) + goto out; + uint64_t ptr_value; + err = drgn_object_read_unsigned(ptr, &ptr_value); + if (err) + goto out; + + err = drgn_object_set_unsigned(res, + drgn_object_qualified_type(ptr), + ptr_value + per_cpu_offset.uvalue, + 0); + } else if (err->code == DRGN_ERROR_LOOKUP) { + drgn_error_destroy(err); + err = drgn_object_copy(res, ptr); + } +out: + drgn_object_deinit(&tmp); + return err; +} + +struct drgn_error *linux_helper_idle_task(struct drgn_object *res, uint64_t cpu) +{ + struct drgn_error *err; + struct drgn_program *prog = drgn_object_program(res); + + struct drgn_object tmp; + drgn_object_init(&tmp, prog); + err = drgn_program_find_object(prog, "runqueues", NULL, + DRGN_FIND_OBJECT_ANY, &tmp); + if (err) + goto out; + err = drgn_object_address_of(&tmp, &tmp); + if (err) + goto out; + err = linux_helper_per_cpu_ptr(&tmp, &tmp, cpu); + if (err) + goto out; + err = drgn_object_member_dereference(res, &tmp, "idle"); +out: + drgn_object_deinit(&tmp); + return err; +} + +static struct drgn_error * +radix_tree_init(struct drgn_program *prog, const struct drgn_object *root, + uint64_t *RADIX_TREE_INTERNAL_NODE_ret, + uint64_t *RADIX_TREE_MAP_MASK_ret, + struct drgn_qualified_type *node_type_ret, + struct drgn_object *node_ret) +{ + struct drgn_error *err = + drgn_object_member_dereference(node_ret, root, "xa_head"); /* node = root->xa_head */ - err = drgn_object_member_dereference(&node, root, "xa_head"); if (!err) { - err = drgn_program_find_type(drgn_object_program(res), - "struct xa_node *", NULL, - &node_type); + err = drgn_program_find_type(prog, "struct xa_node *", NULL, + node_type_ret); if (err) - goto out; - RADIX_TREE_INTERNAL_NODE = 2; + return err; + *RADIX_TREE_INTERNAL_NODE_ret = 2; } else if (err->code == DRGN_ERROR_LOOKUP) { drgn_error_destroy(err); /* node = (void *)root.rnode */ - err = drgn_object_member_dereference(&node, root, "rnode"); + err = drgn_object_member_dereference(node_ret, root, "rnode"); if (err) - goto out; - err = drgn_program_find_type(drgn_object_program(res), "void *", - NULL, &node_type); + return err; + err = drgn_program_find_type(prog, "void *", NULL, + node_type_ret); if (err) - goto out; - err = drgn_object_cast(&node, node_type, &node); + return err; + err = drgn_object_cast(node_ret, *node_type_ret, node_ret); if (err) - goto out; - err = drgn_program_find_type(drgn_object_program(res), - "struct radix_tree_node *", NULL, - &node_type); + return err; + err = drgn_program_find_type(prog, "struct radix_tree_node *", + NULL, node_type_ret); if (err) - goto out; - RADIX_TREE_INTERNAL_NODE = 1; + return err; + *RADIX_TREE_INTERNAL_NODE_ret = 1; } else { - goto out; + return err; } struct drgn_type_member *member; uint64_t member_bit_offset; - err = drgn_type_find_member(drgn_type_type(node_type.type).type, + err = drgn_type_find_member(drgn_type_type(node_type_ret->type).type, "slots", &member, &member_bit_offset); if (err) - goto out; + return err; struct drgn_qualified_type member_type; err = drgn_member_type(member, &member_type, NULL); + if (err) + return err; + if (drgn_type_kind(member_type.type) != DRGN_TYPE_ARRAY) + return drgn_error_create( + DRGN_ERROR_TYPE, + "struct radix_tree_node slots member is not an array"); + *RADIX_TREE_MAP_MASK_ret = drgn_type_length(member_type.type) - 1; + return NULL; +} + +struct drgn_error * +linux_helper_radix_tree_lookup(struct drgn_object *res, + const struct drgn_object *root, uint64_t index) +{ + struct drgn_error *err; + uint64_t RADIX_TREE_INTERNAL_NODE; + uint64_t RADIX_TREE_MAP_MASK; + struct drgn_object node, tmp; + struct drgn_qualified_type node_type; + + drgn_object_init(&node, drgn_object_program(res)); + drgn_object_init(&tmp, drgn_object_program(res)); + err = radix_tree_init(drgn_object_program(root), root, + &RADIX_TREE_INTERNAL_NODE, &RADIX_TREE_MAP_MASK, + &node_type, &node); if (err) goto out; - if (drgn_type_kind(member_type.type) != DRGN_TYPE_ARRAY) { - err = drgn_error_create(DRGN_ERROR_TYPE, - "struct radix_tree_node slots member is not an array"); - goto out; - } - RADIX_TREE_MAP_MASK = drgn_type_length(member_type.type) - 1; for (;;) { uint64_t value; @@ -243,6 +318,36 @@ struct drgn_error *linux_helper_idr_find(struct drgn_object *res, return err; } +static struct drgn_error *pid_hash_init(struct drgn_program *prog, + const struct drgn_object *ns, + struct drgn_qualified_type *upid_type_ret, + uint64_t *pidhash_length_ret, uint64_t *ns_level_ret) +{ + struct drgn_error *err; + struct drgn_object ns_level, pidhash_shift; + drgn_object_init(&ns_level, prog); + drgn_object_init(&pidhash_shift, prog); + err = drgn_program_find_type(prog, "struct upid", NULL, upid_type_ret); + if (err) + goto out; + err = drgn_program_find_object(prog, "pidhash_shift", NULL, DRGN_FIND_OBJECT_ANY, + &pidhash_shift); + if (err) + goto out; + err = drgn_object_read_unsigned(&pidhash_shift, pidhash_length_ret); + if (err) + goto out; + // *pidhash_length_ret = 1 << pidhash_shift + *pidhash_length_ret = *pidhash_length_ret >= 64 ? 0 : UINT64_C(1) << *pidhash_length_ret; + err = drgn_object_member_dereference(&ns_level, ns, "level"); + if (err) + goto out; + err = drgn_object_read_unsigned(&ns_level, ns_level_ret); +out: + drgn_object_deinit(&ns_level); + return err; +} + /* * Before Linux kernel commit 95846ecf9dac ("pid: replace pid bitmap * implementation with IDR API") (in v4.15), (struct pid_namespace).idr does not @@ -257,15 +362,27 @@ find_pid_in_pid_hash(struct drgn_object *res, const struct drgn_object *ns, { struct drgn_error *err; + struct drgn_object node, tmp; + drgn_object_init(&node, drgn_object_program(res)); + drgn_object_init(&tmp, drgn_object_program(res)); + + err = drgn_object_read(&tmp, ns); + if (err) + goto out; + struct drgn_qualified_type upid_type; + uint64_t i, ns_level; + err = pid_hash_init(drgn_object_program(res), &tmp, &upid_type, &i, + &ns_level); + if (err) + goto out; struct drgn_qualified_type pidp_type; - err = drgn_program_find_type(drgn_object_program(res), "struct pid *", - NULL, &pidp_type); + err = drgn_program_find_type(drgn_object_program(res), "struct pid *", NULL, + &pidp_type); if (err) return err; - struct drgn_qualified_type upid_type; - err = drgn_program_find_type(drgn_object_program(res), "struct upid", - NULL, &upid_type); + uint64_t ns_addr; + err = drgn_object_read_unsigned(&tmp, &ns_addr); if (err) return err; @@ -298,40 +415,6 @@ find_pid_in_pid_hash(struct drgn_object *res, const struct drgn_object *ns, if (err) return err; - struct drgn_object node, tmp; - drgn_object_init(&node, drgn_object_program(res)); - drgn_object_init(&tmp, drgn_object_program(res)); - - err = drgn_object_read(&tmp, ns); - if (err) - goto out; - uint64_t ns_addr; - err = drgn_object_read_unsigned(&tmp, &ns_addr); - if (err) - goto out; - union drgn_value ns_level; - err = drgn_object_member_dereference(&tmp, &tmp, "level"); - if (err) - goto out; - err = drgn_object_read_integer(&tmp, &ns_level); - if (err) - goto out; - - /* i = 1 << pidhash_shift */ - err = drgn_program_find_object(drgn_object_program(res), - "pidhash_shift", NULL, - DRGN_FIND_OBJECT_ANY, &tmp); - if (err) - goto out; - union drgn_value pidhash_shift; - err = drgn_object_read_integer(&tmp, &pidhash_shift); - if (err) - goto out; - uint64_t i; - if (pidhash_shift.uvalue >= 64) - i = 0; - else - i = UINT64_C(1) << pidhash_shift.uvalue; while (i--) { /* for (node = pid_hash[i].first; node; node = node->next) */ err = drgn_object_subscript(&node, pid_hash, i); @@ -382,7 +465,7 @@ find_pid_in_pid_hash(struct drgn_object *res, const struct drgn_object *ns, goto next; sprintf(member, "numbers[%" PRIu64 "].pid_chain", - ns_level.uvalue); + ns_level); err = drgn_object_container_of(res, &node, drgn_type_type(pidp_type.type), member); @@ -463,6 +546,8 @@ struct drgn_error *linux_helper_pid_task(struct drgn_object *res, task_struct_type = drgn_type_type(task_structp_type.type); err = drgn_object_bool(pid, &truthy); + if (err) + goto out; if (!truthy) goto null; @@ -531,3 +616,381 @@ struct drgn_error *linux_helper_find_task(struct drgn_object *res, drgn_object_deinit(&pid_obj); return err; } + +struct linux_helper_radix_tree_iter_frame { + struct drgn_object slots; + uint64_t index; + uint64_t shift; + uint64_t next_slot; +}; + +DEFINE_VECTOR_FUNCTIONS(linux_helper_radix_tree_iter_frame_vector) + +struct drgn_error *linux_helper_radix_tree_iter_init(struct linux_helper_radix_tree_iter *iter, + const struct drgn_object *root) +{ + struct drgn_program *prog = drgn_object_program(root); + iter->started = false; + drgn_object_init(&iter->root, prog); + drgn_object_init(&iter->entry.node, prog); + iter->entry.index = 0; + iter->prog = prog; + + struct drgn_error *err = + radix_tree_init(prog, root, &iter->RADIX_TREE_INTERNAL_NODE, + &iter->RADIX_TREE_MAP_MASK, &iter->node_type, &iter->root); + + if (err) { + drgn_object_deinit(&iter->root); + drgn_object_deinit(&iter->entry.node); + return err; + } + + linux_helper_radix_tree_iter_frame_vector_init(&iter->frames); + return NULL; +} + +void linux_helper_radix_tree_iter_deinit(struct linux_helper_radix_tree_iter *iter) +{ + drgn_object_deinit(&iter->root); + drgn_object_deinit(&iter->entry.node); + while (iter->frames.size) { + drgn_object_deinit( + &linux_helper_radix_tree_iter_frame_vector_pop(&iter->frames)->slots); + } + linux_helper_radix_tree_iter_frame_vector_deinit(&iter->frames); +} + +static struct drgn_error *radix_tree_iter_handle_node(struct linux_helper_radix_tree_iter *iter, + struct drgn_object *_node, uint64_t index, + bool *entry_populated_ret) +{ + struct drgn_object *node = &iter->entry.node; + struct drgn_error *err; + uint64_t value; + + err = drgn_object_read(node, _node); + if (err) + return err; + err = drgn_object_read_unsigned(node, &value); + if (err) + return err; + if ((value & RADIX_TREE_ENTRY_MASK) != iter->RADIX_TREE_INTERNAL_NODE) { + // Base-case, node is NOT internal + if (value) { + *entry_populated_ret = true; + iter->entry.index = index; + } + return NULL; + } + + *entry_populated_ret = false; + + // We are dealing with an internal node, and must iterate over its slots + + err = drgn_object_set_unsigned(node, iter->node_type, + value & ~iter->RADIX_TREE_INTERNAL_NODE, 0); + if (err) + return err; + struct linux_helper_radix_tree_iter_frame *frame = + linux_helper_radix_tree_iter_frame_vector_append_entry(&iter->frames); + if (!frame) + return &drgn_enomem; + frame->index = index; + frame->next_slot = 0; + drgn_object_init(&frame->slots, iter->prog); + // We temporarily use `frame->slots` to hold `shift` in order to avoid + // using another `struct drgn_object`. + err = drgn_object_member_dereference(&frame->slots, node, "shift"); + if (err) + goto err_frame; + err = drgn_object_read_unsigned(&frame->slots, &frame->shift); + if (err) + goto err_frame; + // Now `frame->slots` is actually used for `slots`. + err = drgn_object_member_dereference(&frame->slots, node, "slots"); + if (err) + goto err_frame; + return NULL; + +err_frame: + drgn_object_deinit(&frame->slots); + linux_helper_radix_tree_iter_frame_vector_pop(&iter->frames); + return err; +} + +struct drgn_error *linux_helper_radix_tree_iter_next(struct linux_helper_radix_tree_iter *iter, + struct linux_helper_radix_tree_iter_entry **ret) +{ + bool entry_populated = false; + struct drgn_error *err = NULL; + struct drgn_object node; + drgn_object_init(&node, iter->prog); + if (!iter->started) { + iter->started = true; + err = radix_tree_iter_handle_node(iter, &iter->root, 0, &entry_populated); + } + + while (!err && !entry_populated && iter->frames.size) { + struct linux_helper_radix_tree_iter_frame *frame = + &iter->frames.data[iter->frames.size - 1]; + if (frame->next_slot <= iter->RADIX_TREE_MAP_MASK) { + err = drgn_object_subscript(&node, &frame->slots, frame->next_slot); + if (!err) + err = radix_tree_iter_handle_node(iter, &node, + frame->index + (frame->next_slot++ + << frame->shift), + &entry_populated); + } else { + drgn_object_deinit(&frame->slots); + linux_helper_radix_tree_iter_frame_vector_pop(&iter->frames); + } + } + if (!err) + *ret = entry_populated ? &iter->entry : NULL; + drgn_object_deinit(&node); + return err; +} + +struct drgn_error *linux_helper_idr_iter_init(struct linux_helper_idr_iter *iter, + const struct drgn_object *idr) +{ + struct drgn_error *err; + struct drgn_object idr_rt, idr_base; + drgn_object_init(&idr_rt, drgn_object_program(idr)); + drgn_object_init(&idr_base, drgn_object_program(idr)); + + err = drgn_object_member(&idr_base, idr, "idr_base"); + if (!err) { + err = drgn_object_read_unsigned(&idr_base, &iter->base); + if (err) + goto out; + } else if (err->code == DRGN_ERROR_LOOKUP) { + drgn_error_destroy(err); + iter->base = 0; + } else { + goto out; + } + + err = drgn_object_member(&idr_rt, idr, "idr_rt"); + if (err) + goto out; + err = drgn_object_address_of(&idr_rt, &idr_rt); + if (err) + goto out; + err = linux_helper_radix_tree_iter_init(&iter->iter, &idr_rt); +out: + drgn_object_deinit(&idr_rt); + drgn_object_deinit(&idr_base); + return err; +} + +void linux_helper_idr_iter_deinit(struct linux_helper_idr_iter *iter) +{ + linux_helper_radix_tree_iter_deinit(&iter->iter); +} + +struct drgn_error *linux_helper_idr_iter_next(struct linux_helper_idr_iter *iter, + struct linux_helper_radix_tree_iter_entry **ret) +{ + struct drgn_error *err = linux_helper_radix_tree_iter_next(&iter->iter, ret); + if (!err && *ret) + (*ret)->index += iter->base; + return err; +} + +// See `find_pid_in_pid_hash` +static struct drgn_error *pid_iter_init_pid_hash(struct drgn_program *prog, + const struct drgn_object *ns, + struct linux_helper_pid_iter *iter) +{ + struct drgn_error *err; + drgn_object_init(&iter->pid_hash, prog); + drgn_object_init(&iter->pos, prog); + drgn_object_init(&iter->ns, prog); + drgn_object_init(&iter->entry, prog); + err = drgn_program_find_object(prog, "pid_hash", NULL, DRGN_FIND_OBJECT_VARIABLE, + &iter->pid_hash); + if (err) + goto out; + struct drgn_qualified_type void_star_type; + err = drgn_program_find_type(prog, "void *", NULL, &void_star_type); + if (err) + goto out; + err = drgn_object_set_unsigned(&iter->pos, void_star_type, 0, 0); + if (err) + goto out; + err = drgn_object_copy(&iter->ns, ns); + if (err) + goto out; + uint64_t ns_level; + err = pid_hash_init(prog, ns, &iter->upid_type, &iter->index, &ns_level); + if (err) + goto out; + snprintf(iter->member_specifier, sizeof(iter->member_specifier), "numbers[%" PRIu64 "]", + ns_level); + err = drgn_program_find_type(prog, "struct pid", NULL, &iter->pid_type); + if (err) + goto out; + err = drgn_program_find_type(prog, "struct upid", NULL, &iter->upid_type); + if (err) + goto out; +out: + if (err) { + drgn_object_deinit(&iter->pid_hash); + drgn_object_deinit(&iter->pos); + drgn_object_deinit(&iter->ns); + drgn_object_deinit(&iter->entry); + } + return err; +} + +struct drgn_error *linux_helper_pid_iter_init(struct linux_helper_pid_iter *iter, + const struct drgn_object *ns) +{ + struct drgn_program *prog = drgn_object_program(ns); + struct drgn_error *err; + struct drgn_object idr; + drgn_object_init(&idr, prog); + + err = drgn_object_member_dereference(&idr, ns, "idr"); + if (!err) { + iter->has_idr = true; + err = drgn_program_find_type(prog, "struct pid *", NULL, &iter->pid_type); + if (!err) + err = linux_helper_idr_iter_init(&iter->iter, &idr); + } else if (err->code == DRGN_ERROR_LOOKUP) { + iter->has_idr = false; + drgn_error_destroy(err); + err = pid_iter_init_pid_hash(prog, ns, iter); + } + + drgn_object_deinit(&idr); + return err; +} + +void linux_helper_pid_iter_deinit(struct linux_helper_pid_iter *iter) +{ + if (iter->has_idr) { + linux_helper_idr_iter_deinit(&iter->iter); + } else { + drgn_object_deinit(&iter->pid_hash); + drgn_object_deinit(&iter->pos); + drgn_object_deinit(&iter->ns); + drgn_object_deinit(&iter->entry); + } +} + +struct drgn_error *linux_helper_pid_iter_next(struct linux_helper_pid_iter *iter, + struct drgn_object **ret) +{ + if (iter->has_idr) { + struct linux_helper_radix_tree_iter_entry *entry; + struct drgn_error *err = linux_helper_idr_iter_next(&iter->iter, &entry); + if (err) + return err; + if (!entry) { + *ret = NULL; + return NULL; + } + err = drgn_object_cast(&entry->node, iter->pid_type, &entry->node); + if (!err) + *ret = &entry->node; + return err; + } + + struct drgn_error *err = NULL; + struct drgn_object upid, upid_ns; + drgn_object_init(&upid, drgn_object_program(&iter->ns)); + drgn_object_init(&upid_ns, drgn_object_program(&iter->ns)); + + for (;;) { + for (;;) { + bool is_truthy; + err = drgn_object_bool(&iter->pos, &is_truthy); + if (err) + goto out; + if (is_truthy) + break; + if (iter->index == 0) { + *ret = NULL; + goto out; + } + err = drgn_object_subscript(&iter->pos, &iter->pid_hash, --iter->index); + if (err) + goto out; + err = drgn_object_member(&iter->pos, &iter->pos, "first"); + if (err) + goto out; + err = drgn_object_bool(&iter->pos, &is_truthy); + if (err) + goto out; + } + err = drgn_object_container_of(&upid, &iter->pos, iter->upid_type, "pid_chain"); + if (err) + goto out; + err = drgn_object_member_dereference(&iter->pos, &iter->pos, "next"); + if (err) + goto out; + err = drgn_object_member_dereference(&upid_ns, &upid, "ns"); + if (err) + goto out; + int ns_cmp_result; + err = drgn_object_cmp(&upid_ns, &iter->ns, &ns_cmp_result); + if (err) + goto out; + if (ns_cmp_result == 0) { + err = drgn_object_container_of(&iter->entry, &upid, iter->pid_type, + iter->member_specifier); + if (!err) + *ret = &iter->entry; + goto out; + } + } + +out: + drgn_object_deinit(&upid); + drgn_object_deinit(&upid_ns); + return err; +} + +struct drgn_error *linux_helper_task_iter_init(struct linux_helper_task_iter *iter, + const struct drgn_object *ns) +{ + struct drgn_program *prog = drgn_object_program(ns); + struct drgn_error *err = linux_helper_pid_iter_init(&iter->iter, ns); + if (err) + return err; + struct drgn_object PIDTYPE_PID; + drgn_object_init(&PIDTYPE_PID, prog); + err = drgn_program_find_object(prog, "PIDTYPE_PID", NULL, DRGN_FIND_OBJECT_CONSTANT, + &PIDTYPE_PID); + if (!err) + err = drgn_object_read_unsigned(&PIDTYPE_PID, &iter->PIDTYPE_PID); + if (err) + linux_helper_pid_iter_deinit(&iter->iter); + drgn_object_deinit(&PIDTYPE_PID); + return err; +} + +struct drgn_error *linux_helper_task_iter_next(struct linux_helper_task_iter *iter, + struct drgn_object **ret) +{ + struct drgn_error *err; + bool value_is_truthy; + do { + err = linux_helper_pid_iter_next(&iter->iter, ret); + if (err || !*ret) + return err; + err = linux_helper_pid_task(*ret, *ret, iter->PIDTYPE_PID); + if (err) + return err; + err = drgn_object_bool(*ret, &value_is_truthy); + } while (!err && !value_is_truthy); + return err; +} + +void linux_helper_task_iter_deinit(struct linux_helper_task_iter *iter) +{ + linux_helper_pid_iter_deinit(&iter->iter); +} diff --git a/libdrgn/memory_reader.c b/libdrgn/memory_reader.c index 18ad93b62..527ec69c2 100644 --- a/libdrgn/memory_reader.c +++ b/libdrgn/memory_reader.c @@ -1,10 +1,9 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include #include #include -#include #include #include "memory_reader.h" @@ -249,21 +248,19 @@ struct drgn_error *drgn_read_memory_file(void *buf, uint64_t address, void *arg, bool physical) { struct drgn_memory_file_segment *file_segment = arg; - char *p = buf; - uint64_t file_offset = file_segment->file_offset + offset; - size_t file_count; - if (offset < file_segment->file_size) { - file_count = min((uint64_t)count, - file_segment->file_size - offset); - count -= file_count; - } else { - file_count = 0; + if (offset > file_segment->file_size || + count > file_segment->file_size - offset) { + if (offset <= file_segment->file_size) + address += file_segment->file_size - offset; + return drgn_error_create_fault("memory not saved in core dump", + address); } - while (file_count) { - ssize_t ret; - ret = pread(file_segment->fd, p, file_count, file_offset); + uint64_t file_offset = file_segment->file_offset + offset; + char *p = buf; + while (count) { + ssize_t ret = pread(file_segment->fd, p, count, file_offset); if (ret == -1) { if (errno == EINTR) { continue; @@ -278,9 +275,9 @@ struct drgn_error *drgn_read_memory_file(void *buf, uint64_t address, address); } p += ret; - file_count -= ret; + address += ret; + count -= ret; file_offset += ret; } - memset(p, 0, count); return NULL; } diff --git a/libdrgn/memory_reader.h b/libdrgn/memory_reader.h index 9e608995a..4fa524c4a 100644 --- a/libdrgn/memory_reader.h +++ b/libdrgn/memory_reader.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** @@ -123,8 +123,9 @@ struct drgn_memory_file_segment { uint64_t file_offset; /** * Size of the segment in the file. This may be less than the size of - * the segment in memory, in which case the remaining bytes are treated - * as if they contained zeroes. + * the segment in memory, which means that the remaining bytes were in + * the program's memory but were not saved in the core dump. Attempting + * to read these bytes is treated as a fault. */ uint64_t file_size; /** File descriptor. */ diff --git a/libdrgn/minmax.h b/libdrgn/minmax.h index 7e517667b..0c64ce703 100644 --- a/libdrgn/minmax.h +++ b/libdrgn/minmax.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/nstring.h b/libdrgn/nstring.h new file mode 100644 index 000000000..5fc803d57 --- /dev/null +++ b/libdrgn/nstring.h @@ -0,0 +1,37 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: GPL-3.0-or-later + +/** + * @file + * + * String with length. + */ + +#ifndef DRGN_NSTRING_H +#define DRGN_NSTRING_H + +#include + +/** A string with a stored length. */ +struct nstring { + /** + * The string, which is not necessarily null-terminated and may have + * embedded null bytes. + */ + const char *str; + /** The length in bytes of the string. */ + size_t len; +}; + +/** Compare two @ref nstring keys for equality. */ +static inline bool nstring_eq(const struct nstring *a, const struct nstring *b) +{ + /* + * len == 0 is a special case because memcmp(NULL, NULL, 0) is + * technically undefined. + */ + return (a->len == b->len && + (a->len == 0 || memcmp(a->str, b->str, a->len) == 0)); +} + +#endif /* DRGN_NSTRING_H */ diff --git a/libdrgn/object.c b/libdrgn/object.c index 9e2fa4d2a..b0f0b7d61 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include @@ -609,6 +609,74 @@ drgn_object_read_value(const struct drgn_object *obj, union drgn_value *value, ) } +LIBDRGN_PUBLIC struct drgn_error * +drgn_object_read_bytes(const struct drgn_object *obj, void *buf) +{ + struct drgn_error *err; + + if (!drgn_object_encoding_is_complete(obj->encoding)) { + return drgn_error_incomplete_type("cannot read object with %s type", + obj->type); + } + + SWITCH_ENUM(obj->kind, + case DRGN_OBJECT_VALUE: + if (obj->encoding == DRGN_OBJECT_ENCODING_BUFFER) { + memcpy(buf, drgn_object_buffer(obj), + drgn_object_size(obj)); + } else { + union { + uint64_t uvalue; + struct { +#if !HOST_LITTLE_ENDIAN + uint32_t pad; +#endif + float fvalue32; +#if HOST_LITTLE_ENDIAN + uint32_t pad; +#endif + }; + } tmp; + ((uint8_t *)buf)[drgn_object_size(obj) - 1] = 0; + if (obj->encoding == DRGN_OBJECT_ENCODING_FLOAT && + obj->bit_size == 32) { + tmp.fvalue32 = (float)obj->value.fvalue; + tmp.pad = 0; + } else { + tmp.uvalue = obj->value.uvalue; + } + serialize_bits(buf, 0, + truncate_unsigned(tmp.uvalue, obj->bit_size), + obj->bit_size, obj->little_endian); + } + return NULL; + case DRGN_OBJECT_REFERENCE: { + uint8_t bit_offset = obj->bit_offset; + uint64_t bit_size = obj->bit_size; + uint64_t read_size = drgn_value_size(bit_offset + bit_size); + if (bit_offset == 0) { + return drgn_program_read_memory(drgn_object_program(obj), + buf, obj->address, + read_size, false); + } else { + char tmp[9]; + assert(read_size <= sizeof(tmp)); + err = drgn_program_read_memory(drgn_object_program(obj), + tmp, obj->address, + read_size, false); + if (err) + return err; + ((uint8_t *)buf)[drgn_value_size(bit_size) - 1] = 0; + copy_bits(buf, 0, tmp, bit_offset, obj->bit_size, + obj->little_endian); + return NULL; + } + } + case DRGN_OBJECT_ABSENT: + return &drgn_error_object_absent; + ) +} + static struct drgn_error * drgn_object_value_signed(const struct drgn_object *obj, int64_t *ret) { diff --git a/libdrgn/object.h b/libdrgn/object.h index d008cdcc2..a94c87a3a 100644 --- a/libdrgn/object.h +++ b/libdrgn/object.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/object_index.c b/libdrgn/object_index.c index e7248138c..e2e592683 100644 --- a/libdrgn/object_index.c +++ b/libdrgn/object_index.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/object_index.h b/libdrgn/object_index.h index 2929056c4..f551118e4 100644 --- a/libdrgn/object_index.h +++ b/libdrgn/object_index.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/orc.h b/libdrgn/orc.h index dd1fc87d6..c3b42c86d 100644 --- a/libdrgn/orc.h +++ b/libdrgn/orc.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/orc_info.c b/libdrgn/orc_info.c new file mode 100644 index 000000000..6fe9bc486 --- /dev/null +++ b/libdrgn/orc_info.c @@ -0,0 +1,309 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include +#include +#include + +#include "debug_info.h" // IWYU pragma: associated +#include "error.h" +#include "orc.h" +#include "util.h" + +void drgn_orc_module_info_deinit(struct drgn_debug_info_module *module) +{ + free(module->orc.entries); + free(module->orc.pc_offsets); +} + +/* + * Get the program counter of an ORC entry directly from the .orc_unwind_ip + * section. + */ +static inline uint64_t drgn_raw_orc_pc(struct drgn_debug_info_module *module, + size_t i) +{ + int32_t offset; + memcpy(&offset, + (int32_t *)module->scn_data[DRGN_SCN_ORC_UNWIND_IP]->d_buf + i, + sizeof(offset)); + if (drgn_platform_bswap(&module->platform)) + offset = bswap_32(offset); + return module->orc.pc_base + UINT64_C(4) * i + offset; +} + +static int compare_orc_entries(const void *a, const void *b, void *arg) +{ + struct drgn_debug_info_module *module = arg; + size_t index_a = *(size_t *)a; + size_t index_b = *(size_t *)b; + + uint64_t pc_a = drgn_raw_orc_pc(module, index_a); + uint64_t pc_b = drgn_raw_orc_pc(module, index_b); + if (pc_a < pc_b) + return -1; + else if (pc_a > pc_b) + return 1; + + /* + * If two entries have the same PC, then one is probably a "terminator" + * at the end of a compilation unit. Prefer the real entry. + */ + const struct drgn_orc_entry *entries = + module->scn_data[DRGN_SCN_ORC_UNWIND]->d_buf; + uint16_t flags_a, flags_b; + memcpy(&flags_a, &entries[index_a].flags, sizeof(flags_a)); + memcpy(&flags_b, &entries[index_b].flags, sizeof(flags_b)); + if (drgn_platform_bswap(&module->platform)) { + flags_a = bswap_16(flags_a); + flags_b = bswap_16(flags_b); + } + return (drgn_orc_flags_is_terminator(flags_b) + - drgn_orc_flags_is_terminator(flags_a)); +} + +static size_t keep_orc_entry(struct drgn_debug_info_module *module, + size_t *indices, size_t num_entries, size_t i) +{ + + const struct drgn_orc_entry *entries = + module->scn_data[DRGN_SCN_ORC_UNWIND]->d_buf; + if (num_entries > 0 && + memcmp(&entries[indices[num_entries - 1]], &entries[indices[i]], + sizeof(entries[0])) == 0) { + /* + * The previous entry is identical to this one, so we can skip + * this entry (which effectively merges it into the previous + * one). This usually happens for "terminator" entries. + */ + return num_entries; + } + indices[num_entries] = indices[i]; + return num_entries + 1; +} + +/* + * The vast majority of ORC entries are redundant with DWARF CFI, and it's a + * waste to store and binary search those entries. This removes ORC entries that + * are entirely shadowed by DWARF FDEs. + */ +static size_t remove_fdes_from_orc(struct drgn_debug_info_module *module, + size_t *indices, size_t num_entries) +{ + if (module->dwarf.num_fdes == 0) + return num_entries; + + struct drgn_dwarf_fde *fde = module->dwarf.fdes; + struct drgn_dwarf_fde *last_fde = fde + module->dwarf.num_fdes - 1; + + size_t new_num_entries = 0; + + /* Keep any entries that start before the first DWARF FDE. */ + uint64_t start_pc; + for (;;) { + start_pc = drgn_raw_orc_pc(module, new_num_entries); + if (fde->initial_location <= start_pc) + break; + new_num_entries++; + if (new_num_entries == num_entries) + return num_entries; + } + + for (size_t i = new_num_entries; i < num_entries - 1; i++) { + uint64_t end_pc = drgn_raw_orc_pc(module, i + 1); + + /* + * Find the last FDE that starts at or before the current ORC + * entry. + */ + while (fde != last_fde && fde[1].initial_location <= start_pc) + fde++; + + /* + * Check whether the current ORC entry is completely covered by + * one or more FDEs. + */ + while (end_pc - fde->initial_location > fde->address_range) { + /* + * The current FDE doesn't cover the current ORC entry. + */ + if (fde == last_fde) { + /* + * There are no more FDEs. Keep the remaining + * ORC entries. + */ + if (i != new_num_entries) { + memmove(&indices[new_num_entries], + &indices[i], + (num_entries - i) * + sizeof(indices[0])); + } + return new_num_entries + (num_entries - i); + } + if (fde[1].initial_location - fde->initial_location + > fde->address_range) { + /* + * There is a gap between the current FDE and + * the next FDE that exposes the current ORC + * entry. Keep it. + */ + new_num_entries = keep_orc_entry(module, + indices, + new_num_entries, + i); + break; + } + fde++; + } + + start_pc = end_pc; + } + /* We don't know where the last ORC entry ends, so always keep it. */ + return keep_orc_entry(module, indices, new_num_entries, + num_entries - 1); +} + +static struct drgn_error * +drgn_debug_info_parse_orc(struct drgn_debug_info_module *module) +{ + struct drgn_error *err; + + if (!module->platform.arch->orc_to_cfi || + !module->scns[DRGN_SCN_ORC_UNWIND_IP] || + !module->scns[DRGN_SCN_ORC_UNWIND]) + return NULL; + + GElf_Shdr shdr_mem, *shdr; + shdr = gelf_getshdr(module->scns[DRGN_SCN_ORC_UNWIND_IP], &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + module->orc.pc_base = shdr->sh_addr; + + err = drgn_debug_info_module_cache_section(module, + DRGN_SCN_ORC_UNWIND_IP); + if (err) + return err; + err = drgn_debug_info_module_cache_section(module, DRGN_SCN_ORC_UNWIND); + if (err) + return err; + Elf_Data *orc_unwind_ip = module->scn_data[DRGN_SCN_ORC_UNWIND_IP]; + Elf_Data *orc_unwind = module->scn_data[DRGN_SCN_ORC_UNWIND]; + + size_t num_entries = orc_unwind_ip->d_size / sizeof(int32_t); + if (orc_unwind_ip->d_size % sizeof(int32_t) != 0 || + orc_unwind->d_size % sizeof(struct drgn_orc_entry) != 0 || + orc_unwind->d_size / sizeof(struct drgn_orc_entry) != num_entries) { + return drgn_error_create(DRGN_ERROR_OTHER, + ".orc_unwind_ip and/or .orc_unwind has invalid size"); + } + if (!num_entries) + return NULL; + + size_t *indices = malloc_array(num_entries, sizeof(indices[0])); + if (!indices) + return &drgn_enomem; + for (size_t i = 0; i < num_entries; i++) + indices[i] = i; + + /* + * Sort the ORC entries for binary search. Since Linux kernel commit + * f14bf6a350df ("x86/unwind/orc: Remove boot-time ORC unwind tables + * sorting") (in v5.6), this is already sorted for vmlinux, so only sort + * it if necessary. + */ + for (size_t i = 1; i < num_entries; i++) { + if (compare_orc_entries(&indices[i - 1], &indices[i], + module) > 0) { + qsort_r(indices, num_entries, sizeof(indices[0]), + compare_orc_entries, module); + break; + } + } + + num_entries = remove_fdes_from_orc(module, indices, num_entries); + + int32_t *pc_offsets = malloc_array(num_entries, sizeof(pc_offsets[0])); + if (!pc_offsets) { + err = &drgn_enomem; + goto out; + } + struct drgn_orc_entry *entries = malloc_array(num_entries, + sizeof(entries[0])); + if (!entries) { + free(pc_offsets); + err = &drgn_enomem; + goto out; + } + const int32_t *orig_offsets = orc_unwind_ip->d_buf; + const struct drgn_orc_entry *orig_entries = orc_unwind->d_buf; + bool bswap = drgn_platform_bswap(&module->platform); + for (size_t i = 0; i < num_entries; i++) { + size_t index = indices[i]; + int32_t offset; + memcpy(&offset, &orig_offsets[index], sizeof(offset)); + struct drgn_orc_entry entry; + memcpy(&entry, &orig_entries[index], sizeof(entry)); + if (bswap) { + offset = bswap_32(offset); + entry.sp_offset = bswap_16(entry.sp_offset); + entry.bp_offset = bswap_16(entry.bp_offset); + entry.flags = bswap_16(entry.flags); + } + pc_offsets[i] = UINT64_C(4) * index + offset - UINT64_C(4) * i; + entries[i] = entry; + } + + module->orc.pc_offsets = pc_offsets; + module->orc.entries = entries; + module->orc.num_entries = num_entries; + + err = NULL; +out: + free(indices); + return err; +} + +static inline uint64_t drgn_orc_pc(struct drgn_debug_info_module *module, + size_t i) +{ + return module->orc.pc_base + UINT64_C(4) * i + module->orc.pc_offsets[i]; +} + +struct drgn_error * +drgn_debug_info_find_orc_cfi(struct drgn_debug_info_module *module, + uint64_t unbiased_pc, + struct drgn_cfi_row **row_ret, + bool *interrupted_ret, + drgn_register_number *ret_addr_regno_ret) +{ + struct drgn_error *err; + + if (!module->parsed_orc) { + err = drgn_debug_info_parse_orc(module); + if (err) + return err; + module->parsed_orc = true; + } + + /* + * We don't know the maximum program counter covered by the ORC data, + * but the last entry seems to always be a terminator, so it doesn't + * matter. All addresses beyond the max will fall into the last entry. + */ + if (!module->orc.num_entries || unbiased_pc < drgn_orc_pc(module, 0)) + return &drgn_not_found; + size_t lo = 0, hi = module->orc.num_entries, found = 0; + while (lo < hi) { + size_t mid = lo + (hi - lo) / 2; + if (drgn_orc_pc(module, mid) <= unbiased_pc) { + found = mid; + lo = mid + 1; + } else { + hi = mid; + } + } + return module->platform.arch->orc_to_cfi(&module->orc.entries[found], + row_ret, interrupted_ret, + ret_addr_regno_ret); +} diff --git a/libdrgn/orc_info.h b/libdrgn/orc_info.h new file mode 100644 index 000000000..4aee69eef --- /dev/null +++ b/libdrgn/orc_info.h @@ -0,0 +1,76 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: GPL-3.0-or-later + +/** + * @file + * + * ORC unwinder support. + * + * See @ref DebugInfo. + */ + +#ifndef DRGN_ORC_INFO_H +#define DRGN_ORC_INFO_H + +#include +#include +#include + +#include "cfi.h" + +struct drgn_debug_info_module; + +/** + * @ingroup DebugInfo + * + * @{ + */ + +/** ORC unwinder data for a @ref drgn_debug_info_module. */ +struct drgn_orc_module_info { + /** + * Base for calculating program counter corresponding to an ORC unwinder + * entry. + * + * This is the address of the `.orc_unwind_ip` ELF section. + * + * @sa drgn_orc_module_info::entries + */ + uint64_t pc_base; + /** + * Offsets for calculating program counter corresponding to an ORC + * unwinder entry. + * + * This is the contents of the `.orc_unwind_ip` ELF section, byte + * swapped to the host's byte order if necessary. + * + * @sa drgn_orc_module_info::entries + */ + int32_t *pc_offsets; + /** + * ORC unwinder entries. + * + * This is the contents of the `.orc_unwind` ELF section, byte swapped + * to the host's byte order if necessary. + * + * Entry `i` specifies how to unwind the stack if + * `orc_pc(i) <= PC < orc_pc(i + 1)`, where + * `orc_pc(i) = pc_base + 4 * i + pc_offsets[i]`. + */ + struct drgn_orc_entry *entries; + /** Number of ORC unwinder entries. */ + size_t num_entries; +}; + +void drgn_orc_module_info_deinit(struct drgn_debug_info_module *module); + +struct drgn_error * +drgn_debug_info_find_orc_cfi(struct drgn_debug_info_module *module, + uint64_t unbiased_pc, + struct drgn_cfi_row **row_ret, + bool *interrupted_ret, + drgn_register_number *ret_addr_regno_ret); + +/** @} */ + +#endif /* DRGN_ORC_INFO_H */ diff --git a/libdrgn/path.c b/libdrgn/path.c index cafc4dea0..653478ec4 100644 --- a/libdrgn/path.c +++ b/libdrgn/path.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include @@ -6,12 +6,13 @@ #include #include "path.h" +#include "util.h" bool path_iterator_next(struct path_iterator *it, const char **component_ret, size_t *component_len_ret) { while (it->num_components) { - struct string *cur = &it->components[it->num_components - 1]; + struct nstring *cur = &it->components[it->num_components - 1]; while (cur->len > 0) { if (cur->str[cur->len - 1] == '/') { if (cur->len == 1) { @@ -95,7 +96,7 @@ bool die_matches_filename(Dwarf_Die *die, const char *filename) if (!filename || !filename[0]) return true; - struct string die_components[2]; + struct nstring die_components[2]; struct path_iterator die_path = { .components = die_components, }; @@ -123,7 +124,7 @@ bool die_matches_filename(Dwarf_Die *die, const char *filename) die_path.num_components++; struct path_iterator needle = { - .components = (struct string []){ + .components = (struct nstring []){ { filename, strlen(filename) } }, .num_components = 1, @@ -136,13 +137,13 @@ LIBDRGN_PUBLIC bool drgn_filename_matches(const char *haystack, const char *needle) { struct path_iterator haystack_path = { - .components = (struct string []){ + .components = (struct nstring []){ { haystack, strlen(haystack) } }, .num_components = 1, }; struct path_iterator needle_path = { - .components = (struct string []){ + .components = (struct nstring []){ { needle, strlen(needle) } }, .num_components = 1, diff --git a/libdrgn/path.h b/libdrgn/path.h index 6a36349ce..e03cd2fd9 100644 --- a/libdrgn/path.h +++ b/libdrgn/path.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** @@ -12,12 +12,11 @@ #ifndef DRGN_PATH_H #define DRGN_PATH_H +#include #include #include -#include - -#include "util.h" +#include "nstring.h" // IWYU pragma: export /** * @ingroup Internals @@ -61,10 +60,10 @@ struct path_iterator { * Array of input components. * * The input components are treated as if they were joined with a "/". - * @ref string::str and @ref string::len should be initialized for each - * component. The latter will be modified as the path is iterated. + * @ref nstring::str and @ref nstring::len should be initialized for + * each component. The latter will be modified as the path is iterated. */ - struct string *components; + struct nstring *components; /** Number of components in @ref path_iterator::components. */ size_t num_components; /** diff --git a/libdrgn/platform.c b/libdrgn/platform.c index d778b3a01..0c7059977 100644 --- a/libdrgn/platform.c +++ b/libdrgn/platform.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/platform.h b/libdrgn/platform.h index 288ea4e75..61b109f6e 100644 --- a/libdrgn/platform.h +++ b/libdrgn/platform.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #ifndef DRGN_PLATFORM_H diff --git a/libdrgn/pp.h b/libdrgn/pp.h index e38cadd3f..67805269a 100644 --- a/libdrgn/pp.h +++ b/libdrgn/pp.h @@ -1,7 +1,7 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later -/* +/** * @file * * Preprocessor utilities. @@ -75,29 +75,36 @@ * Expand and concatenate arguments. * * This expands each argument and then joins them with the `##` operator. + * `PP_CAT` takes two arguments, `PP_CAT3` takes three, `PP_CAT4` takes four, + * etc. * * ``` * #define a foo * #define b bar - * PP_CAT(a, b, baz) // Expands to foobarbaz + * PP_CAT(a, b) // Expands to foobar * ``` * * Intermediate results are not expanded: * ``` * #define HELLO oops - * PP_CAT(HELL, O, WORLD) // Expands to HELLOWORLD, _not_ oopsWORLD + * PP_CAT3(HELL, O, WORLD) // Expands to HELLOWORLD, _not_ oopsWORLD * ``` * * All possible intermediate results must be valid preprocessing tokens: * ``` - * PP_CAT(1e, +, 3) // Undefined because +3 is not a valid preprocessing token + * PP_CAT3(1e, +, 3) // Undefined because +3 is not a valid preprocessing token * ``` * * @hideinitializer */ -#define PP_CAT(...) PP_OVERLOAD(PP_CAT_I, __VA_ARGS__)(__VA_ARGS__) +#define PP_CAT(_0, _1) PP_CAT_I2(_0, _1) +#define PP_CAT3(_0, _1, _2) PP_CAT_I3(_0, _1, _2) +#define PP_CAT4(_0, _1, _2, _3) PP_CAT_I4(_0, _1, _2, _3) +#define PP_CAT5(_0, _1, _2, _3, _4) PP_CAT_I5(_0, _1, _2, _3, _4) +#define PP_CAT6(_0, _1, _2, _3, _4, _5) PP_CAT_I6(_0, _1, _2, _3, _4, _5) +#define PP_CAT7(_0, _1, _2, _3, _4, _5, _6) PP_CAT_I7(_0, _1, _2, _3, _4, _5, _6) +#define PP_CAT8(_0, _1, _2, _3, _4, _5, _6, _7) PP_CAT_I8(_0, _1, _2, _3, _4, _5, _6, _7) /** @cond */ -#define PP_CAT_I1(_0) _0 #define PP_CAT_I2(_0, _1) _0##_1 #define PP_CAT_I3(_0, _1, _2) _0##_1##_2 #define PP_CAT_I4(_0, _1, _2, _3) _0##_1##_2##_3 diff --git a/libdrgn/program.c b/libdrgn/program.c index e4b5c6cdd..cc1bcdd6e 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -1,12 +1,10 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include #include -#include #include #include -#include #include #include #include @@ -18,7 +16,6 @@ #include #include "debug_info.h" -#include "dwarf_index.h" #include "error.h" #include "language.h" #include "linux_kernel.h" @@ -31,16 +28,7 @@ #include "util.h" DEFINE_VECTOR_FUNCTIONS(drgn_prstatus_vector) -DEFINE_HASH_TABLE_FUNCTIONS(drgn_prstatus_map, int_key_hash_pair, scalar_key_eq) - -static Elf_Type note_header_type(GElf_Phdr *phdr) -{ -#if _ELFUTILS_PREREQ(0, 175) - if (phdr->p_align == 8) - return ELF_T_NHDR8; -#endif - return ELF_T_NHDR; -} +DEFINE_HASH_MAP_FUNCTIONS(drgn_prstatus_map, int_key_hash_pair, scalar_key_eq) LIBDRGN_PUBLIC enum drgn_program_flags drgn_program_flags(struct drgn_program *prog) @@ -112,7 +100,7 @@ void drgn_program_deinit(struct drgn_program *prog) if (prog->core_fd != -1) close(prog->core_fd); - drgn_debug_info_destroy(prog->_dbinfo); + drgn_debug_info_destroy(prog->dbinfo); } LIBDRGN_PUBLIC struct drgn_error * @@ -280,7 +268,7 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) data = elf_getdata_rawchunk(prog->core, phdr->p_offset, phdr->p_filesz, - note_header_type(phdr)); + note_header_type(phdr->p_align)); if (!data) { err = drgn_error_libelf(); goto out_platform; @@ -295,11 +283,13 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) name = (char *)data->d_buf + name_offset; desc = (char *)data->d_buf + desc_offset; - if (strncmp(name, "CORE", nhdr.n_namesz) == 0) { + if (nhdr.n_namesz == sizeof("CORE") && + memcmp(name, "CORE", sizeof("CORE")) == 0) { if (nhdr.n_type == NT_TASKSTRUCT) have_nt_taskstruct = true; - } else if (strncmp(name, "VMCOREINFO", - nhdr.n_namesz) == 0) { + } else if (nhdr.n_namesz == sizeof("VMCOREINFO") && + memcmp(name, "VMCOREINFO", + sizeof("VMCOREINFO")) == 0) { vmcoreinfo_note = desc; vmcoreinfo_size = nhdr.n_descsz; /* @@ -350,8 +340,10 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) goto out_platform; } - if ((is_proc_kcore || vmcoreinfo_note) && - prog->platform.arch->linux_kernel_pgtable_iterator_next) { + bool pgtable_reader = + (is_proc_kcore || vmcoreinfo_note) && + prog->platform.arch->linux_kernel_pgtable_iterator_next; + if (pgtable_reader) { /* * Try to read any memory that isn't in the core dump via the * page table. @@ -381,6 +373,13 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) prog->file_segments[j].fd = prog->core_fd; prog->file_segments[j].eio_is_fault = false; err = drgn_program_add_memory_segment(prog, phdr->p_vaddr, + /* + * Don't override the page + * table reader for + * unsaved regions. + */ + pgtable_reader ? + phdr->p_filesz : phdr->p_memsz, drgn_read_memory_file, &prog->file_segments[j], @@ -391,6 +390,8 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) phdr->p_paddr != (is_64_bit ? UINT64_MAX : UINT32_MAX)) { err = drgn_program_add_memory_segment(prog, phdr->p_paddr, + pgtable_reader ? + phdr->p_filesz : phdr->p_memsz, drgn_read_memory_file, &prog->file_segments[j], @@ -436,6 +437,8 @@ drgn_program_set_core_dump(struct drgn_program *prog, const char *path) phys_addr = phdr->p_vaddr - direct_mapping; err = drgn_program_add_memory_segment(prog, phys_addr, + pgtable_reader ? + phdr->p_filesz : phdr->p_memsz, drgn_read_memory_file, &prog->file_segments[j], @@ -549,70 +552,19 @@ drgn_program_set_pid(struct drgn_program *prog, pid_t pid) return err; } -struct drgn_error *drgn_program_get_dbinfo(struct drgn_program *prog, - struct drgn_debug_info **ret) -{ - struct drgn_error *err; - - if (!prog->_dbinfo) { - struct drgn_debug_info *dbinfo; - err = drgn_debug_info_create(prog, &dbinfo); - if (err) - return err; - err = drgn_program_add_object_finder(prog, - drgn_debug_info_find_object, - dbinfo); - if (err) { - drgn_debug_info_destroy(dbinfo); - return err; - } - err = drgn_program_add_type_finder(prog, - drgn_debug_info_find_type, - dbinfo); - if (err) { - drgn_object_index_remove_finder(&prog->oindex); - drgn_debug_info_destroy(dbinfo); - return err; - } - prog->_dbinfo = dbinfo; - } - *ret = prog->_dbinfo; - return NULL; -} - /* Set the default language from the language of "main". */ -static void drgn_program_set_language_from_main(struct drgn_debug_info *dbinfo) +static void drgn_program_set_language_from_main(struct drgn_program *prog) { struct drgn_error *err; - struct drgn_dwarf_index_iterator it; - static const uint64_t tags[] = { DW_TAG_subprogram }; - err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dindex.global, - "main", strlen("main"), tags, - ARRAY_SIZE(tags)); - if (err) { - drgn_error_destroy(err); - return; - } - struct drgn_dwarf_index_die *index_die; - while ((index_die = drgn_dwarf_index_iterator_next(&it))) { - Dwarf_Die die; - err = drgn_dwarf_index_get_die(index_die, &die); - if (err) { - drgn_error_destroy(err); - continue; - } - const struct drgn_language *lang; - err = drgn_language_from_die(&die, false, &lang); - if (err) { - drgn_error_destroy(err); - continue; - } - if (lang) { - dbinfo->prog->lang = lang; - break; - } - } + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) + return; + const struct drgn_language *lang; + err = drgn_debug_info_main_language(prog->dbinfo, &lang); + if (err) + drgn_error_destroy(err); + if (lang) + prog->lang = lang; } static int drgn_set_platform_from_dwarf(Dwfl_Module *module, void **userdatap, @@ -644,16 +596,33 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, if (!n && !load_default && !load_main) return NULL; - struct drgn_debug_info *dbinfo; - err = drgn_program_get_dbinfo(prog, &dbinfo); - if (err) - return err; + struct drgn_debug_info *dbinfo = prog->dbinfo; + if (!dbinfo) { + err = drgn_debug_info_create(prog, &dbinfo); + if (err) + return err; + err = drgn_program_add_object_finder(prog, + drgn_debug_info_find_object, + dbinfo); + if (err) { + drgn_debug_info_destroy(dbinfo); + return err; + } + err = drgn_program_add_type_finder(prog, + drgn_debug_info_find_type, + dbinfo); + if (err) { + drgn_object_index_remove_finder(&prog->oindex); + drgn_debug_info_destroy(dbinfo); + return err; + } + prog->dbinfo = dbinfo; + } err = drgn_debug_info_load(dbinfo, paths, n, load_default, load_main); if ((!err || err->code == DRGN_ERROR_MISSING_DEBUG_INFO)) { - if (!prog->lang && - !(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL)) - drgn_program_set_language_from_main(dbinfo); + if (!prog->lang) + drgn_program_set_language_from_main(prog); if (!prog->has_platform) { dwfl_getdwarf(dbinfo->dwfl, drgn_set_platform_from_dwarf, prog, 0); @@ -691,7 +660,7 @@ struct drgn_error *drgn_program_cache_prstatus_entry(struct drgn_program *prog, size_t size) { if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) { - struct string *entry = + struct nstring *entry = drgn_prstatus_vector_append_entry(&prog->prstatus_vector); if (!entry) return &drgn_enomem; @@ -756,7 +725,7 @@ static struct drgn_error *drgn_program_cache_prstatus(struct drgn_program *prog) data = elf_getdata_rawchunk(prog->core, phdr->p_offset, phdr->p_filesz, - note_header_type(phdr)); + note_header_type(phdr->p_align)); if (!data) { err = drgn_error_libelf(); goto out; @@ -796,7 +765,7 @@ static struct drgn_error *drgn_program_cache_prstatus(struct drgn_program *prog) struct drgn_error *drgn_program_find_prstatus_by_cpu(struct drgn_program *prog, uint32_t cpu, - struct string *ret, + struct nstring *ret, uint32_t *tid_ret) { assert(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL); @@ -816,7 +785,7 @@ struct drgn_error *drgn_program_find_prstatus_by_cpu(struct drgn_program *prog, struct drgn_error *drgn_program_find_prstatus_by_tid(struct drgn_program *prog, uint32_t tid, - struct string *ret) + struct nstring *ret) { struct drgn_error *err; struct drgn_prstatus_map_iterator it; @@ -1095,8 +1064,8 @@ bool drgn_program_find_symbol_by_address_internal(struct drgn_program *prog, struct drgn_symbol *ret) { if (!module) { - if (prog->_dbinfo) { - module = dwfl_addrmodule(prog->_dbinfo->dwfl, address); + if (prog->dbinfo) { + module = dwfl_addrmodule(prog->dbinfo->dwfl, address); if (!module) return false; } else { @@ -1110,9 +1079,7 @@ bool drgn_program_find_symbol_by_address_internal(struct drgn_program *prog, &elf_sym, NULL, NULL, NULL); if (!name) return false; - ret->name = name; - ret->address = address - offset; - ret->size = elf_sym.st_size; + drgn_symbol_from_elf(name, address - offset, &elf_sym, ret); return true; } @@ -1143,8 +1110,9 @@ drgn_program_find_symbol_by_address(struct drgn_program *prog, uint64_t address, struct find_symbol_by_name_arg { const char *name; - struct drgn_symbol **ret; - struct drgn_error *err; + GElf_Sym sym; + GElf_Addr addr; + bool found; bool bad_symtabs; }; @@ -1153,34 +1121,43 @@ static int find_symbol_by_name_cb(Dwfl_Module *dwfl_module, void **userdatap, void *cb_arg) { struct find_symbol_by_name_arg *arg = cb_arg; - int symtab_len, i; - - symtab_len = dwfl_module_getsymtab(dwfl_module); - i = dwfl_module_getsymtab_first_global(dwfl_module); - if (symtab_len == -1 || i == -1) { + int symtab_len = dwfl_module_getsymtab(dwfl_module); + if (symtab_len == -1) { arg->bad_symtabs = true; return DWARF_CB_OK; } - for (; i < symtab_len; i++) { - GElf_Sym elf_sym; - GElf_Addr elf_addr; - const char *name; - - name = dwfl_module_getsym_info(dwfl_module, i, &elf_sym, - &elf_addr, NULL, NULL, NULL); + /* + * Global symbols are after local symbols, so by iterating backwards we + * might find a global symbol faster. Ignore the zeroth null symbol. + */ + for (int i = symtab_len - 1; i > 0; i--) { + GElf_Sym sym; + GElf_Addr addr; + const char *name = dwfl_module_getsym_info(dwfl_module, i, &sym, + &addr, NULL, NULL, + NULL); if (name && strcmp(arg->name, name) == 0) { - struct drgn_symbol *sym; - - sym = malloc(sizeof(*sym)); - if (sym) { - sym->name = name; - sym->address = elf_addr; - sym->size = elf_sym.st_size; - *arg->ret = sym; - } else { - arg->err = &drgn_enomem; + /* + * The order of precedence is + * GLOBAL = GNU_UNIQUE > WEAK > LOCAL = everything else + * + * If we found a global or unique symbol, return it + * immediately. If we found a weak symbol, then save it, + * which may overwrite a previously found weak or local + * symbol. Otherwise, save the symbol only if we haven't + * found another symbol. + */ + if (GELF_ST_BIND(sym.st_info) == STB_GLOBAL || + GELF_ST_BIND(sym.st_info) == STB_GNU_UNIQUE || + GELF_ST_BIND(sym.st_info) == STB_WEAK || + !arg->found) { + arg->sym = sym; + arg->addr = addr; + arg->found = true; } - return DWARF_CB_ABORT; + if (GELF_ST_BIND(sym.st_info) == STB_GLOBAL || + GELF_ST_BIND(sym.st_info) == STB_GNU_UNIQUE) + return DWARF_CB_ABORT; } } return DWARF_CB_OK; @@ -1192,13 +1169,19 @@ drgn_program_find_symbol_by_name(struct drgn_program *prog, { struct find_symbol_by_name_arg arg = { .name = name, - .ret = ret, }; - - if (prog->_dbinfo && - dwfl_getmodules(prog->_dbinfo->dwfl, find_symbol_by_name_cb, - &arg, 0)) - return arg.err; + if (prog->dbinfo) { + dwfl_getmodules(prog->dbinfo->dwfl, find_symbol_by_name_cb, + &arg, 0); + if (arg.found) { + struct drgn_symbol *sym = malloc(sizeof(*sym)); + if (!sym) + return &drgn_enomem; + drgn_symbol_from_elf(name, arg.addr, &arg.sym, sym); + *ret = sym; + return NULL; + } + } return drgn_error_format(DRGN_ERROR_LOOKUP, "could not find symbol with name '%s'%s", name, arg.bad_symtabs ? diff --git a/libdrgn/program.h b/libdrgn/program.h index e58434d80..0924cb64d 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** @@ -28,7 +28,6 @@ #include "type.h" #include "vector.h" -struct drgn_debug_info; struct drgn_symbol; /** @@ -66,8 +65,8 @@ struct vmcoreinfo { }; DEFINE_VECTOR_TYPE(drgn_typep_vector, struct drgn_type *) -DEFINE_VECTOR_TYPE(drgn_prstatus_vector, struct string) -DEFINE_HASH_MAP_TYPE(drgn_prstatus_map, uint32_t, struct string) +DEFINE_VECTOR_TYPE(drgn_prstatus_vector, struct nstring) +DEFINE_HASH_MAP_TYPE(drgn_prstatus_map, uint32_t, struct nstring) struct drgn_program { /** @privatesection */ @@ -122,7 +121,7 @@ struct drgn_program { * Debugging information. */ struct drgn_object_index oindex; - struct drgn_debug_info *_dbinfo; + struct drgn_debug_info *dbinfo; /* * Program information. @@ -257,9 +256,6 @@ drgn_program_address_mask(const struct drgn_program *prog, uint64_t *ret) return NULL; } -struct drgn_error *drgn_program_get_dbinfo(struct drgn_program *prog, - struct drgn_debug_info **ret); - /** * Find the @c NT_PRSTATUS note for the given CPU. * @@ -271,7 +267,7 @@ struct drgn_error *drgn_program_get_dbinfo(struct drgn_program *prog, */ struct drgn_error *drgn_program_find_prstatus_by_cpu(struct drgn_program *prog, uint32_t cpu, - struct string *ret, + struct nstring *ret, uint32_t *tid_ret); /** @@ -284,7 +280,7 @@ struct drgn_error *drgn_program_find_prstatus_by_cpu(struct drgn_program *prog, */ struct drgn_error *drgn_program_find_prstatus_by_tid(struct drgn_program *prog, uint32_t tid, - struct string *ret); + struct nstring *ret); /** * Cache the @c NT_PRSTATUS note provided by @p data in @p prog. diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 2094e15d8..c3ed2428b 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #ifndef DRGNPY_H @@ -97,6 +97,14 @@ typedef struct { struct pyobjectp_set objects; } Program; +typedef struct _GenericIterator { + PyObject_HEAD + Program *prog; + void *iter; + PyObject *(*next)(struct _GenericIterator *); + void (*iter_deinit)(void *); +} GenericIterator; + typedef struct { PyObject_HEAD const struct drgn_register *reg; @@ -161,10 +169,13 @@ extern PyObject *PlatformFlags_class; extern PyObject *PrimitiveType_class; extern PyObject *ProgramFlags_class; extern PyObject *Qualifiers_class; +extern PyObject *SymbolBinding_class; +extern PyObject *SymbolKind_class; extern PyObject *TypeKind_class; extern PyTypeObject DrgnObject_type; extern PyTypeObject DrgnType_type; extern PyTypeObject FaultError_type; +extern PyTypeObject GenericIterator_type; extern PyTypeObject Language_type; extern PyTypeObject ObjectIterator_type; extern PyTypeObject Platform_type; @@ -280,6 +291,10 @@ int enum_converter(PyObject *o, void *p); PyObject *drgnpy_linux_helper_read_vm(PyObject *self, PyObject *args, PyObject *kwds); +DrgnObject *drgnpy_linux_helper_per_cpu_ptr(PyObject *self, PyObject *args, + PyObject *kwds); +DrgnObject *drgnpy_linux_helper_idle_task(PyObject *self, PyObject *args, + PyObject *kwds); DrgnObject *drgnpy_linux_helper_radix_tree_lookup(PyObject *self, PyObject *args, PyObject *kwds); @@ -291,11 +306,21 @@ DrgnObject *drgnpy_linux_helper_pid_task(PyObject *self, PyObject *args, PyObject *kwds); DrgnObject *drgnpy_linux_helper_find_task(PyObject *self, PyObject *args, PyObject *kwds); -PyObject *drgnpy_linux_helper_task_state_to_char(PyObject *self, PyObject *args, - PyObject *kwds); PyObject *drgnpy_linux_helper_kaslr_offset(PyObject *self, PyObject *args, PyObject *kwds); PyObject *drgnpy_linux_helper_pgtable_l5_enabled(PyObject *self, PyObject *args, PyObject *kwds); +GenericIterator *drgnpy_linux_helper_for_each_task(PyObject *self, + PyObject *args, + PyObject *kwds); +GenericIterator *drgnpy_linux_helper_for_each_pid(PyObject *self, + PyObject *args, + PyObject *kwds); +GenericIterator *drgnpy_linux_helper_idr_for_each(PyObject *self, + PyObject *args, + PyObject *kwds); +GenericIterator *drgnpy_linux_helper_radix_tree_for_each(PyObject *self, + PyObject *args, + PyObject *kwds); #endif /* DRGNPY_H */ diff --git a/libdrgn/python/error.c b/libdrgn/python/error.c index 5e674ca23..3b749efe6 100644 --- a/libdrgn/python/error.c +++ b/libdrgn/python/error.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "drgnpy.h" diff --git a/libdrgn/python/helpers.c b/libdrgn/python/helpers.c index 3158be0bd..7b9b90595 100644 --- a/libdrgn/python/helpers.c +++ b/libdrgn/python/helpers.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "drgnpy.h" @@ -38,6 +38,54 @@ PyObject *drgnpy_linux_helper_read_vm(PyObject *self, PyObject *args, return buf; } +DrgnObject *drgnpy_linux_helper_per_cpu_ptr(PyObject *self, PyObject *args, + PyObject *kwds) +{ + static char *keywords[] = {"ptr", "cpu", NULL}; + struct drgn_error *err; + DrgnObject *ptr; + struct index_arg cpu = {}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O&:per_cpu_ptr", + keywords, &DrgnObject_type, &ptr, + index_converter, &cpu)) + return NULL; + + DrgnObject *res = DrgnObject_alloc(DrgnObject_prog(ptr)); + if (!res) + return NULL; + err = linux_helper_per_cpu_ptr(&res->obj, &ptr->obj, cpu.uvalue); + if (err) { + Py_DECREF(res); + return set_drgn_error(err); + } + return res; +} + +DrgnObject *drgnpy_linux_helper_idle_task(PyObject *self, PyObject *args, + PyObject *kwds) +{ + static char *keywords[] = {"prog", "cpu", NULL}; + struct drgn_error *err; + Program *prog; + struct index_arg cpu = {}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O&:idle_task", keywords, + &Program_type, &prog, index_converter, + &cpu)) + return NULL; + + DrgnObject *res = DrgnObject_alloc(prog); + if (!res) + return NULL; + err = linux_helper_idle_task(&res->obj, cpu.uvalue); + if (err) { + Py_DECREF(res); + return set_drgn_error(err); + } + return res; +} + DrgnObject *drgnpy_linux_helper_radix_tree_lookup(PyObject *self, PyObject *args, PyObject *kwds) @@ -249,3 +297,251 @@ PyObject *drgnpy_linux_helper_pgtable_l5_enabled(PyObject *self, PyObject *args, return PyErr_Format(PyExc_ValueError, "not Linux kernel"); Py_RETURN_BOOL(prog->prog.vmcoreinfo.pgtable_l5_enabled); } + +static void GenericIterator_dealloc(GenericIterator *self) +{ + if (self->iter) { + self->iter_deinit(self->iter); + free(self->iter); + } + Py_XDECREF(self->prog); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static PyObject *GenericIterator_next(GenericIterator *self) +{ + return self->next(self); +} + +PyTypeObject GenericIterator_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn._GenericIterator", + .tp_basicsize = sizeof(GenericIterator), + .tp_dealloc = (destructor)GenericIterator_dealloc, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_iter = PyObject_SelfIter, + .tp_iternext = (iternextfunc)GenericIterator_next, +}; + +static PyObject *for_each_task_next(GenericIterator *self) +{ + struct drgn_error *err; + struct drgn_object *entry; + err = linux_helper_task_iter_next(self->iter, &entry); + if (err) + return set_drgn_error(err); + if (!entry) + return NULL; + DrgnObject *ret = DrgnObject_alloc(self->prog); + if (!ret) + return NULL; + err = drgn_object_copy(&ret->obj, entry); + if (err) { + Py_DECREF(ret); + return set_drgn_error(err); + } + return (PyObject *)ret; +} + +GenericIterator *drgnpy_linux_helper_for_each_task(PyObject *self, + PyObject *args, + PyObject *kwds) +{ + static char *keywords[] = {"prog_or_ns", NULL}; + struct drgn_error *err = NULL; + struct prog_or_ns_arg prog_or_ns; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&:for_each_task", + keywords, &prog_or_pid_ns_converter, + &prog_or_ns)) + return NULL; + + GenericIterator *iterator = + (GenericIterator *)GenericIterator_type.tp_alloc( + &GenericIterator_type, 0); + if (!iterator) + goto out; + iterator->prog = prog_or_ns.prog; + Py_INCREF(iterator->prog); + iterator->next = for_each_task_next; + iterator->iter_deinit = (void (*)(void *))linux_helper_task_iter_deinit; + iterator->iter = malloc(sizeof(struct linux_helper_task_iter)); + if (!iterator->iter) { + PyErr_NoMemory(); + Py_DECREF(iterator); + iterator = NULL; + goto out; + } + err = linux_helper_task_iter_init(iterator->iter, prog_or_ns.ns); + if (err) { + set_drgn_error(err); + Py_DECREF(iterator); + iterator = NULL; + } +out: + prog_or_ns_cleanup(&prog_or_ns); + return iterator; +} + +static PyObject *for_each_pid_next(GenericIterator *self) +{ + struct drgn_error *err; + struct drgn_object *entry; + err = linux_helper_pid_iter_next(self->iter, &entry); + if (err) + return set_drgn_error(err); + if (!entry) + return NULL; + DrgnObject *ret = DrgnObject_alloc(self->prog); + if (!ret) + return NULL; + err = drgn_object_copy(&ret->obj, entry); + if (err) { + Py_DECREF(ret); + return set_drgn_error(err); + } + return (PyObject *)ret; +} + +GenericIterator * +drgnpy_linux_helper_for_each_pid(PyObject *self, PyObject *args, PyObject *kwds) +{ + static char *keywords[] = {"prog_or_ns", NULL}; + struct drgn_error *err = NULL; + struct prog_or_ns_arg prog_or_ns; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&:for_each_pid", + keywords, &prog_or_pid_ns_converter, + &prog_or_ns)) + return NULL; + + GenericIterator *iterator = + (GenericIterator *)GenericIterator_type.tp_alloc( + &GenericIterator_type, 0); + if (!iterator) + goto out; + iterator->prog = prog_or_ns.prog; + Py_INCREF(iterator->prog); + iterator->next = for_each_pid_next; + iterator->iter_deinit = (void (*)(void *))linux_helper_pid_iter_deinit; + iterator->iter = malloc(sizeof(struct linux_helper_pid_iter)); + if (!iterator->iter) { + PyErr_NoMemory(); + Py_DECREF(iterator); + iterator = NULL; + goto out; + } + err = linux_helper_pid_iter_init(iterator->iter, prog_or_ns.ns); + if (err) { + set_drgn_error(err); + Py_DECREF(iterator); + iterator = NULL; + } +out: + prog_or_ns_cleanup(&prog_or_ns); + return iterator; +} + +static PyObject *idr_iter_entry_wrap(struct linux_helper_radix_tree_iter_entry *entry, + Program *prog) +{ + DrgnObject *node = DrgnObject_alloc(prog); + if (!node) + return NULL; + struct drgn_error *err = drgn_object_copy(&node->obj, &entry->node); + if (err) { + Py_DECREF(node); + return set_drgn_error(err); + } + PyObject *ret = + Py_BuildValue("KO", (unsigned long long)entry->index, node); + Py_DECREF(node); + return ret; +} + +static PyObject *idr_for_each_next(GenericIterator *self) +{ + struct linux_helper_radix_tree_iter_entry *entry; + struct drgn_error *err = linux_helper_idr_iter_next(self->iter, &entry); + if (err) + return set_drgn_error(err); + if (!entry) + return NULL; + return idr_iter_entry_wrap(entry, self->prog); +} + +GenericIterator * +drgnpy_linux_helper_idr_for_each(PyObject *self, PyObject *args, PyObject *kwds) +{ + static char *keywords[] = {"idr", NULL}; + struct drgn_error *err; + DrgnObject *idr; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!:idr_for_each", + keywords, &DrgnObject_type, &idr)) + return NULL; + + GenericIterator *iterator = + (GenericIterator *)GenericIterator_type.tp_alloc( + &GenericIterator_type, 0); + if (!iterator) + return NULL; + iterator->prog = DrgnObject_prog(idr); + Py_INCREF(iterator->prog); + iterator->next = idr_for_each_next; + iterator->iter_deinit = (void (*)(void *))linux_helper_idr_iter_deinit; + iterator->iter = malloc(sizeof(struct linux_helper_idr_iter)); + if (!iterator->iter) { + Py_DECREF(iterator); + return (GenericIterator *)PyErr_NoMemory(); + } + err = linux_helper_idr_iter_init(iterator->iter, &idr->obj); + if (err) { + Py_DECREF(iterator); + return set_drgn_error(err); + } + return iterator; +} + +static PyObject *radix_tree_for_each_next(GenericIterator *self) +{ + struct linux_helper_radix_tree_iter_entry *entry; + struct drgn_error *err = linux_helper_radix_tree_iter_next(self->iter, &entry); + if (err) + return set_drgn_error(err); + if (!entry) + return NULL; + return idr_iter_entry_wrap(entry, self->prog); +} + +GenericIterator *drgnpy_linux_helper_radix_tree_for_each(PyObject *self, + PyObject *args, + PyObject *kwds) +{ + static char *keywords[] = {"root", NULL}; + struct drgn_error *err; + DrgnObject *root; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!:radix_tree_for_each", + keywords, &DrgnObject_type, &root)) + return NULL; + + GenericIterator *iterator = + (GenericIterator *)GenericIterator_type.tp_alloc( + &GenericIterator_type, 0); + if (!iterator) + return NULL; + iterator->prog = DrgnObject_prog(root); + Py_INCREF(iterator->prog); + iterator->next = radix_tree_for_each_next; + iterator->iter_deinit = (void (*)(void *))linux_helper_radix_tree_iter_deinit; + iterator->iter = malloc(sizeof(struct linux_helper_radix_tree_iter)); + if (!iterator->iter) { + Py_DECREF(iterator); + return (GenericIterator *)PyErr_NoMemory(); + } + err = linux_helper_radix_tree_iter_init(iterator->iter, &root->obj); + if (err) { + Py_DECREF(iterator); + return set_drgn_error(err); + } + return iterator; +} diff --git a/libdrgn/python/language.c b/libdrgn/python/language.c index d893091db..76f704743 100644 --- a/libdrgn/python/language.c +++ b/libdrgn/python/language.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "drgnpy.h" diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 449e6f7ea..f67cd4e00 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include @@ -43,7 +43,7 @@ static PyObject *filename_matches(PyObject *self, PyObject *args, return NULL; struct path_iterator haystack = { - .components = (struct string [1]){}, + .components = (struct nstring [1]){}, .num_components = 0, }; if (haystack_arg.path) { @@ -52,7 +52,7 @@ static PyObject *filename_matches(PyObject *self, PyObject *args, haystack.num_components = 1; } struct path_iterator needle = { - .components = (struct string [1]){}, + .components = (struct nstring [1]){}, .num_components = 0, }; if (needle_arg.path) { @@ -124,6 +124,11 @@ static PyMethodDef drgn_methods[] = { METH_VARARGS | METH_KEYWORDS, drgn_program_from_pid_DOC}, {"_linux_helper_read_vm", (PyCFunction)drgnpy_linux_helper_read_vm, METH_VARARGS | METH_KEYWORDS}, + {"_linux_helper_per_cpu_ptr", + (PyCFunction)drgnpy_linux_helper_per_cpu_ptr, + METH_VARARGS | METH_KEYWORDS}, + {"_linux_helper_idle_task", (PyCFunction)drgnpy_linux_helper_idle_task, + METH_VARARGS | METH_KEYWORDS}, {"_linux_helper_radix_tree_lookup", (PyCFunction)drgnpy_linux_helper_radix_tree_lookup, METH_VARARGS | METH_KEYWORDS}, @@ -141,6 +146,18 @@ static PyMethodDef drgn_methods[] = { {"_linux_helper_pgtable_l5_enabled", (PyCFunction)drgnpy_linux_helper_pgtable_l5_enabled, METH_VARARGS | METH_KEYWORDS}, + {"_linux_helper_for_each_task", + (PyCFunction)drgnpy_linux_helper_for_each_task, + METH_VARARGS | METH_KEYWORDS, drgn__linux_helper_for_each_task_DOC}, + {"_linux_helper_for_each_pid", + (PyCFunction)drgnpy_linux_helper_for_each_pid, + METH_VARARGS | METH_KEYWORDS, drgn__linux_helper_for_each_pid_DOC}, + {"_linux_helper_idr_for_each", + (PyCFunction)drgnpy_linux_helper_idr_for_each, + METH_VARARGS | METH_KEYWORDS, drgn__linux_helper_idr_for_each_DOC}, + {"_linux_helper_radix_tree_for_each", + (PyCFunction)drgnpy_linux_helper_radix_tree_for_each, + METH_VARARGS | METH_KEYWORDS, drgn__linux_helper_radix_tree_for_each_DOC}, {}, }; @@ -230,6 +247,7 @@ DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) add_type(m, &StackTrace_type) || add_type(m, &Symbol_type) || add_type(m, &DrgnType_type) || + add_type(m, &GenericIterator_type) || add_type(m, &TypeEnumerator_type) || add_type(m, &TypeMember_type) || add_type(m, &TypeParameter_type) || diff --git a/libdrgn/python/object.c b/libdrgn/python/object.c index e8bd59031..2f11fabbc 100644 --- a/libdrgn/python/object.c +++ b/libdrgn/python/object.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include @@ -746,6 +746,70 @@ static DrgnObject *DrgnObject_read(DrgnObject *self) ) } +static PyObject *DrgnObject_to_bytes(DrgnObject *self) +{ + struct drgn_error *err; + PyObject *buf = PyBytes_FromStringAndSize(NULL, + drgn_object_size(&self->obj)); + if (!buf) + return NULL; + err = drgn_object_read_bytes(&self->obj, PyBytes_AS_STRING(buf)); + if (err) { + Py_DECREF(buf); + return set_drgn_error(err); + } + return buf; +} + +static DrgnObject *DrgnObject_from_bytes(PyTypeObject *type, PyObject *args, + PyObject *kwds) +{ + static char *keywords[] = { + "prog", "type", "bytes", "bit_offset", "bit_field_size", NULL + }; + struct drgn_error *err; + Program *prog; + PyObject *type_obj = Py_None; + Py_buffer bytes; + struct index_arg bit_offset = {}; + struct index_arg bit_field_size = { .allow_none = true, .is_none = true }; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!Oy*|O&O&:from_bytes_", + keywords, &Program_type, &prog, + &type_obj, &bytes, index_converter, + &bit_offset, index_converter, + &bit_field_size)) + return NULL; + + DrgnObject *res = NULL; + struct drgn_qualified_type qualified_type; + if (Program_type_arg(prog, type_obj, false, &qualified_type) == -1) + goto out; + + if (!bit_field_size.is_none && bit_field_size.uvalue == 0) { + PyErr_SetString(PyExc_ValueError, + "bit field size cannot be zero"); + goto out; + } + + res = DrgnObject_alloc(prog); + if (!res) + goto out; + + err = drgn_object_set_from_buffer(&res->obj, qualified_type, bytes.buf, + bytes.len, bit_offset.uvalue, + bit_field_size.uvalue); + if (err) { + set_drgn_error(err); + Py_DECREF(res); + res = NULL; + goto out; + } + +out: + PyBuffer_Release(&bytes); + return res; +} + static int append_bit_offset(PyObject *parts, uint8_t bit_offset) { if (bit_offset == 0) @@ -1582,6 +1646,11 @@ static PyMethodDef DrgnObject_methods[] = { drgn_Object_address_of__DOC}, {"read_", (PyCFunction)DrgnObject_read, METH_NOARGS, drgn_Object_read__DOC}, + {"to_bytes_", (PyCFunction)DrgnObject_to_bytes, METH_NOARGS, + drgn_Object_to_bytes__DOC}, + {"from_bytes_", (PyCFunction)DrgnObject_from_bytes, + METH_CLASS | METH_VARARGS | METH_KEYWORDS, + drgn_Object_from_bytes__DOC}, {"format_", (PyCFunction)DrgnObject_format, METH_VARARGS | METH_KEYWORDS, drgn_Object_format__DOC}, {"__round__", (PyCFunction)DrgnObject_round, diff --git a/libdrgn/python/platform.c b/libdrgn/python/platform.c index b199e2d9e..9e5116f3e 100644 --- a/libdrgn/python/platform.c +++ b/libdrgn/python/platform.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "drgnpy.h" diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 453edf628..309fa6a21 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "drgnpy.h" @@ -7,7 +7,7 @@ #include "../vector.h" #include "../util.h" -DEFINE_HASH_TABLE_FUNCTIONS(pyobjectp_set, ptr_key_hash_pair, scalar_key_eq) +DEFINE_HASH_SET_FUNCTIONS(pyobjectp_set, ptr_key_hash_pair, scalar_key_eq) int Program_hold_object(Program *prog, PyObject *obj) { @@ -217,7 +217,8 @@ static struct drgn_error *py_type_find_fn(enum drgn_type_kind kind, PyObject *type_obj; gstate = PyGILState_Ensure(); - kind_obj = PyObject_CallFunction(TypeKind_class, "k", kind); + kind_obj = PyObject_CallFunction(TypeKind_class, "k", + (unsigned long)kind); if (!kind_obj) { err = drgn_error_from_python(); goto out_gstate; diff --git a/libdrgn/python/stack_trace.c b/libdrgn/python/stack_trace.c index 5fe9fdf6e..c0556e62a 100644 --- a/libdrgn/python/stack_trace.c +++ b/libdrgn/python/stack_trace.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "drgnpy.h" diff --git a/libdrgn/python/symbol.c b/libdrgn/python/symbol.c index 42140ea7f..6220eaa77 100644 --- a/libdrgn/python/symbol.c +++ b/libdrgn/python/symbol.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include @@ -25,22 +25,6 @@ static void Symbol_dealloc(Symbol *self) Py_TYPE(self)->tp_free((PyObject *)self); } -static PyObject *Symbol_repr(Symbol *self) -{ - PyObject *tmp, *ret; - char address[19], size[19]; - - tmp = PyUnicode_FromString(drgn_symbol_name(self->sym)); - if (!tmp) - return NULL; - sprintf(address, "0x%" PRIx64, drgn_symbol_address(self->sym)); - sprintf(size, "0x%" PRIx64, drgn_symbol_size(self->sym)); - ret = PyUnicode_FromFormat("Symbol(name=%R, address=%s, size=%s)", tmp, - address, size); - Py_DECREF(tmp); - return ret; -} - static PyObject *Symbol_richcompare(Symbol *self, PyObject *other, int op) { if (!PyObject_TypeCheck(other, &Symbol_type) || @@ -67,10 +51,54 @@ static PyObject *Symbol_get_size(Symbol *self, void *arg) return PyLong_FromUnsignedLongLong(drgn_symbol_size(self->sym)); } +static PyObject *Symbol_get_binding(Symbol *self, void *arg) +{ + return PyObject_CallFunction(SymbolBinding_class, "k", + (unsigned long)drgn_symbol_binding(self->sym)); +} + +static PyObject *Symbol_get_kind(Symbol *self, void *arg) +{ + return PyObject_CallFunction(SymbolKind_class, "k", + (unsigned long)drgn_symbol_kind(self->sym)); +} + +static PyObject *Symbol_repr(Symbol *self) +{ + PyObject *ret = NULL; + PyObject *tmp = PyUnicode_FromString(drgn_symbol_name(self->sym)); + if (!tmp) + return NULL; + + PyObject *binding = Symbol_get_binding(self, NULL); + if (!binding) + goto out_tmp; + + PyObject *kind = Symbol_get_kind(self, NULL); + if (!kind) + goto out_binding; + + char address[19], size[19]; + sprintf(address, "0x%" PRIx64, drgn_symbol_address(self->sym)); + sprintf(size, "0x%" PRIx64, drgn_symbol_size(self->sym)); + ret = PyUnicode_FromFormat("Symbol(name=%R, address=%s, size=%s, binding=%R, kind=%R)", + tmp, address, size, binding, kind); + + Py_DECREF(kind); +out_binding: + Py_DECREF(binding); +out_tmp: + Py_DECREF(tmp); + return ret; + +} + static PyGetSetDef Symbol_getset[] = { {"name", (getter)Symbol_get_name, NULL, drgn_Symbol_name_DOC}, {"address", (getter)Symbol_get_address, NULL, drgn_Symbol_address_DOC}, {"size", (getter)Symbol_get_size, NULL, drgn_Symbol_size_DOC}, + {"binding", (getter)Symbol_get_binding, NULL, drgn_Symbol_binding_DOC}, + {"kind", (getter)Symbol_get_kind, NULL, drgn_Symbol_kind_DOC}, {}, }; diff --git a/libdrgn/python/test.c b/libdrgn/python/test.c index 6728762ea..c6733f98b 100644 --- a/libdrgn/python/test.c +++ b/libdrgn/python/test.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /* diff --git a/libdrgn/python/type.c b/libdrgn/python/type.c index c0f6afbc0..fdb6f8d36 100644 --- a/libdrgn/python/type.c +++ b/libdrgn/python/type.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include @@ -60,7 +60,7 @@ static Program *DrgnType_get_prog(DrgnType *self, void *arg) static PyObject *DrgnType_get_kind(DrgnType *self) { return PyObject_CallFunction(TypeKind_class, "k", - drgn_type_kind(self->type)); + (unsigned long)drgn_type_kind(self->type)); } static PyObject *DrgnType_get_primitive(DrgnType *self) @@ -68,7 +68,7 @@ static PyObject *DrgnType_get_primitive(DrgnType *self) if (drgn_type_primitive(self->type) == DRGN_NOT_PRIMITIVE_TYPE) Py_RETURN_NONE; return PyObject_CallFunction(PrimitiveType_class, "k", - drgn_type_primitive(self->type)); + (unsigned long)drgn_type_primitive(self->type)); } static PyObject *DrgnType_get_qualifiers(DrgnType *self) diff --git a/libdrgn/python/util.c b/libdrgn/python/util.c index f49777338..ad5c5578e 100644 --- a/libdrgn/python/util.c +++ b/libdrgn/python/util.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/register_state.c b/libdrgn/register_state.c index 3fd8f8db7..d44584f4e 100644 --- a/libdrgn/register_state.c +++ b/libdrgn/register_state.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include @@ -86,8 +86,8 @@ void drgn_register_state_set_pc(struct drgn_program *prog, pc &= drgn_platform_address_mask(&prog->platform); regs->_pc = pc; drgn_register_state_set_known(regs, 0); - if (prog->_dbinfo) { - Dwfl_Module *dwfl_module = dwfl_addrmodule(prog->_dbinfo->dwfl, + if (prog->dbinfo) { + Dwfl_Module *dwfl_module = dwfl_addrmodule(prog->dbinfo->dwfl, pc - !regs->interrupted); if (dwfl_module) { void **userdatap; diff --git a/libdrgn/register_state.h b/libdrgn/register_state.h index 9f8b2bf32..5844e73e7 100644 --- a/libdrgn/register_state.h +++ b/libdrgn/register_state.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/serialize.c b/libdrgn/serialize.c index 3904f4fb5..b4228e427 100644 --- a/libdrgn/serialize.c +++ b/libdrgn/serialize.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/serialize.h b/libdrgn/serialize.h index 0f4783f35..b28eab6d6 100644 --- a/libdrgn/serialize.h +++ b/libdrgn/serialize.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** @@ -122,7 +122,7 @@ void copy_bits(void *dst, unsigned int dst_bit_offset, const void *src, * @param[in] bit_offset Offset in bits from the beginning of @p buf to where to * write. This is interpreted differently based on @p little_endian. * @param[in] uvalue Bits to write, in host order. - * @param[in] bit_size Number of bits in @p uvalue. This must be grather than + * @param[in] bit_size Number of bits in @p uvalue. This must be greater than * zero and no more than 64. Note that this is not checked or truncated, so if * @p uvalue has more than this many bits, the results will likely be incorrect. * @param[in] little_endian Whether the bits should be written out in @@ -140,7 +140,7 @@ void serialize_bits(void *buf, uint64_t bit_offset, uint64_t uvalue, * @param[in] buf Memory buffer to read from. * @param[in] bit_offset Offset in bits from the beginning of @p buf to where to * read from. This is interpreted differently based on @p little_endian. - * @param[in] bit_size Number of bits to read. This must be grather than zero + * @param[in] bit_size Number of bits to read. This must be greater than zero * and no more than 64. * @param[in] little_endian Whether the bits should be interpreted in * little-endian order. diff --git a/libdrgn/splay_tree.c b/libdrgn/splay_tree.c index a0bfa6964..cd84f1a53 100644 --- a/libdrgn/splay_tree.c +++ b/libdrgn/splay_tree.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "binary_search_tree.h" // IWYU pragma: associated @@ -15,9 +15,9 @@ * 1: "Self-Adjusting Binary Search Trees" (Sleator & Tarjan, 1985): * http://www.cs.cmu.edu/~sleator/papers/self-adjusting.pdf */ -void splay_tree_splay(struct binary_tree_node **root, - struct binary_tree_node *node, - struct binary_tree_node *parent) +void drgn_splay_tree_splay(struct binary_tree_node **root, + struct binary_tree_node *node, + struct binary_tree_node *parent) { for (;;) { struct binary_tree_node *grandparent, *great_grandparent; @@ -146,9 +146,9 @@ void splay_tree_splay(struct binary_tree_node **root, node->parent = NULL; } -static inline void transplant(struct binary_tree_node **root, - struct binary_tree_node *old, - struct binary_tree_node *new) +static inline void drgn_splay_tree_transplant(struct binary_tree_node **root, + struct binary_tree_node *old, + struct binary_tree_node *new) { if (!old->parent) *root = new; @@ -160,13 +160,13 @@ static inline void transplant(struct binary_tree_node **root, new->parent = old->parent; } -void splay_tree_delete(struct binary_tree_node **root, - struct binary_tree_node *node) +void drgn_splay_tree_delete(struct binary_tree_node **root, + struct binary_tree_node *node) { if (node->left == NULL) { - transplant(root, node, node->right); + drgn_splay_tree_transplant(root, node, node->right); } else if (node->right == NULL) { - transplant(root, node, node->left); + drgn_splay_tree_transplant(root, node, node->left); } else { struct binary_tree_node *successor; @@ -175,14 +175,14 @@ void splay_tree_delete(struct binary_tree_node **root, do { successor = successor->left; } while (successor->left); - transplant(root, successor, successor->right); + drgn_splay_tree_transplant(root, successor, successor->right); successor->right = node->right; successor->right->parent = successor; } - transplant(root, node, successor); + drgn_splay_tree_transplant(root, node, successor); successor->left = node->left; successor->left->parent = successor; } if (node->parent && node->parent->parent) - splay_tree_splay(root, node->parent, node->parent->parent); + drgn_splay_tree_splay(root, node->parent, node->parent->parent); } diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index ee330d380..73299643f 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include @@ -13,10 +13,11 @@ #include "cfi.h" #include "debug_info.h" #include "drgn.h" +#include "dwarf_info.h" #include "error.h" -#include "hash_table.h" #include "helpers.h" #include "minmax.h" +#include "nstring.h" #include "platform.h" #include "program.h" #include "register_state.h" @@ -78,16 +79,18 @@ static void drgn_stack_trace_shrink_to_fit(struct drgn_stack_trace **trace, LIBDRGN_PUBLIC void drgn_stack_trace_destroy(struct drgn_stack_trace *trace) { - struct drgn_register_state *regs = NULL; - for (size_t i = 0; i < trace->num_frames; i++) { - if (trace->frames[i].regs != regs) { - drgn_register_state_destroy(regs); - regs = trace->frames[i].regs; + if (trace) { + struct drgn_register_state *regs = NULL; + for (size_t i = 0; i < trace->num_frames; i++) { + if (trace->frames[i].regs != regs) { + drgn_register_state_destroy(regs); + regs = trace->frames[i].regs; + } + free(trace->frames[i].scopes); } - free(trace->frames[i].scopes); + drgn_register_state_destroy(regs); + free(trace); } - drgn_register_state_destroy(regs); - free(trace); } LIBDRGN_PUBLIC size_t @@ -443,7 +446,7 @@ not_found:; } Dwarf_Die function_die = frame->scopes[frame->function_scope]; - return drgn_object_from_dwarf(trace->prog->_dbinfo, frame->regs->module, + return drgn_object_from_dwarf(trace->prog->dbinfo, frame->regs->module, &die, dwarf_tag(&die) == DW_TAG_enumerator ? &type_die : NULL, @@ -525,7 +528,7 @@ drgn_get_initial_registers(struct drgn_program *prog, uint32_t tid, struct drgn_error *err; struct drgn_object obj; struct drgn_object tmp; - struct string prstatus; + struct nstring prstatus; drgn_object_init(&obj, prog); drgn_object_init(&tmp, prog); diff --git a/libdrgn/stack_trace.h b/libdrgn/stack_trace.h index 1a932a1a8..004e556c2 100644 --- a/libdrgn/stack_trace.h +++ b/libdrgn/stack_trace.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** @@ -12,6 +12,7 @@ #ifndef DRGN_STACK_TRACE_H #define DRGN_STACK_TRACE_H +#include #include /** diff --git a/libdrgn/string_builder.c b/libdrgn/string_builder.c index 9a10d0783..d78374583 100644 --- a/libdrgn/string_builder.c +++ b/libdrgn/string_builder.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/libdrgn/string_builder.h b/libdrgn/string_builder.h index bc3b3f04e..f4ca10a25 100644 --- a/libdrgn/string_builder.h +++ b/libdrgn/string_builder.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/symbol.c b/libdrgn/symbol.c index e9ef9a2d0..69030b47e 100644 --- a/libdrgn/symbol.c +++ b/libdrgn/symbol.c @@ -1,10 +1,11 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later -#include +#include #include #include +#include "drgn.h" #include "symbol.h" #include "util.h" @@ -13,6 +14,24 @@ LIBDRGN_PUBLIC void drgn_symbol_destroy(struct drgn_symbol *sym) free(sym); } +void drgn_symbol_from_elf(const char *name, uint64_t address, + const GElf_Sym *elf_sym, struct drgn_symbol *ret) +{ + ret->name = name; + ret->address = address; + ret->size = elf_sym->st_size; + int binding = GELF_ST_BIND(elf_sym->st_info); + if (binding <= STB_WEAK || binding == STB_GNU_UNIQUE) + ret->binding = binding + 1; + else + ret->binding = DRGN_SYMBOL_BINDING_UNKNOWN; + int type = GELF_ST_TYPE(elf_sym->st_info); + if (type <= STT_TLS || type == STT_GNU_IFUNC) + ret->kind = type; + else + ret->kind = DRGN_SYMBOL_KIND_UNKNOWN; +} + LIBDRGN_PUBLIC const char *drgn_symbol_name(struct drgn_symbol *sym) { return sym->name; @@ -28,8 +47,21 @@ LIBDRGN_PUBLIC uint64_t drgn_symbol_size(struct drgn_symbol *sym) return sym->size; } + +LIBDRGN_PUBLIC enum drgn_symbol_binding +drgn_symbol_binding(struct drgn_symbol *sym) +{ + return sym->binding; +} + +LIBDRGN_PUBLIC enum drgn_symbol_kind drgn_symbol_kind(struct drgn_symbol *sym) +{ + return sym->kind; +} + LIBDRGN_PUBLIC bool drgn_symbol_eq(struct drgn_symbol *a, struct drgn_symbol *b) { return (strcmp(a->name, b->name) == 0 && a->address == b->address && - a->size == b->size); + a->size == b->size && a->binding == b->binding && + a->kind == b->kind); } diff --git a/libdrgn/symbol.h b/libdrgn/symbol.h index 508a35ba2..4fb765640 100644 --- a/libdrgn/symbol.h +++ b/libdrgn/symbol.h @@ -1,15 +1,23 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #ifndef DRGN_SYMBOL_H #define DRGN_SYMBOL_H -#include +#include + +#include "drgn.h" struct drgn_symbol { const char *name; uint64_t address; uint64_t size; + enum drgn_symbol_binding binding; + enum drgn_symbol_kind kind; }; +/** Initialize a @ref drgn_symbol from an ELF symbol. */ +void drgn_symbol_from_elf(const char *name, uint64_t address, + const GElf_Sym *elf_sym, struct drgn_symbol *ret); + #endif /* DRGN_SYMBOL_H */ diff --git a/libdrgn/type.c b/libdrgn/type.c index 25000b123..39f6a27e2 100644 --- a/libdrgn/type.c +++ b/libdrgn/type.c @@ -1,10 +1,11 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include #include #include +#include "array.h" #include "error.h" #include "hash_table.h" #include "language.h" @@ -169,10 +170,10 @@ static bool drgn_member_key_eq(const struct drgn_member_key *a, (!a->name_len || memcmp(a->name, b->name, a->name_len) == 0)); } -DEFINE_HASH_TABLE_FUNCTIONS(drgn_member_map, drgn_member_key_hash_pair, - drgn_member_key_eq) +DEFINE_HASH_MAP_FUNCTIONS(drgn_member_map, drgn_member_key_hash_pair, + drgn_member_key_eq) -DEFINE_HASH_TABLE_FUNCTIONS(drgn_type_set, ptr_key_hash_pair, scalar_key_eq) +DEFINE_HASH_SET_FUNCTIONS(drgn_type_set, ptr_key_hash_pair, scalar_key_eq) LIBDRGN_PUBLIC struct drgn_error * drgn_member_object(struct drgn_type_member *member, @@ -328,8 +329,8 @@ static bool drgn_type_dedupe_eq(struct drgn_type * const *entry_a, * We don't deduplicate types with members, parameters, template parameters, or * enumerators, so the hash and comparison functions ignore those. */ -DEFINE_HASH_TABLE_FUNCTIONS(drgn_dedupe_type_set, drgn_type_dedupe_hash_pair, - drgn_type_dedupe_eq) +DEFINE_HASH_SET_FUNCTIONS(drgn_dedupe_type_set, drgn_type_dedupe_hash_pair, + drgn_type_dedupe_eq) DEFINE_VECTOR_FUNCTIONS(drgn_typep_vector) @@ -351,8 +352,8 @@ static struct drgn_error *find_or_create_type(struct drgn_type *key, return &drgn_enomem; *type = *key; - if (!drgn_dedupe_type_set_insert_searched(&prog->dedupe_types, &type, - hp, NULL)) { + if (drgn_dedupe_type_set_insert_searched(&prog->dedupe_types, &type, hp, + NULL) < 0) { free(type); return &drgn_enomem; } @@ -1294,7 +1295,7 @@ struct drgn_error *drgn_error_incomplete_type(const char *format, void drgn_program_init_types(struct drgn_program *prog) { - for (size_t i = 0; i < ARRAY_SIZE(prog->void_types); i++) { + for (size_t i = 0; i < array_size(prog->void_types); i++) { struct drgn_type *type = &prog->void_types[i]; type->_private.kind = DRGN_TYPE_VOID; type->_private.is_complete = false; @@ -1439,11 +1440,10 @@ default_size_t_or_ptrdiff_t(struct drgn_program *prog, err = drgn_program_address_size(prog, &address_size); if (err) return err; - for (size_t i = 0; i < ARRAY_SIZE(integer_types[0]); i++) { - enum drgn_primitive_type integer_type = - integer_types[type == DRGN_C_TYPE_PTRDIFF_T][i]; + array_for_each(integer_type, + integer_types[type == DRGN_C_TYPE_PTRDIFF_T]) { struct drgn_qualified_type qualified_type; - err = drgn_program_find_primitive_type(prog, integer_type, + err = drgn_program_find_primitive_type(prog, *integer_type, &qualified_type.type); if (err) return err; diff --git a/libdrgn/type.h b/libdrgn/type.h index 24ddeb033..bab54b32e 100644 --- a/libdrgn/type.h +++ b/libdrgn/type.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/libdrgn/util.h b/libdrgn/util.h index fd09f912b..cd524d7ff 100644 --- a/libdrgn/util.h +++ b/libdrgn/util.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** @@ -85,9 +85,6 @@ #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) -#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) - #define container_of(ptr, type, member) ({ \ void *__mptr = (void *)(ptr); \ BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \ @@ -148,26 +145,4 @@ static inline uint64_t uint_max(int n) #define add_to_possibly_null_pointer(ptr, i) \ ((typeof(ptr))((uintptr_t)(ptr) + (i) * sizeof(*(ptr)))) -/** A string with a stored length. */ -struct string { - /** - * The string, which is not necessarily null-terminated and may have - * embedded null bytes. - */ - const char *str; - /** The length in bytes of the string. */ - size_t len; -}; - -/** Compare two @ref string keys for equality. */ -static inline bool string_eq(const struct string *a, const struct string *b) -{ - /* - * len == 0 is a special case because memcmp(NULL, NULL, 0) is - * technically undefined. - */ - return (a->len == b->len && - (a->len == 0 || memcmp(a->str, b->str, a->len) == 0)); -} - #endif /* DRGN_UTIL_H */ diff --git a/libdrgn/vector.c b/libdrgn/vector.c index 96b13c9f9..ab51e9919 100644 --- a/libdrgn/vector.c +++ b/libdrgn/vector.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include "vector.h" diff --git a/libdrgn/vector.h b/libdrgn/vector.h index 70196c477..0fc2c0596 100644 --- a/libdrgn/vector.h +++ b/libdrgn/vector.h @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /** diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..976c84825 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +python_classes = +python_functions = diff --git a/scripts/build_manylinux_in_docker.sh b/scripts/build_manylinux_in_docker.sh index dc556945a..edc0aa3a9 100755 --- a/scripts/build_manylinux_in_docker.sh +++ b/scripts/build_manylinux_in_docker.sh @@ -20,7 +20,7 @@ ln -s /usr/share/aclocal/pkg.m4 /usr/local/share/aclocal/ # Install a recent version of elfutils instead of whatever is in the manylinux # image. -elfutils_version=0.183 +elfutils_version=0.186 elfutils_url=https://sourceware.org/elfutils/ftp/$elfutils_version/elfutils-$elfutils_version.tar.bz2 mkdir /tmp/elfutils cd /tmp/elfutils @@ -35,13 +35,28 @@ curl -L "$elfutils_url" | tar -xj --strip-components=1 make -j$(($(nproc) + 1)) make install -libkdumpfile_commit=v0.4.0 -libkdumpfile_url=https://github.com/ptesarik/libkdumpfile/archive/$libkdumpfile_commit/libkdumpfile-$libkdumpfile_commit.tar.gz +libkdumpfile_version=0.4.1 +libkdumpfile_url=https://github.com/ptesarik/libkdumpfile/releases/download/v$libkdumpfile_version/libkdumpfile-$libkdumpfile_version.tar.gz mkdir /tmp/libkdumpfile cd /tmp/libkdumpfile curl -L "$libkdumpfile_url" | tar -xz --strip-components=1 -autoreconf -fiv -# z_const was added in zlib 1.2.5.2, but CentOS 6 has 1.2.3. +# This file is missing an include of limits.h which it accidentally gets from +# zlib.h via zconf.h, but only since zlib 1.2.7. CentOS 6 has 1.2.3. +patch -p1 << "EOF" +diff --git a/src/kdumpfile/util.c b/src/kdumpfile/util.c +index 4fb2960..14e1ce3 100644 +--- a/src/kdumpfile/util.c ++++ b/src/kdumpfile/util.c +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + + #if USE_ZLIB + # include +EOF +# z_const was added in zlib 1.2.5.2. CPPFLAGS="-Dz_const=const" ./configure --with-lzo --with-snappy --with-zlib --without-python make -j$(($(nproc) + 1)) make install diff --git a/scripts/gen_pp_cat.py b/scripts/gen_pp_cat.py new file mode 100755 index 000000000..7109f17c3 --- /dev/null +++ b/scripts/gen_pp_cat.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +import argparse + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("max", type=int) + args = parser.parse_args() + + for i in range(2, args.max + 1): + print( + f"#define PP_CAT{str(i) if i > 2 else ''}(" + + ", ".join(f"_{j}" for j in range(i)) + + f") PP_CAT_I{i}(" + + ", ".join(f"_{j}" for j in range(i)) + + ")" + ) + print("/** @cond */") + for i in range(2, args.max + 1): + print( + f"#define PP_CAT_I{i}(" + + ", ".join(f"_{j}" for j in range(i)) + + ") " + + "##".join(f"_{j}" for j in range(i)) + ) + print("/** @endcond */") diff --git a/scripts/generate_dwarf_constants.py b/scripts/generate_dwarf_constants.py deleted file mode 100755 index baf224102..000000000 --- a/scripts/generate_dwarf_constants.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. -# SPDX-License-Identifier: GPL-3.0-or-later - -import keyword -import re - -prefixes = [ - "DW_AT", - "DW_ATE", - "DW_CHILDREN", - "DW_END", - "DW_FORM", - "DW_LANG", - "DW_LNE", - "DW_LNS", - "DW_OP", - "DW_TAG", -] - -if __name__ == "__main__": - with open("libdrgn/include/dwarf.h", "r") as f: - dwarf_h = f.read() - dwarf_h = re.sub(r"/\*.*?\*/", "", dwarf_h, flags=re.DOTALL) - dwarf_h = re.sub(r"\\\n", "", dwarf_h) - matches = re.findall( - r"^\s*(" + "|".join(prefixes) + r")_(\w+)\s*=\s*(0x[0-9a-fA-F]+|[0-9]+)", - dwarf_h, - re.MULTILINE, - ) - - enums = {} - for enum, name, value in matches: - try: - enums[enum].append((name, int(value, 0))) - except KeyError: - enums[enum] = [(name, int(value, 0))] - - print( - """\ -# Automatically generated from dwarf.h - -import enum -from typing import Text - -""" - ) - first = True - for enum in prefixes: - assert enums[enum] - if not first: - print() - print() - first = False - print(f"class {enum}(enum.IntEnum):") - for name, value in enums[enum]: - if keyword.iskeyword(name): - name += "_" - print(f" {name} = 0x{value:X}", end="") - if name == "name": - print(" # type: ignore") - else: - print() - print() - print(" @classmethod") - print(" def str(cls, value: int) -> Text:") - print(" try:") - print(f' return f"{enum}_{{cls(value).name}}"') - print(" except ValueError:") - print(" return hex(value)") diff --git a/scripts/generate_primitive_type_spellings.py b/scripts/generate_primitive_type_spellings.py index 425aec517..6f620c666 100755 --- a/scripts/generate_primitive_type_spellings.py +++ b/scripts/generate_primitive_type_spellings.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import itertools diff --git a/scripts/generate_test_constants.py b/scripts/generate_test_constants.py new file mode 100755 index 000000000..65738a02e --- /dev/null +++ b/scripts/generate_test_constants.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +import argparse +import keyword +from pathlib import Path +import re + +ENUMS = { + "elf": ( + "ET", + "PT", + "SHN", + "SHT", + "STB", + "STT", + "STV", + ), + "dwarf": ( + "DW_AT", + "DW_ATE", + "DW_CHILDREN", + "DW_END", + "DW_FORM", + "DW_LANG", + "DW_LNE", + "DW_LNS", + "DW_OP", + "DW_TAG", + ), +} + + +VALUE_REGEX = r"(?P0x[0-9a-fA-F]+|[0-9]+)" +REGEXES = { + "elf": r"^\s*#\s*define\s+(?P" + + "|".join(ENUMS["elf"]) + + r")_(?P\w+)\s+" + + VALUE_REGEX, + "dwarf": r"^\s*(?P" + + "|".join(ENUMS["dwarf"]) + + r")_(?P\w+)\s*=\s*" + + VALUE_REGEX, +} + + +def read_header(name: str) -> str: + contents = (Path("libdrgn/include") / name).read_text() + contents = re.sub(r"/\*.*?\*/", "", contents, flags=re.DOTALL) + contents = re.sub(r"\\\n", "", contents) + return contents + + +def generate_constants(file: str) -> None: + contents = read_header(file + ".h") + + enums = {} + for match in re.finditer(REGEXES[file], contents, re.MULTILINE): + enum = match.group("enum") + name = match.group("name") + value = int(match.group("value"), 0) + try: + enums[enum].append((name, value)) + except KeyError: + enums[enum] = [(name, value)] + + print( + f"""\ +# Automatically generated from {file}.h + +import enum +from typing import Text + +""" + ) + first = True + for enum in ENUMS[file]: + assert enums[enum] + if not first: + print() + print() + first = False + print(f"class {enum}(enum.IntEnum):") + for name, value in enums[enum]: + if keyword.iskeyword(name): + name += "_" + print(f" {name} = 0x{value:X}", end="") + if name == "name": + print(" # type: ignore") + else: + print() + print() + print(" @classmethod") + print(" def str(cls, value: int) -> Text:") + print(" try:") + print(f' return f"{enum}_{{cls(value).name}}"') + print(" except ValueError:") + print(" return hex(value)") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="generate constants for Python tests from header file" + ) + parser.add_argument("file", choices=list(ENUMS)) + args = parser.parse_args() + generate_constants(args.file) diff --git a/scripts/iwyu.py b/scripts/iwyu.py index 42eb2a067..60cb773a5 100755 --- a/scripts/iwyu.py +++ b/scripts/iwyu.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import argparse @@ -122,7 +122,7 @@ def gen_python_mapping_file(mapping_path): # For some reason, include-what-you-mean wants struct _typeobject, but # find-all-symbols only reports PyTypeObject. Add it manually. imp.write( - f' {{"symbol": ["_typeobject", "private", "", "public"]}}, # From cpython/object.h\n' + ' {"symbol": ["_typeobject", "private", "", "public"]}, # From cpython/object.h\n' ) imp.write("]\n") @@ -130,7 +130,46 @@ def gen_python_mapping_file(mapping_path): os.rename(mapping_path + ".tmp", mapping_path) +def iwyu_associated_header(path): + with open(path, "r") as f: + match = re.search( + r'^\s*#\s*include\s+"([^"]+)"\s+//\s+IWYU\s+pragma:\s+associated', + f.read(), + re.M, + ) + if match: + return os.path.join(os.path.dirname(path), match.group(1)) + if path.endswith(".c"): + return path[:-2] + ".h" + return None + + +def ignore_line(path, state, line): + # include-what-you-use/include-what-you-use#969: iwyu recommends bogus + # forward declarations for the anonymous unions generated by + # BINARY_OP_SIGNED_2C. + if line.endswith("::;"): + return True + + # include-what-you-use/include-what-you-use#971: drgn.h "exports" a forward + # declaration of several opaque types, but iwyu doesn't have such a notion. + if re.fullmatch( + r"struct drgn_(language|platform|program|register|stack_trace|symbol);", line + ): + paths = [path] + associated_header = iwyu_associated_header(path) + if associated_header is not None: + paths.append(associated_header) + for path in paths: + with open(path, "r") as f: + if re.search(r'^#include "(drgn.h|drgnpy.h)"', f.read(), re.M): + return True + + return False + + def main(): + os.environ["CFLAGS"] = "-Wall" parser = argparse.ArgumentParser(description="run include-what-you-use on drgn") parser.add_argument( "source", nargs="*", help="run on given file instead of all source files" @@ -209,14 +248,15 @@ def main(): else: header = None lines.clear() - elif state != "include_list" and line: + elif ( + line + and state != "include_list" + and not ignore_line(path, state, line) + ): if header is not None: print("\n" + header) header = None print(line) - print( - "Please ignore suggestions to declare opaque types if the appropriate header has already been included." - ) if __name__ == "__main__": diff --git a/scripts/test_cityhash.c b/scripts/test_cityhash.c index 503dd7e74..a23dba958 100644 --- a/scripts/test_cityhash.c +++ b/scripts/test_cityhash.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later #include diff --git a/setup.py b/setup.py index d60253092..d53bd92e1 100755 --- a/setup.py +++ b/setup.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later # setuptools must be imported before distutils (see pypa/setuptools#2230). -import setuptools # isort: skip +import setuptools # isort: skip # noqa: F401 import contextlib from distutils import log @@ -125,7 +125,20 @@ def make_release_tree(self, base_dir, files): class test(Command): description = "run unit tests after in-place build" - KERNELS = ["5.13", "5.12", "5.11", "5.10", "5.4", "4.19", "4.14", "4.9", "4.4"] + KERNELS = [ + "5.16", + "5.15", + "5.14", + "5.13", + "5.12", + "5.11", + "5.10", + "5.4", + "4.19", + "4.14", + "4.9", + "4.4", + ] user_options = [ ( @@ -174,9 +187,20 @@ def _run_vm(self, kernel_dir): import vmtest.vm - command = fr"""cd {shlex.quote(os.getcwd())} && - DRGN_RUN_LINUX_HELPER_TESTS=1 {shlex.quote(sys.executable)} -Bm \ - unittest discover -t . -s tests/helpers/linux {"-v" if self.verbose else ""}""" + command = fr""" +set -e + +cd {shlex.quote(os.getcwd())} +if "$BUSYBOX" [ -e /proc/vmcore ]; then + "$PYTHON" -Bm unittest discover -t . -s tests/linux_kernel/vmcore {"-v" if self.verbose else ""} +else + DRGN_RUN_LINUX_HELPER_TESTS=1 "$PYTHON" -Bm \ + unittest discover -t . -s tests/helpers/linux {"-v" if self.verbose else ""} + "$PYTHON" vmtest/enter_kdump.py + # We should crash and not reach this. + exit 1 +fi +""" try: returncode = vmtest.vm.run_in_vm( command, Path(kernel_dir), Path(self.vmtest_dir) diff --git a/tests/__init__.py b/tests/__init__.py index 43367aec4..4d9e5d983 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,8 +1,7 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import functools -import types from typing import Any, NamedTuple, Optional import unittest diff --git a/tests/assembler.py b/tests/assembler.py index 3d3e0dff6..94540e180 100644 --- a/tests/assembler.py +++ b/tests/assembler.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later from collections import namedtuple diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index c39574d69..827ed9257 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later from collections import namedtuple @@ -203,7 +203,7 @@ def compile_file_names(die): UNIT_HEADER_TYPES = frozenset({DW_TAG.type_unit, DW_TAG.compile_unit}) -def compile_dwarf( +def dwarf_sections( dies, little_endian=True, bits=64, *, lang=None, use_dw_form_indirect=False ): if isinstance(dies, DwarfDie): @@ -238,7 +238,6 @@ def compile_dwarf( ) sections = [ - ElfSection(p_type=PT.LOAD, vaddr=0xFFFF0000, data=b""), ElfSection( name=".debug_abbrev", sh_type=SHT.PROGBITS, @@ -256,5 +255,21 @@ def compile_dwarf( sections.append( ElfSection(name=".debug_types", sh_type=SHT.PROGBITS, data=debug_types) ) + return sections + - return create_elf_file(ET.EXEC, sections, little_endian=little_endian, bits=bits) +def compile_dwarf( + dies, little_endian=True, bits=64, *, lang=None, use_dw_form_indirect=False +): + return create_elf_file( + ET.EXEC, + dwarf_sections( + dies, + little_endian=little_endian, + bits=bits, + lang=lang, + use_dw_form_indirect=use_dw_form_indirect, + ), + little_endian=little_endian, + bits=bits, + ) diff --git a/tests/elf.py b/tests/elf.py index 4a502bdd3..0c82c7a37 100644 --- a/tests/elf.py +++ b/tests/elf.py @@ -1,43 +1,241 @@ -# Copyright (c) Facebook, Inc. and its affiliates. -# SPDX-License-Identifier: GPL-3.0-or-later +# Automatically generated from elf.h import enum +from typing import Text class ET(enum.IntEnum): - NONE = 0 - REL = 1 - EXEC = 2 - DYN = 3 - CORE = 4 + NONE = 0x0 + REL = 0x1 + EXEC = 0x2 + DYN = 0x3 + CORE = 0x4 + NUM = 0x5 + LOOS = 0xFE00 + HIOS = 0xFEFF + LOPROC = 0xFF00 + HIPROC = 0xFFFF + + @classmethod + def str(cls, value: int) -> Text: + try: + return f"ET_{cls(value).name}" + except ValueError: + return hex(value) class PT(enum.IntEnum): - NULL = 0 - LOAD = 1 - DYNAMIC = 2 - INTERP = 3 - NOTE = 4 - SHLIB = 5 - PHDR = 6 - TLS = 7 + NULL = 0x0 + LOAD = 0x1 + DYNAMIC = 0x2 + INTERP = 0x3 + NOTE = 0x4 + SHLIB = 0x5 + PHDR = 0x6 + TLS = 0x7 + NUM = 0x8 + LOOS = 0x60000000 + GNU_EH_FRAME = 0x6474E550 + GNU_STACK = 0x6474E551 + GNU_RELRO = 0x6474E552 + GNU_PROPERTY = 0x6474E553 + LOSUNW = 0x6FFFFFFA + SUNWBSS = 0x6FFFFFFA + SUNWSTACK = 0x6FFFFFFB + HISUNW = 0x6FFFFFFF + HIOS = 0x6FFFFFFF + LOPROC = 0x70000000 + HIPROC = 0x7FFFFFFF + MIPS_REGINFO = 0x70000000 + MIPS_RTPROC = 0x70000001 + MIPS_OPTIONS = 0x70000002 + MIPS_ABIFLAGS = 0x70000003 + PARISC_ARCHEXT = 0x70000000 + PARISC_UNWIND = 0x70000001 + + @classmethod + def str(cls, value: int) -> Text: + try: + return f"PT_{cls(value).name}" + except ValueError: + return hex(value) + + +class SHN(enum.IntEnum): + UNDEF = 0x0 + LORESERVE = 0xFF00 + LOPROC = 0xFF00 + BEFORE = 0xFF00 + AFTER = 0xFF01 + HIPROC = 0xFF1F + LOOS = 0xFF20 + HIOS = 0xFF3F + ABS = 0xFFF1 + COMMON = 0xFFF2 + XINDEX = 0xFFFF + HIRESERVE = 0xFFFF + MIPS_ACOMMON = 0xFF00 + MIPS_TEXT = 0xFF01 + MIPS_DATA = 0xFF02 + MIPS_SCOMMON = 0xFF03 + MIPS_SUNDEFINED = 0xFF04 + PARISC_ANSI_COMMON = 0xFF00 + PARISC_HUGE_COMMON = 0xFF01 + + @classmethod + def str(cls, value: int) -> Text: + try: + return f"SHN_{cls(value).name}" + except ValueError: + return hex(value) class SHT(enum.IntEnum): - NULL = 0 - PROGBITS = 1 - SYMTAB = 2 - STRTAB = 3 - RELA = 4 - HASH = 5 - DYNAMIC = 6 - NOTE = 7 - NOBITS = 8 - REL = 9 - SHLIB = 10 - DYNSYM = 11 - INIT_ARRAY = 14 - FINI_ARRAY = 15 - PREINIT_ARRAY = 16 - GROUP = 17 - SYMTAB_SHNDX = 18 + NULL = 0x0 + PROGBITS = 0x1 + SYMTAB = 0x2 + STRTAB = 0x3 + RELA = 0x4 + HASH = 0x5 + DYNAMIC = 0x6 + NOTE = 0x7 + NOBITS = 0x8 + REL = 0x9 + SHLIB = 0xA + DYNSYM = 0xB + INIT_ARRAY = 0xE + FINI_ARRAY = 0xF + PREINIT_ARRAY = 0x10 + GROUP = 0x11 + SYMTAB_SHNDX = 0x12 + NUM = 0x13 + LOOS = 0x60000000 + GNU_ATTRIBUTES = 0x6FFFFFF5 + GNU_HASH = 0x6FFFFFF6 + GNU_LIBLIST = 0x6FFFFFF7 + CHECKSUM = 0x6FFFFFF8 + LOSUNW = 0x6FFFFFFA + SUNW_move = 0x6FFFFFFA + SUNW_COMDAT = 0x6FFFFFFB + SUNW_syminfo = 0x6FFFFFFC + GNU_verdef = 0x6FFFFFFD + GNU_verneed = 0x6FFFFFFE + GNU_versym = 0x6FFFFFFF + HISUNW = 0x6FFFFFFF + HIOS = 0x6FFFFFFF + LOPROC = 0x70000000 + HIPROC = 0x7FFFFFFF + LOUSER = 0x80000000 + HIUSER = 0x8FFFFFFF + MIPS_LIBLIST = 0x70000000 + MIPS_MSYM = 0x70000001 + MIPS_CONFLICT = 0x70000002 + MIPS_GPTAB = 0x70000003 + MIPS_UCODE = 0x70000004 + MIPS_DEBUG = 0x70000005 + MIPS_REGINFO = 0x70000006 + MIPS_PACKAGE = 0x70000007 + MIPS_PACKSYM = 0x70000008 + MIPS_RELD = 0x70000009 + MIPS_IFACE = 0x7000000B + MIPS_CONTENT = 0x7000000C + MIPS_OPTIONS = 0x7000000D + MIPS_SHDR = 0x70000010 + MIPS_FDESC = 0x70000011 + MIPS_EXTSYM = 0x70000012 + MIPS_DENSE = 0x70000013 + MIPS_PDESC = 0x70000014 + MIPS_LOCSYM = 0x70000015 + MIPS_AUXSYM = 0x70000016 + MIPS_OPTSYM = 0x70000017 + MIPS_LOCSTR = 0x70000018 + MIPS_LINE = 0x70000019 + MIPS_RFDESC = 0x7000001A + MIPS_DELTASYM = 0x7000001B + MIPS_DELTAINST = 0x7000001C + MIPS_DELTACLASS = 0x7000001D + MIPS_DWARF = 0x7000001E + MIPS_DELTADECL = 0x7000001F + MIPS_SYMBOL_LIB = 0x70000020 + MIPS_EVENTS = 0x70000021 + MIPS_TRANSLATE = 0x70000022 + MIPS_PIXIE = 0x70000023 + MIPS_XLATE = 0x70000024 + MIPS_XLATE_DEBUG = 0x70000025 + MIPS_WHIRL = 0x70000026 + MIPS_EH_REGION = 0x70000027 + MIPS_XLATE_OLD = 0x70000028 + MIPS_PDR_EXCEPTION = 0x70000029 + MIPS_XHASH = 0x7000002B + PARISC_EXT = 0x70000000 + PARISC_UNWIND = 0x70000001 + PARISC_DOC = 0x70000002 + ALPHA_DEBUG = 0x70000001 + ALPHA_REGINFO = 0x70000002 + X86_64_UNWIND = 0x70000001 + + @classmethod + def str(cls, value: int) -> Text: + try: + return f"SHT_{cls(value).name}" + except ValueError: + return hex(value) + + +class STB(enum.IntEnum): + LOCAL = 0x0 + GLOBAL = 0x1 + WEAK = 0x2 + NUM = 0x3 + LOOS = 0xA + GNU_UNIQUE = 0xA + HIOS = 0xC + LOPROC = 0xD + HIPROC = 0xF + MIPS_SPLIT_COMMON = 0xD + + @classmethod + def str(cls, value: int) -> Text: + try: + return f"STB_{cls(value).name}" + except ValueError: + return hex(value) + + +class STT(enum.IntEnum): + NOTYPE = 0x0 + OBJECT = 0x1 + FUNC = 0x2 + SECTION = 0x3 + FILE = 0x4 + COMMON = 0x5 + TLS = 0x6 + NUM = 0x7 + LOOS = 0xA + GNU_IFUNC = 0xA + HIOS = 0xC + LOPROC = 0xD + HIPROC = 0xF + SPARC_REGISTER = 0xD + PARISC_MILLICODE = 0xD + + @classmethod + def str(cls, value: int) -> Text: + try: + return f"STT_{cls(value).name}" + except ValueError: + return hex(value) + + +class STV(enum.IntEnum): + DEFAULT = 0x0 + INTERNAL = 0x1 + HIDDEN = 0x2 + PROTECTED = 0x3 + + @classmethod + def str(cls, value: int) -> Text: + try: + return f"STV_{cls(value).name}" + except ValueError: + return hex(value) diff --git a/tests/elfwriter.py b/tests/elfwriter.py index b4393f54d..32e9ef1b0 100644 --- a/tests/elfwriter.py +++ b/tests/elfwriter.py @@ -1,16 +1,16 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import struct -from typing import Optional, Sequence +from typing import List, NamedTuple, Optional, Sequence -from tests.elf import ET, PT, SHT +from tests.elf import ET, PT, SHN, SHT, STB, STT, STV class ElfSection: def __init__( self, - data: bytes, + data: bytes = b"", name: Optional[str] = None, sh_type: Optional[SHT] = None, p_type: Optional[PT] = None, @@ -18,6 +18,9 @@ def __init__( paddr: int = 0, memsz: Optional[int] = None, p_align: int = 0, + sh_link: int = 0, + sh_info: int = 0, + sh_entsize: int = 0, ): self.data = data self.name = name @@ -25,19 +28,94 @@ def __init__( self.p_type = p_type self.vaddr = vaddr self.paddr = paddr - self.memsz = memsz + self.memsz = len(self.data) if memsz is None else memsz self.p_align = p_align + self.sh_link = sh_link + self.sh_info = sh_info + self.sh_entsize = sh_entsize assert (self.name is not None) or (self.p_type is not None) assert (self.name is None) == (self.sh_type is None) - if self.p_type is None: - assert self.memsz is None - elif self.memsz is None: - self.memsz = len(self.data) + + +class ElfSymbol(NamedTuple): + name: str + value: int + size: int + type: STT + binding: STB + shindex: Optional[int] = None + visibility: STV = STV.DEFAULT + + def st_info(self) -> int: + return (self.binding << 4) + (self.type & 0xF) + + +def _create_symtab( + sections: List[ElfSection], + symbols: Sequence[ElfSymbol], + little_endian: bool, + bits: int, +): + assert not any(section.name in (".symtab", ".strtab") for section in sections) + + endian = "<" if little_endian else ">" + if bits == 64: + symbol_struct = struct.Struct(endian + "IBBHQQ") + + def symbol_fields(sym: ElfSymbol): + return ( + sym.st_info(), + sym.visibility, + SHN.UNDEF if sym.shindex is None else sym.shindex, + sym.value, + sym.size, + ) + + else: + symbol_struct = struct.Struct(endian + "IIIBBH") + + def symbol_fields(sym: ElfSymbol): + return ( + sym.value, + sym.size, + sym.st_info(), + sym.visibility, + SHN.UNDEF if sym.shindex is None else sym.shindex, + ) + + symtab_data = bytearray((len(symbols) + 1) * symbol_struct.size) + strtab_data = bytearray(1) + sh_info = 1 + for i, sym in enumerate(symbols, 1): + symbol_struct.pack_into( + symtab_data, i * symbol_struct.size, len(strtab_data), *symbol_fields(sym) + ) + strtab_data.extend(sym.name.encode()) + strtab_data.append(0) + if sym.binding == STB.LOCAL: + assert sh_info == i, "local symbol after non-local symbol" + sh_info = i + 1 + + sections.append( + ElfSection( + name=".symtab", + sh_type=SHT.SYMTAB, + data=symtab_data, + sh_link=sum((1 for section in sections if section.name is not None), 2), + sh_info=sh_info, + sh_entsize=symbol_struct.size, + ) + ) + sections.append(ElfSection(name=".strtab", sh_type=SHT.STRTAB, data=strtab_data)) def create_elf_file( - type: ET, sections: Sequence[ElfSection], little_endian: bool = True, bits: int = 64 + type: ET, + sections: Sequence[ElfSection], + symbols: Sequence[ElfSymbol] = (), + little_endian: bool = True, + bits: int = 64, ): endian = "<" if little_endian else ">" if bits == 64: @@ -52,19 +130,24 @@ def create_elf_file( phdr_struct = struct.Struct(endian + "8I") e_machine = 3 if little_endian else 8 # EM_386 or EM_MIPS - shstrtab = ElfSection(name=".shstrtab", sh_type=SHT.STRTAB, data=bytearray(1)) - tmp = [shstrtab] - tmp.extend(sections) - sections = tmp - shnum = 1 # One for the SHT_NULL section. + sections = list(sections) + if symbols: + _create_symtab(sections, symbols, little_endian=little_endian, bits=bits) + shnum = 0 phnum = 0 + shstrtab = bytearray(1) for section in sections: if section.name is not None: - shstrtab.data.extend(section.name.encode()) - shstrtab.data.append(0) + shstrtab.extend(section.name.encode()) + shstrtab.append(0) shnum += 1 if section.p_type is not None: phnum += 1 + if shnum > 0: + shnum += 2 # One for the SHT_NULL section, one for .shstrtab. + shstrtab.extend(b".shstrtab\0") + sections = list(sections) + sections.append(ElfSection(name=".shstrtab", sh_type=SHT.STRTAB, data=shstrtab)) shdr_offset = ehdr_struct.size phdr_offset = shdr_offset + shdr_struct.size * shnum @@ -93,15 +176,15 @@ def create_elf_file( e_machine, 1, # e_version = EV_CURRENT 0, # e_entry - phdr_offset, # e_phoff - shdr_offset, # e_shoff + phdr_offset if phnum else 0, # e_phoff + shdr_offset if shnum else 0, # e_shoff 0, # e_flags ehdr_struct.size, # e_ehsize phdr_struct.size, # e_phentsize phnum, # e_phnum - shdr_struct.size, # e_shentsize, - shnum, # e_shnum, - 1, # e_shstrndx + shdr_struct.size, # e_shentsize + shnum, # e_shnum + shnum - 1 if shnum else 0, # e_shstrndx ) shdr_offset += shdr_struct.size @@ -113,16 +196,16 @@ def create_elf_file( shdr_struct.pack_into( buf, shdr_offset, - shstrtab.data.index(section.name.encode()), # sh_name + shstrtab.index(section.name.encode()), # sh_name section.sh_type, # sh_type 0, # sh_flags section.vaddr, # sh_addr len(buf), # sh_offset - len(section.data), # sh_size - 0, # sh_link - 0, # sh_info + section.memsz, # sh_size + section.sh_link, # sh_link + section.sh_info, # sh_info 1 if section.p_type is None else bits // 8, # sh_addralign - 0, # sh_entsize + section.sh_entsize, # sh_entsize ) shdr_offset += shdr_struct.size if section.p_type is not None: diff --git a/tests/helpers/linux/__init__.py b/tests/helpers/linux/__init__.py index 5b5754a6c..68df7c258 100644 --- a/tests/helpers/linux/__init__.py +++ b/tests/helpers/linux/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import contextlib @@ -18,10 +18,11 @@ class LinuxHelperTestCase(unittest.TestCase): prog = None skip_reason = None - def setUp(self): - # We only want to create the Program once, so it's cached as a class - # variable. If we can't run these tests for whatever reason, we also - # cache that. + @classmethod + def setUpClass(cls): + # We only want to create the Program once for all tests, so it's cached + # as a class variable (in the base class). If we can't run these tests + # for whatever reason, we also cache that. if LinuxHelperTestCase.prog is not None: return if LinuxHelperTestCase.skip_reason is None: @@ -37,7 +38,7 @@ def setUp(self): elif not force_run and os.geteuid() != 0: LinuxHelperTestCase.skip_reason = ( "Linux helper tests must be run as root " - "(run with env DRGN_RUN_LINUX_HELPER_TESTS=1 to force" + "(run with env DRGN_RUN_LINUX_HELPER_TESTS=1 to force)" ) else: # Some of the tests use the loop module. Open loop-control so @@ -58,7 +59,7 @@ def setUp(self): if force_run: raise LinuxHelperTestCase.skip_reason = str(e) - self.skipTest(LinuxHelperTestCase.skip_reason) + raise unittest.SkipTest(LinuxHelperTestCase.skip_reason) def wait_until(fn, *args, **kwds): @@ -93,6 +94,32 @@ def proc_state(pid): return re.search(r"State:\s*(\S)", f.read(), re.M).group(1) +# Return whether a process is blocked and fully scheduled out. The process +# state is updated while the process is still running, so use this instead of +# proc_state(pid) != "R" to avoid races. This is not accurate if pid is the +# calling thread. +def proc_blocked(pid): + with open(f"/proc/{pid}/syscall", "r") as f: + return f.read() != "running\n" + + +def smp_enabled(): + return bool(re.search(r"\bSMP\b", os.uname().version)) + + +def parse_range_list(s): + values = set() + s = s.strip() + if s: + for range_str in s.split(","): + first, sep, last = range_str.partition("-") + if sep: + values.update(range(int(first), int(last) + 1)) + else: + values.add(int(first)) + return values + + _c = ctypes.CDLL(None, use_errno=True) _mount = _c.mount @@ -104,17 +131,41 @@ def proc_state(pid): ctypes.c_ulong, ctypes.c_void_p, ] +MS_RDONLY = 1 +MS_NOSUID = 2 +MS_NODEV = 4 +MS_NOEXEC = 8 +MS_SYNCHRONOUS = 16 +MS_REMOUNT = 32 +MS_MANDLOCK = 64 +MS_DIRSYNC = 128 +MS_NOSYMFOLLOW = 256 +MS_NOATIME = 1024 +MS_NODIRATIME = 2048 MS_BIND = 4096 - - -def mount(source, target, fstype, flags, data): +MS_MOVE = 8192 +MS_REC = 16384 +MS_SILENT = 32768 +MS_POSIXACL = 1 << 16 +MS_UNBINDABLE = 1 << 17 +MS_PRIVATE = 1 << 18 +MS_SLAVE = 1 << 19 +MS_SHARED = 1 << 20 +MS_RELATIME = 1 << 21 +MS_KERNMOUNT = 1 << 22 +MS_I_VERSION = 1 << 23 +MS_STRICTATIME = 1 << 24 +MS_LAZYTIME = 1 << 25 + + +def mount(source, target, fstype, flags=0, data=None): if ( _mount( os.fsencode(source), os.fsencode(target), fstype.encode(), flags, - data.encode(), + None if data is None else data.encode(), ) == -1 ): diff --git a/tests/helpers/linux/test_bitops.py b/tests/helpers/linux/test_bitops.py new file mode 100644 index 000000000..ec510de6e --- /dev/null +++ b/tests/helpers/linux/test_bitops.py @@ -0,0 +1,48 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +from drgn import Object +from drgn.helpers.linux.bitops import for_each_clear_bit, for_each_set_bit, test_bit +from tests import MockProgramTestCase + + +class TestBitOps(MockProgramTestCase): + BITMAP = [0xB351BC986648A680, 0x80DDB6615A80BC63] + # fmt: off + SET_BITS = [ + 7, 9, 10, 13, 15, 19, 22, 25, 26, 29, 30, 35, 36, 39, 42, 43, 44, 45, + 47, 48, 52, 54, 56, 57, 60, 61, 63, 64, 65, 69, 70, 74, 75, 76, 77, 79, + 87, 89, 91, 92, 94, 96, 101, 102, 105, 106, 108, 109, 111, 112, 114, + 115, 116, 118, 119, 127, + ] + CLEAR_BITS = [ + 0, 1, 2, 3, 4, 5, 6, 8, 11, 12, 14, 16, 17, 18, 20, 21, 23, 24, 27, 28, + 31, 32, 33, 34, 37, 38, 40, 41, 46, 49, 50, 51, 53, 55, 58, 59, 62, 66, + 67, 68, 71, 72, 73, 78, 80, 81, 82, 83, 84, 85, 86, 88, 90, 93, 95, 97, + 98, 99, 100, 103, 104, 107, 110, 113, 117, 120, 121, 122, 123, 124, + 125, 126, + ] + # fmt: on + + def test_for_each_set_bit(self): + bitmap = Object(self.prog, "unsigned long [2]", self.BITMAP) + self.assertEqual(list(for_each_set_bit(bitmap, 128)), self.SET_BITS) + self.assertEqual( + list(for_each_set_bit(bitmap, 101)), + [bit for bit in self.SET_BITS if bit < 101], + ) + + def test_for_each_clear_bit(self): + bitmap = Object(self.prog, "unsigned long [2]", self.BITMAP) + self.assertEqual(list(for_each_clear_bit(bitmap, 128)), self.CLEAR_BITS) + self.assertEqual( + list(for_each_clear_bit(bitmap, 100)), + [bit for bit in self.CLEAR_BITS if bit < 100], + ) + + def test_test_bit(self): + bitmap = Object(self.prog, "unsigned long [2]", self.BITMAP) + for bit in self.SET_BITS: + self.assertTrue(test_bit(bit, bitmap)) + for bit in self.CLEAR_BITS: + self.assertFalse(test_bit(bit, bitmap)) diff --git a/tests/helpers/linux/test_block.py b/tests/helpers/linux/test_block.py index 26f981409..4f5f3bbb5 100644 --- a/tests/helpers/linux/test_block.py +++ b/tests/helpers/linux/test_block.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import errno diff --git a/tests/helpers/linux/test_boot.py b/tests/helpers/linux/test_boot.py index 595482bec..66454d909 100644 --- a/tests/helpers/linux/test_boot.py +++ b/tests/helpers/linux/test_boot.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import platform diff --git a/tests/helpers/linux/test_cgroup.py b/tests/helpers/linux/test_cgroup.py index fa10d226e..9d89b708e 100644 --- a/tests/helpers/linux/test_cgroup.py +++ b/tests/helpers/linux/test_cgroup.py @@ -1,58 +1,154 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os +from pathlib import Path +import signal +import tempfile +import unittest +from drgn import NULL from drgn.helpers.linux.cgroup import ( cgroup_name, + cgroup_parent, cgroup_path, css_for_each_child, css_for_each_descendant_pre, ) from drgn.helpers.linux.pid import find_task -from tests.helpers.linux import LinuxHelperTestCase +from tests.helpers.linux import ( + MS_NODEV, + MS_NOEXEC, + MS_NOSUID, + LinuxHelperTestCase, + fork_and_pause, + mount, + umount, +) class TestCgroup(LinuxHelperTestCase): - def setUp(self): - super().setUp() + @classmethod + def setUpClass(cls): + # It'd be nice to just use addClassCleanup(), but that was added in + # Python 3.8. + cls.__cleanups = [] try: - with open("/proc/self/cgroup", "rb") as f: - for line in f: - if line.startswith(b"0::"): - self.cgroup = line[3:].rstrip(b"\n") - break - else: - self.skipTest("process is not using cgroup v2") - except FileNotFoundError: - self.skipTest("kernel does not support cgroup") + super().setUpClass() - def test_cgroup_name(self): - task = find_task(self.prog, os.getpid()) + # Don't enable cgroup2 on systems that aren't already using it (or + # don't support it). + cgroup2_enabled = False + try: + with open("/proc/self/cgroup", "rb") as f: + for line in f: + if line.startswith(b"0::"): + cgroup2_enabled = True + break + except FileNotFoundError: + pass + if not cgroup2_enabled: + raise unittest.SkipTest("cgroup2 not enabled") + + # It's easier to mount the cgroup2 filesystem than to find it. + cgroup2_mount = Path(tempfile.mkdtemp(prefix="drgn-tests-")) + cls.__cleanups.append((cgroup2_mount.rmdir,)) + mount("cgroup2", cgroup2_mount, "cgroup2", MS_NOSUID | MS_NODEV | MS_NOEXEC) + cls.__cleanups.append((umount, cgroup2_mount)) + + cls.root_cgroup = cls.prog["cgrp_dfl_root"].cgrp.address_of_() + + pid = fork_and_pause() + try: + task = find_task(cls.prog, pid) + + parent_cgroup_dir = Path( + tempfile.mkdtemp(prefix="drgn-tests-", dir=cgroup2_mount) + ) + cls.__cleanups.append((parent_cgroup_dir.rmdir,)) + cls.parent_cgroup_name = os.fsencode(parent_cgroup_dir.name) + cls.parent_cgroup_path = b"/" + cls.parent_cgroup_name + + (parent_cgroup_dir / "cgroup.procs").write_text(str(pid)) + cls.parent_cgroup = task.cgroups.dfl_cgrp.read_() + + child_cgroup_dir = parent_cgroup_dir / "child" + child_cgroup_dir.mkdir() + cls.__cleanups.append((child_cgroup_dir.rmdir,)) + cls.child_cgroup_name = os.fsencode(child_cgroup_dir.name) + cls.child_cgroup_path = ( + cls.parent_cgroup_path + b"/" + cls.child_cgroup_name + ) + + (child_cgroup_dir / "cgroup.procs").write_text(str(pid)) + cls.child_cgroup = task.cgroups.dfl_cgrp.read_() + finally: + os.kill(pid, signal.SIGKILL) + os.waitpid(pid, 0) + except: + for cleanup in reversed(cls.__cleanups): + cleanup[0](*cleanup[1:]) + raise + + @classmethod + def tearDownClass(cls): + super().tearDownClass() + for cleanup in reversed(cls.__cleanups): + cleanup[0](*cleanup[1:]) + + def test_cgroup_parent(self): + self.assertEqual(cgroup_parent(self.child_cgroup), self.parent_cgroup) + self.assertEqual(cgroup_parent(self.parent_cgroup), self.root_cgroup) self.assertEqual( - cgroup_name(task.cgroups.dfl_cgrp), os.path.basename(self.cgroup) + cgroup_parent(self.root_cgroup), NULL(self.prog, "struct cgroup *") ) + def test_cgroup_name(self): + self.assertEqual(cgroup_name(self.root_cgroup), b"/") + self.assertEqual(cgroup_name(self.parent_cgroup), self.parent_cgroup_name) + self.assertEqual(cgroup_name(self.child_cgroup), self.child_cgroup_name) + def test_cgroup_path(self): - task = find_task(self.prog, os.getpid()) - self.assertEqual(cgroup_path(task.cgroups.dfl_cgrp), self.cgroup) + self.assertEqual(cgroup_path(self.root_cgroup), b"/") + self.assertEqual(cgroup_path(self.parent_cgroup), self.parent_cgroup_path) + self.assertEqual(cgroup_path(self.child_cgroup), self.child_cgroup_path) + + @staticmethod + def _cgroup_iter_paths(fn, cgroup): + return [cgroup_path(css.cgroup) for css in fn(cgroup.self.address_of_())] def test_css_for_each_child(self): - self.assertTrue( - any( - self.cgroup.startswith(cgroup_path(css.cgroup)) - for css in css_for_each_child( - self.prog["cgrp_dfl_root"].cgrp.self.address_of_() - ) - ) + children = self._cgroup_iter_paths(css_for_each_child, self.root_cgroup) + self.assertIn(self.parent_cgroup_path, children) + self.assertNotIn(self.child_cgroup_path, children) + + self.assertEqual( + self._cgroup_iter_paths(css_for_each_child, self.parent_cgroup), + [self.child_cgroup_path], + ) + + self.assertEqual( + self._cgroup_iter_paths(css_for_each_child, self.child_cgroup), [] ) def test_css_for_each_descendant_pre(self): - self.assertTrue( - any( - cgroup_path(css.cgroup) == self.cgroup - for css in css_for_each_descendant_pre( - self.prog["cgrp_dfl_root"].cgrp.self.address_of_() - ) - ) + descendants = self._cgroup_iter_paths( + css_for_each_descendant_pre, self.root_cgroup + ) + self.assertEqual(descendants[0], b"/") + self.assertIn(self.parent_cgroup_path, descendants) + self.assertIn(self.child_cgroup_path, descendants) + self.assertLess( + descendants.index(self.parent_cgroup_path), + descendants.index(self.child_cgroup_path), + ) + + self.assertEqual( + self._cgroup_iter_paths(css_for_each_descendant_pre, self.parent_cgroup), + [self.parent_cgroup_path, self.child_cgroup_path], + ) + + self.assertEqual( + self._cgroup_iter_paths(css_for_each_descendant_pre, self.child_cgroup), + [self.child_cgroup_path], ) diff --git a/tests/helpers/linux/test_cpumask.py b/tests/helpers/linux/test_cpumask.py index 17dd4378c..cbe2f4441 100644 --- a/tests/helpers/linux/test_cpumask.py +++ b/tests/helpers/linux/test_cpumask.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later from pathlib import Path @@ -8,27 +8,16 @@ for_each_possible_cpu, for_each_present_cpu, ) -from tests.helpers.linux import LinuxHelperTestCase +from tests.helpers.linux import LinuxHelperTestCase, parse_range_list CPU_PATH = Path("/sys/devices/system/cpu") -def parse_cpulist(cpulist): - cpus = set() - for cpu_range in cpulist.split(","): - first, sep, last = cpu_range.partition("-") - if sep: - cpus.update(range(int(first), int(last) + 1)) - else: - cpus.add(int(first)) - return cpus - - class TestCpuMask(LinuxHelperTestCase): def _test_for_each_cpu(self, func, name): self.assertEqual( list(func(self.prog)), - sorted(parse_cpulist((CPU_PATH / name).read_text())), + sorted(parse_range_list((CPU_PATH / name).read_text())), ) def test_for_each_online_cpu(self): diff --git a/tests/helpers/linux/test_debug_info.py b/tests/helpers/linux/test_debug_info.py index 94dc53b81..7d7e3211e 100644 --- a/tests/helpers/linux/test_debug_info.py +++ b/tests/helpers/linux/test_debug_info.py @@ -1,16 +1,23 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os +from pathlib import Path +import unittest from drgn import Program from tests.helpers.linux import LinuxHelperTestCase, setenv +KALLSYMS_PATH = Path("/proc/kallsyms") + +@unittest.skipUnless( + KALLSYMS_PATH.exists(), "kernel does not have kallsyms (CONFIG_KALLSYMS)" +) class TestModuleDebugInfo(LinuxHelperTestCase): # Arbitrary symbol that we can use to check that the module debug info was # loaded. - SYMBOL = "loop_register_transfer" + SYMBOL = "lo_open" def setUp(self): super().setUp() @@ -21,7 +28,7 @@ def setUp(self): else: self.skipTest("loop module is built in or not loaded") - with open("/proc/kallsyms", "r") as f: + with KALLSYMS_PATH.open() as f: for line in f: tokens = line.split() if tokens[2] == self.SYMBOL: diff --git a/tests/helpers/linux/test_fs.py b/tests/helpers/linux/test_fs.py index aff6e1a9d..069914f29 100644 --- a/tests/helpers/linux/test_fs.py +++ b/tests/helpers/linux/test_fs.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os @@ -31,7 +31,7 @@ def test_path_lookup(self): def test_path_lookup_bind_mount(self): with tempfile.NamedTemporaryFile(prefix="drgn-tests-") as f: old_mnt = path_lookup(self.prog, os.path.abspath(f.name)).mnt - mount(f.name, f.name, "", MS_BIND, "") + mount(f.name, f.name, "", MS_BIND) try: new_mnt = path_lookup(self.prog, os.path.abspath(f.name)).mnt self.assertNotEqual(old_mnt, new_mnt) @@ -51,9 +51,9 @@ def test_inode_paths(self): with tempfile.TemporaryDirectory(prefix="drgn-tests-") as dir: path1 = os.fsencode(os.path.abspath(os.path.join(dir, "a"))) path2 = os.fsencode(os.path.abspath(os.path.join(dir, "b"))) - with open(path1, "w") as f: + with open(path1, "w"): os.link(path1, path2) - with open(path2, "r") as f: + with open(path2, "r"): inode = path_lookup(self.prog, path1).dentry.d_inode paths = list(inode_paths(inode)) self.assertEqual(len(paths), 2) diff --git a/tests/helpers/linux/test_kconfig.py b/tests/helpers/linux/test_kconfig.py index b5ebdd36f..16226bdfd 100644 --- a/tests/helpers/linux/test_kconfig.py +++ b/tests/helpers/linux/test_kconfig.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os.path diff --git a/tests/helpers/linux/test_kernfs.py b/tests/helpers/linux/test_kernfs.py index e31f564f1..1e12e8a83 100644 --- a/tests/helpers/linux/test_kernfs.py +++ b/tests/helpers/linux/test_kernfs.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os diff --git a/tests/helpers/linux/test_mm.py b/tests/helpers/linux/test_mm.py index 73f800f7b..cd91df340 100644 --- a/tests/helpers/linux/test_mm.py +++ b/tests/helpers/linux/test_mm.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import contextlib diff --git a/tests/helpers/linux/test_net.py b/tests/helpers/linux/test_net.py index b0373fcfc..2620073c0 100644 --- a/tests/helpers/linux/test_net.py +++ b/tests/helpers/linux/test_net.py @@ -1,18 +1,99 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os +import socket +import sys +import tempfile from drgn import cast from drgn.helpers.linux.fs import fget -from drgn.helpers.linux.net import sk_fullsock +from drgn.helpers.linux.net import ( + _S_IFMT, + _S_IFSOCK, + SOCK_INODE, + SOCKET_I, + for_each_net, + get_net_ns_by_fd, + netdev_for_each_tx_queue, + netdev_get_by_index, + netdev_get_by_name, + sk_fullsock, +) from drgn.helpers.linux.pid import find_task from tests.helpers.linux import LinuxHelperTestCase, create_socket class TestNet(LinuxHelperTestCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.pid = os.getpid() + cls.task = find_task(cls.prog, cls.pid) + with open(f"/proc/{cls.pid}/ns/net") as file: + cls.net = get_net_ns_by_fd(cls.task, file.fileno()) + def test_sk_fullsock(self): - with create_socket() as sock: - file = fget(find_task(self.prog, os.getpid()), sock.fileno()) + with create_socket() as skt: + file = fget(self.task, skt.fileno()) sk = cast("struct socket *", file.private_data).sk.read_() self.assertTrue(sk_fullsock(sk)) + + def test_netdev_get_by_index(self): + for index, name in socket.if_nameindex(): + netdev = netdev_get_by_index(self.net, index) + self.assertEqual(netdev.name.string_().decode(), name) + + def test_netdev_get_by_name(self): + for index, name in socket.if_nameindex(): + netdev = netdev_get_by_name(self.net, name) + self.assertEqual(netdev.ifindex, index) + + def test_for_each_net(self): + self.assertIn(self.prog["init_net"].address_of_(), for_each_net(self.prog)) + + def test_get_net_ns_by_fd(self): + for index, name in socket.if_nameindex(): + netdev = netdev_get_by_index(self.net, index) + self.assertEqual(netdev.name.string_().decode(), name) + + with tempfile.TemporaryFile("rb") as file: + self.assertRaisesRegex( + ValueError, + "not a namespace inode", + get_net_ns_by_fd, + self.task, + file.fileno(), + ) + + with open(f"/proc/{self.pid}/ns/mnt") as file: + self.assertRaisesRegex( + ValueError, + "not a network namespace inode", + get_net_ns_by_fd, + self.task, + file.fileno(), + ) + + def test_netdev_for_each_tx_queue(self): + for index, _ in socket.if_nameindex(): + netdev = netdev_get_by_index(self.net, index) + for queue in netdev_for_each_tx_queue(netdev): + self.assertEqual(queue.dev, netdev) + + def test_SOCKET_I(self): + with create_socket(type=socket.SOCK_DGRAM) as skt: + sock = SOCKET_I(fget(self.task, skt.fileno()).f_inode) + self.assertEqual(sock.type, socket.SOCK_DGRAM) + + with open("/dev/null") as null: + file = fget(self.task, null.fileno()) + self.assertRaisesRegex( + ValueError, "not a socket inode", SOCKET_I, file.f_inode + ) + + def test_SOCK_INODE(self): + with create_socket() as skt: + sock = SOCKET_I(fget(self.task, skt.fileno()).f_inode) + inode = SOCK_INODE(sock) + self.assertEqual(inode.i_mode & _S_IFMT, _S_IFSOCK) diff --git a/tests/helpers/linux/test_nodemask.py b/tests/helpers/linux/test_nodemask.py new file mode 100644 index 000000000..f7506fc7d --- /dev/null +++ b/tests/helpers/linux/test_nodemask.py @@ -0,0 +1,42 @@ +# Copyright (c) ByteDance, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +from pathlib import Path +import unittest + +from drgn.helpers.linux.nodemask import for_each_node, for_each_online_node, node_state +from tests.helpers.linux import LinuxHelperTestCase, parse_range_list + +NODE_PATH = Path("/sys/devices/system/node") + + +@unittest.skipUnless(NODE_PATH.exists(), "kernel does not support NUMA") +class TestNodeMask(LinuxHelperTestCase): + @staticmethod + def _parse_node_list(name): + return parse_range_list((NODE_PATH / name).read_text()) + + def _test_for_each_node(self, func, name): + self.assertEqual(list(func(self.prog)), sorted(self._parse_node_list(name))) + + def test_for_each_node(self): + self._test_for_each_node(for_each_node, "possible") + + def test_for_each_online_node(self): + self._test_for_each_node(for_each_online_node, "online") + + def _test_node_state(self, state_name, file_name): + possible = self._parse_node_list("possible") + expected = self._parse_node_list(file_name) + state = self.prog[state_name] + for node in possible: + self.assertEqual(node_state(node, state), node in expected) + + def test_node_state(self): + self._test_node_state("N_NORMAL_MEMORY", "has_normal_memory") + # N_GENERIC_INITIATOR was added in Linux kernel commit 894c26a1c274 + # ("ACPI: Support Generic Initiator only domains") (in v5.10). Most of + # the time it is unset, so if it exists we can use it to test the unset + # case. + if (NODE_PATH / "has_generic_initiator").exists(): + self._test_node_state("N_GENERIC_INITIATOR", "has_generic_initiator") diff --git a/tests/helpers/linux/test_percpu.py b/tests/helpers/linux/test_percpu.py index 672532ee7..5efce8b09 100644 --- a/tests/helpers/linux/test_percpu.py +++ b/tests/helpers/linux/test_percpu.py @@ -1,12 +1,20 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later from drgn.helpers.linux.cpumask import for_each_possible_cpu from drgn.helpers.linux.percpu import per_cpu -from tests.helpers.linux import LinuxHelperTestCase +from tests.helpers.linux import LinuxHelperTestCase, smp_enabled class TestPerCpu(LinuxHelperTestCase): def test_per_cpu(self): + smp = smp_enabled() for cpu in for_each_possible_cpu(self.prog): - self.assertEqual(per_cpu(self.prog["runqueues"], cpu).cpu, cpu) + if smp: + self.assertEqual(per_cpu(self.prog["runqueues"], cpu).cpu, cpu) + else: + # struct rq::cpu only exists if CONFIG_SMP=y, so just check + # that we get something valid. + self.assertEqual( + per_cpu(self.prog["runqueues"], cpu).idle.comm.string_(), b"swapper" + ) diff --git a/tests/helpers/linux/test_pid.py b/tests/helpers/linux/test_pid.py index b29dce951..92664346b 100644 --- a/tests/helpers/linux/test_pid.py +++ b/tests/helpers/linux/test_pid.py @@ -1,6 +1,7 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later +from multiprocessing import Barrier, Process import os from drgn.helpers.linux.pid import find_pid, find_task, for_each_pid, for_each_task @@ -30,5 +31,23 @@ def test_find_task(self): self.assertEqual(task.comm.string_(), comm) def test_for_each_task(self): - pid = os.getpid() - self.assertTrue(any(task.pid == pid for task in for_each_task(self.prog))) + NUM_PROCS = 12 + barrier = Barrier(NUM_PROCS + 1) + + def proc_func(): + barrier.wait() + + try: + procs = [Process(target=proc_func) for _ in range(NUM_PROCS)] + for proc in procs: + proc.start() + pids = {task.pid.value_() for task in for_each_task(self.prog)} + for proc in procs: + self.assertIn(proc.pid, pids) + self.assertIn(os.getpid(), pids) + barrier.wait() + except: + barrier.abort() + for proc in procs: + proc.terminate() + raise diff --git a/tests/helpers/linux/test_sched.py b/tests/helpers/linux/test_sched.py index 4a4253c68..841ca0173 100644 --- a/tests/helpers/linux/test_sched.py +++ b/tests/helpers/linux/test_sched.py @@ -1,15 +1,17 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os import signal +from drgn.helpers.linux.cpumask import for_each_possible_cpu from drgn.helpers.linux.pid import find_task -from drgn.helpers.linux.sched import task_state_to_char +from drgn.helpers.linux.sched import idle_task, task_state_to_char from tests.helpers.linux import ( LinuxHelperTestCase, fork_and_pause, proc_state, + smp_enabled, wait_until, ) @@ -34,3 +36,12 @@ def test_task_state_to_char(self): self.assertEqual(task_state_to_char(task), "Z") os.waitpid(pid, 0) + + def test_idle_task(self): + if smp_enabled(): + for cpu in for_each_possible_cpu(self.prog): + self.assertEqual( + idle_task(self.prog, cpu).comm.string_(), f"swapper/{cpu}".encode() + ) + else: + self.assertEqual(idle_task(self.prog, 0).comm.string_(), b"swapper") diff --git a/tests/helpers/linux/test_stack_trace.py b/tests/helpers/linux/test_stack_trace.py index 0716439b0..1a9d20f9a 100644 --- a/tests/helpers/linux/test_stack_trace.py +++ b/tests/helpers/linux/test_stack_trace.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os @@ -9,7 +9,7 @@ from tests.helpers.linux import ( LinuxHelperTestCase, fork_and_pause, - proc_state, + proc_blocked, setenv, wait_until, ) @@ -18,7 +18,7 @@ class TestStackTrace(LinuxHelperTestCase): def test_by_task_struct(self): pid = fork_and_pause() - wait_until(lambda: proc_state(pid) == "S") + wait_until(proc_blocked, pid) self.assertIn("pause", str(self.prog.stack_trace(find_task(self.prog, pid)))) os.kill(pid, signal.SIGKILL) os.waitpid(pid, 0) @@ -33,7 +33,7 @@ def _test_by_pid(self, orc): prog.set_kernel() prog.load_default_debug_info() pid = fork_and_pause() - wait_until(lambda: proc_state(pid) == "S") + wait_until(proc_blocked, pid) self.assertIn("pause", str(prog.stack_trace(pid))) os.kill(pid, signal.SIGKILL) os.waitpid(pid, 0) @@ -46,7 +46,7 @@ def test_by_pid_orc(self): def test_local_variable(self): pid = fork_and_pause() - wait_until(lambda: proc_state(pid) == "S") + wait_until(proc_blocked, pid) for frame in self.prog.stack_trace(pid): if frame.name in ("context_switch", "__schedule"): try: @@ -75,7 +75,7 @@ def test_registers(self): # Smoke test that we get at least one register and that # StackFrame.registers() agrees with StackFrame.register(). pid = fork_and_pause() - wait_until(lambda: proc_state(pid) == "S") + wait_until(proc_blocked, pid) trace = self.prog.stack_trace(pid) have_registers = False for frame in trace: diff --git a/tests/helpers/linux/test_symbol.py b/tests/helpers/linux/test_symbol.py new file mode 100644 index 000000000..a33ffb870 --- /dev/null +++ b/tests/helpers/linux/test_symbol.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021, Oracle and/or its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +from drgn import SymbolBinding, SymbolKind +from tests.helpers.linux import LinuxHelperTestCase + + +class TestSymbol(LinuxHelperTestCase): + def test_global_symbol(self): + symbol = self.prog.symbol("jiffies") + self.assertEqual(symbol.name, "jiffies") + self.assertEqual(symbol.binding, SymbolBinding.GLOBAL) + self.assertEqual(symbol.kind, SymbolKind.OBJECT) diff --git a/tests/helpers/linux/test_tc.py b/tests/helpers/linux/test_tc.py new file mode 100644 index 000000000..6f16205c4 --- /dev/null +++ b/tests/helpers/linux/test_tc.py @@ -0,0 +1,114 @@ +# Copyright (c) ByteDance, Inc. and its affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +import os +import random +import string +import unittest + +from drgn.helpers.linux.fs import path_lookup +from drgn.helpers.linux.net import get_net_ns_by_inode, netdev_get_by_name +from drgn.helpers.linux.tc import qdisc_lookup +from tests.helpers.linux import LinuxHelperTestCase + +try: + from pyroute2 import NetNS + from pyroute2.netlink.exceptions import NetlinkError + + have_pyroute2 = True +except ImportError: + have_pyroute2 = False + + +@unittest.skipUnless(have_pyroute2, "pyroute2 not found") +class TestTc(LinuxHelperTestCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.ns = None + while cls.ns is None: + try: + cls.name = "".join( + random.choice(string.ascii_letters) for _ in range(16) + ) + cls.ns = NetNS(cls.name, flags=os.O_CREAT | os.O_EXCL) + except FileExistsError: + pass + + @classmethod + def tearDownClass(cls): + cls.ns.remove() + super().tearDownClass() + + def test_qdisc_lookup(self): + try: + self.ns.link("add", ifname="dummy0", kind="dummy") + except NetlinkError: + self.skipTest("kernel does not support dummy interface (CONFIG_DUMMY)") + + dummy = self.ns.link_lookup(ifname="dummy0")[0] + + # tc qdisc add dev dummy0 root handle 1: prio + try: + self.ns.tc( + "add", + kind="prio", + index=dummy, + handle="1:", + # default TCA_OPTIONS for sch_prio, see [iproute2] tc/q_prio.c:prio_parse_opt() + bands=3, + priomap=[1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], + ) + except NetlinkError: + self.ns.link("delete", ifname="dummy0") + self.skipTest( + "kernel does not support Multi Band Priority Queueing (CONFIG_NET_SCH_PRIO)" + ) + # tc qdisc add dev dummy0 parent 1:1 handle 10: sfq + try: + self.ns.tc("add", kind="sfq", index=dummy, parent="1:1", handle="10:") + except NetlinkError: + self.ns.link("delete", ifname="dummy0") + self.skipTest( + "kernel does not support Stochastic Fairness Queueing (CONFIG_NET_SCH_SFQ)" + ) + # tc qdisc add dev dummy0 parent 1:2 handle 20: tbf rate 20kbit buffer 1600 limit 3000 + try: + self.ns.tc( + "add", + kind="tbf", + index=dummy, + parent="1:2", + handle="20:", + rate=2500, + burst=1600, + limit=3000, + ) + except NetlinkError: + self.ns.link("delete", ifname="dummy0") + self.skipTest( + "kernel does not support Token Bucket Filter (CONFIG_NET_SCH_TBF)" + ) + # tc qdisc add dev dummy0 parent 1:3 handle 30: sfq + self.ns.tc("add", kind="sfq", index=dummy, parent="1:3", handle="30:") + # tc qdisc add dev dummy0 ingress + try: + self.ns.tc("add", kind="ingress", index=dummy) + except NetlinkError: + self.ns.link("delete", ifname="dummy0") + self.skipTest( + "kernel does not support ingress Qdisc (CONFIG_NET_SCH_INGRESS)" + ) + + inode = path_lookup( + self.prog, os.path.realpath(f"/var/run/netns/{self.name}") + ).dentry.d_inode + netdev = netdev_get_by_name(get_net_ns_by_inode(inode), "dummy0") + + self.assertEqual(qdisc_lookup(netdev, 0x1).ops.id.string_(), b"prio") + self.assertEqual(qdisc_lookup(netdev, 0x10).ops.id.string_(), b"sfq") + self.assertEqual(qdisc_lookup(netdev, 0x20).ops.id.string_(), b"tbf") + self.assertEqual(qdisc_lookup(netdev, 0x30).ops.id.string_(), b"sfq") + self.assertEqual(qdisc_lookup(netdev, 0xFFFF).ops.id.string_(), b"ingress") + + self.ns.link("delete", ifname="dummy0") diff --git a/tests/helpers/linux/test_tcp.py b/tests/helpers/linux/test_tcp.py index 0fc7dbd9b..8b6a77a2d 100644 --- a/tests/helpers/linux/test_tcp.py +++ b/tests/helpers/linux/test_tcp.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os diff --git a/tests/helpers/linux/test_user.py b/tests/helpers/linux/test_user.py index 1659aedb3..219814f8b 100644 --- a/tests/helpers/linux/test_user.py +++ b/tests/helpers/linux/test_user.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import functools diff --git a/tests/helpers/linux/test_uts.py b/tests/helpers/linux/test_uts.py index 9a637cc8b..f5f880cfb 100644 --- a/tests/helpers/linux/test_uts.py +++ b/tests/helpers/linux/test_uts.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import os diff --git a/tests/libdrgn.py b/tests/libdrgn.py index 900ecccae..dba4dbb14 100644 --- a/tests/libdrgn.py +++ b/tests/libdrgn.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import ctypes @@ -7,7 +7,6 @@ import os import _drgn -import drgn _drgn_pydll = ctypes.PyDLL(_drgn.__file__) _drgn_cdll = ctypes.CDLL(_drgn.__file__) diff --git a/tests/linux_kernel/__init__.py b/tests/linux_kernel/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/linux_kernel/vmcore/__init__.py b/tests/linux_kernel/vmcore/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/linux_kernel/vmcore/test_vmcore.py b/tests/linux_kernel/vmcore/test_vmcore.py new file mode 100644 index 000000000..73468d2dd --- /dev/null +++ b/tests/linux_kernel/vmcore/test_vmcore.py @@ -0,0 +1,15 @@ +from pathlib import Path +import unittest + +from drgn import Program, ProgramFlags + +VMCORE_PATH = Path("/proc/vmcore") + + +@unittest.skipUnless(VMCORE_PATH.exists(), "not running in kdump") +class TestAttachToVMCore(unittest.TestCase): + def test_attach_to_vmcore(self): + prog = Program() + prog.set_core_dump("/proc/vmcore") + self.assertFalse(prog.flags & ProgramFlags.IS_LIVE) + self.assertTrue(prog.flags & ProgramFlags.IS_LINUX_KERNEL) diff --git a/tests/test_docs.py b/tests/test_docs.py index 5b0cc369e..c3f170984 100644 --- a/tests/test_docs.py +++ b/tests/test_docs.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import pydoc diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index cffc8d7f4..381b44098 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import functools @@ -6,7 +6,6 @@ import os.path import re import tempfile -import unittest import drgn from drgn import ( @@ -1025,22 +1024,25 @@ def test_filename(self): ), ] - point_type = lambda prog: prog.struct_type( - "point", - 8, - ( - TypeMember(prog.int_type("int", 4, True), "x"), - TypeMember(prog.int_type("int", 4, True), "y", 32), - ), - ) - other_point_type = lambda prog: prog.struct_type( - "point", - 8, - ( - TypeMember(prog.int_type("int", 4, True), "a"), - TypeMember(prog.int_type("int", 4, True), "b", 32), - ), - ) + def point_type(prog): + return prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x"), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), + ) + + def other_point_type(prog): + return prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "a"), + TypeMember(prog.int_type("int", 4, True), "b", 32), + ), + ) prog = dwarf_program(dies) for dir in ["", "src", "usr/src", "/usr/src"]: @@ -1721,6 +1723,65 @@ def test_enum(self): ), ) + def test_enum_typedef(self): + prog = dwarf_program( + wrap_test_type_dies( + ( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 1), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "RED"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 0), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "GREEN"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 1), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "BLUE"), + DwarfAttrib(DW_AT.const_value, DW_FORM.data1, 2), + ), + ), + ), + ), + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "__uint32_t"), + DwarfAttrib(DW_AT.type, DW_FORM.ref4, 2), + ), + ), + unsigned_int_die, + ) + ) + ) + self.assertIdentical( + prog.type("TEST").type, + prog.enum_type( + "color", + prog.int_type("unsigned int", 4, False), + ( + TypeEnumerator("RED", 0), + TypeEnumerator("GREEN", 1), + TypeEnumerator("BLUE", 2), + ), + ), + ) + def test_enum_anonymous(self): prog = dwarf_program( wrap_test_type_dies( diff --git a/tests/test_language_c.py b/tests/test_language_c.py index 76b8a8f9e..48a959bdc 100644 --- a/tests/test_language_c.py +++ b/tests/test_language_c.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later from functools import reduce @@ -340,6 +340,67 @@ def test_function(self): "int (void)", ) + def test_pointer_to_anonymous_struct(self): + self.assertTypeName( + self.prog.pointer_type( + self.prog.struct_type( + None, 8, (TypeMember(self.prog.int_type("int", 4, True), "x", 0),) + ) + ), + "struct *", + ) + + def test_array_of_anonymous_struct(self): + self.assertTypeName( + self.prog.array_type( + self.prog.struct_type( + None, 8, (TypeMember(self.prog.int_type("int", 4, True), "x", 0),) + ), + 2, + ), + "struct [2]", + ) + + def test_function_returning_anonymous_struct(self): + self.assertTypeName( + self.prog.function_type( + self.prog.struct_type( + None, 8, (TypeMember(self.prog.int_type("int", 4, True), "x", 0),) + ), + (), + ), + "struct (void)", + ) + + def test_function_of_anonymous_struct(self): + self.assertTypeName( + self.prog.function_type( + self.prog.int_type("int", 4, True), + ( + TypeParameter( + self.prog.struct_type( + None, + 8, + (TypeMember(self.prog.int_type("int", 4, True), "x", 0),), + ), + "x", + ), + ), + ), + "int (struct x)", + ) + + def test_typedef_of_anonymous_struct(self): + self.assertTypeName( + self.prog.typedef_type( + "onymous", + self.prog.struct_type( + None, 8, (TypeMember(self.prog.int_type("int", 4, True), "x", 0),) + ), + ), + "onymous", + ) + class TestPrettyPrintType(MockProgramTestCase): def assertPrettyPrint(self, type, expected): diff --git a/tests/test_lexer.py b/tests/test_lexer.py index 0b8a7786c..d9abe8eb7 100644 --- a/tests/test_lexer.py +++ b/tests/test_lexer.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import unittest diff --git a/tests/test_object.py b/tests/test_object.py index 0d6994bfc..523f18600 100644 --- a/tests/test_object.py +++ b/tests/test_object.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import math @@ -11,7 +11,6 @@ ObjectAbsentError, OutOfBoundsError, Qualifiers, - Type, TypeMember, cast, reinterpret, @@ -930,6 +929,233 @@ def test_index(self): Object(self.prog, "int []", address=0), ) + def test_signed_int_value_to_bytes(self): + for byteorder in ("little", "big"): + with self.subTest(byteorder=byteorder): + self.assertEqual( + Object( + self.prog, self.prog.int_type("int", 4, True, byteorder), -100 + ).to_bytes_(), + (-100).to_bytes(4, byteorder, signed=True), + ) + self.assertEqual( + Object( + self.prog, + self.prog.int_type("long", 8, True, byteorder), + -(2 ** 32), + ).to_bytes_(), + (-(2 ** 32)).to_bytes(8, byteorder, signed=True), + ) + + def test_unsigned_int_value_to_bytes(self): + for byteorder in ("little", "big"): + with self.subTest(byteorder=byteorder): + self.assertEqual( + Object( + self.prog, + self.prog.int_type("unsigned int", 4, False, byteorder), + 2 ** 31, + ).to_bytes_(), + (2 ** 31).to_bytes(4, byteorder), + ) + self.assertEqual( + Object( + self.prog, + self.prog.int_type("unsigned long", 8, False, byteorder), + 2 ** 60, + ).to_bytes_(), + (2 ** 60).to_bytes(8, byteorder), + ) + + def test_float64_value_to_bytes(self): + for byteorder in ("little", "big"): + with self.subTest(byteorder=byteorder): + self.assertEqual( + Object( + self.prog, self.prog.float_type("double", 8, byteorder), math.e + ).to_bytes_(), + struct.pack(("<" if byteorder == "little" else ">") + "d", math.e), + ) + + def test_float32_value_to_bytes(self): + for byteorder in ("little", "big"): + with self.subTest(byteorder=byteorder): + self.assertEqual( + Object( + self.prog, self.prog.float_type("float", 4, byteorder), math.e + ).to_bytes_(), + struct.pack(("<" if byteorder == "little" else ">") + "f", math.e), + ) + + def test_struct_value_to_bytes(self): + self.assertEqual( + Object(self.prog, self.point_type, {"x": 1, "y": 2}).to_bytes_(), + b"\x01\x00\x00\x00\x02\x00\x00\x00", + ) + + def test_int_reference_to_bytes(self): + self.add_memory_segment(b"\x78\x56\x34\x12", virt_addr=0xFFFF0000) + self.assertEqual( + Object(self.prog, "int", address=0xFFFF0000).to_bytes_(), + b"\x78\x56\x34\x12", + ) + + def test_int_reference_bit_offset_to_bytes(self): + self.add_memory_segment(b"\xe0Y\xd1H\x00", virt_addr=0xFFFF0000) + self.assertEqual( + Object(self.prog, "int", address=0xFFFF0000, bit_offset=2).to_bytes_(), + b"\x78\x56\x34\x12", + ) + + def test_int_reference_big_endian_bit_offset_to_bytes(self): + self.add_memory_segment(b"\x04\x8d\x15\x9e\x00", virt_addr=0xFFFF0000) + self.assertEqual( + Object( + self.prog, + self.prog.int_type("int", 4, True, "big"), + address=0xFFFF0000, + bit_offset=2, + ).to_bytes_(), + b"\x12\x34\x56\x78", + ) + + def test_struct_reference_to_bytes(self): + self.add_memory_segment( + b"\x01\x00\x00\x00\x02\x00\x00\x00", virt_addr=0xFFFF0000 + ) + self.assertEqual( + Object(self.prog, self.point_type, address=0xFFFF0000).to_bytes_(), + b"\x01\x00\x00\x00\x02\x00\x00\x00", + ) + + def test_int_from_bytes(self): + for byteorder in ("little", "big"): + with self.subTest(byteorder=byteorder): + type_ = self.prog.int_type("int", 4, True, byteorder) + self.assertIdentical( + Object.from_bytes_( + self.prog, type_, (0x12345678).to_bytes(4, byteorder) + ), + Object(self.prog, type_, 0x12345678), + ) + + def test_int_from_bytes_bit_offset(self): + self.assertIdentical( + Object.from_bytes_(self.prog, "int", b"\xe0Y\xd1H\x00", bit_offset=2), + Object(self.prog, "int", 0x12345678), + ) + + def test_int_from_bytes_big_endian_bit_offset(self): + self.assertIdentical( + Object.from_bytes_( + self.prog, + self.prog.int_type("int", 4, True, "big"), + b"\x04\x8d\x15\x9e\x00", + bit_offset=2, + ), + Object(self.prog, self.prog.int_type("int", 4, True, "big"), 0x12345678), + ) + + def test_int_from_bytes_bit_field(self): + self.assertIdentical( + Object.from_bytes_(self.prog, "int", b"\xcc", bit_field_size=8), + Object(self.prog, "int", 0xCC, bit_field_size=8), + ) + + def test_float64_from_bytes(self): + for byteorder in ("little", "big"): + with self.subTest(byteorder=byteorder): + type_ = self.prog.float_type("double", 8, byteorder) + self.assertIdentical( + Object.from_bytes_( + self.prog, + type_, + struct.pack( + ("<" if byteorder == "little" else ">") + "d", math.e + ), + ), + Object(self.prog, type_, math.e), + ) + + def test_float32_from_bytes(self): + for byteorder in ("little", "big"): + with self.subTest(byteorder=byteorder): + type_ = self.prog.float_type("float", 4, byteorder) + self.assertIdentical( + Object.from_bytes_( + self.prog, + type_, + struct.pack( + ("<" if byteorder == "little" else ">") + "f", math.e + ), + ), + Object(self.prog, type_, math.e), + ) + + def test_struct_from_bytes(self): + self.assertIdentical( + Object.from_bytes_( + self.prog, self.point_type, b"\x01\x00\x00\x00\x02\x00\x00\x00" + ), + Object(self.prog, self.point_type, {"x": 1, "y": 2}), + ) + + def test_struct_from_bytes_bit_offset(self): + self.assertIdentical( + Object.from_bytes_( + self.prog, + self.point_type, + b"\xff\x01\x00\x00\x00\x02\x00\x00\x00", + bit_offset=8, + ), + Object(self.prog, self.point_type, {"x": 1, "y": 2}), + ) + + def test_struct_from_bytes_invalid_bit_offset(self): + self.assertRaisesRegex( + ValueError, + "non-scalar must be byte-aligned", + Object.from_bytes_, + self.prog, + self.point_type, + b"\xff\x01\x00\x00\x00\x02\x00\x00\x00", + bit_offset=2, + ) + + def test_from_bytes_invalid_bit_field_size(self): + self.assertRaisesRegex( + ValueError, + "bit field size cannot be zero", + Object.from_bytes_, + self.prog, + "int", + b"", + bit_field_size=0, + ) + + def test_from_bytes_buffer_too_small(self): + self.assertRaisesRegex( + ValueError, + "buffer is too small", + Object.from_bytes_, + self.prog, + "int", + bytes(3), + ) + + def test_from_bytes_incomplete_type(self): + self.assertRaisesRegex( + TypeError, + "cannot create object with void type", + Object.from_bytes_, + self.prog, + "void", + b"", + ) + + def test_from_bytes_bad_type(self): + self.assertRaises(TypeError, Object.from_bytes_, self.prog, None, b"") + class TestInvalidBitField(MockProgramTestCase): def test_integer(self): diff --git a/tests/test_path.py b/tests/test_path.py index 10b96c358..ecf40f663 100644 --- a/tests/test_path.py +++ b/tests/test_path.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import itertools diff --git a/tests/test_platform.py b/tests/test_platform.py index 9e11c65bf..f3152d0be 100644 --- a/tests/test_platform.py +++ b/tests/test_platform.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import itertools diff --git a/tests/test_program.py b/tests/test_program.py index d3a9c94c4..bc8ea8cc2 100644 --- a/tests/test_program.py +++ b/tests/test_program.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import ctypes @@ -47,9 +47,6 @@ def test_set_pid(self): prog.set_pid(os.getpid()) self.assertEqual(prog.platform, host_platform) self.assertTrue(prog.flags & ProgramFlags.IS_LIVE) - data = b"hello, world!" - buf = ctypes.create_string_buffer(data) - self.assertEqual(prog.read(ctypes.addressof(buf), len(data)), data) self.assertRaisesRegex( ValueError, "program memory was already initialized", @@ -57,6 +54,31 @@ def test_set_pid(self): os.getpid(), ) + def test_pid_memory(self): + data = b"hello, world!" + buf = ctypes.create_string_buffer(data) + address = ctypes.addressof(buf) + + # QEMU user-mode emulation doesn't seem to emulate /proc/$pid/mem + # correctly on a 64-bit host with a 32-bit guest; see + # https://gitlab.com/qemu-project/qemu/-/issues/698. Packit uses mock + # to cross-compile and test packages, which in turn uses QEMU user-mode + # emulation. Skip this test if /proc/$pid/mem doesn't work so that + # those builds succeed. + try: + with open("/proc/self/mem", "rb") as f: + f.seek(address) + functional_proc_pid_mem = f.read(len(data)) == data + except OSError: + functional_proc_pid_mem = False + if not functional_proc_pid_mem: + self.skipTest("/proc/$pid/mem is not functional") + + prog = Program() + prog.set_pid(os.getpid()) + + self.assertEqual(prog.read(ctypes.addressof(buf), len(data)), data) + def test_lookup_error(self): prog = mock_program() self.assertRaisesRegex( @@ -784,7 +806,7 @@ def test_physical(self): self.assertEqual(prog.read(0xFFFF0000, len(data)), data) self.assertEqual(prog.read(0xA0, len(data), physical=True), data) - def test_zero_fill(self): + def test_unsaved(self): data = b"hello, world" prog = Program() with tempfile.NamedTemporaryFile() as f: @@ -803,4 +825,6 @@ def test_zero_fill(self): ) f.flush() prog.set_core_dump(f.name) - self.assertEqual(prog.read(0xFFFF0000, len(data) + 4), data + bytes(4)) + with self.assertRaisesRegex(FaultError, "memory not saved in core dump") as cm: + prog.read(0xFFFF0000, len(data) + 4) + self.assertEqual(cm.exception.address, 0xFFFF000C) diff --git a/tests/test_python.py b/tests/test_python.py index 5a108b676..a211fc1cf 100644 --- a/tests/test_python.py +++ b/tests/test_python.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import unittest diff --git a/tests/test_serialize.py b/tests/test_serialize.py index ae03048bc..f8ff1210c 100644 --- a/tests/test_serialize.py +++ b/tests/test_serialize.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import unittest diff --git a/tests/test_symbol.py b/tests/test_symbol.py new file mode 100644 index 000000000..abd439e59 --- /dev/null +++ b/tests/test_symbol.py @@ -0,0 +1,206 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +import tempfile +from typing import NamedTuple +import unittest + +from drgn import Program, SymbolBinding, SymbolKind +from tests.dwarfwriter import dwarf_sections +from tests.elf import ET, PT, SHT, STB, STT +from tests.elfwriter import ElfSection, ElfSymbol, create_elf_file + + +def create_elf_symbol_file(symbols): + # We need some DWARF data so that libdwfl will load the file. + sections = dwarf_sections(()) + # Create a section for the symbols to reference and the corresponding + # segment for address lookups. + min_address = min(symbol.value for symbol in symbols) + max_address = max(symbol.value + symbol.size for symbol in symbols) + sections.append( + ElfSection( + name=".foo", + sh_type=SHT.NOBITS, + p_type=PT.LOAD, + vaddr=min_address, + memsz=max_address - min_address, + ) + ) + symbols = [ + symbol._replace( + shindex=len(sections) if symbol.shindex is None else symbol.shindex + ) + for symbol in symbols + ] + return create_elf_file(ET.EXEC, sections, symbols) + + +def elf_symbol_program(*modules): + prog = Program() + for symbols in modules: + with tempfile.NamedTemporaryFile() as f: + f.write(create_elf_symbol_file(symbols)) + f.flush() + prog.load_debug_info([f.name]) + return prog + + +# We don't want to support creating drgn.Symbol instances yet, so use this dumb +# class for testing. +class Symbol(NamedTuple): + name: str + address: int + size: int + binding: SymbolBinding + kind: SymbolKind + + +class TestElfSymbol(unittest.TestCase): + def assert_symbol_equal(self, drgn_symbol, symbol): + self.assertEqual( + Symbol( + drgn_symbol.name, + drgn_symbol.address, + drgn_symbol.size, + drgn_symbol.binding, + drgn_symbol.kind, + ), + symbol, + ) + + def test_by_address(self): + elf_first = ElfSymbol("first", 0xFFFF0000, 0x8, STT.OBJECT, STB.LOCAL) + elf_second = ElfSymbol("second", 0xFFFF0008, 0x8, STT.OBJECT, STB.LOCAL) + first = Symbol("first", 0xFFFF0000, 0x8, SymbolBinding.LOCAL, SymbolKind.OBJECT) + second = Symbol( + "second", 0xFFFF0008, 0x8, SymbolBinding.LOCAL, SymbolKind.OBJECT + ) + + same_module = ((elf_first, elf_second),) + different_modules = ((elf_first,), (elf_second,)) + + for modules in same_module, different_modules: + with self.subTest(modules=len(modules)): + prog = elf_symbol_program(*modules) + self.assertRaises(LookupError, prog.symbol, 0xFFFEFFFF) + self.assert_symbol_equal(prog.symbol(0xFFFF0000), first) + self.assert_symbol_equal(prog.symbol(0xFFFF0004), first) + self.assert_symbol_equal(prog.symbol(0xFFFF0008), second) + self.assert_symbol_equal(prog.symbol(0xFFFF000C), second) + self.assertRaises(LookupError, prog.symbol, 0xFFFF0010) + + def test_by_address_precedence(self): + precedence = (STB.GLOBAL, STB.WEAK, STB.LOCAL) + + def assert_find_higher(*modules): + self.assertEqual( + elf_symbol_program(*modules).symbol(0xFFFF0000).name, "foo" + ) + + for i in range(len(precedence) - 1): + higher_binding = precedence[i] + for j in range(i + 1, len(precedence)): + lower_binding = precedence[j] + with self.subTest(higher=higher_binding, lower=lower_binding): + higher = ElfSymbol( + "foo", 0xFFFF0000, 0x8, STT.OBJECT, higher_binding + ) + lower = ElfSymbol("bar", 0xFFFF0000, 0x8, STT.OBJECT, lower_binding) + # Local symbols must be before global symbols. + if lower_binding != STB.LOCAL: + with self.subTest("higher before lower"): + assert_find_higher((higher, lower)) + with self.subTest("lower before higher"): + assert_find_higher((lower, higher)) + + def test_by_name(self): + elf_first = ElfSymbol("first", 0xFFFF0000, 0x8, STT.OBJECT, STB.GLOBAL) + elf_second = ElfSymbol("second", 0xFFFF0008, 0x8, STT.OBJECT, STB.GLOBAL) + first = Symbol( + "first", 0xFFFF0000, 0x8, SymbolBinding.GLOBAL, SymbolKind.OBJECT + ) + second = Symbol( + "second", 0xFFFF0008, 0x8, SymbolBinding.GLOBAL, SymbolKind.OBJECT + ) + + same_module = ((elf_first, elf_second),) + different_modules = ((elf_first,), (elf_second,)) + + for modules in same_module, different_modules: + with self.subTest(modules=len(modules)): + prog = elf_symbol_program(*modules) + self.assert_symbol_equal(prog.symbol("first"), first) + self.assert_symbol_equal(prog.symbol("second"), second) + self.assertRaises(LookupError, prog.symbol, "third") + + def test_by_name_precedence(self): + precedence = ( + (STB.GLOBAL, STB.GNU_UNIQUE), + (STB.WEAK,), + (STB.LOCAL, STB.HIPROC), + ) + + expected = 0xFFFF0008 + + def assert_find_higher(*modules): + self.assertEqual( + elf_symbol_program(*modules).symbol("foo").address, expected + ) + + for i in range(len(precedence) - 1): + for higher_binding in precedence[i]: + for j in range(i + 1, len(precedence)): + for lower_binding in precedence[j]: + with self.subTest(higher=higher_binding, lower=lower_binding): + higher = ElfSymbol( + "foo", expected, 0x8, STT.OBJECT, higher_binding + ) + lower = ElfSymbol( + "foo", expected - 0x8, 0x8, STT.OBJECT, lower_binding + ) + # Local symbols must be before global symbols. + if lower_binding not in precedence[-1]: + with self.subTest("same module, higher before lower"): + assert_find_higher((higher, lower)) + with self.subTest("same module, lower before higher"): + assert_find_higher((lower, higher)) + with self.subTest("different modules, higher before lower"): + assert_find_higher((higher,), (lower,)) + with self.subTest("different modules, lower before higher"): + assert_find_higher((lower,), (higher,)) + + def test_binding(self): + for by in "name", "address": + for elf_binding, drgn_binding in ( + (STB.LOCAL, SymbolBinding.LOCAL), + (STB.GLOBAL, SymbolBinding.GLOBAL), + (STB.WEAK, SymbolBinding.WEAK), + (STB.GNU_UNIQUE, SymbolBinding.UNIQUE), + (STB.HIPROC, SymbolBinding.UNKNOWN), + ): + with self.subTest(by=by, binding=elf_binding): + prog = elf_symbol_program( + (ElfSymbol("foo", 0xFFFF0000, 1, STT.OBJECT, elf_binding),) + ) + self.assertEqual( + prog.symbol("foo" if by == "name" else 0xFFFF0000).binding, + drgn_binding, + ) + + def test_kind(self): + for elf_type, drgn_kind in ( + (STT.NOTYPE, SymbolKind.UNKNOWN), + (STT.OBJECT, SymbolKind.OBJECT), + (STT.FUNC, SymbolKind.FUNC), + (STT.SECTION, SymbolKind.SECTION), + (STT.FILE, SymbolKind.FILE), + (STT.COMMON, SymbolKind.COMMON), + (STT.TLS, SymbolKind.TLS), + (STT.GNU_IFUNC, SymbolKind.IFUNC), + ): + with self.subTest(type=elf_type): + prog = elf_symbol_program( + (ElfSymbol("foo", 0xFFFF0000, 1, elf_type, STB.GLOBAL),) + ) + self.assertEqual(prog.symbol("foo").kind, drgn_kind) diff --git a/tests/test_type.py b/tests/test_type.py index 7ac393e8c..add26ebbb 100644 --- a/tests/test_type.py +++ b/tests/test_type.py @@ -1,8 +1,6 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later -import operator - from drgn import ( Language, Object, diff --git a/tests/test_util.py b/tests/test_util.py index 20eadfe94..cb186047e 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later from functools import cmp_to_key diff --git a/tools/bpf_inspect.py b/tools/bpf_inspect.py index 177494050..0ce7959f8 100755 --- a/tools/bpf_inspect.py +++ b/tools/bpf_inspect.py @@ -1,14 +1,8 @@ #!/usr/bin/env drgn -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later -DESCRIPTION = """ -drgn script to list BPF programs or maps and their properties -unavailable via kernel API. -""" - import argparse -import sys from drgn.helpers import enum_type_to_class from drgn.helpers.linux import bpf_map_for_each, bpf_prog_for_each, hlist_for_each_entry @@ -116,7 +110,7 @@ def list_bpf_maps(args): def main(): parser = argparse.ArgumentParser( - description=DESCRIPTION, formatter_class=argparse.RawTextHelpFormatter + description="drgn script to list BPF programs or maps and their properties unavailable via kernel API" ) subparsers = parser.add_subparsers(title="subcommands", dest="subcommand") diff --git a/util.py b/util.py index 23f178692..4f3fc4ff8 100644 --- a/util.py +++ b/util.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later from functools import total_ordering diff --git a/vmtest/asynciosubprocess.py b/vmtest/asynciosubprocess.py index b4b90d0a5..8309b42a2 100644 --- a/vmtest/asynciosubprocess.py +++ b/vmtest/asynciosubprocess.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import asyncio diff --git a/vmtest/download.py b/vmtest/download.py index 8565d7619..588d4768b 100644 --- a/vmtest/download.py +++ b/vmtest/download.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import argparse @@ -27,7 +27,7 @@ def available_kernel_releases( github_release: Dict[str, Any], arch: str ) -> Dict[str, Dict[str, Any]]: - pattern = re.compile(r"kernel-(.*)\." + re.escape(arch) + "\.tar\.zst") + pattern = re.compile(r"kernel-(.*)\." + re.escape(arch) + r"\.tar\.zst") releases = {} for asset in github_release["assets"]: match = pattern.fullmatch(asset["name"]) diff --git a/vmtest/enter_kdump.py b/vmtest/enter_kdump.py new file mode 100644 index 000000000..a34710996 --- /dev/null +++ b/vmtest/enter_kdump.py @@ -0,0 +1,40 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: GPL-3.0-or-later + +# This isn't great: it's specific to x86-64, both by virtue of the syscall +# number and because kexec_file_load isn't implemented on many architectures, +# especially on older kernels. + +import ctypes +import os +import re + +SYS_kexec_file_load = 320 # On x86-64. +KEXEC_FILE_ON_CRASH = 2 +KEXEC_FILE_NO_INITRAMFS = 4 + +syscall = ctypes.CDLL(None, use_errno=True).syscall +syscall.restype = ctypes.c_long + +with open("/proc/cmdline", "rb") as f: + cmdline = f.read().rstrip(b"\n") + cmdline = re.sub(rb"(^|\s)crashkernel=\S+", b"", cmdline) + # `nosmp` is required to avoid QEMU sporadically failing an internal assertion + # `nokaslr` is required to avoid sporadically failing to reserve space for the + # capture kernel + cmdline += b" nosmp nokaslr" + +with open(f"/lib/modules/{os.uname().release}/vmlinuz", "rb") as kernel: + if syscall( + ctypes.c_long(SYS_kexec_file_load), + ctypes.c_int(kernel.fileno()), + ctypes.c_int(-1), + ctypes.c_ulong(len(cmdline) + 1), + ctypes.c_char_p(cmdline + b"\0"), + ctypes.c_ulong(KEXEC_FILE_ON_CRASH | KEXEC_FILE_NO_INITRAMFS), + ): + errno = ctypes.get_errno() + raise OSError(errno, os.strerror(errno)) + +with open("/proc/sysrq-trigger", "w") as f: + f.write("c") diff --git a/vmtest/githubapi.py b/vmtest/githubapi.py index d37dd73bc..46a7a32fc 100644 --- a/vmtest/githubapi.py +++ b/vmtest/githubapi.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import json diff --git a/vmtest/kbuild.py b/vmtest/kbuild.py index f41c85265..b7ac4b14e 100644 --- a/vmtest/kbuild.py +++ b/vmtest/kbuild.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import argparse @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) -KERNEL_LOCALVERSION = "-vmtest3" +KERNEL_LOCALVERSION = "-vmtest8" def kconfig() -> str: @@ -59,29 +59,54 @@ def kconfig() -> str: CONFIG_TMPFS_XATTR=y CONFIG_VIRTIO_CONSOLE=y CONFIG_VIRTIO_PCI=y - -# drgn needs /proc/kcore for live debugging. -CONFIG_PROC_KCORE=y -# In some cases, it also needs /proc/kallsyms. -CONFIG_KALLSYMS=y -CONFIG_KALLSYMS_ALL=y +CONFIG_HW_RANDOM=m +CONFIG_HW_RANDOM_VIRTIO=m # drgn needs debug info. CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y -# Before Linux kernel commit 8757dc970f55 ("x86/crash: Define -# arch_crash_save_vmcoreinfo() if CONFIG_CRASH_CORE=y") (in v5.6), some -# important information in VMCOREINFO is initialized by the kexec code. +# For testing live kernel debugging with /proc/kcore. +CONFIG_PROC_KCORE=y +# drgn needs /proc/kallsyms in some cases. Some test cases also need it. +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y + +# For testing kernel core dumps with /proc/vmcore. +CONFIG_CRASH_DUMP=y +CONFIG_PROC_VMCORE=y CONFIG_KEXEC=y +CONFIG_KEXEC_FILE=y +# Needed for CONFIG_KEXEC_FILE. +CONFIG_CRYPTO=y +CONFIG_CRYPTO_SHA256=y + +# So that we can trigger a crash with /proc/sysrq-trigger. +CONFIG_MAGIC_SYSRQ=y # For block tests. CONFIG_BLK_DEV_LOOP=m +# For cgroup tests. +CONFIG_CGROUPS=y + # For kconfig tests. CONFIG_IKCONFIG=m CONFIG_IKCONFIG_PROC=y + +# For nodemask tests. +CONFIG_NUMA=y + +# For Traffic Control tests. +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_CLS_ACT=y +CONFIG_NETDEVICES=y +CONFIG_DUMMY=m """ diff --git a/vmtest/manage.py b/vmtest/manage.py index daddf592b..90b4fadbf 100644 --- a/vmtest/manage.py +++ b/vmtest/manage.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import argparse @@ -87,7 +87,7 @@ async def fetch_kernel_tags(kernel_dir: Path, kernel_tags: Sequence[str]) -> Non mainline_tags = [] stable_tags = [] for tag in kernel_tags: - if re.fullmatch("v[0-9]+\.[0-9]+\.[0-9]+", tag): + if re.fullmatch(r"v[0-9]+\.[0-9]+\.[0-9]+", tag): stable_tags.append(tag) else: mainline_tags.append(tag) diff --git a/vmtest/onoatimehack.c b/vmtest/onoatimehack.c index 9042c7380..1eb54e363 100644 --- a/vmtest/onoatimehack.c +++ b/vmtest/onoatimehack.c @@ -1,4 +1,4 @@ -// Copyright (c) Facebook, Inc. and its affiliates. +// Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: GPL-3.0-or-later /* diff --git a/vmtest/vm.py b/vmtest/vm.py index c3fe15c78..971e108ca 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -1,4 +1,4 @@ -# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: GPL-3.0-or-later import errno @@ -14,6 +14,8 @@ from util import nproc, out_of_date +_9PFS_MSIZE = 1024 * 1024 + # Script run as init in the virtual machine. This only depends on busybox. We # don't assume that any regular commands are built in (not even echo or test), # so we always explicitly run busybox. @@ -22,6 +24,7 @@ set -eu export BUSYBOX={busybox} +export PYTHON={python} trap '"$BUSYBOX" poweroff -f' EXIT @@ -55,8 +58,12 @@ # Mount additional filesystems. "$BUSYBOX" mount -t devtmpfs -o nosuid,noexec dev /dev +"$BUSYBOX" mkdir /dev/shm +"$BUSYBOX" mount -t tmpfs -o nosuid,nodev tmpfs /dev/shm "$BUSYBOX" mount -t proc -o nosuid,nodev,noexec proc /proc "$BUSYBOX" mount -t sysfs -o nosuid,nodev,noexec sys /sys +# cgroup2 was added in Linux v4.5. +"$BUSYBOX" mount -t cgroup2 -o nosuid,nodev,noexec cgroup2 /sys/fs/cgroup || "$BUSYBOX" true # Ideally we'd just be able to create an opaque directory for /tmp on the upper # layer. However, before Linux kernel commit 51f7e52dc943 ("ovl: share inode # for hard link") (in v4.8), overlayfs doesn't handle hard links correctly, @@ -65,8 +72,10 @@ # Load kernel modules. "$BUSYBOX" mkdir -p "/lib/modules/$RELEASE" -"$BUSYBOX" mount -t 9p -o trans=virtio,cache=loose,ro modules "/lib/modules/$RELEASE" -"$BUSYBOX" modprobe configs +"$BUSYBOX" mount -t 9p -o trans=virtio,cache=loose,ro,msize={_9PFS_MSIZE} modules "/lib/modules/$RELEASE" +for module in configs rng_core virtio_rng; do + "$BUSYBOX" modprobe "$module" +done # Create static device nodes. "$BUSYBOX" grep -v '^#' "/lib/modules/$RELEASE/modules.devname" | @@ -162,7 +171,7 @@ class LostVMError(Exception): def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: match = re.search( - "QEMU emulator version ([0-9]+(?:\.[0-9]+)*)", + r"QEMU emulator version ([0-9]+(?:\.[0-9]+)*)", subprocess.check_output( ["qemu-system-x86_64", "-version"], universal_newlines=True ), @@ -204,7 +213,10 @@ def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: with open(init, "w") as init_file: init_file.write( _INIT_TEMPLATE.format( - busybox=shlex.quote(busybox), command=shlex.quote(command) + _9PFS_MSIZE=_9PFS_MSIZE, + busybox=shlex.quote(busybox), + python=shlex.quote(sys.executable), + command=shlex.quote(command), ) ) os.chmod(init, 0o755) @@ -222,10 +234,12 @@ def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: "-no-reboot", "-virtfs", - f"local,id=root,path=/,mount_tag=/dev/root,security_model=none,readonly{multidevs}", + f"local,id=root,path=/,mount_tag=/dev/root,security_model=none,readonly=on{multidevs}", "-virtfs", - f"local,path={kernel_dir},mount_tag=modules,security_model=none,readonly", + f"local,path={kernel_dir},mount_tag=modules,security_model=none,readonly=on", + + "-device", "virtio-rng-pci", "-device", "virtio-serial", "-chardev", f"socket,id=vmtest,path={socket_path}", @@ -234,11 +248,11 @@ def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: "-kernel", str(kernel_dir / "vmlinuz"), "-append", - f"rootfstype=9p rootflags=trans=virtio,cache=loose ro console=0,115200 panic=-1 init={init}", + f"rootfstype=9p rootflags=trans=virtio,cache=loose,msize={_9PFS_MSIZE} ro console=0,115200 panic=-1 crashkernel=256M init={init}", # fmt: on ], env=env, - ) as qemu: + ): server_sock.settimeout(5) try: sock = server_sock.accept()[0] @@ -267,6 +281,11 @@ def run_in_vm(command: str, kernel_dir: Path, build_dir: Path) -> int: if __name__ == "__main__": import argparse + import logging + + logging.basicConfig( + format="%(asctime)s:%(levelname)s:%(name)s:%(message)s", level=logging.INFO + ) parser = argparse.ArgumentParser( description="run vmtest virtual machine",